Coverage for src/pyhrp/algos.py: 100%

1"""Portfolio optimization algorithms for hierarchical risk parity.

3This module implements various portfolio optimization algorithms:

4- risk_parity: The main hierarchical risk parity algorithm

5- schur_risk_parity: Schur Complementary Allocation (Cotton, arXiv:2411.05807)

6- one_over_n: A simple equal-weight allocation strategy

8Allocator contract

9------------------

10All three allocators take the same inputs — a ``Cluster`` tree (``root``) plus

11the asset names — and never mutate the tree they are given: weights are always

12rebuilt from scratch, so every allocator is idempotent and a tree can be reused.

13``risk_parity`` and ``schur_risk_parity`` share the recursive ``_allocate_with``

14scaffolding and each return the fully weighted root ``Cluster``. ``one_over_n``

15intentionally differs in its *output*: it is a generator that yields the

16equal-weight portfolio one tree level at a time (see its docstring), because its

17purpose is to expose the allocation as it deepens rather than a single final

18result.

19"""

21from __future__ import annotations

23from collections.abc import Callable, Generator

24from copy import deepcopy

26import numpy as np

27import polars as pl

29from .cluster import Cluster, Portfolio

31__all__ = ["one_over_n", "risk_parity", "schur_risk_parity"]

34def risk_parity(root: Cluster, cov: pl.DataFrame) -> Cluster:

35 """Compute hierarchical risk parity weights for a cluster tree.

37 This is the main algorithm for hierarchical risk parity. It recursively

38 traverses the cluster tree and assigns weights to each node based on

39 the risk parity principle.

41 Note:

42 The tree is modified in place: the portfolio of every node is rebuilt

43 from scratch, so the function is idempotent and a tree can be reused

44 with a different covariance matrix.

46 Args:

47 root (Cluster): The root node of the cluster tree

48 cov (pl.DataFrame): Covariance matrix of asset returns

50 Returns:

51 Cluster: The root node with portfolio weights assigned

53 Examples:

54 >>> import polars as pl

55 >>> from pyhrp.cluster import Cluster

56 >>> from pyhrp.algos import risk_parity

57 >>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})

58 >>> root = Cluster(2, left=Cluster(0), right=Cluster(1))

59 >>> cluster = risk_parity(root=root, cov=cov)

60 >>> round(cluster.portfolio["B"], 1)

61 0.8

62 """

64 def node_variances(left: Cluster, right: Cluster, cov_np: np.ndarray, index: dict[str, int]) -> tuple[float, float]:

65 """Plain block variance of each child sub-portfolio."""

66 return (

67 _block_variance(left.portfolio, cov_np, index),

68 _block_variance(right.portfolio, cov_np, index),

69 )

71 return _allocate_with(root, cov, node_variances)

74def schur_risk_parity(root: Cluster, cov: pl.DataFrame, gamma: float = 0.5) -> Cluster:

75 """Compute Schur Complementary Allocation weights for a cluster tree.

77 An extension of HRP introduced by Peter Cotton (arXiv:2411.05807) that augments

78 sub-covariance matrices with off-diagonal block information via Schur complements.

79 At gamma=0 this recovers standard HRP; at gamma=1 it recovers the minimum-variance

80 portfolio through the same recursive structure.

82 Note:

83 The tree is modified in place: the portfolio of every node is rebuilt

84 from scratch, so the function is idempotent and a tree can be reused

85 with a different covariance matrix or gamma.

87 Args:

88 root (Cluster): The root node of the cluster tree

89 cov (pl.DataFrame): Covariance matrix of asset returns

90 gamma (float): Interpolation parameter in [0, 1]. 0 = HRP, 1 = minimum variance.

92 Returns:

93 Cluster: The root node with portfolio weights assigned

95 Raises:

96 ValueError: If gamma is outside the interval [0, 1].

98 Examples:

99 >>> import polars as pl

100 >>> from pyhrp.cluster import Cluster

101 >>> from pyhrp.algos import schur_risk_parity

102 >>> cov = pl.DataFrame({"A": [4.0, 0.0], "B": [0.0, 1.0]})

103 >>> root = Cluster(2, left=Cluster(0), right=Cluster(1))

104 >>> cluster = schur_risk_parity(root=root, cov=cov, gamma=0.5)

105 >>> round(cluster.portfolio["B"], 1)

106 0.8

107 """

108 if not 0.0 <= gamma <= 1.0:

109 msg = f"gamma must be in [0, 1], got {gamma}"

110 raise ValueError(msg)

111

112 def node_variances(left: Cluster, right: Cluster, cov_np: np.ndarray, index: dict[str, int]) -> tuple[float, float]:

113 """Schur-augmented block variance of each child, conditioned on the other."""

114 li = [index[a] for a in left.portfolio.assets]

115 ri = [index[a] for a in right.portfolio.assets]

116

117 a_mat = cov_np[np.ix_(li, li)]

118 b_mat = cov_np[np.ix_(li, ri)]

119 d_mat = cov_np[np.ix_(ri, ri)]

120

121 w_left = np.array([left.portfolio[a] for a in left.portfolio.assets])

122 w_right = np.array([right.portfolio[a] for a in right.portfolio.assets])

123

124 # Schur-augmented blocks: condition each group on the other

125 a_aug = a_mat - gamma * (b_mat @ _solve(d_mat, b_mat.T))

126 d_aug = d_mat - gamma * (b_mat.T @ _solve(a_mat, b_mat))

127

128 v_left = float(w_left @ a_aug @ w_left)

129 v_right = float(w_right @ d_aug @ w_right)

130 return v_left, v_right

131

132 return _allocate_with(root, cov, node_variances)

133

134

135# Given a node's two children (plus the precomputed covariance array and the

136# column->row index), return the (v_left, v_right) risk pair used to split it.

137NodeVariances = Callable[[Cluster, Cluster, np.ndarray, dict[str, int]], tuple[float, float]]

138

139

140def _allocate_with(root: Cluster, cov: pl.DataFrame, node_variances: NodeVariances) -> Cluster:

141 """Shared scaffolding for the recursive risk-based allocators.

142

143 Builds the numpy covariance array and column index once, then walks the tree

144 bottom-up, splitting each node's weight between its children inversely to the

145 ``(v_left, v_right)`` pair supplied by ``node_variances``. The only thing that

146 distinguishes ``risk_parity`` from ``schur_risk_parity`` is that per-node

147 variance rule; everything else — the ``cov``/``index`` setup, the combine

148 wrapper, and the rebuild-from-scratch traversal — lives here.

149

150 Args:

151 root (Cluster): The root node of the cluster tree.

152 cov (pl.DataFrame): Covariance matrix of asset returns.

153 node_variances (NodeVariances): Per-node rule mapping a node's left/right

154 children (and the precomputed covariance array and column index) to

155 the ``(v_left, v_right)`` risk pair used to split that node.

156

157 Returns:

158 Cluster: The root node with portfolio weights assigned.

159 """

160 cov_np = cov.to_numpy()

161 index = {name: i for i, name in enumerate(cov.columns)}

162

163 def combine(cluster: Cluster) -> Cluster:

164 """Combine the child portfolios of a cluster via an inverse-variance split."""

165 left, right = _children(cluster)

166 v_left, v_right = node_variances(left, right, cov_np, index)

167 return _split(cluster, v_left, v_right)

168

169 return _allocate(root, cov.columns, combine)

170

171

172def _allocate(root: Cluster, assets: list[str], combine: Callable[[Cluster], Cluster]) -> Cluster:

173 """Traverse the tree bottom-up, assigning leaf portfolios and combining children.

174

175 Every node's portfolio is replaced, never accumulated into, which keeps

176 repeated allocations on the same tree idempotent.

177

178 Args:

179 root (Cluster): The (sub)tree to allocate weights for

180 assets (list[str]): Asset names; a leaf's value indexes into this list

181 combine (Callable[[Cluster], Cluster]): Combines the two child portfolios

182 of a node into the node's own portfolio

183

184 Returns:

185 Cluster: The input node with portfolio weights assigned

186 """

187 if root.is_leaf:

188 root.portfolio = Portfolio()

189 root.portfolio[assets[int(root.value)]] = 1.0

190 return root

191

192 left, right = _children(root)

193 root.left = _allocate(left, assets, combine)

194 root.right = _allocate(right, assets, combine)

195 return combine(root)

196

197

198def _children(cluster: Cluster) -> tuple[Cluster, Cluster]:

199 """Return the validated left and right children of a non-leaf cluster."""

200 if not isinstance(cluster.left, Cluster):

201 raise TypeError("Expected left child to be a Cluster")

202 if not isinstance(cluster.right, Cluster):

203 raise TypeError("Expected right child to be a Cluster")

204 return cluster.left, cluster.right

205

206

207def _block_variance(portfolio: Portfolio, cov_np: np.ndarray, index: dict[str, int]) -> float:

208 """Compute the variance of a portfolio against a precomputed covariance array."""

209 assets = portfolio.assets

210 idx = [index[a] for a in assets]

211 w = np.array([portfolio[a] for a in assets])

212 return float(w @ cov_np[np.ix_(idx, idx)] @ w)

213

214

215def _split(cluster: Cluster, v_left: float, v_right: float) -> Cluster:

216 """Distribute weight between the two children inversely proportional to risk.

217

218 The split satisfies v_left * alpha_left == v_right * alpha_right with

219 alpha_left + alpha_right == 1. If both variances are zero (e.g. riskless

220 sub-portfolios), the weight is split equally.

221

222 Args:

223 cluster (Cluster): The parent cluster with left and right children

224 v_left (float): Variance of the left sub-portfolio

225 v_right (float): Variance of the right sub-portfolio

226

227 Returns:

228 Cluster: The parent cluster with portfolio weights assigned

229 """

230 left, right = _children(cluster)

231 total = v_left + v_right

232 alpha_left = v_right / total if total > 0 else 0.5

233 alpha_right = 1.0 - alpha_left

234

235 cluster.portfolio = Portfolio()

236 for asset, weight in left.portfolio.weights.items():

237 cluster.portfolio[asset] = alpha_left * weight

238 for asset, weight in right.portfolio.weights.items():

239 cluster.portfolio[asset] = alpha_right * weight

240

241 return cluster

242

243

244def _solve(m: np.ndarray, b: np.ndarray) -> np.ndarray:

245 """Solve m @ x = b, falling back to least squares for singular matrices.

246

247 Covariance blocks of collinear assets are singular; the minimum-norm

248 least-squares solution keeps the Schur augmentation well-defined there.

249 """

250 try:

251 return np.linalg.solve(m, b)

252 except np.linalg.LinAlgError:

253 return np.asarray(np.linalg.lstsq(m, b, rcond=None)[0])

254

255

256def one_over_n(root: Cluster, assets: list[str]) -> Generator[tuple[int, Portfolio]]:

257 """Generate 1/N (equal-weight) portfolios one tree level at a time.

258

259 This implements a hierarchical 1/N strategy where weights are distributed

260 equally among the leaves of each cluster, and the weight budget halves at

261 each successive level of the tree.

262

263 Unlike :func:`risk_parity` and :func:`schur_risk_parity` — which rebuild a

264 single final allocation and return the root ``Cluster`` — this allocator is

265 intentionally a **generator**: its purpose is to expose the equal-weight

266 allocation as the tree deepens, yielding one portfolio per level. It shares

267 the sibling input contract (a ``Cluster`` tree plus the asset names) and, like

268 them, does not mutate the tree: weights accumulate in a local buffer, so a

269 leaf that terminates at a shallow level keeps its weight in the deeper levels

270 (each yielded portfolio is therefore a complete allocation over all assets),

271 and re-running on the same tree yields an identical sequence.

272

273 Args:

274 root (Cluster): The root node of the cluster tree.

275 assets (list[str]): Asset names; a leaf's value indexes into this list.

276

277 Yields:

278 tuple[int, Portfolio]: The level number and the (cumulative) equal-weight

279 portfolio at that level.

280

281 Examples:

282 >>> import polars as pl

283 >>> from pyhrp.hrp import build_tree

284 >>> from pyhrp.algos import one_over_n

285 >>> cor = pl.DataFrame({"A": [1.0, 0.3], "B": [0.3, 1.0]})

286 >>> dg = build_tree(cor, method="ward")

287 >>> levels = list(one_over_n(dg.root, dg.assets))

288 >>> len(levels) > 0

289 True

290 """

291 # Accumulate into a local buffer so the input tree is never mutated.

292 portfolio = Portfolio()

293

294 # Initial weight to distribute

295 w: float = 1.0

296

297 # Process each level of the tree

298 for n, level in enumerate(root.levels):

299 for node in level:

300 # Distribute weight equally among all leaves in this node

301 for leaf in node.leaves:

302 portfolio[assets[leaf.value]] = w / node.leaf_count

303

304 # Reduce weight for the next level

305 w *= 0.5

306

307 # Yield the current level number and a deep copy of the portfolio

308 yield n, deepcopy(portfolio)