Coverage for src / jquantstats / _stats / _performance.py: 100%

147 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-26 18:44 +0000

1"""Performance and risk-adjusted return metrics for financial data.""" 

2 

3from __future__ import annotations 

4 

5from typing import TYPE_CHECKING, cast 

6 

7import numpy as np 

8import polars as pl 

9from scipy.stats import norm 

10 

11from ._core import columnwise_stat, to_frame 

12 

13# ── Performance statistics mixin ───────────────────────────────────────────── 

14 

15 

16class _PerformanceStatsMixin: 

17 """Mixin providing performance, drawdown, and benchmark/factor metrics. 

18 

19 Covers: Sharpe ratio, Sortino ratio, adjusted Sortino, drawdown series, 

20 max drawdown, prices, R-squared, information ratio, and Greeks (alpha/beta). 

21 

22 Attributes (provided by the concrete subclass): 

23 data: The :class:`~jquantstats._data.Data` object. 

24 all: Combined DataFrame for efficient column selection. 

25 """ 

26 

27 if TYPE_CHECKING: 

28 from ._protocol import DataLike 

29 

30 data: DataLike 

31 all: pl.DataFrame | None 

32 

33 # ── Sharpe & Sortino ────────────────────────────────────────────────────── 

34 

35 @columnwise_stat 

36 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float: 

37 """Calculate the Sharpe ratio of asset returns. 

38 

39 Args: 

40 series (pl.Series): The series to calculate Sharpe ratio for. 

41 periods (int, optional): Number of periods per year. Defaults to 252. 

42 

43 Returns: 

44 float: The Sharpe ratio value. 

45 

46 """ 

47 periods = periods or self.data._periods_per_year 

48 

49 std_val = cast(float, series.std(ddof=1)) 

50 mean_val = cast(float, series.mean()) 

51 divisor = std_val if std_val is not None else 0.0 

52 mean_f = mean_val if mean_val is not None else 0.0 

53 

54 _eps = np.finfo(np.float64).eps 

55 if divisor <= _eps * max(abs(mean_f), _eps) * 10: 

56 return float("nan") 

57 

58 res = mean_f / divisor 

59 factor = periods or 1 

60 return float(res * np.sqrt(factor)) 

61 

62 @columnwise_stat 

63 def sharpe_variance(self, series: pl.Series, periods: int | float | None = None) -> float: 

64 r"""Calculate the asymptotic variance of the Sharpe Ratio. 

65 

66 .. math:: 

67 \text{Var}(SR) = \frac{1 + \frac{S \cdot SR}{2} + \frac{(K - 3) \cdot SR^2}{4}}{T} 

68 

69 where: 

70 - \(S\) is the skewness of returns 

71 - \(K\) is the kurtosis of returns 

72 - \(SR\) is the Sharpe ratio (unannualized) 

73 - \(T\) is the number of observations 

74 

75 Args: 

76 series (pl.Series): The series to calculate Sharpe ratio variance for. 

77 periods (int | float, optional): Number of periods per year. Defaults to data periods. 

78 

79 Returns: 

80 float: The asymptotic variance of the Sharpe ratio. 

81 If number of periods per year is provided or inferred from the data, the result is annualized. 

82 

83 """ 

84 t = series.count() 

85 mean_val = cast(float, series.mean()) 

86 std_val = cast(float, series.std(ddof=1)) 

87 if mean_val is None or std_val is None or std_val == 0: 

88 return float(np.nan) 

89 sr = mean_val / std_val 

90 

91 skew_val = series.skew(bias=False) 

92 kurt_val = series.kurtosis(bias=False) 

93 

94 if skew_val is None or kurt_val is None: 

95 return float(np.nan) 

96 # Base variance calculation using unannualized Sharpe ratio 

97 # Formula: (1 + skew*SR/2 + (kurt-3)*SR²/4) / T 

98 base_variance = (1 + (float(skew_val) * sr) / 2 + ((float(kurt_val) - 3) / 4) * sr**2) / t 

99 # Annualize by scaling with the number of periods 

100 periods = periods or self.data._periods_per_year 

101 factor = periods or 1 

102 return float(base_variance * factor) 

103 

104 @columnwise_stat 

105 def prob_sharpe_ratio(self, series: pl.Series, benchmark_sr: float) -> float: 

106 r"""Calculate the probabilistic sharpe ratio (PSR). 

107 

108 Args: 

109 series (pl.Series): The series to calculate probabilistic Sharpe ratio for. 

110 benchmark_sr (float): The target Sharpe ratio to compare against. This should be unannualized. 

111 

112 Returns: 

113 float: Probabilistic Sharpe Ratio. 

114 

115 Note: 

116 PSR is the probability that the observed Sharpe ratio is greater than a 

117 given benchmark Sharpe ratio. 

118 

119 """ 

120 t = series.count() 

121 

122 # Calculate observed unannualized Sharpe ratio 

123 mean_val = cast(float, series.mean()) 

124 std_val = cast(float, series.std(ddof=1)) 

125 if mean_val is None or std_val is None or std_val == 0: 

126 return float(np.nan) 

127 # Unannualized observed Sharpe ratio 

128 observed_sr = mean_val / std_val 

129 

130 skew_val = series.skew(bias=False) 

131 kurt_val = series.kurtosis(bias=False) 

132 

133 if skew_val is None or kurt_val is None: 

134 return float(np.nan) 

135 

136 # Calculate variance using unannualized benchmark Sharpe ratio 

137 var_bench_sr = (1 + (float(skew_val) * benchmark_sr) / 2 + ((float(kurt_val) - 3) / 4) * benchmark_sr**2) / t 

138 

139 if var_bench_sr <= 0: 

140 return float(np.nan) 

141 return float(norm.cdf((observed_sr - benchmark_sr) / np.sqrt(var_bench_sr))) 

142 

143 @columnwise_stat 

144 def hhi_positive(self, series: pl.Series) -> float: 

145 r"""Calculate the Herfindahl-Hirschman Index (HHI) for positive returns. 

146 

147 This quantifies how concentrated the positive returns are in a series. 

148 

149 .. math:: 

150 w^{\plus} = \frac{r_{t}^{\plus}}{\sum{r_{t}^{\plus}}} \\ 

151 HHI^{\plus} = \frac{N_{\plus} \sum{(w^{\plus})^2} - 1}{N_{\plus} - 1} 

152 

153 where: 

154 - \(r_{t}^{\plus}\) are the positive returns 

155 - \(N_{\plus}\) is the number of positive returns 

156 - \(w^{\plus}\) are the weights of positive returns 

157 

158 Args: 

159 series (pl.Series): The series to calculate HHI for. 

160 

161 Returns: 

162 float: The HHI value for positive returns. Returns NaN if fewer than 3 

163 positive returns are present. 

164 

165 Note: 

166 Values range from 0 (perfectly diversified gains) to 1 (all gains 

167 concentrated in a single period). 

168 """ 

169 positive_returns = series.filter(series > 0).drop_nans() 

170 if positive_returns.len() <= 2: 

171 return float(np.nan) 

172 weight = positive_returns / positive_returns.sum() 

173 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1)) 

174 

175 @columnwise_stat 

176 def hhi_negative(self, series: pl.Series) -> float: 

177 r"""Calculate the Herfindahl-Hirschman Index (HHI) for negative returns. 

178 

179 This quantifies how concentrated the negative returns are in a series. 

180 

181 .. math:: 

182 w^{\minus} = \frac{r_{t}^{\minus}}{\sum{r_{t}^{\minus}}} \\ 

183 HHI^{\minus} = \frac{N_{\minus} \sum{(w^{\minus})^2} - 1}{N_{\minus} - 1} 

184 

185 where: 

186 - \(r_{t}^{\minus}\) are the negative returns 

187 - \(N_{\minus}\) is the number of negative returns 

188 - \(w^{\minus}\) are the weights of negative returns 

189 

190 Args: 

191 series (pl.Series): The returns series to calculate HHI for. 

192 

193 Returns: 

194 float: The HHI value for negative returns. Returns NaN if fewer than 3 

195 negative returns are present. 

196 

197 Note: 

198 Values range from 0 (perfectly diversified losses) to 1 (all losses 

199 concentrated in a single period). 

200 """ 

201 negative_returns = series.filter(series < 0).drop_nans() 

202 if negative_returns.len() <= 2: 

203 return float(np.nan) 

204 weight = negative_returns / negative_returns.sum() 

205 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1)) 

206 

207 @columnwise_stat 

208 def sortino(self, series: pl.Series, periods: int | float | None = None) -> float: 

209 """Calculate the Sortino ratio. 

210 

211 The Sortino ratio is the mean return divided by downside deviation. 

212 Based on Red Rock Capital's Sortino ratio paper. 

213 

214 Args: 

215 series (pl.Series): The series to calculate Sortino ratio for. 

216 periods (int, optional): Number of periods per year. Defaults to 252. 

217 

218 Returns: 

219 float: The Sortino ratio value. 

220 

221 """ 

222 periods = periods or self.data._periods_per_year 

223 downside_sum = ((series.filter(series < 0)) ** 2).sum() 

224 downside_deviation = float(np.sqrt(float(downside_sum) / series.count())) 

225 mean_val = cast(float, series.mean()) 

226 mean_f = mean_val if mean_val is not None else 0.0 

227 if downside_deviation == 0.0: 

228 if mean_f > 0: 

229 return float("inf") 

230 elif mean_f < 0: # pragma: no cover # unreachable: no negatives ⟹ mean ≥ 0 

231 return float("-inf") 

232 else: 

233 return float("nan") 

234 ratio = mean_f / downside_deviation 

235 return float(ratio * np.sqrt(periods)) 

236 

237 # ── Drawdown ────────────────────────────────────────────────────────────── 

238 

239 @to_frame 

240 def drawdown(self, series: pl.Series) -> pl.Series: 

241 """Calculate the drawdown series for returns. 

242 

243 Args: 

244 series (pl.Series): The series to calculate drawdown for. 

245 

246 Returns: 

247 pl.Series: The drawdown series. 

248 

249 """ 

250 equity = self.prices(series) 

251 d = (equity / equity.cum_max()) - 1 

252 return -d 

253 

254 @staticmethod 

255 def prices(series: pl.Series) -> pl.Series: 

256 """Convert returns series to price series. 

257 

258 Args: 

259 series (pl.Series): The returns series to convert. 

260 

261 Returns: 

262 pl.Series: The price series. 

263 

264 """ 

265 return (1.0 + series).cum_prod() 

266 

267 @staticmethod 

268 def max_drawdown_single_series(series: pl.Series) -> float: 

269 """Compute the maximum drawdown for a single returns series. 

270 

271 Args: 

272 series: A Polars Series of returns values. 

273 

274 Returns: 

275 float: The maximum drawdown as a positive fraction (e.g. 0.2 for 20%). 

276 """ 

277 price = _PerformanceStatsMixin.prices(series) 

278 peak = price.cum_max() 

279 drawdown = price / peak - 1 

280 dd_min = cast(float, drawdown.min()) 

281 return -dd_min if dd_min is not None else 0.0 

282 

283 @columnwise_stat 

284 def max_drawdown(self, series: pl.Series) -> float: 

285 """Calculate the maximum drawdown for each column. 

286 

287 Args: 

288 series (pl.Series): The series to calculate maximum drawdown for. 

289 

290 Returns: 

291 float: The maximum drawdown value. 

292 

293 """ 

294 return _PerformanceStatsMixin.max_drawdown_single_series(series) 

295 

296 def adjusted_sortino(self, periods: int | float | None = None) -> dict[str, float]: 

297 """Calculate Jack Schwager's adjusted Sortino ratio. 

298 

299 This adjustment allows for direct comparison to Sharpe ratio. 

300 See: https://archive.is/wip/2rwFW. 

301 

302 Args: 

303 periods (int, optional): Number of periods per year. Defaults to 252. 

304 

305 Returns: 

306 dict[str, float]: Dictionary mapping asset names to adjusted Sortino ratios. 

307 

308 """ 

309 sortino_data = self.sortino(periods=periods) 

310 return {k: v / np.sqrt(2) for k, v in sortino_data.items()} 

311 

312 # ── Benchmark & factor ──────────────────────────────────────────────────── 

313 

314 @columnwise_stat 

315 def r_squared(self, series: pl.Series, benchmark: str | None = None) -> float: 

316 """Measure the straight line fit of the equity curve. 

317 

318 Args: 

319 series (pl.Series): The series to calculate R-squared for. 

320 benchmark (str, optional): The benchmark column name. Defaults to None. 

321 

322 Returns: 

323 float: The R-squared value. 

324 

325 Raises: 

326 AttributeError: If no benchmark data is available. 

327 

328 """ 

329 if self.data.benchmark is None: 

330 raise AttributeError("No benchmark data available") # noqa: TRY003 

331 

332 benchmark_col = benchmark or self.data.benchmark.columns[0] 

333 

334 # Evaluate both series and benchmark as Series 

335 all_data = cast(pl.DataFrame, self.all) 

336 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")]) 

337 

338 # Drop nulls 

339 dframe = dframe.drop_nulls() 

340 

341 matrix = dframe.to_numpy() 

342 # Get actual Series 

343 

344 strategy_np = matrix[:, 0] 

345 benchmark_np = matrix[:, 1] 

346 

347 corr_matrix = np.corrcoef(strategy_np, benchmark_np) 

348 r = corr_matrix[0, 1] 

349 return float(r**2) 

350 

351 def r2(self) -> dict[str, float]: 

352 """Shorthand for r_squared(). 

353 

354 Returns: 

355 dict[str, float]: Dictionary mapping asset names to R-squared values. 

356 

357 """ 

358 return self.r_squared() 

359 

360 @columnwise_stat 

361 def information_ratio( 

362 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None 

363 ) -> float: 

364 """Calculate the information ratio. 

365 

366 This is essentially the risk return ratio of the net profits. 

367 

368 Args: 

369 series (pl.Series): The series to calculate information ratio for. 

370 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

371 benchmark (str, optional): The benchmark column name. Defaults to None. 

372 

373 Returns: 

374 float: The information ratio value. 

375 

376 """ 

377 ppy = periods_per_year or self.data._periods_per_year 

378 

379 benchmark_data = cast(pl.DataFrame, self.data.benchmark) 

380 benchmark_col = benchmark or benchmark_data.columns[0] 

381 

382 active = series - benchmark_data[benchmark_col] 

383 

384 mean_val = cast(float, active.mean()) 

385 std_val = cast(float, active.std()) 

386 

387 try: 

388 mean_f = mean_val if mean_val is not None else 0.0 

389 std_f = std_val if std_val is not None else 1.0 

390 return float((mean_f / std_f) * (ppy**0.5)) 

391 except ZeroDivisionError: 

392 return 0.0 

393 

394 @columnwise_stat 

395 def greeks( 

396 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None 

397 ) -> dict[str, float]: 

398 """Calculate alpha and beta of the portfolio. 

399 

400 Args: 

401 series (pl.Series): The series to calculate greeks for. 

402 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

403 benchmark (str, optional): The benchmark column name. Defaults to None. 

404 

405 Returns: 

406 dict[str, float]: Dictionary containing alpha and beta values. 

407 

408 """ 

409 ppy = periods_per_year or self.data._periods_per_year 

410 

411 benchmark_data = cast(pl.DataFrame, self.data.benchmark) 

412 benchmark_col = benchmark or benchmark_data.columns[0] 

413 

414 # Evaluate both series and benchmark as Series 

415 all_data = cast(pl.DataFrame, self.all) 

416 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")]) 

417 

418 # Drop nulls 

419 dframe = dframe.drop_nulls() 

420 matrix = dframe.to_numpy() 

421 

422 # Get actual Series 

423 strategy_np = matrix[:, 0] 

424 benchmark_np = matrix[:, 1] 

425 

426 # 2x2 covariance matrix: [[var_strategy, cov], [cov, var_benchmark]] 

427 cov_matrix = np.cov(strategy_np, benchmark_np) 

428 

429 cov = cov_matrix[0, 1] 

430 var_benchmark = cov_matrix[1, 1] 

431 

432 beta = float(cov / var_benchmark) if var_benchmark != 0 else float("nan") 

433 alpha = float(np.mean(strategy_np) - beta * np.mean(benchmark_np)) 

434 

435 return {"alpha": float(alpha * ppy), "beta": beta}