Coverage for src/jquantstats/_stats/

1"""Performance and risk-adjusted return metrics for financial data."""

3from __future__ import annotations

5from typing import TYPE_CHECKING, cast

7import numpy as np

8import polars as pl

9from scipy.stats import norm

11from ._core import columnwise_stat, to_frame

13# ── Performance statistics mixin ─────────────────────────────────────────────

16class _PerformanceStatsMixin:

17 """Mixin providing performance, drawdown, and benchmark/factor metrics.

19 Covers: Sharpe ratio, Sortino ratio, adjusted Sortino, drawdown series,

20 max drawdown, prices, R-squared, information ratio, and Greeks (alpha/beta).

22 Attributes (provided by the concrete subclass):

23 data: The :class:`~jquantstats._data.Data` object.

24 all: Combined DataFrame for efficient column selection.

25 """

27 if TYPE_CHECKING:

28 from ._protocol import DataLike

30 data: DataLike

31 all: pl.DataFrame | None

33 # ── Sharpe & Sortino ──────────────────────────────────────────────────────

35 @columnwise_stat

36 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float:

37 """Calculate the Sharpe ratio of asset returns.

39 Args:

40 series (pl.Series): The series to calculate Sharpe ratio for.

41 periods (int, optional): Number of periods per year. Defaults to 252.

43 Returns:

44 float: The Sharpe ratio value.

46 """

47 periods = periods or self.data._periods_per_year

49 std_val = cast(float, series.std(ddof=1))

50 mean_val = cast(float, series.mean())

51 divisor = std_val if std_val is not None else 0.0

52 mean_f = mean_val if mean_val is not None else 0.0

54 _eps = np.finfo(np.float64).eps

55 if divisor <= _eps * max(abs(mean_f), _eps) * 10:

56 return float("nan")

58 res = mean_f / divisor

59 factor = periods or 1

60 return float(res * np.sqrt(factor))

62 @columnwise_stat

63 def sharpe_variance(self, series: pl.Series, periods: int | float | None = None) -> float:

64 r"""Calculate the asymptotic variance of the Sharpe Ratio.

66 .. math::

67 \text{Var}(SR) = \frac{1 + \frac{S \cdot SR}{2} + \frac{(K - 3) \cdot SR^2}{4}}{T}

69 where:

70 - \(S\) is the skewness of returns

71 - \(K\) is the kurtosis of returns

72 - \(SR\) is the Sharpe ratio (unannualized)

73 - \(T\) is the number of observations

75 Args:

76 series (pl.Series): The series to calculate Sharpe ratio variance for.

77 periods (int | float, optional): Number of periods per year. Defaults to data periods.

79 Returns:

80 float: The asymptotic variance of the Sharpe ratio.

81 If number of periods per year is provided or inferred from the data, the result is annualized.

83 """

84 t = series.count()

85 mean_val = cast(float, series.mean())

86 std_val = cast(float, series.std(ddof=1))

87 if mean_val is None or std_val is None or std_val == 0:

88 return float(np.nan)

89 sr = mean_val / std_val

91 skew_val = series.skew(bias=False)

92 kurt_val = series.kurtosis(bias=False)

94 if skew_val is None or kurt_val is None:

95 return float(np.nan)

96 # Base variance calculation using unannualized Sharpe ratio

97 # Formula: (1 + skew*SR/2 + (kurt-3)*SR²/4) / T

98 base_variance = (1 + (float(skew_val) * sr) / 2 + ((float(kurt_val) - 3) / 4) * sr**2) / t

99 # Annualize by scaling with the number of periods

100 periods = periods or self.data._periods_per_year

101 factor = periods or 1

102 return float(base_variance * factor)

103

104 @columnwise_stat

105 def prob_sharpe_ratio(self, series: pl.Series, benchmark_sr: float) -> float:

106 r"""Calculate the probabilistic sharpe ratio (PSR).

107

108 Args:

109 series (pl.Series): The series to calculate probabilistic Sharpe ratio for.

110 benchmark_sr (float): The target Sharpe ratio to compare against. This should be unannualized.

111

112 Returns:

113 float: Probabilistic Sharpe Ratio.

114

115 Note:

116 PSR is the probability that the observed Sharpe ratio is greater than a

117 given benchmark Sharpe ratio.

118

119 """

120 t = series.count()

121

122 # Calculate observed unannualized Sharpe ratio

123 mean_val = cast(float, series.mean())

124 std_val = cast(float, series.std(ddof=1))

125 if mean_val is None or std_val is None or std_val == 0:

126 return float(np.nan)

127 # Unannualized observed Sharpe ratio

128 observed_sr = mean_val / std_val

129

130 skew_val = series.skew(bias=False)

131 kurt_val = series.kurtosis(bias=False)

132

133 if skew_val is None or kurt_val is None:

134 return float(np.nan)

135

136 # Calculate variance using unannualized benchmark Sharpe ratio

137 var_bench_sr = (1 + (float(skew_val) * benchmark_sr) / 2 + ((float(kurt_val) - 3) / 4) * benchmark_sr**2) / t

138

139 if var_bench_sr <= 0:

140 return float(np.nan)

141 return float(norm.cdf((observed_sr - benchmark_sr) / np.sqrt(var_bench_sr)))

142

143 @columnwise_stat

144 def hhi_positive(self, series: pl.Series) -> float:

145 r"""Calculate the Herfindahl-Hirschman Index (HHI) for positive returns.

146

147 This quantifies how concentrated the positive returns are in a series.

148

149 .. math::

150 w^{\plus} = \frac{r_{t}^{\plus}}{\sum{r_{t}^{\plus}}} \\

151 HHI^{\plus} = \frac{N_{\plus} \sum{(w^{\plus})^2} - 1}{N_{\plus} - 1}

152

153 where:

154 - \(r_{t}^{\plus}\) are the positive returns

155 - \(N_{\plus}\) is the number of positive returns

156 - \(w^{\plus}\) are the weights of positive returns

157

158 Args:

159 series (pl.Series): The series to calculate HHI for.

160

161 Returns:

162 float: The HHI value for positive returns. Returns NaN if fewer than 3

163 positive returns are present.

164

165 Note:

166 Values range from 0 (perfectly diversified gains) to 1 (all gains

167 concentrated in a single period).

168 """

169 positive_returns = series.filter(series > 0).drop_nans()

170 if positive_returns.len() <= 2:

171 return float(np.nan)

172 weight = positive_returns / positive_returns.sum()

173 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1))

174

175 @columnwise_stat

176 def hhi_negative(self, series: pl.Series) -> float:

177 r"""Calculate the Herfindahl-Hirschman Index (HHI) for negative returns.

178

179 This quantifies how concentrated the negative returns are in a series.

180

181 .. math::

182 w^{\minus} = \frac{r_{t}^{\minus}}{\sum{r_{t}^{\minus}}} \\

183 HHI^{\minus} = \frac{N_{\minus} \sum{(w^{\minus})^2} - 1}{N_{\minus} - 1}

184

185 where:

186 - \(r_{t}^{\minus}\) are the negative returns

187 - \(N_{\minus}\) is the number of negative returns

188 - \(w^{\minus}\) are the weights of negative returns

189

190 Args:

191 series (pl.Series): The returns series to calculate HHI for.

192

193 Returns:

194 float: The HHI value for negative returns. Returns NaN if fewer than 3

195 negative returns are present.

196

197 Note:

198 Values range from 0 (perfectly diversified losses) to 1 (all losses

199 concentrated in a single period).

200 """

201 negative_returns = series.filter(series < 0).drop_nans()

202 if negative_returns.len() <= 2:

203 return float(np.nan)

204 weight = negative_returns / negative_returns.sum()

205 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1))

206

207 @columnwise_stat

208 def sortino(self, series: pl.Series, periods: int | float | None = None) -> float:

209 """Calculate the Sortino ratio.

210

211 The Sortino ratio is the mean return divided by downside deviation.

212 Based on Red Rock Capital's Sortino ratio paper.

213

214 Args:

215 series (pl.Series): The series to calculate Sortino ratio for.

216 periods (int, optional): Number of periods per year. Defaults to 252.

217

218 Returns:

219 float: The Sortino ratio value.

220

221 """

222 periods = periods or self.data._periods_per_year

223 downside_sum = ((series.filter(series < 0)) ** 2).sum()

224 downside_deviation = float(np.sqrt(float(downside_sum) / series.count()))

225 mean_val = cast(float, series.mean())

226 mean_f = mean_val if mean_val is not None else 0.0

227 if downside_deviation == 0.0:

228 if mean_f > 0:

229 return float("inf")

230 elif mean_f < 0: # pragma: no cover # unreachable: no negatives ⟹ mean ≥ 0

231 return float("-inf")

232 else:

233 return float("nan")

234 ratio = mean_f / downside_deviation

235 return float(ratio * np.sqrt(periods))

236

237 # ── Drawdown ──────────────────────────────────────────────────────────────

238

239 @to_frame

240 def drawdown(self, series: pl.Series) -> pl.Series:

241 """Calculate the drawdown series for returns.

242

243 Args:

244 series (pl.Series): The series to calculate drawdown for.

245

246 Returns:

247 pl.Series: The drawdown series.

248

249 """

250 equity = self.prices(series)

251 d = (equity / equity.cum_max()) - 1

252 return -d

253

254 @staticmethod

255 def prices(series: pl.Series) -> pl.Series:

256 """Convert returns series to price series.

257

258 Args:

259 series (pl.Series): The returns series to convert.

260

261 Returns:

262 pl.Series: The price series.

263

264 """

265 return (1.0 + series).cum_prod()

266

267 @staticmethod

268 def max_drawdown_single_series(series: pl.Series) -> float:

269 """Compute the maximum drawdown for a single returns series.

270

271 Args:

272 series: A Polars Series of returns values.

273

274 Returns:

275 float: The maximum drawdown as a positive fraction (e.g. 0.2 for 20%).

276 """

277 price = _PerformanceStatsMixin.prices(series)

278 peak = price.cum_max()

279 drawdown = price / peak - 1

280 dd_min = cast(float, drawdown.min())

281 return -dd_min if dd_min is not None else 0.0

282

283 @columnwise_stat

284 def max_drawdown(self, series: pl.Series) -> float:

285 """Calculate the maximum drawdown for each column.

286

287 Args:

288 series (pl.Series): The series to calculate maximum drawdown for.

289

290 Returns:

291 float: The maximum drawdown value.

292

293 """

294 return _PerformanceStatsMixin.max_drawdown_single_series(series)

295

296 def adjusted_sortino(self, periods: int | float | None = None) -> dict[str, float]:

297 """Calculate Jack Schwager's adjusted Sortino ratio.

298

299 This adjustment allows for direct comparison to Sharpe ratio.

300 See: https://archive.is/wip/2rwFW.

301

302 Args:

303 periods (int, optional): Number of periods per year. Defaults to 252.

304

305 Returns:

306 dict[str, float]: Dictionary mapping asset names to adjusted Sortino ratios.

307

308 """

309 sortino_data = self.sortino(periods=periods)

310 return {k: v / np.sqrt(2) for k, v in sortino_data.items()}

311

312 # ── Benchmark & factor ────────────────────────────────────────────────────

313

314 @columnwise_stat

315 def r_squared(self, series: pl.Series, benchmark: str | None = None) -> float:

316 """Measure the straight line fit of the equity curve.

317

318 Args:

319 series (pl.Series): The series to calculate R-squared for.

320 benchmark (str, optional): The benchmark column name. Defaults to None.

321

322 Returns:

323 float: The R-squared value.

324

325 Raises:

326 AttributeError: If no benchmark data is available.

327

328 """

329 if self.data.benchmark is None:

330 raise AttributeError("No benchmark data available") # noqa: TRY003

331

332 benchmark_col = benchmark or self.data.benchmark.columns[0]

333

334 # Evaluate both series and benchmark as Series

335 all_data = cast(pl.DataFrame, self.all)

336 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")])

337

338 # Drop nulls

339 dframe = dframe.drop_nulls()

340

341 matrix = dframe.to_numpy()

342 # Get actual Series

343

344 strategy_np = matrix[:, 0]

345 benchmark_np = matrix[:, 1]

346

347 corr_matrix = np.corrcoef(strategy_np, benchmark_np)

348 r = corr_matrix[0, 1]

349 return float(r**2)

350

351 def r2(self) -> dict[str, float]:

352 """Shorthand for r_squared().

353

354 Returns:

355 dict[str, float]: Dictionary mapping asset names to R-squared values.

356

357 """

358 return self.r_squared()

359

360 @columnwise_stat

361 def information_ratio(

362 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None

363 ) -> float:

364 """Calculate the information ratio.

365

366 This is essentially the risk return ratio of the net profits.

367

368 Args:

369 series (pl.Series): The series to calculate information ratio for.

370 periods_per_year (int, optional): Number of periods per year. Defaults to 252.

371 benchmark (str, optional): The benchmark column name. Defaults to None.

372

373 Returns:

374 float: The information ratio value.

375

376 """

377 ppy = periods_per_year or self.data._periods_per_year

378

379 benchmark_data = cast(pl.DataFrame, self.data.benchmark)

380 benchmark_col = benchmark or benchmark_data.columns[0]

381

382 active = series - benchmark_data[benchmark_col]

383

384 mean_val = cast(float, active.mean())

385 std_val = cast(float, active.std())

386

387 try:

388 mean_f = mean_val if mean_val is not None else 0.0

389 std_f = std_val if std_val is not None else 1.0

390 return float((mean_f / std_f) * (ppy**0.5))

391 except ZeroDivisionError:

392 return 0.0

393

394 @columnwise_stat

395 def greeks(

396 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None

397 ) -> dict[str, float]:

398 """Calculate alpha and beta of the portfolio.

399

400 Args:

401 series (pl.Series): The series to calculate greeks for.

402 periods_per_year (int, optional): Number of periods per year. Defaults to 252.

403 benchmark (str, optional): The benchmark column name. Defaults to None.

404

405 Returns:

406 dict[str, float]: Dictionary containing alpha and beta values.

407

408 """

409 ppy = periods_per_year or self.data._periods_per_year

410

411 benchmark_data = cast(pl.DataFrame, self.data.benchmark)

412 benchmark_col = benchmark or benchmark_data.columns[0]

413

414 # Evaluate both series and benchmark as Series

415 all_data = cast(pl.DataFrame, self.all)

416 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")])

417

418 # Drop nulls

419 dframe = dframe.drop_nulls()

420 matrix = dframe.to_numpy()

421

422 # Get actual Series

423 strategy_np = matrix[:, 0]

424 benchmark_np = matrix[:, 1]

425

426 # 2x2 covariance matrix: [[var_strategy, cov], [cov, var_benchmark]]

427 cov_matrix = np.cov(strategy_np, benchmark_np)

428

429 cov = cov_matrix[0, 1]

430 var_benchmark = cov_matrix[1, 1]

431

432 beta = float(cov / var_benchmark) if var_benchmark != 0 else float("nan")

433 alpha = float(np.mean(strategy_np) - beta * np.mean(benchmark_np))

434

435 return {"alpha": float(alpha * ppy), "beta": beta}

Coverage for src / jquantstats / _stats / _performance.py: 100%

147 statements