Coverage for src / jquantstats / _stats.py: 97%

278 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-03 02:21 +0000

1from __future__ import annotations 

2 

3import dataclasses 

4from collections.abc import Callable, Iterable 

5from functools import wraps 

6from typing import TYPE_CHECKING, Any, cast 

7 

8import numpy as np 

9import polars as pl 

10from scipy.stats import norm 

11 

12if TYPE_CHECKING: 

13 from ._data import Data 

14 

15 

16@dataclasses.dataclass(frozen=True) 

17class Stats: 

18 """Statistical analysis tools for financial returns data. 

19 

20 This class provides a comprehensive set of methods for calculating various 

21 financial metrics and statistics on returns data, including: 

22 

23 - Basic statistics (mean, skew, kurtosis) 

24 - Risk metrics (volatility, value-at-risk, drawdown) 

25 - Performance ratios (Sharpe, Sortino, information ratio) 

26 - Win/loss metrics (win rate, profit factor, payoff ratio) 

27 - Rolling calculations (rolling volatility, rolling Sharpe) 

28 - Factor analysis (alpha, beta, R-squared) 

29 

30 The class is designed to work with the _Data class and operates on Polars DataFrames 

31 for efficient computation. 

32 

33 Attributes: 

34 data: The _Data object containing returns and benchmark data. 

35 all: A DataFrame combining all data (index, returns, benchmark) for easy access. 

36 

37 """ 

38 

39 data: Data 

40 all: pl.DataFrame | None = None # Default is None; will be set in __post_init__ 

41 

42 def __post_init__(self) -> None: 

43 object.__setattr__(self, "all", self.data.all) 

44 

45 @staticmethod 

46 def _mean_positive_expr(series: pl.Series) -> float: 

47 return cast(float, series.filter(series > 0).mean()) 

48 

49 @staticmethod 

50 def _mean_negative_expr(series: pl.Series) -> float: 

51 return cast(float, series.filter(series < 0).mean()) 

52 

53 @staticmethod 

54 def columnwise_stat(func: Callable[..., Any]) -> Callable[..., dict[str, float]]: 

55 """Apply a column-wise statistical function to all numeric columns. 

56 

57 Args: 

58 func (Callable): The function to decorate. 

59 

60 Returns: 

61 Callable: The decorated function. 

62 

63 """ 

64 

65 @wraps(func) 

66 def wrapper(self: Stats, *args: Any, **kwargs: Any) -> dict[str, float]: 

67 return {col: func(self, series, *args, **kwargs) for col, series in self.data.items()} 

68 

69 return wrapper 

70 

71 @staticmethod 

72 def to_frame(func: Callable[..., Any]) -> Callable[..., pl.DataFrame]: 

73 """Apply per-column expressions and evaluates with .with_columns(...). 

74 

75 Args: 

76 func (Callable): The function to decorate. 

77 

78 Returns: 

79 Callable: The decorated function. 

80 

81 """ 

82 

83 @wraps(func) 

84 def wrapper(self: Stats, *args: Any, **kwargs: Any) -> pl.DataFrame: 

85 return cast(pl.DataFrame, self.all).select( 

86 [pl.col(name) for name in self.data.date_col] 

87 + [func(self, series, *args, **kwargs).alias(col) for col, series in self.data.items()] 

88 ) 

89 

90 return wrapper 

91 

92 @columnwise_stat 

93 def skew(self, series: pl.Series) -> int | float | None: 

94 """Calculate skewness (asymmetry) for each numeric column. 

95 

96 Args: 

97 series (pl.Series): The series to calculate skewness for. 

98 

99 Returns: 

100 float: The skewness value. 

101 

102 """ 

103 return cast("int | float | None", series.skew(bias=False)) 

104 

105 @columnwise_stat 

106 def kurtosis(self, series: pl.Series) -> int | float | None: 

107 """Calculate the kurtosis of returns. 

108 

109 The degree to which a distribution peak compared to a normal distribution. 

110 

111 Args: 

112 series (pl.Series): The series to calculate kurtosis for. 

113 

114 Returns: 

115 float: The kurtosis value. 

116 

117 """ 

118 return cast("int | float | None", series.kurtosis(bias=False)) 

119 

120 @columnwise_stat 

121 def avg_return(self, series: pl.Series) -> float: 

122 """Calculate average return per non-zero, non-null value. 

123 

124 Args: 

125 series (pl.Series): The series to calculate average return for. 

126 

127 Returns: 

128 float: The average return value. 

129 

130 """ 

131 return cast(float, series.filter(series.is_not_null() & (series != 0)).mean()) 

132 

133 @columnwise_stat 

134 def avg_win(self, series: pl.Series) -> float: 

135 """Calculate the average winning return/trade for an asset. 

136 

137 Args: 

138 series (pl.Series): The series to calculate average win for. 

139 

140 Returns: 

141 float: The average winning return. 

142 

143 """ 

144 return self._mean_positive_expr(series) 

145 

146 @columnwise_stat 

147 def avg_loss(self, series: pl.Series) -> float: 

148 """Calculate the average loss return/trade for a period. 

149 

150 Args: 

151 series (pl.Series): The series to calculate average loss for. 

152 

153 Returns: 

154 float: The average loss return. 

155 

156 """ 

157 return self._mean_negative_expr(series) 

158 

159 @columnwise_stat 

160 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float: 

161 """Calculate the volatility of returns. 

162 

163 - Std dev of returns 

164 - Annualized by sqrt(periods) if `annualize` is True. 

165 

166 Args: 

167 series (pl.Series): The series to calculate volatility for. 

168 periods (int, optional): Number of periods per year. Defaults to 252. 

169 annualize (bool, optional): Whether to annualize the result. Defaults to True. 

170 

171 Returns: 

172 float: The volatility value. 

173 

174 """ 

175 raw_periods = periods or self.data._periods_per_year 

176 

177 # Ensure it's numeric 

178 if not isinstance(raw_periods, int | float): 

179 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003 

180 

181 factor = float(np.sqrt(raw_periods)) if annualize else 1.0 

182 std_val = cast(float, series.std()) 

183 return (std_val if std_val is not None else 0.0) * factor 

184 

185 # periods = periods or self.data._periods_per_year 

186 # factor = np.sqrt(periods) if annualize else 1 

187 # return series.std() * factor 

188 

189 @columnwise_stat 

190 def payoff_ratio(self, series: pl.Series) -> float: 

191 """Measure the payoff ratio. 

192 

193 The payoff ratio is calculated as average win / abs(average loss). 

194 

195 Args: 

196 series (pl.Series): The series to calculate payoff ratio for. 

197 

198 Returns: 

199 float: The payoff ratio value. 

200 

201 """ 

202 avg_win = cast(float, series.filter(series > 0).mean()) 

203 avg_loss = float(np.abs(cast(float, series.filter(series < 0).mean()))) 

204 return avg_win / avg_loss 

205 

206 def win_loss_ratio(self) -> dict[str, float]: 

207 """Shorthand for payoff_ratio(). 

208 

209 Returns: 

210 dict[str, float]: Dictionary mapping asset names to win/loss ratios. 

211 

212 """ 

213 return self.payoff_ratio() 

214 

215 @columnwise_stat 

216 def profit_ratio(self, series: pl.Series) -> float: 

217 """Measure the profit ratio. 

218 

219 The profit ratio is calculated as win ratio / loss ratio. 

220 

221 Args: 

222 series (pl.Series): The series to calculate profit ratio for. 

223 

224 Returns: 

225 float: The profit ratio value. 

226 

227 """ 

228 wins = series.filter(series >= 0) 

229 losses = series.filter(series < 0) 

230 

231 try: 

232 win_mean = cast(float, wins.mean()) 

233 loss_mean = cast(float, losses.mean()) 

234 win_ratio = float(np.abs(win_mean / wins.count())) 

235 loss_ratio = float(np.abs(loss_mean / losses.count())) 

236 

237 return win_ratio / loss_ratio 

238 

239 except TypeError: 

240 return float(np.nan) 

241 

242 @columnwise_stat 

243 def profit_factor(self, series: pl.Series) -> float: 

244 """Measure the profit factor. 

245 

246 The profit factor is calculated as wins / loss. 

247 

248 Args: 

249 series (pl.Series): The series to calculate profit factor for. 

250 

251 Returns: 

252 float: The profit factor value. 

253 

254 """ 

255 wins = series.filter(series > 0) 

256 losses = series.filter(series < 0) 

257 wins_sum = cast(float, wins.sum()) 

258 losses_sum = cast(float, losses.sum()) 

259 

260 return float(np.abs(wins_sum / losses_sum)) 

261 

262 @columnwise_stat 

263 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

264 """Calculate the daily value-at-risk. 

265 

266 Uses variance-covariance calculation with confidence level. 

267 

268 Args: 

269 series (pl.Series): The series to calculate value at risk for. 

270 alpha (float, optional): Confidence level. Defaults to 0.05. 

271 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

272 

273 Returns: 

274 float: The value at risk. 

275 

276 """ 

277 mean_val = cast(float, series.mean()) 

278 std_val = cast(float, series.std()) 

279 mu = mean_val if mean_val is not None else 0.0 

280 sigma *= std_val if std_val is not None else 0.0 

281 

282 return float(norm.ppf(alpha, mu, sigma)) 

283 

284 @columnwise_stat 

285 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

286 """Calculate the conditional value-at-risk. 

287 

288 Also known as CVaR or expected shortfall, calculated for each numeric column. 

289 

290 Args: 

291 series (pl.Series): The series to calculate conditional value at risk for. 

292 alpha (float, optional): Confidence level. Defaults to 0.05. 

293 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

294 

295 Returns: 

296 float: The conditional value at risk. 

297 

298 """ 

299 mean_val = cast(float, series.mean()) 

300 std_val = cast(float, series.std()) 

301 mu = mean_val if mean_val is not None else 0.0 

302 sigma *= std_val if std_val is not None else 0.0 

303 

304 var = norm.ppf(alpha, mu, sigma) 

305 

306 # Compute mean of returns less than or equal to VaR 

307 # Cast to Any or pl.Series to suppress Ty error 

308 # Cast the mask to pl.Expr to satisfy type checker 

309 mask = cast(Iterable[bool], series < var) 

310 return cast(float, series.filter(mask).mean()) 

311 

312 # filtered_series = cast(pl.Series, series.filter(series < var)) 

313 # return filtered_series.mean() 

314 

315 @columnwise_stat 

316 def win_rate(self, series: pl.Series) -> float: 

317 """Calculate the win ratio for a period. 

318 

319 Args: 

320 series (pl.Series): The series to calculate win rate for. 

321 

322 Returns: 

323 float: The win rate value. 

324 

325 """ 

326 num_pos = series.filter(series > 0).count() 

327 num_nonzero = series.filter(series != 0).count() 

328 return float(num_pos / num_nonzero) 

329 

330 @columnwise_stat 

331 def gain_to_pain_ratio(self, series: pl.Series) -> float: 

332 """Calculate Jack Schwager's Gain-to-Pain Ratio. 

333 

334 The ratio is calculated as total return / sum of losses (in absolute value). 

335 

336 Args: 

337 series (pl.Series): The series to calculate gain to pain ratio for. 

338 

339 Returns: 

340 float: The gain to pain ratio value. 

341 

342 """ 

343 total_gain = series.sum() 

344 total_pain = series.filter(series < 0).abs().sum() 

345 try: 

346 return float(total_gain / total_pain) 

347 except ZeroDivisionError: 

348 return float(np.nan) 

349 

350 @columnwise_stat 

351 def risk_return_ratio(self, series: pl.Series) -> float: 

352 """Calculate the return/risk ratio. 

353 

354 This is equivalent to the Sharpe ratio without a risk-free rate. 

355 

356 Args: 

357 series (pl.Series): The series to calculate risk return ratio for. 

358 

359 Returns: 

360 float: The risk return ratio value. 

361 

362 """ 

363 mean_val = cast(float, series.mean()) 

364 std_val = cast(float, series.std()) 

365 return (mean_val if mean_val is not None else 0.0) / (std_val if std_val is not None else 1.0) 

366 

367 def kelly_criterion(self) -> dict[str, float]: 

368 """Calculate the optimal capital allocation per column. 

369 

370 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b 

371 where: 

372 - b = payoff ratio 

373 - p = win rate 

374 - q = 1 - p. 

375 

376 Returns: 

377 dict[str, float]: Dictionary mapping asset names to Kelly criterion values. 

378 

379 """ 

380 b = self.payoff_ratio() 

381 p = self.win_rate() 

382 

383 return { 

384 col: ((b[col] * p[col]) - (1 - p[col])) / b[col] 

385 # if b[col] not in (None, 0) and p[col] is not None else None 

386 for col in b 

387 } 

388 

389 @columnwise_stat 

390 def best(self, series: pl.Series) -> float | None: 

391 """Find the maximum return per column (best period). 

392 

393 Args: 

394 series (pl.Series): The series to find the best return for. 

395 

396 Returns: 

397 float: The maximum return value. 

398 

399 """ 

400 val = cast(float, series.max()) 

401 return val if val is not None else None 

402 

403 @columnwise_stat 

404 def worst(self, series: pl.Series) -> float | None: 

405 """Find the minimum return per column (worst period). 

406 

407 Args: 

408 series (pl.Series): The series to find the worst return for. 

409 

410 Returns: 

411 float: The minimum return value. 

412 

413 """ 

414 val = cast(float, series.min()) 

415 return val if val is not None else None 

416 

417 @columnwise_stat 

418 def exposure(self, series: pl.Series) -> float: 

419 """Calculate the market exposure time (returns != 0). 

420 

421 Args: 

422 series (pl.Series): The series to calculate exposure for. 

423 

424 Returns: 

425 float: The exposure value. 

426 

427 """ 

428 all_data = cast(pl.DataFrame, self.all) 

429 return float(np.round((series.filter(series != 0).count() / all_data.height), decimals=2)) 

430 

431 @columnwise_stat 

432 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float: 

433 """Calculate the Sharpe ratio of asset returns. 

434 

435 Args: 

436 series (pl.Series): The series to calculate Sharpe ratio for. 

437 periods (int, optional): Number of periods per year. Defaults to 252. 

438 

439 Returns: 

440 float: The Sharpe ratio value. 

441 

442 """ 

443 periods = periods or self.data._periods_per_year 

444 

445 std_val = cast(float, series.std(ddof=1)) 

446 mean_val = cast(float, series.mean()) 

447 divisor = std_val if std_val is not None else 1.0 

448 

449 res = (mean_val if mean_val is not None else 0.0) / divisor 

450 factor = periods or 1 

451 return float(res * np.sqrt(factor)) 

452 

453 @columnwise_stat 

454 def sharpe_variance(self, series: pl.Series, periods: int | float | None = None) -> float: 

455 r"""Calculate the asymptotic variance of the Sharpe Ratio. 

456 

457 .. math:: 

458 \text{Var}(SR) = \frac{1 + \frac{S \cdot SR}{2} + \frac{(K - 3) \cdot SR^2}{4}}{T} 

459 

460 where: 

461 - \(S\) is the skewness of returns 

462 - \(K\) is the kurtosis of returns 

463 - \(SR\) is the Sharpe ratio (unannualized) 

464 - \(T\) is the number of observations 

465 

466 Args: 

467 series (pl.Series): The series to calculate Sharpe ratio variance for. 

468 periods (int | float, optional): Number of periods per year. Defaults to data periods. 

469 

470 Returns: 

471 float: The asymptotic variance of the Sharpe ratio. 

472 If number of periods per year is provided or inferred from the data, the result is annualized. 

473 

474 """ 

475 t = series.count() 

476 mean_val = cast(float, series.mean()) 

477 std_val = cast(float, series.std(ddof=1)) 

478 if mean_val is None or std_val is None or std_val == 0: 

479 return float(np.nan) 

480 sr = mean_val / std_val 

481 

482 skew_val = series.skew(bias=False) 

483 kurt_val = series.kurtosis(bias=False) 

484 

485 if skew_val is None or kurt_val is None: 

486 return float(np.nan) 

487 # Base variance calculation using unannualized Sharpe ratio 

488 # Formula: (1 + skew*SR/2 + (kurt-3)*SR²/4) / T 

489 base_variance = (1 + (float(skew_val) * sr) / 2 + ((float(kurt_val) - 3) / 4) * sr**2) / t 

490 # Annualize by scaling with the number of periods 

491 periods = periods or self.data._periods_per_year 

492 factor = periods or 1 

493 return float(base_variance * factor) 

494 

495 @columnwise_stat 

496 def prob_sharpe_ratio(self, series: pl.Series, benchmark_sr: float) -> float: 

497 r"""Calculate the probabilistic sharpe ratio (PSR). 

498 

499 Args: 

500 series (pl.Series): The series to calculate probabilistic Sharpe ratio for. 

501 benchmark_sr (float): The target Sharpe ratio to compare against. This should be unannualized. 

502 

503 Returns: 

504 float: Probabilistic Sharpe Ratio. 

505 

506 Note: 

507 PSR is the probability that the observed Sharpe ratio is greater than a 

508 given benchmark Sharpe ratio. 

509 

510 """ 

511 t = series.count() 

512 

513 # Calculate observed unannualized Sharpe ratio 

514 mean_val = cast(float, series.mean()) 

515 std_val = cast(float, series.std(ddof=1)) 

516 if mean_val is None or std_val is None or std_val == 0: 

517 return float(np.nan) 

518 # Unannualized observed Sharpe ratio 

519 observed_sr = mean_val / std_val 

520 

521 skew_val = series.skew(bias=False) 

522 kurt_val = series.kurtosis(bias=False) 

523 

524 if skew_val is None or kurt_val is None: 

525 return float(np.nan) 

526 

527 # Calculate variance using unannualized benchmark Sharpe ratio 

528 var_bench_sr = (1 + (float(skew_val) * benchmark_sr) / 2 + ((float(kurt_val) - 3) / 4) * benchmark_sr**2) / t 

529 

530 if var_bench_sr <= 0: 

531 return float(np.nan) 

532 return float(norm.cdf((observed_sr - benchmark_sr) / np.sqrt(var_bench_sr))) 

533 

534 @columnwise_stat 

535 def hhi_positive(self, series: pl.Series) -> float: 

536 r"""Calculate the Herfindahl-Hirschman Index (HHI) for positive returns. 

537 

538 This quantifies how concentrated the positive returns are in a series. 

539 

540 .. math:: 

541 w^{\plus} = \frac{r_{t}^{\plus}}{\sum{r_{t}^{\plus}}} \\ 

542 HHI^{\plus} = \frac{N_{\plus} \sum{(w^{\plus})^2} - 1}{N_{\plus} - 1} 

543 

544 where: 

545 - \(r_{t}^{\plus}\) are the positive returns 

546 - \(N_{\plus}\) is the number of positive returns 

547 - \(w^{\plus}\) are the weights of positive returns 

548 

549 Args: 

550 series (pl.Series): The series to calculate HHI for. 

551 

552 Returns: 

553 float: The HHI value for positive returns. Returns NaN if fewer than 3 

554 positive returns are present. 

555 

556 Note: 

557 Values range from 0 (perfectly diversified gains) to 1 (all gains 

558 concentrated in a single period). 

559 """ 

560 positive_returns = series.filter(series > 0).drop_nans() 

561 if positive_returns.len() <= 2: 

562 return float(np.nan) 

563 weight = positive_returns / positive_returns.sum() 

564 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1)) 

565 

566 @columnwise_stat 

567 def hhi_negative(self, series: pl.Series) -> float: 

568 r"""Calculate the Herfindahl-Hirschman Index (HHI) for negative returns. 

569 

570 This quantifies how concentrated the negative returns are in a series. 

571 

572 .. math:: 

573 w^{\minus} = \frac{r_{t}^{\minus}}{\sum{r_{t}^{\minus}}} \\ 

574 HHI^{\minus} = \frac{N_{\minus} \sum{(w^{\minus})^2} - 1}{N_{\minus} - 1} 

575 

576 where: 

577 - \(r_{t}^{\minus}\) are the negative returns 

578 - \(N_{\minus}\) is the number of negative returns 

579 - \(w^{\minus}\) are the weights of negative returns 

580 

581 Args: 

582 series (pl.Series): The returns series to calculate HHI for. 

583 

584 Returns: 

585 float: The HHI value for negative returns. Returns NaN if fewer than 3 

586 negative returns are present. 

587 

588 Note: 

589 Values range from 0 (perfectly diversified losses) to 1 (all losses 

590 concentrated in a single period). 

591 """ 

592 negative_returns = series.filter(series < 0).drop_nans() 

593 if negative_returns.len() <= 2: 

594 return float(np.nan) 

595 weight = negative_returns / negative_returns.sum() 

596 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1)) 

597 

598 @columnwise_stat 

599 def sortino(self, series: pl.Series, periods: int | float | None = None) -> float: 

600 """Calculate the Sortino ratio. 

601 

602 The Sortino ratio is the mean return divided by downside deviation. 

603 Based on Red Rock Capital's Sortino ratio paper. 

604 

605 Args: 

606 series (pl.Series): The series to calculate Sortino ratio for. 

607 periods (int, optional): Number of periods per year. Defaults to 252. 

608 

609 Returns: 

610 float: The Sortino ratio value. 

611 

612 """ 

613 periods = periods or self.data._periods_per_year 

614 downside_sum = cast(float, ((series.filter(series < 0)) ** 2).sum()) 

615 downside_deviation = float(np.sqrt(downside_sum / series.count())) 

616 mean_val = cast(float, series.mean()) 

617 ratio = (mean_val if mean_val is not None else 0.0) / downside_deviation 

618 return float(ratio * np.sqrt(periods)) 

619 

620 @to_frame 

621 def rolling_sortino( 

622 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None 

623 ) -> pl.Expr: 

624 """Calculate the rolling Sortino ratio. 

625 

626 Args: 

627 series (pl.Expr): The expression to calculate rolling Sortino ratio for. 

628 rolling_period (int, optional): The rolling window size. Defaults to 126. 

629 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

630 

631 Returns: 

632 pl.Expr: The rolling Sortino ratio expression. 

633 

634 """ 

635 ppy = periods_per_year or self.data._periods_per_year 

636 

637 mean_ret = series.rolling_mean(window_size=rolling_period) 

638 

639 # Rolling downside deviation (squared negative returns averaged over window) 

640 downside = series.map_elements(lambda x: x**2 if x < 0 else 0.0, return_dtype=pl.Float64).rolling_mean( 

641 window_size=rolling_period 

642 ) 

643 

644 # Avoid division by zero 

645 sortino = mean_ret / downside.sqrt().fill_nan(0).fill_null(0) 

646 return cast(pl.Expr, sortino * (ppy**0.5)) 

647 

648 @to_frame 

649 def rolling_sharpe( 

650 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None 

651 ) -> pl.Expr: 

652 """Calculate the rolling Sharpe ratio. 

653 

654 Args: 

655 series (pl.Expr): The expression to calculate rolling Sharpe ratio for. 

656 rolling_period (int, optional): The rolling window size. Defaults to 126. 

657 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

658 

659 Returns: 

660 pl.Expr: The rolling Sharpe ratio expression. 

661 

662 """ 

663 ppy = periods_per_year or self.data._periods_per_year 

664 res = series.rolling_mean(window_size=rolling_period) / series.rolling_std(window_size=rolling_period) 

665 return cast(pl.Expr, res * np.sqrt(ppy)) 

666 

667 @to_frame 

668 def rolling_volatility( 

669 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None 

670 ) -> pl.Expr: 

671 """Calculate the rolling volatility of returns. 

672 

673 Args: 

674 series (pl.Expr): The expression to calculate rolling volatility for. 

675 rolling_period (int, optional): The rolling window size. Defaults to 126. 

676 periods_per_year (float, optional): Number of periods per year. Defaults to None. 

677 

678 Returns: 

679 pl.Expr: The rolling volatility expression. 

680 

681 """ 

682 ppy = periods_per_year or self.data._periods_per_year 

683 return cast(pl.Expr, series.rolling_std(window_size=rolling_period) * np.sqrt(ppy)) 

684 

685 @to_frame 

686 def drawdown(self, series: pl.Series) -> pl.Series: 

687 """Calculate the drawdown series for returns. 

688 

689 Args: 

690 series (pl.Series): The series to calculate drawdown for. 

691 

692 Returns: 

693 pl.Series: The drawdown series. 

694 

695 """ 

696 equity = self.prices(series) 

697 d = (equity / equity.cum_max()) - 1 

698 return -d 

699 

700 @staticmethod 

701 def prices(series: pl.Series) -> pl.Series: 

702 """Convert returns series to price series. 

703 

704 Args: 

705 series (pl.Series): The returns series to convert. 

706 

707 Returns: 

708 pl.Series: The price series. 

709 

710 """ 

711 return (1.0 + series).cum_prod() 

712 

713 @staticmethod 

714 def max_drawdown_single_series(series: pl.Series) -> float: 

715 price = Stats.prices(series) 

716 peak = price.cum_max() 

717 drawdown = price / peak - 1 

718 dd_min = cast(float, drawdown.min()) 

719 return -dd_min if dd_min is not None else 0.0 

720 

721 @columnwise_stat 

722 def max_drawdown(self, series: pl.Series) -> float: 

723 """Calculate the maximum drawdown for each column. 

724 

725 Args: 

726 series (pl.Series): The series to calculate maximum drawdown for. 

727 

728 Returns: 

729 float: The maximum drawdown value. 

730 

731 """ 

732 return Stats.max_drawdown_single_series(series) 

733 

734 def adjusted_sortino(self, periods: int | float | None = None) -> dict[str, float]: 

735 """Calculate Jack Schwager's adjusted Sortino ratio. 

736 

737 This adjustment allows for direct comparison to Sharpe ratio. 

738 See: https://archive.is/wip/2rwFW. 

739 

740 Args: 

741 periods (int, optional): Number of periods per year. Defaults to 252. 

742 

743 Returns: 

744 dict[str, float]: Dictionary mapping asset names to adjusted Sortino ratios. 

745 

746 """ 

747 sortino_data = self.sortino(periods=periods) 

748 return {k: v / np.sqrt(2) for k, v in sortino_data.items()} 

749 

750 @columnwise_stat 

751 def r_squared(self, series: pl.Series, benchmark: str | None = None) -> float: 

752 """Measure the straight line fit of the equity curve. 

753 

754 Args: 

755 series (pl.Series): The series to calculate R-squared for. 

756 benchmark (str, optional): The benchmark column name. Defaults to None. 

757 

758 Returns: 

759 float: The R-squared value. 

760 

761 Raises: 

762 AttributeError: If no benchmark data is available. 

763 

764 """ 

765 if self.data.benchmark is None: 

766 raise AttributeError("No benchmark data available") # noqa: TRY003 

767 

768 benchmark_col = benchmark or self.data.benchmark.columns[0] 

769 

770 # Evaluate both series and benchmark as Series 

771 all_data = cast(pl.DataFrame, self.all) 

772 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")]) 

773 

774 # Drop nulls 

775 dframe = dframe.drop_nulls() 

776 

777 matrix = dframe.to_numpy() 

778 # Get actual Series 

779 

780 strategy_np = matrix[:, 0] 

781 benchmark_np = matrix[:, 1] 

782 

783 corr_matrix = np.corrcoef(strategy_np, benchmark_np) 

784 r = corr_matrix[0, 1] 

785 return float(r**2) 

786 

787 def r2(self) -> dict[str, float]: 

788 """Shorthand for r_squared(). 

789 

790 Returns: 

791 dict[str, float]: Dictionary mapping asset names to R-squared values. 

792 

793 """ 

794 return self.r_squared() 

795 

796 @columnwise_stat 

797 def information_ratio( 

798 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None 

799 ) -> float: 

800 """Calculate the information ratio. 

801 

802 This is essentially the risk return ratio of the net profits. 

803 

804 Args: 

805 series (pl.Series): The series to calculate information ratio for. 

806 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

807 benchmark (str, optional): The benchmark column name. Defaults to None. 

808 

809 Returns: 

810 float: The information ratio value. 

811 

812 """ 

813 ppy = periods_per_year or self.data._periods_per_year 

814 

815 benchmark_data = cast(pl.DataFrame, self.data.benchmark) 

816 benchmark_col = benchmark or benchmark_data.columns[0] 

817 

818 active = series - benchmark_data[benchmark_col] 

819 

820 mean_val = cast(float, active.mean()) 

821 std_val = cast(float, active.std()) 

822 

823 try: 

824 mean_f = mean_val if mean_val is not None else 0.0 

825 std_f = std_val if std_val is not None else 1.0 

826 return float((mean_f / std_f) * (ppy**0.5)) 

827 except ZeroDivisionError: 

828 return 0.0 

829 

830 @columnwise_stat 

831 def greeks( 

832 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None 

833 ) -> dict[str, float]: 

834 """Calculate alpha and beta of the portfolio. 

835 

836 Args: 

837 series (pl.Series): The series to calculate greeks for. 

838 periods_per_year (int, optional): Number of periods per year. Defaults to 252. 

839 benchmark (str, optional): The benchmark column name. Defaults to None. 

840 

841 Returns: 

842 dict[str, float]: Dictionary containing alpha and beta values. 

843 

844 """ 

845 ppy = periods_per_year or self.data._periods_per_year 

846 

847 benchmark_data = cast(pl.DataFrame, self.data.benchmark) 

848 benchmark_col = benchmark or benchmark_data.columns[0] 

849 

850 # Evaluate both series and benchmark as Series 

851 all_data = cast(pl.DataFrame, self.all) 

852 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")]) 

853 

854 # Drop nulls 

855 dframe = dframe.drop_nulls() 

856 matrix = dframe.to_numpy() 

857 

858 # Get actual Series 

859 strategy_np = matrix[:, 0] 

860 benchmark_np = matrix[:, 1] 

861 

862 # 2x2 covariance matrix: [[var_strategy, cov], [cov, var_benchmark]] 

863 cov_matrix = np.cov(strategy_np, benchmark_np) 

864 

865 cov = cov_matrix[0, 1] 

866 var_benchmark = cov_matrix[1, 1] 

867 

868 beta = float(cov / var_benchmark) if var_benchmark != 0 else float("nan") 

869 alpha = float(np.mean(strategy_np) - beta * np.mean(benchmark_np)) 

870 

871 return {"alpha": float(alpha * ppy), "beta": beta}