Coverage for src / jquantstats / _stats.py: 97%
278 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:21 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:21 +0000
1from __future__ import annotations
3import dataclasses
4from collections.abc import Callable, Iterable
5from functools import wraps
6from typing import TYPE_CHECKING, Any, cast
8import numpy as np
9import polars as pl
10from scipy.stats import norm
12if TYPE_CHECKING:
13 from ._data import Data
16@dataclasses.dataclass(frozen=True)
17class Stats:
18 """Statistical analysis tools for financial returns data.
20 This class provides a comprehensive set of methods for calculating various
21 financial metrics and statistics on returns data, including:
23 - Basic statistics (mean, skew, kurtosis)
24 - Risk metrics (volatility, value-at-risk, drawdown)
25 - Performance ratios (Sharpe, Sortino, information ratio)
26 - Win/loss metrics (win rate, profit factor, payoff ratio)
27 - Rolling calculations (rolling volatility, rolling Sharpe)
28 - Factor analysis (alpha, beta, R-squared)
30 The class is designed to work with the _Data class and operates on Polars DataFrames
31 for efficient computation.
33 Attributes:
34 data: The _Data object containing returns and benchmark data.
35 all: A DataFrame combining all data (index, returns, benchmark) for easy access.
37 """
39 data: Data
40 all: pl.DataFrame | None = None # Default is None; will be set in __post_init__
42 def __post_init__(self) -> None:
43 object.__setattr__(self, "all", self.data.all)
45 @staticmethod
46 def _mean_positive_expr(series: pl.Series) -> float:
47 return cast(float, series.filter(series > 0).mean())
49 @staticmethod
50 def _mean_negative_expr(series: pl.Series) -> float:
51 return cast(float, series.filter(series < 0).mean())
53 @staticmethod
54 def columnwise_stat(func: Callable[..., Any]) -> Callable[..., dict[str, float]]:
55 """Apply a column-wise statistical function to all numeric columns.
57 Args:
58 func (Callable): The function to decorate.
60 Returns:
61 Callable: The decorated function.
63 """
65 @wraps(func)
66 def wrapper(self: Stats, *args: Any, **kwargs: Any) -> dict[str, float]:
67 return {col: func(self, series, *args, **kwargs) for col, series in self.data.items()}
69 return wrapper
71 @staticmethod
72 def to_frame(func: Callable[..., Any]) -> Callable[..., pl.DataFrame]:
73 """Apply per-column expressions and evaluates with .with_columns(...).
75 Args:
76 func (Callable): The function to decorate.
78 Returns:
79 Callable: The decorated function.
81 """
83 @wraps(func)
84 def wrapper(self: Stats, *args: Any, **kwargs: Any) -> pl.DataFrame:
85 return cast(pl.DataFrame, self.all).select(
86 [pl.col(name) for name in self.data.date_col]
87 + [func(self, series, *args, **kwargs).alias(col) for col, series in self.data.items()]
88 )
90 return wrapper
92 @columnwise_stat
93 def skew(self, series: pl.Series) -> int | float | None:
94 """Calculate skewness (asymmetry) for each numeric column.
96 Args:
97 series (pl.Series): The series to calculate skewness for.
99 Returns:
100 float: The skewness value.
102 """
103 return cast("int | float | None", series.skew(bias=False))
105 @columnwise_stat
106 def kurtosis(self, series: pl.Series) -> int | float | None:
107 """Calculate the kurtosis of returns.
109 The degree to which a distribution peak compared to a normal distribution.
111 Args:
112 series (pl.Series): The series to calculate kurtosis for.
114 Returns:
115 float: The kurtosis value.
117 """
118 return cast("int | float | None", series.kurtosis(bias=False))
120 @columnwise_stat
121 def avg_return(self, series: pl.Series) -> float:
122 """Calculate average return per non-zero, non-null value.
124 Args:
125 series (pl.Series): The series to calculate average return for.
127 Returns:
128 float: The average return value.
130 """
131 return cast(float, series.filter(series.is_not_null() & (series != 0)).mean())
133 @columnwise_stat
134 def avg_win(self, series: pl.Series) -> float:
135 """Calculate the average winning return/trade for an asset.
137 Args:
138 series (pl.Series): The series to calculate average win for.
140 Returns:
141 float: The average winning return.
143 """
144 return self._mean_positive_expr(series)
146 @columnwise_stat
147 def avg_loss(self, series: pl.Series) -> float:
148 """Calculate the average loss return/trade for a period.
150 Args:
151 series (pl.Series): The series to calculate average loss for.
153 Returns:
154 float: The average loss return.
156 """
157 return self._mean_negative_expr(series)
159 @columnwise_stat
160 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float:
161 """Calculate the volatility of returns.
163 - Std dev of returns
164 - Annualized by sqrt(periods) if `annualize` is True.
166 Args:
167 series (pl.Series): The series to calculate volatility for.
168 periods (int, optional): Number of periods per year. Defaults to 252.
169 annualize (bool, optional): Whether to annualize the result. Defaults to True.
171 Returns:
172 float: The volatility value.
174 """
175 raw_periods = periods or self.data._periods_per_year
177 # Ensure it's numeric
178 if not isinstance(raw_periods, int | float):
179 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003
181 factor = float(np.sqrt(raw_periods)) if annualize else 1.0
182 std_val = cast(float, series.std())
183 return (std_val if std_val is not None else 0.0) * factor
185 # periods = periods or self.data._periods_per_year
186 # factor = np.sqrt(periods) if annualize else 1
187 # return series.std() * factor
189 @columnwise_stat
190 def payoff_ratio(self, series: pl.Series) -> float:
191 """Measure the payoff ratio.
193 The payoff ratio is calculated as average win / abs(average loss).
195 Args:
196 series (pl.Series): The series to calculate payoff ratio for.
198 Returns:
199 float: The payoff ratio value.
201 """
202 avg_win = cast(float, series.filter(series > 0).mean())
203 avg_loss = float(np.abs(cast(float, series.filter(series < 0).mean())))
204 return avg_win / avg_loss
206 def win_loss_ratio(self) -> dict[str, float]:
207 """Shorthand for payoff_ratio().
209 Returns:
210 dict[str, float]: Dictionary mapping asset names to win/loss ratios.
212 """
213 return self.payoff_ratio()
215 @columnwise_stat
216 def profit_ratio(self, series: pl.Series) -> float:
217 """Measure the profit ratio.
219 The profit ratio is calculated as win ratio / loss ratio.
221 Args:
222 series (pl.Series): The series to calculate profit ratio for.
224 Returns:
225 float: The profit ratio value.
227 """
228 wins = series.filter(series >= 0)
229 losses = series.filter(series < 0)
231 try:
232 win_mean = cast(float, wins.mean())
233 loss_mean = cast(float, losses.mean())
234 win_ratio = float(np.abs(win_mean / wins.count()))
235 loss_ratio = float(np.abs(loss_mean / losses.count()))
237 return win_ratio / loss_ratio
239 except TypeError:
240 return float(np.nan)
242 @columnwise_stat
243 def profit_factor(self, series: pl.Series) -> float:
244 """Measure the profit factor.
246 The profit factor is calculated as wins / loss.
248 Args:
249 series (pl.Series): The series to calculate profit factor for.
251 Returns:
252 float: The profit factor value.
254 """
255 wins = series.filter(series > 0)
256 losses = series.filter(series < 0)
257 wins_sum = cast(float, wins.sum())
258 losses_sum = cast(float, losses.sum())
260 return float(np.abs(wins_sum / losses_sum))
262 @columnwise_stat
263 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
264 """Calculate the daily value-at-risk.
266 Uses variance-covariance calculation with confidence level.
268 Args:
269 series (pl.Series): The series to calculate value at risk for.
270 alpha (float, optional): Confidence level. Defaults to 0.05.
271 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
273 Returns:
274 float: The value at risk.
276 """
277 mean_val = cast(float, series.mean())
278 std_val = cast(float, series.std())
279 mu = mean_val if mean_val is not None else 0.0
280 sigma *= std_val if std_val is not None else 0.0
282 return float(norm.ppf(alpha, mu, sigma))
284 @columnwise_stat
285 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
286 """Calculate the conditional value-at-risk.
288 Also known as CVaR or expected shortfall, calculated for each numeric column.
290 Args:
291 series (pl.Series): The series to calculate conditional value at risk for.
292 alpha (float, optional): Confidence level. Defaults to 0.05.
293 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
295 Returns:
296 float: The conditional value at risk.
298 """
299 mean_val = cast(float, series.mean())
300 std_val = cast(float, series.std())
301 mu = mean_val if mean_val is not None else 0.0
302 sigma *= std_val if std_val is not None else 0.0
304 var = norm.ppf(alpha, mu, sigma)
306 # Compute mean of returns less than or equal to VaR
307 # Cast to Any or pl.Series to suppress Ty error
308 # Cast the mask to pl.Expr to satisfy type checker
309 mask = cast(Iterable[bool], series < var)
310 return cast(float, series.filter(mask).mean())
312 # filtered_series = cast(pl.Series, series.filter(series < var))
313 # return filtered_series.mean()
315 @columnwise_stat
316 def win_rate(self, series: pl.Series) -> float:
317 """Calculate the win ratio for a period.
319 Args:
320 series (pl.Series): The series to calculate win rate for.
322 Returns:
323 float: The win rate value.
325 """
326 num_pos = series.filter(series > 0).count()
327 num_nonzero = series.filter(series != 0).count()
328 return float(num_pos / num_nonzero)
330 @columnwise_stat
331 def gain_to_pain_ratio(self, series: pl.Series) -> float:
332 """Calculate Jack Schwager's Gain-to-Pain Ratio.
334 The ratio is calculated as total return / sum of losses (in absolute value).
336 Args:
337 series (pl.Series): The series to calculate gain to pain ratio for.
339 Returns:
340 float: The gain to pain ratio value.
342 """
343 total_gain = series.sum()
344 total_pain = series.filter(series < 0).abs().sum()
345 try:
346 return float(total_gain / total_pain)
347 except ZeroDivisionError:
348 return float(np.nan)
350 @columnwise_stat
351 def risk_return_ratio(self, series: pl.Series) -> float:
352 """Calculate the return/risk ratio.
354 This is equivalent to the Sharpe ratio without a risk-free rate.
356 Args:
357 series (pl.Series): The series to calculate risk return ratio for.
359 Returns:
360 float: The risk return ratio value.
362 """
363 mean_val = cast(float, series.mean())
364 std_val = cast(float, series.std())
365 return (mean_val if mean_val is not None else 0.0) / (std_val if std_val is not None else 1.0)
367 def kelly_criterion(self) -> dict[str, float]:
368 """Calculate the optimal capital allocation per column.
370 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b
371 where:
372 - b = payoff ratio
373 - p = win rate
374 - q = 1 - p.
376 Returns:
377 dict[str, float]: Dictionary mapping asset names to Kelly criterion values.
379 """
380 b = self.payoff_ratio()
381 p = self.win_rate()
383 return {
384 col: ((b[col] * p[col]) - (1 - p[col])) / b[col]
385 # if b[col] not in (None, 0) and p[col] is not None else None
386 for col in b
387 }
389 @columnwise_stat
390 def best(self, series: pl.Series) -> float | None:
391 """Find the maximum return per column (best period).
393 Args:
394 series (pl.Series): The series to find the best return for.
396 Returns:
397 float: The maximum return value.
399 """
400 val = cast(float, series.max())
401 return val if val is not None else None
403 @columnwise_stat
404 def worst(self, series: pl.Series) -> float | None:
405 """Find the minimum return per column (worst period).
407 Args:
408 series (pl.Series): The series to find the worst return for.
410 Returns:
411 float: The minimum return value.
413 """
414 val = cast(float, series.min())
415 return val if val is not None else None
417 @columnwise_stat
418 def exposure(self, series: pl.Series) -> float:
419 """Calculate the market exposure time (returns != 0).
421 Args:
422 series (pl.Series): The series to calculate exposure for.
424 Returns:
425 float: The exposure value.
427 """
428 all_data = cast(pl.DataFrame, self.all)
429 return float(np.round((series.filter(series != 0).count() / all_data.height), decimals=2))
431 @columnwise_stat
432 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float:
433 """Calculate the Sharpe ratio of asset returns.
435 Args:
436 series (pl.Series): The series to calculate Sharpe ratio for.
437 periods (int, optional): Number of periods per year. Defaults to 252.
439 Returns:
440 float: The Sharpe ratio value.
442 """
443 periods = periods or self.data._periods_per_year
445 std_val = cast(float, series.std(ddof=1))
446 mean_val = cast(float, series.mean())
447 divisor = std_val if std_val is not None else 1.0
449 res = (mean_val if mean_val is not None else 0.0) / divisor
450 factor = periods or 1
451 return float(res * np.sqrt(factor))
453 @columnwise_stat
454 def sharpe_variance(self, series: pl.Series, periods: int | float | None = None) -> float:
455 r"""Calculate the asymptotic variance of the Sharpe Ratio.
457 .. math::
458 \text{Var}(SR) = \frac{1 + \frac{S \cdot SR}{2} + \frac{(K - 3) \cdot SR^2}{4}}{T}
460 where:
461 - \(S\) is the skewness of returns
462 - \(K\) is the kurtosis of returns
463 - \(SR\) is the Sharpe ratio (unannualized)
464 - \(T\) is the number of observations
466 Args:
467 series (pl.Series): The series to calculate Sharpe ratio variance for.
468 periods (int | float, optional): Number of periods per year. Defaults to data periods.
470 Returns:
471 float: The asymptotic variance of the Sharpe ratio.
472 If number of periods per year is provided or inferred from the data, the result is annualized.
474 """
475 t = series.count()
476 mean_val = cast(float, series.mean())
477 std_val = cast(float, series.std(ddof=1))
478 if mean_val is None or std_val is None or std_val == 0:
479 return float(np.nan)
480 sr = mean_val / std_val
482 skew_val = series.skew(bias=False)
483 kurt_val = series.kurtosis(bias=False)
485 if skew_val is None or kurt_val is None:
486 return float(np.nan)
487 # Base variance calculation using unannualized Sharpe ratio
488 # Formula: (1 + skew*SR/2 + (kurt-3)*SR²/4) / T
489 base_variance = (1 + (float(skew_val) * sr) / 2 + ((float(kurt_val) - 3) / 4) * sr**2) / t
490 # Annualize by scaling with the number of periods
491 periods = periods or self.data._periods_per_year
492 factor = periods or 1
493 return float(base_variance * factor)
495 @columnwise_stat
496 def prob_sharpe_ratio(self, series: pl.Series, benchmark_sr: float) -> float:
497 r"""Calculate the probabilistic sharpe ratio (PSR).
499 Args:
500 series (pl.Series): The series to calculate probabilistic Sharpe ratio for.
501 benchmark_sr (float): The target Sharpe ratio to compare against. This should be unannualized.
503 Returns:
504 float: Probabilistic Sharpe Ratio.
506 Note:
507 PSR is the probability that the observed Sharpe ratio is greater than a
508 given benchmark Sharpe ratio.
510 """
511 t = series.count()
513 # Calculate observed unannualized Sharpe ratio
514 mean_val = cast(float, series.mean())
515 std_val = cast(float, series.std(ddof=1))
516 if mean_val is None or std_val is None or std_val == 0:
517 return float(np.nan)
518 # Unannualized observed Sharpe ratio
519 observed_sr = mean_val / std_val
521 skew_val = series.skew(bias=False)
522 kurt_val = series.kurtosis(bias=False)
524 if skew_val is None or kurt_val is None:
525 return float(np.nan)
527 # Calculate variance using unannualized benchmark Sharpe ratio
528 var_bench_sr = (1 + (float(skew_val) * benchmark_sr) / 2 + ((float(kurt_val) - 3) / 4) * benchmark_sr**2) / t
530 if var_bench_sr <= 0:
531 return float(np.nan)
532 return float(norm.cdf((observed_sr - benchmark_sr) / np.sqrt(var_bench_sr)))
534 @columnwise_stat
535 def hhi_positive(self, series: pl.Series) -> float:
536 r"""Calculate the Herfindahl-Hirschman Index (HHI) for positive returns.
538 This quantifies how concentrated the positive returns are in a series.
540 .. math::
541 w^{\plus} = \frac{r_{t}^{\plus}}{\sum{r_{t}^{\plus}}} \\
542 HHI^{\plus} = \frac{N_{\plus} \sum{(w^{\plus})^2} - 1}{N_{\plus} - 1}
544 where:
545 - \(r_{t}^{\plus}\) are the positive returns
546 - \(N_{\plus}\) is the number of positive returns
547 - \(w^{\plus}\) are the weights of positive returns
549 Args:
550 series (pl.Series): The series to calculate HHI for.
552 Returns:
553 float: The HHI value for positive returns. Returns NaN if fewer than 3
554 positive returns are present.
556 Note:
557 Values range from 0 (perfectly diversified gains) to 1 (all gains
558 concentrated in a single period).
559 """
560 positive_returns = series.filter(series > 0).drop_nans()
561 if positive_returns.len() <= 2:
562 return float(np.nan)
563 weight = positive_returns / positive_returns.sum()
564 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1))
566 @columnwise_stat
567 def hhi_negative(self, series: pl.Series) -> float:
568 r"""Calculate the Herfindahl-Hirschman Index (HHI) for negative returns.
570 This quantifies how concentrated the negative returns are in a series.
572 .. math::
573 w^{\minus} = \frac{r_{t}^{\minus}}{\sum{r_{t}^{\minus}}} \\
574 HHI^{\minus} = \frac{N_{\minus} \sum{(w^{\minus})^2} - 1}{N_{\minus} - 1}
576 where:
577 - \(r_{t}^{\minus}\) are the negative returns
578 - \(N_{\minus}\) is the number of negative returns
579 - \(w^{\minus}\) are the weights of negative returns
581 Args:
582 series (pl.Series): The returns series to calculate HHI for.
584 Returns:
585 float: The HHI value for negative returns. Returns NaN if fewer than 3
586 negative returns are present.
588 Note:
589 Values range from 0 (perfectly diversified losses) to 1 (all losses
590 concentrated in a single period).
591 """
592 negative_returns = series.filter(series < 0).drop_nans()
593 if negative_returns.len() <= 2:
594 return float(np.nan)
595 weight = negative_returns / negative_returns.sum()
596 return float((weight.len() * (weight**2).sum() - 1) / (weight.len() - 1))
598 @columnwise_stat
599 def sortino(self, series: pl.Series, periods: int | float | None = None) -> float:
600 """Calculate the Sortino ratio.
602 The Sortino ratio is the mean return divided by downside deviation.
603 Based on Red Rock Capital's Sortino ratio paper.
605 Args:
606 series (pl.Series): The series to calculate Sortino ratio for.
607 periods (int, optional): Number of periods per year. Defaults to 252.
609 Returns:
610 float: The Sortino ratio value.
612 """
613 periods = periods or self.data._periods_per_year
614 downside_sum = cast(float, ((series.filter(series < 0)) ** 2).sum())
615 downside_deviation = float(np.sqrt(downside_sum / series.count()))
616 mean_val = cast(float, series.mean())
617 ratio = (mean_val if mean_val is not None else 0.0) / downside_deviation
618 return float(ratio * np.sqrt(periods))
620 @to_frame
621 def rolling_sortino(
622 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None
623 ) -> pl.Expr:
624 """Calculate the rolling Sortino ratio.
626 Args:
627 series (pl.Expr): The expression to calculate rolling Sortino ratio for.
628 rolling_period (int, optional): The rolling window size. Defaults to 126.
629 periods_per_year (int, optional): Number of periods per year. Defaults to 252.
631 Returns:
632 pl.Expr: The rolling Sortino ratio expression.
634 """
635 ppy = periods_per_year or self.data._periods_per_year
637 mean_ret = series.rolling_mean(window_size=rolling_period)
639 # Rolling downside deviation (squared negative returns averaged over window)
640 downside = series.map_elements(lambda x: x**2 if x < 0 else 0.0, return_dtype=pl.Float64).rolling_mean(
641 window_size=rolling_period
642 )
644 # Avoid division by zero
645 sortino = mean_ret / downside.sqrt().fill_nan(0).fill_null(0)
646 return cast(pl.Expr, sortino * (ppy**0.5))
648 @to_frame
649 def rolling_sharpe(
650 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None
651 ) -> pl.Expr:
652 """Calculate the rolling Sharpe ratio.
654 Args:
655 series (pl.Expr): The expression to calculate rolling Sharpe ratio for.
656 rolling_period (int, optional): The rolling window size. Defaults to 126.
657 periods_per_year (int, optional): Number of periods per year. Defaults to 252.
659 Returns:
660 pl.Expr: The rolling Sharpe ratio expression.
662 """
663 ppy = periods_per_year or self.data._periods_per_year
664 res = series.rolling_mean(window_size=rolling_period) / series.rolling_std(window_size=rolling_period)
665 return cast(pl.Expr, res * np.sqrt(ppy))
667 @to_frame
668 def rolling_volatility(
669 self, series: pl.Expr, rolling_period: int = 126, periods_per_year: int | float | None = None
670 ) -> pl.Expr:
671 """Calculate the rolling volatility of returns.
673 Args:
674 series (pl.Expr): The expression to calculate rolling volatility for.
675 rolling_period (int, optional): The rolling window size. Defaults to 126.
676 periods_per_year (float, optional): Number of periods per year. Defaults to None.
678 Returns:
679 pl.Expr: The rolling volatility expression.
681 """
682 ppy = periods_per_year or self.data._periods_per_year
683 return cast(pl.Expr, series.rolling_std(window_size=rolling_period) * np.sqrt(ppy))
685 @to_frame
686 def drawdown(self, series: pl.Series) -> pl.Series:
687 """Calculate the drawdown series for returns.
689 Args:
690 series (pl.Series): The series to calculate drawdown for.
692 Returns:
693 pl.Series: The drawdown series.
695 """
696 equity = self.prices(series)
697 d = (equity / equity.cum_max()) - 1
698 return -d
700 @staticmethod
701 def prices(series: pl.Series) -> pl.Series:
702 """Convert returns series to price series.
704 Args:
705 series (pl.Series): The returns series to convert.
707 Returns:
708 pl.Series: The price series.
710 """
711 return (1.0 + series).cum_prod()
713 @staticmethod
714 def max_drawdown_single_series(series: pl.Series) -> float:
715 price = Stats.prices(series)
716 peak = price.cum_max()
717 drawdown = price / peak - 1
718 dd_min = cast(float, drawdown.min())
719 return -dd_min if dd_min is not None else 0.0
721 @columnwise_stat
722 def max_drawdown(self, series: pl.Series) -> float:
723 """Calculate the maximum drawdown for each column.
725 Args:
726 series (pl.Series): The series to calculate maximum drawdown for.
728 Returns:
729 float: The maximum drawdown value.
731 """
732 return Stats.max_drawdown_single_series(series)
734 def adjusted_sortino(self, periods: int | float | None = None) -> dict[str, float]:
735 """Calculate Jack Schwager's adjusted Sortino ratio.
737 This adjustment allows for direct comparison to Sharpe ratio.
738 See: https://archive.is/wip/2rwFW.
740 Args:
741 periods (int, optional): Number of periods per year. Defaults to 252.
743 Returns:
744 dict[str, float]: Dictionary mapping asset names to adjusted Sortino ratios.
746 """
747 sortino_data = self.sortino(periods=periods)
748 return {k: v / np.sqrt(2) for k, v in sortino_data.items()}
750 @columnwise_stat
751 def r_squared(self, series: pl.Series, benchmark: str | None = None) -> float:
752 """Measure the straight line fit of the equity curve.
754 Args:
755 series (pl.Series): The series to calculate R-squared for.
756 benchmark (str, optional): The benchmark column name. Defaults to None.
758 Returns:
759 float: The R-squared value.
761 Raises:
762 AttributeError: If no benchmark data is available.
764 """
765 if self.data.benchmark is None:
766 raise AttributeError("No benchmark data available") # noqa: TRY003
768 benchmark_col = benchmark or self.data.benchmark.columns[0]
770 # Evaluate both series and benchmark as Series
771 all_data = cast(pl.DataFrame, self.all)
772 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")])
774 # Drop nulls
775 dframe = dframe.drop_nulls()
777 matrix = dframe.to_numpy()
778 # Get actual Series
780 strategy_np = matrix[:, 0]
781 benchmark_np = matrix[:, 1]
783 corr_matrix = np.corrcoef(strategy_np, benchmark_np)
784 r = corr_matrix[0, 1]
785 return float(r**2)
787 def r2(self) -> dict[str, float]:
788 """Shorthand for r_squared().
790 Returns:
791 dict[str, float]: Dictionary mapping asset names to R-squared values.
793 """
794 return self.r_squared()
796 @columnwise_stat
797 def information_ratio(
798 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None
799 ) -> float:
800 """Calculate the information ratio.
802 This is essentially the risk return ratio of the net profits.
804 Args:
805 series (pl.Series): The series to calculate information ratio for.
806 periods_per_year (int, optional): Number of periods per year. Defaults to 252.
807 benchmark (str, optional): The benchmark column name. Defaults to None.
809 Returns:
810 float: The information ratio value.
812 """
813 ppy = periods_per_year or self.data._periods_per_year
815 benchmark_data = cast(pl.DataFrame, self.data.benchmark)
816 benchmark_col = benchmark or benchmark_data.columns[0]
818 active = series - benchmark_data[benchmark_col]
820 mean_val = cast(float, active.mean())
821 std_val = cast(float, active.std())
823 try:
824 mean_f = mean_val if mean_val is not None else 0.0
825 std_f = std_val if std_val is not None else 1.0
826 return float((mean_f / std_f) * (ppy**0.5))
827 except ZeroDivisionError:
828 return 0.0
830 @columnwise_stat
831 def greeks(
832 self, series: pl.Series, periods_per_year: int | float | None = None, benchmark: str | None = None
833 ) -> dict[str, float]:
834 """Calculate alpha and beta of the portfolio.
836 Args:
837 series (pl.Series): The series to calculate greeks for.
838 periods_per_year (int, optional): Number of periods per year. Defaults to 252.
839 benchmark (str, optional): The benchmark column name. Defaults to None.
841 Returns:
842 dict[str, float]: Dictionary containing alpha and beta values.
844 """
845 ppy = periods_per_year or self.data._periods_per_year
847 benchmark_data = cast(pl.DataFrame, self.data.benchmark)
848 benchmark_col = benchmark or benchmark_data.columns[0]
850 # Evaluate both series and benchmark as Series
851 all_data = cast(pl.DataFrame, self.all)
852 dframe = all_data.select([series, pl.col(benchmark_col).alias("benchmark")])
854 # Drop nulls
855 dframe = dframe.drop_nulls()
856 matrix = dframe.to_numpy()
858 # Get actual Series
859 strategy_np = matrix[:, 0]
860 benchmark_np = matrix[:, 1]
862 # 2x2 covariance matrix: [[var_strategy, cov], [cov, var_benchmark]]
863 cov_matrix = np.cov(strategy_np, benchmark_np)
865 cov = cov_matrix[0, 1]
866 var_benchmark = cov_matrix[1, 1]
868 beta = float(cov / var_benchmark) if var_benchmark != 0 else float("nan")
869 alpha = float(np.mean(strategy_np) - beta * np.mean(benchmark_np))
871 return {"alpha": float(alpha * ppy), "beta": beta}