Coverage for src / jquantstats / _stats / _basic.py: 100%
113 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 18:44 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 18:44 +0000
1"""Basic statistical metrics for financial returns data."""
3from __future__ import annotations
5from collections.abc import Iterable
6from typing import TYPE_CHECKING, cast
8import numpy as np
9import polars as pl
10from scipy.stats import norm
12from ._core import columnwise_stat
14# ── Basic statistics mixin ───────────────────────────────────────────────────
17class _BasicStatsMixin:
18 """Mixin providing basic return/risk and win/loss financial statistics.
20 Covers: basic statistics (skew, kurtosis, avg return/win/loss), volatility,
21 win/loss metrics (payoff ratio, profit factor), and risk metrics (VaR, CVaR,
22 win rate, kelly criterion, best/worst, exposure).
24 Attributes (provided by the concrete subclass):
25 data: The :class:`~jquantstats._data.Data` object.
26 all: Combined DataFrame for efficient column selection.
27 """
29 if TYPE_CHECKING:
30 from ._protocol import DataLike
32 data: DataLike
33 all: pl.DataFrame | None
35 @staticmethod
36 def _mean_positive_expr(series: pl.Series) -> float:
37 """Return the mean of all positive values in *series*, or NaN if none exist."""
38 return cast(float, series.filter(series > 0).mean())
40 @staticmethod
41 def _mean_negative_expr(series: pl.Series) -> float:
42 """Return the mean of all negative values in *series*, or NaN if none exist."""
43 return cast(float, series.filter(series < 0).mean())
45 # ── Basic statistics ──────────────────────────────────────────────────────
47 @columnwise_stat
48 def skew(self, series: pl.Series) -> int | float | None:
49 """Calculate skewness (asymmetry) for each numeric column.
51 Args:
52 series (pl.Series): The series to calculate skewness for.
54 Returns:
55 float: The skewness value.
57 """
58 return series.skew(bias=False)
60 @columnwise_stat
61 def kurtosis(self, series: pl.Series) -> int | float | None:
62 """Calculate the kurtosis of returns.
64 The degree to which a distribution peak compared to a normal distribution.
66 Args:
67 series (pl.Series): The series to calculate kurtosis for.
69 Returns:
70 float: The kurtosis value.
72 """
73 return series.kurtosis(bias=False)
75 @columnwise_stat
76 def avg_return(self, series: pl.Series) -> float:
77 """Calculate average return per non-zero, non-null value.
79 Args:
80 series (pl.Series): The series to calculate average return for.
82 Returns:
83 float: The average return value.
85 """
86 return cast(float, series.filter(series.is_not_null() & (series != 0)).mean())
88 @columnwise_stat
89 def avg_win(self, series: pl.Series) -> float:
90 """Calculate the average winning return/trade for an asset.
92 Args:
93 series (pl.Series): The series to calculate average win for.
95 Returns:
96 float: The average winning return.
98 """
99 return self._mean_positive_expr(series)
101 @columnwise_stat
102 def avg_loss(self, series: pl.Series) -> float:
103 """Calculate the average loss return/trade for a period.
105 Args:
106 series (pl.Series): The series to calculate average loss for.
108 Returns:
109 float: The average loss return.
111 """
112 return self._mean_negative_expr(series)
114 # ── Volatility & risk ─────────────────────────────────────────────────────
116 @columnwise_stat
117 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float:
118 """Calculate the volatility of returns.
120 - Std dev of returns
121 - Annualized by sqrt(periods) if `annualize` is True.
123 Args:
124 series (pl.Series): The series to calculate volatility for.
125 periods (int, optional): Number of periods per year. Defaults to 252.
126 annualize (bool, optional): Whether to annualize the result. Defaults to True.
128 Returns:
129 float: The volatility value.
131 """
132 raw_periods = periods or self.data._periods_per_year
134 # Ensure it's numeric
135 if not isinstance(raw_periods, int | float):
136 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003
138 factor = float(np.sqrt(raw_periods)) if annualize else 1.0
139 std_val = cast(float, series.std())
140 return (std_val if std_val is not None else 0.0) * factor
142 # ── Win / loss metrics ────────────────────────────────────────────────────
144 @columnwise_stat
145 def payoff_ratio(self, series: pl.Series) -> float:
146 """Measure the payoff ratio.
148 The payoff ratio is calculated as average win / abs(average loss).
150 Args:
151 series (pl.Series): The series to calculate payoff ratio for.
153 Returns:
154 float: The payoff ratio value.
156 """
157 avg_win = cast(float, series.filter(series > 0).mean())
158 avg_loss = float(np.abs(cast(float, series.filter(series < 0).mean())))
159 return avg_win / avg_loss
161 def win_loss_ratio(self) -> dict[str, float]:
162 """Shorthand for payoff_ratio().
164 Returns:
165 dict[str, float]: Dictionary mapping asset names to win/loss ratios.
167 """
168 return self.payoff_ratio()
170 @columnwise_stat
171 def profit_ratio(self, series: pl.Series) -> float:
172 """Measure the profit ratio.
174 The profit ratio is calculated as win ratio / loss ratio.
176 Args:
177 series (pl.Series): The series to calculate profit ratio for.
179 Returns:
180 float: The profit ratio value.
182 """
183 wins = series.filter(series >= 0)
184 losses = series.filter(series < 0)
186 try:
187 win_mean = cast(float, wins.mean())
188 loss_mean = cast(float, losses.mean())
189 win_ratio = float(np.abs(win_mean / wins.count()))
190 loss_ratio = float(np.abs(loss_mean / losses.count()))
192 return win_ratio / loss_ratio
194 except TypeError:
195 return float(np.nan)
197 @columnwise_stat
198 def profit_factor(self, series: pl.Series) -> float:
199 """Measure the profit factor.
201 The profit factor is calculated as wins / loss.
203 Args:
204 series (pl.Series): The series to calculate profit factor for.
206 Returns:
207 float: The profit factor value.
209 """
210 wins = series.filter(series > 0)
211 losses = series.filter(series < 0)
212 wins_sum = wins.sum()
213 losses_sum = losses.sum()
215 return float(np.abs(float(wins_sum) / float(losses_sum)))
217 # ── Risk metrics ──────────────────────────────────────────────────────────
219 @columnwise_stat
220 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
221 """Calculate the daily value-at-risk.
223 Uses variance-covariance calculation with confidence level.
225 Args:
226 series (pl.Series): The series to calculate value at risk for.
227 alpha (float, optional): Confidence level. Defaults to 0.05.
228 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
230 Returns:
231 float: The value at risk.
233 """
234 mean_val = cast(float, series.mean())
235 std_val = cast(float, series.std())
236 mu = mean_val if mean_val is not None else 0.0
237 sigma *= std_val if std_val is not None else 0.0
239 return float(norm.ppf(alpha, mu, sigma))
241 @columnwise_stat
242 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
243 """Calculate the conditional value-at-risk.
245 Also known as CVaR or expected shortfall, calculated for each numeric column.
247 Args:
248 series (pl.Series): The series to calculate conditional value at risk for.
249 alpha (float, optional): Confidence level. Defaults to 0.05.
250 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
252 Returns:
253 float: The conditional value at risk.
255 """
256 mean_val = cast(float, series.mean())
257 std_val = cast(float, series.std())
258 mu = mean_val if mean_val is not None else 0.0
259 sigma *= std_val if std_val is not None else 0.0
261 var = norm.ppf(alpha, mu, sigma)
263 # Compute mean of returns less than or equal to VaR
264 # Cast to Any or pl.Series to suppress Ty error
265 # Cast the mask to pl.Expr to satisfy type checker
266 mask = cast(Iterable[bool], series < var)
267 return cast(float, series.filter(mask).mean())
269 @columnwise_stat
270 def win_rate(self, series: pl.Series) -> float:
271 """Calculate the win ratio for a period.
273 Args:
274 series (pl.Series): The series to calculate win rate for.
276 Returns:
277 float: The win rate value.
279 """
280 num_pos = series.filter(series > 0).count()
281 num_nonzero = series.filter(series != 0).count()
282 return float(num_pos / num_nonzero)
284 @columnwise_stat
285 def gain_to_pain_ratio(self, series: pl.Series) -> float:
286 """Calculate Jack Schwager's Gain-to-Pain Ratio.
288 The ratio is calculated as total return / sum of losses (in absolute value).
290 Args:
291 series (pl.Series): The series to calculate gain to pain ratio for.
293 Returns:
294 float: The gain to pain ratio value.
296 """
297 total_gain = series.sum()
298 total_pain = series.filter(series < 0).abs().sum()
299 try:
300 return float(float(total_gain) / float(total_pain))
301 except ZeroDivisionError:
302 return float(np.nan)
304 @columnwise_stat
305 def risk_return_ratio(self, series: pl.Series) -> float:
306 """Calculate the return/risk ratio.
308 This is equivalent to the Sharpe ratio without a risk-free rate.
310 Args:
311 series (pl.Series): The series to calculate risk return ratio for.
313 Returns:
314 float: The risk return ratio value.
316 """
317 mean_val = cast(float, series.mean())
318 std_val = cast(float, series.std())
319 return (mean_val if mean_val is not None else 0.0) / (std_val if std_val is not None else 1.0)
321 def kelly_criterion(self) -> dict[str, float]:
322 """Calculate the optimal capital allocation per column.
324 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b
325 where:
326 - b = payoff ratio
327 - p = win rate
328 - q = 1 - p.
330 Returns:
331 dict[str, float]: Dictionary mapping asset names to Kelly criterion values.
333 """
334 b = self.payoff_ratio()
335 p = self.win_rate()
337 return {col: ((b[col] * p[col]) - (1 - p[col])) / b[col] for col in b}
339 @columnwise_stat
340 def best(self, series: pl.Series) -> float | None:
341 """Find the maximum return per column (best period).
343 Args:
344 series (pl.Series): The series to find the best return for.
346 Returns:
347 float: The maximum return value.
349 """
350 val = cast(float, series.max())
351 return val if val is not None else None
353 @columnwise_stat
354 def worst(self, series: pl.Series) -> float | None:
355 """Find the minimum return per column (worst period).
357 Args:
358 series (pl.Series): The series to find the worst return for.
360 Returns:
361 float: The minimum return value.
363 """
364 val = cast(float, series.min())
365 return val if val is not None else None
367 @columnwise_stat
368 def exposure(self, series: pl.Series) -> float:
369 """Calculate the market exposure time (returns != 0).
371 Args:
372 series (pl.Series): The series to calculate exposure for.
374 Returns:
375 float: The exposure value.
377 """
378 all_data = cast(pl.DataFrame, self.all)
379 return float(np.round((series.filter(series != 0).count() / all_data.height), decimals=2))