Coverage for src / jquantstats / _stats / _basic.py: 100%

113 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-26 18:44 +0000

1"""Basic statistical metrics for financial returns data.""" 

2 

3from __future__ import annotations 

4 

5from collections.abc import Iterable 

6from typing import TYPE_CHECKING, cast 

7 

8import numpy as np 

9import polars as pl 

10from scipy.stats import norm 

11 

12from ._core import columnwise_stat 

13 

14# ── Basic statistics mixin ─────────────────────────────────────────────────── 

15 

16 

17class _BasicStatsMixin: 

18 """Mixin providing basic return/risk and win/loss financial statistics. 

19 

20 Covers: basic statistics (skew, kurtosis, avg return/win/loss), volatility, 

21 win/loss metrics (payoff ratio, profit factor), and risk metrics (VaR, CVaR, 

22 win rate, kelly criterion, best/worst, exposure). 

23 

24 Attributes (provided by the concrete subclass): 

25 data: The :class:`~jquantstats._data.Data` object. 

26 all: Combined DataFrame for efficient column selection. 

27 """ 

28 

29 if TYPE_CHECKING: 

30 from ._protocol import DataLike 

31 

32 data: DataLike 

33 all: pl.DataFrame | None 

34 

35 @staticmethod 

36 def _mean_positive_expr(series: pl.Series) -> float: 

37 """Return the mean of all positive values in *series*, or NaN if none exist.""" 

38 return cast(float, series.filter(series > 0).mean()) 

39 

40 @staticmethod 

41 def _mean_negative_expr(series: pl.Series) -> float: 

42 """Return the mean of all negative values in *series*, or NaN if none exist.""" 

43 return cast(float, series.filter(series < 0).mean()) 

44 

45 # ── Basic statistics ────────────────────────────────────────────────────── 

46 

47 @columnwise_stat 

48 def skew(self, series: pl.Series) -> int | float | None: 

49 """Calculate skewness (asymmetry) for each numeric column. 

50 

51 Args: 

52 series (pl.Series): The series to calculate skewness for. 

53 

54 Returns: 

55 float: The skewness value. 

56 

57 """ 

58 return series.skew(bias=False) 

59 

60 @columnwise_stat 

61 def kurtosis(self, series: pl.Series) -> int | float | None: 

62 """Calculate the kurtosis of returns. 

63 

64 The degree to which a distribution peak compared to a normal distribution. 

65 

66 Args: 

67 series (pl.Series): The series to calculate kurtosis for. 

68 

69 Returns: 

70 float: The kurtosis value. 

71 

72 """ 

73 return series.kurtosis(bias=False) 

74 

75 @columnwise_stat 

76 def avg_return(self, series: pl.Series) -> float: 

77 """Calculate average return per non-zero, non-null value. 

78 

79 Args: 

80 series (pl.Series): The series to calculate average return for. 

81 

82 Returns: 

83 float: The average return value. 

84 

85 """ 

86 return cast(float, series.filter(series.is_not_null() & (series != 0)).mean()) 

87 

88 @columnwise_stat 

89 def avg_win(self, series: pl.Series) -> float: 

90 """Calculate the average winning return/trade for an asset. 

91 

92 Args: 

93 series (pl.Series): The series to calculate average win for. 

94 

95 Returns: 

96 float: The average winning return. 

97 

98 """ 

99 return self._mean_positive_expr(series) 

100 

101 @columnwise_stat 

102 def avg_loss(self, series: pl.Series) -> float: 

103 """Calculate the average loss return/trade for a period. 

104 

105 Args: 

106 series (pl.Series): The series to calculate average loss for. 

107 

108 Returns: 

109 float: The average loss return. 

110 

111 """ 

112 return self._mean_negative_expr(series) 

113 

114 # ── Volatility & risk ───────────────────────────────────────────────────── 

115 

116 @columnwise_stat 

117 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float: 

118 """Calculate the volatility of returns. 

119 

120 - Std dev of returns 

121 - Annualized by sqrt(periods) if `annualize` is True. 

122 

123 Args: 

124 series (pl.Series): The series to calculate volatility for. 

125 periods (int, optional): Number of periods per year. Defaults to 252. 

126 annualize (bool, optional): Whether to annualize the result. Defaults to True. 

127 

128 Returns: 

129 float: The volatility value. 

130 

131 """ 

132 raw_periods = periods or self.data._periods_per_year 

133 

134 # Ensure it's numeric 

135 if not isinstance(raw_periods, int | float): 

136 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003 

137 

138 factor = float(np.sqrt(raw_periods)) if annualize else 1.0 

139 std_val = cast(float, series.std()) 

140 return (std_val if std_val is not None else 0.0) * factor 

141 

142 # ── Win / loss metrics ──────────────────────────────────────────────────── 

143 

144 @columnwise_stat 

145 def payoff_ratio(self, series: pl.Series) -> float: 

146 """Measure the payoff ratio. 

147 

148 The payoff ratio is calculated as average win / abs(average loss). 

149 

150 Args: 

151 series (pl.Series): The series to calculate payoff ratio for. 

152 

153 Returns: 

154 float: The payoff ratio value. 

155 

156 """ 

157 avg_win = cast(float, series.filter(series > 0).mean()) 

158 avg_loss = float(np.abs(cast(float, series.filter(series < 0).mean()))) 

159 return avg_win / avg_loss 

160 

161 def win_loss_ratio(self) -> dict[str, float]: 

162 """Shorthand for payoff_ratio(). 

163 

164 Returns: 

165 dict[str, float]: Dictionary mapping asset names to win/loss ratios. 

166 

167 """ 

168 return self.payoff_ratio() 

169 

170 @columnwise_stat 

171 def profit_ratio(self, series: pl.Series) -> float: 

172 """Measure the profit ratio. 

173 

174 The profit ratio is calculated as win ratio / loss ratio. 

175 

176 Args: 

177 series (pl.Series): The series to calculate profit ratio for. 

178 

179 Returns: 

180 float: The profit ratio value. 

181 

182 """ 

183 wins = series.filter(series >= 0) 

184 losses = series.filter(series < 0) 

185 

186 try: 

187 win_mean = cast(float, wins.mean()) 

188 loss_mean = cast(float, losses.mean()) 

189 win_ratio = float(np.abs(win_mean / wins.count())) 

190 loss_ratio = float(np.abs(loss_mean / losses.count())) 

191 

192 return win_ratio / loss_ratio 

193 

194 except TypeError: 

195 return float(np.nan) 

196 

197 @columnwise_stat 

198 def profit_factor(self, series: pl.Series) -> float: 

199 """Measure the profit factor. 

200 

201 The profit factor is calculated as wins / loss. 

202 

203 Args: 

204 series (pl.Series): The series to calculate profit factor for. 

205 

206 Returns: 

207 float: The profit factor value. 

208 

209 """ 

210 wins = series.filter(series > 0) 

211 losses = series.filter(series < 0) 

212 wins_sum = wins.sum() 

213 losses_sum = losses.sum() 

214 

215 return float(np.abs(float(wins_sum) / float(losses_sum))) 

216 

217 # ── Risk metrics ────────────────────────────────────────────────────────── 

218 

219 @columnwise_stat 

220 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

221 """Calculate the daily value-at-risk. 

222 

223 Uses variance-covariance calculation with confidence level. 

224 

225 Args: 

226 series (pl.Series): The series to calculate value at risk for. 

227 alpha (float, optional): Confidence level. Defaults to 0.05. 

228 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

229 

230 Returns: 

231 float: The value at risk. 

232 

233 """ 

234 mean_val = cast(float, series.mean()) 

235 std_val = cast(float, series.std()) 

236 mu = mean_val if mean_val is not None else 0.0 

237 sigma *= std_val if std_val is not None else 0.0 

238 

239 return float(norm.ppf(alpha, mu, sigma)) 

240 

241 @columnwise_stat 

242 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

243 """Calculate the conditional value-at-risk. 

244 

245 Also known as CVaR or expected shortfall, calculated for each numeric column. 

246 

247 Args: 

248 series (pl.Series): The series to calculate conditional value at risk for. 

249 alpha (float, optional): Confidence level. Defaults to 0.05. 

250 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

251 

252 Returns: 

253 float: The conditional value at risk. 

254 

255 """ 

256 mean_val = cast(float, series.mean()) 

257 std_val = cast(float, series.std()) 

258 mu = mean_val if mean_val is not None else 0.0 

259 sigma *= std_val if std_val is not None else 0.0 

260 

261 var = norm.ppf(alpha, mu, sigma) 

262 

263 # Compute mean of returns less than or equal to VaR 

264 # Cast to Any or pl.Series to suppress Ty error 

265 # Cast the mask to pl.Expr to satisfy type checker 

266 mask = cast(Iterable[bool], series < var) 

267 return cast(float, series.filter(mask).mean()) 

268 

269 @columnwise_stat 

270 def win_rate(self, series: pl.Series) -> float: 

271 """Calculate the win ratio for a period. 

272 

273 Args: 

274 series (pl.Series): The series to calculate win rate for. 

275 

276 Returns: 

277 float: The win rate value. 

278 

279 """ 

280 num_pos = series.filter(series > 0).count() 

281 num_nonzero = series.filter(series != 0).count() 

282 return float(num_pos / num_nonzero) 

283 

284 @columnwise_stat 

285 def gain_to_pain_ratio(self, series: pl.Series) -> float: 

286 """Calculate Jack Schwager's Gain-to-Pain Ratio. 

287 

288 The ratio is calculated as total return / sum of losses (in absolute value). 

289 

290 Args: 

291 series (pl.Series): The series to calculate gain to pain ratio for. 

292 

293 Returns: 

294 float: The gain to pain ratio value. 

295 

296 """ 

297 total_gain = series.sum() 

298 total_pain = series.filter(series < 0).abs().sum() 

299 try: 

300 return float(float(total_gain) / float(total_pain)) 

301 except ZeroDivisionError: 

302 return float(np.nan) 

303 

304 @columnwise_stat 

305 def risk_return_ratio(self, series: pl.Series) -> float: 

306 """Calculate the return/risk ratio. 

307 

308 This is equivalent to the Sharpe ratio without a risk-free rate. 

309 

310 Args: 

311 series (pl.Series): The series to calculate risk return ratio for. 

312 

313 Returns: 

314 float: The risk return ratio value. 

315 

316 """ 

317 mean_val = cast(float, series.mean()) 

318 std_val = cast(float, series.std()) 

319 return (mean_val if mean_val is not None else 0.0) / (std_val if std_val is not None else 1.0) 

320 

321 def kelly_criterion(self) -> dict[str, float]: 

322 """Calculate the optimal capital allocation per column. 

323 

324 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b 

325 where: 

326 - b = payoff ratio 

327 - p = win rate 

328 - q = 1 - p. 

329 

330 Returns: 

331 dict[str, float]: Dictionary mapping asset names to Kelly criterion values. 

332 

333 """ 

334 b = self.payoff_ratio() 

335 p = self.win_rate() 

336 

337 return {col: ((b[col] * p[col]) - (1 - p[col])) / b[col] for col in b} 

338 

339 @columnwise_stat 

340 def best(self, series: pl.Series) -> float | None: 

341 """Find the maximum return per column (best period). 

342 

343 Args: 

344 series (pl.Series): The series to find the best return for. 

345 

346 Returns: 

347 float: The maximum return value. 

348 

349 """ 

350 val = cast(float, series.max()) 

351 return val if val is not None else None 

352 

353 @columnwise_stat 

354 def worst(self, series: pl.Series) -> float | None: 

355 """Find the minimum return per column (worst period). 

356 

357 Args: 

358 series (pl.Series): The series to find the worst return for. 

359 

360 Returns: 

361 float: The minimum return value. 

362 

363 """ 

364 val = cast(float, series.min()) 

365 return val if val is not None else None 

366 

367 @columnwise_stat 

368 def exposure(self, series: pl.Series) -> float: 

369 """Calculate the market exposure time (returns != 0). 

370 

371 Args: 

372 series (pl.Series): The series to calculate exposure for. 

373 

374 Returns: 

375 float: The exposure value. 

376 

377 """ 

378 all_data = cast(pl.DataFrame, self.all) 

379 return float(np.round((series.filter(series != 0).count() / all_data.height), decimals=2))