Coverage for src / jsharpe / sharpe.py: 100%

185 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-31 05:40 +0000

1"""Sharpe-related utilities for statistical analysis and hypothesis testing. 

2 

3This module provides comprehensive tools for Sharpe ratio analysis, including: 

4 - Variance estimation under non-Gaussian returns 

5 - Statistical significance testing 

6 - Multiple testing corrections (FDR, FWER) 

7 - Portfolio optimization utilities 

8""" 

9# ruff: noqa: N802, N803, N806, S101, TRY003 

10 

11import math 

12import warnings 

13from collections.abc import Callable 

14 

15import numpy as np 

16import scipy 

17 

18 

19def ppoints(n: int, a: float | None = None) -> np.ndarray: 

20 """Generate probability points for Q-Q plots and distribution fitting. 

21 

22 Creates n equidistant points in the interval (0, 1), suitable for 

23 probability plotting positions. The boundaries 0 and 1 are excluded. 

24 This function mirrors the behavior of R's `ppoints()` function. 

25 

26 Args: 

27 n: Number of probability points to generate. 

28 a: Plotting position parameter in [0, 1]. Defaults to 0.5 if n > 10, 

29 otherwise 3/8 (following R's convention). 

30 

31 Returns: 

32 Array of n equidistant probability points in (0, 1). 

33 

34 Raises: 

35 ValueError: If a is not in [0, 1]. 

36 

37 Example: 

38 >>> import numpy as np 

39 >>> pts = ppoints(5) 

40 >>> len(pts) 

41 5 

42 >>> np.all((pts > 0) & (pts < 1)) 

43 np.True_ 

44 >>> ppoints(5, a=0.5) 

45 array([0.1, 0.3, 0.5, 0.7, 0.9]) 

46 """ 

47 if a is None: 

48 a = 0.5 if n > 10 else 3 / 8 

49 if not (0 <= a <= 1): 

50 raise ValueError(f"the offset should be in [0,1], got {a}") 

51 return np.linspace(1 - a, n - a, n) / (n + 1 - 2 * a) 

52 

53 

54def robust_covariance_inverse(V: np.ndarray) -> np.ndarray: 

55 r"""Compute inverse of a constant-correlation covariance matrix. 

56 

57 Uses the Sherman-Morrison formula for efficient computation. 

58 Assumes the variance matrix has the form: 

59 $V = \rho \sigma \sigma' + (1-\rho) \text{diag}(\sigma^2)$ 

60 (constant correlations across all pairs). 

61 

62 Its inverse is computed as: 

63 $V^{-1} = A^{-1} - \dfrac{ A^{-1} \rho \sigma \sigma' A^{-1} } 

64 { 1 + \rho \sigma' A^{-1} \sigma }$ 

65 

66 Args: 

67 V: Covariance matrix with constant off-diagonal correlations. 

68 Shape (n, n). 

69 

70 Returns: 

71 Inverse of the covariance matrix. Shape (n, n). 

72 

73 Example: 

74 >>> import numpy as np 

75 >>> # Create a constant-correlation covariance matrix 

76 >>> rho = 0.5 

77 >>> sigma = np.array([1.0, 2.0, 1.5]) 

78 >>> C = rho * np.ones((3, 3)) 

79 >>> np.fill_diagonal(C, 1) 

80 >>> V = (C * sigma.reshape(-1, 1)).T * sigma.reshape(-1, 1) 

81 >>> V_inv = robust_covariance_inverse(V) 

82 >>> np.allclose(V @ V_inv, np.eye(3), atol=1e-10) 

83 True 

84 """ 

85 sigma = np.sqrt(np.diag(V)) 

86 C = (V.T / sigma).T / sigma 

87 rho = np.mean(C[np.triu_indices_from(C, 1)]) 

88 A = np.diag(1 / sigma**2) / (1 - rho) 

89 sigma = sigma.reshape(-1, 1) 

90 result: np.ndarray = A - (rho * A @ sigma @ sigma.T @ A) / (1 + rho * sigma.T @ A @ sigma) 

91 return result 

92 

93 

94def minimum_variance_weights_for_correlated_assets(V: np.ndarray) -> np.ndarray: 

95 """Compute weights of the minimum variance portfolio for correlated assets. 

96 

97 Computes the portfolio weights that minimize portfolio variance subject 

98 to the constraint that weights sum to 1. Assumes a constant-correlation 

99 covariance structure for efficient computation. 

100 

101 Args: 

102 V: Covariance matrix of asset returns. Shape (n, n). 

103 

104 Returns: 

105 Portfolio weights that minimize variance. Shape (n,). 

106 Weights sum to 1. 

107 

108 Example: 

109 >>> import numpy as np 

110 >>> # Create a simple covariance matrix 

111 >>> V = np.array([[0.04, 0.01], [0.01, 0.09]]) 

112 >>> w = minimum_variance_weights_for_correlated_assets(V) 

113 >>> np.isclose(w.sum(), 1.0) 

114 np.True_ 

115 """ 

116 ones = np.ones(shape=V.shape[0]) 

117 S = robust_covariance_inverse(V) 

118 w = S @ ones 

119 w = w / np.sum(w) 

120 return w 

121 

122 

123def effective_rank(C: np.ndarray) -> float: 

124 """Compute the effective rank of a positive semi-definite matrix. 

125 

126 The effective rank measures the "effective dimensionality" of a matrix 

127 by computing the exponential of the entropy of its normalized eigenvalues. 

128 This provides a continuous measure between 1 (perfectly correlated) and 

129 n (perfectly uncorrelated/identity matrix). 

130 

131 Algorithm: 

132 1. Compute eigenvalues (non-negative for PSD matrices) 

133 2. Discard zero eigenvalues 

134 3. Normalize to form a probability distribution 

135 4. Compute entropy H = -Σ p_i log(p_i) 

136 5. Return exp(H) 

137 

138 Args: 

139 C: Positive semi-definite matrix (e.g., correlation matrix). 

140 Shape (n, n). 

141 

142 Returns: 

143 Effective rank, a value in [1, n] where n is the matrix dimension. 

144 

145 References: 

146 Roy, O. and Vetterli, M. (2007). "The effective rank: a measure of 

147 effective dimensionality." EURASIP Journal on Advances in Signal 

148 Processing. http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.177.2721 

149 

150 Example: 

151 >>> import numpy as np 

152 >>> # Identity matrix has effective rank equal to its dimension 

153 >>> abs(effective_rank(np.eye(3)) - 3.0) < 1e-10 

154 True 

155 >>> # Perfectly correlated matrix has effective rank 1 

156 >>> C = np.ones((3, 3)) 

157 >>> abs(effective_rank(C) - 1.0) < 1e-10 

158 True 

159 """ 

160 p = np.linalg.eigvalsh(C) 

161 p = p[p > 0] 

162 p = p / sum(p) 

163 H = np.sum(-p * np.log(p)) 

164 return math.exp(H) 

165 

166 

167def moments_Mk(k: int, *, rho: float = 0) -> tuple[float, float, float]: 

168 """Compute moments of the maximum of k standard normal random variables. 

169 

170 Computes E[M_k], E[M_k^2], and Var[M_k] where M_k = max(Z_1, ..., Z_k) 

171 and Z_i are standard normal. Supports both independent (rho=0) and 

172 equi-correlated (rho > 0) cases. 

173 

174 For the correlated case with equi-correlation rho: 

175 Z_i = sqrt(rho) * X + sqrt(1-rho) * Y_i 

176 M = sqrt(rho) * X + sqrt(1-rho) * max(Y_i) 

177 

178 Args: 

179 k: Number of standard normal random variables. 

180 rho: Equi-correlation coefficient in [0, 1). Default is 0 (independent). 

181 

182 Returns: 

183 Tuple of (Ez, Ez2, var): 

184 - Ez: Expected value E[M_k] 

185 - Ez2: Second moment E[M_k^2] 

186 - var: Variance Var[M_k] 

187 

188 Example: 

189 >>> Ez, Ez2, var = moments_Mk(1) 

190 >>> bool(abs(Ez) < 1e-10) # E[max of one N(0,1)] = 0 

191 True 

192 >>> bool(abs(var - 1.0) < 1e-10) # Var[max of one N(0,1)] = 1 

193 True 

194 """ 

195 Phi = scipy.stats.norm.cdf 

196 Ez = E_under_normal(lambda z: k * z * (Phi(z) ** (k - 1))) 

197 Ez2 = E_under_normal(lambda z: k * z**2 * (Phi(z) ** (k - 1))) 

198 var = Ez2 - Ez**2 

199 

200 Ez = (1 - rho) * Ez 

201 var = rho + (1 - rho) * var 

202 Ez2 = var + Ez**2 

203 

204 return Ez, Ez2, var 

205 

206 

207def sharpe_ratio_variance( 

208 SR: float, 

209 T: int, 

210 *, 

211 gamma3: float = 0.0, 

212 gamma4: float = 3.0, 

213 rho: float = 0.0, 

214 K: int = 1, 

215) -> float: 

216 """Compute the asymptotic variance of the Sharpe ratio estimator. 

217 

218 Accounts for non-Gaussian returns (skewness, kurtosis) and autocorrelation. 

219 Under Gaussian iid returns, reduces to (1 + SR^2/2) / T. 

220 

221 Args: 

222 SR: Sharpe ratio value (annualized or per-period). 

223 T: Number of observations (time periods). 

224 gamma3: Skewness of returns. Default 0 (symmetric). 

225 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

226 rho: First-order autocorrelation of returns. Default 0 (iid). 

227 K: Number of strategies for multiple testing adjustment. Default 1. 

228 

229 Returns: 

230 Variance of the Sharpe ratio estimator. 

231 

232 Example: 

233 >>> # Variance under Gaussian iid assumptions 

234 >>> var_gaussian = sharpe_ratio_variance(SR=0.5, T=24) 

235 >>> # Variance with non-Gaussian returns (higher kurtosis) 

236 >>> var_nongauss = sharpe_ratio_variance(SR=0.5, T=24, gamma4=6.0) 

237 >>> bool(var_nongauss > var_gaussian) # Higher kurtosis increases variance 

238 True 

239 """ 

240 A = 1 

241 B = rho / (1 - rho) 

242 C = rho**2 / (1 - rho**2) 

243 a = A + 2 * B 

244 b = A + B + C 

245 c = A + 2 * C 

246 V = (a * 1 - b * gamma3 * SR + c * (gamma4 - 1) / 4 * SR**2) / T 

247 return float(V * moments_Mk(K)[2]) 

248 

249 

250def variance_of_the_maximum_of_k_Sharpe_ratios(number_of_trials: int, variance: float) -> float: 

251 """Compute the variance of the maximum Sharpe ratio across K strategies. 

252 

253 Selection across a larger pool increases the uncertainty of the selected 

254 (maximum) Sharpe ratio estimate. This function applies a logarithmic 

255 variance inflation factor that increases monotonically with K. 

256 

257 Args: 

258 number_of_trials: Number of strategies (K >= 1). 

259 variance: Base variance of individual Sharpe ratio estimates. 

260 

261 Returns: 

262 Inflated variance accounting for selection from K strategies. 

263 

264 Example: 

265 >>> # More trials increases variance due to selection bias 

266 >>> v1 = variance_of_the_maximum_of_k_Sharpe_ratios(1, 0.1) 

267 >>> v10 = variance_of_the_maximum_of_k_Sharpe_ratios(10, 0.1) 

268 >>> bool(v10 > v1) 

269 True 

270 """ 

271 # Monotone increasing variance inflation with K (K>=1) 

272 inflation = 1.0 + np.log(max(1, int(number_of_trials))) 

273 return float(variance * inflation) 

274 

275 

276def control_for_FDR( 

277 q: float, 

278 *, 

279 SR0: float = 0, 

280 SR1: float = 0.5, 

281 p_H1: float = 0.05, 

282 T: int = 24, 

283 gamma3: float = 0.0, 

284 gamma4: float = 3.0, 

285 rho: float = 0.0, 

286 K: int = 1, 

287) -> tuple[float, float, float, float]: 

288 """Compute critical value to test multiple Sharpe ratios controlling FDR. 

289 

290 Determines the critical Sharpe ratio threshold and associated error rates 

291 to control the False Discovery Rate at level q when testing multiple 

292 strategies. 

293 

294 Args: 

295 q: Desired False Discovery Rate level in (0, 1). 

296 SR0: Sharpe ratio under null hypothesis H0. Default 0. 

297 SR1: Sharpe ratio under alternative hypothesis H1. Default 0.5. 

298 p_H1: Prior probability that H1 is true. Default 0.05. 

299 T: Number of observations. Default 24. 

300 gamma3: Skewness of returns. Default 0. 

301 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

302 rho: Autocorrelation of returns. Default 0. 

303 K: Number of strategies (K=1 for FDR; K>1 for FWER-FDR). Default 1. 

304 

305 Returns: 

306 Tuple of (alpha, beta, SR_c, q_hat): 

307 - alpha: Significance level P[SR > SR_c | H0] 

308 - beta: Type II error P[SR <= SR_c | H1]; power is 1 - beta 

309 - SR_c: Critical Sharpe ratio threshold 

310 - q_hat: Estimated FDR (should be close to q) 

311 

312 Example: 

313 >>> alpha, beta, SR_c, q_hat = control_for_FDR(q=0.25, T=24) 

314 >>> bool(0 < alpha < 1) 

315 True 

316 >>> bool(SR_c > 0) # Critical value is positive 

317 True 

318 """ 

319 Z = scipy.stats.norm.cdf 

320 

321 s0 = math.sqrt(sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)) 

322 s1 = math.sqrt(sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)) 

323 SRc = FDR_critical_value(q, SR0, SR1, s0, s1, p_H1) 

324 

325 beta = Z((SRc - SR1) / s1) 

326 alpha = q / (1 - q) * p_H1 / (1 - p_H1) * (1 - beta) 

327 q_hat = 1 / (1 + (1 - beta) / alpha * p_H1 / (1 - p_H1)) 

328 

329 return alpha, beta, SRc, q_hat 

330 

331 

332def expected_maximum_sharpe_ratio(number_of_trials: int, variance: float, SR0: float = 0) -> float: 

333 """Compute expected maximum Sharpe ratio under multiple testing. 

334 

335 Estimates E[max(SR_1, ..., SR_K)] assuming K independent Sharpe ratio 

336 estimates, each with the same variance. Uses the Gumbel approximation 

337 for the expected maximum of normals. 

338 

339 Args: 

340 number_of_trials: Number of strategies (K). 

341 variance: Variance of individual Sharpe ratio estimates. 

342 SR0: Baseline Sharpe ratio to add. Default 0. 

343 

344 Returns: 

345 Expected value of the maximum Sharpe ratio across K strategies. 

346 

347 Example: 

348 >>> # Expected maximum increases with number of trials 

349 >>> e1 = expected_maximum_sharpe_ratio(1, 0.1) 

350 >>> e10 = expected_maximum_sharpe_ratio(10, 0.1) 

351 >>> bool(e10 > e1) 

352 True 

353 """ 

354 return float( 

355 SR0 

356 + ( 

357 np.sqrt(variance) 

358 * ( 

359 (1 - np.euler_gamma) * scipy.stats.norm.ppf(1 - 1 / number_of_trials) 

360 + np.euler_gamma * scipy.stats.norm.ppf(1 - 1 / number_of_trials / np.exp(1)) 

361 ) 

362 ) 

363 ) 

364 

365 

366def minimum_track_record_length( 

367 SR: float, 

368 SR0: float, 

369 *, 

370 gamma3: float = 0.0, 

371 gamma4: float = 3.0, 

372 rho: float = 0.0, 

373 alpha: float = 0.05, 

374) -> float: 

375 """Compute minimum track record length for statistical significance. 

376 

377 Determines the minimum number of observations T required for the observed 

378 Sharpe ratio SR to be significantly greater than SR0 at confidence level 

379 1 - alpha. 

380 

381 Args: 

382 SR: Observed Sharpe ratio. 

383 SR0: Sharpe ratio under null hypothesis H0. 

384 gamma3: Skewness of returns. Default 0. 

385 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

386 rho: Autocorrelation of returns. Default 0. 

387 alpha: Significance level. Default 0.05. 

388 

389 Returns: 

390 Minimum track record length (number of periods) required. 

391 

392 Example: 

393 >>> # Higher Sharpe ratio needs shorter track record 

394 >>> mtrl_high = minimum_track_record_length(SR=1.0, SR0=0) 

395 >>> mtrl_low = minimum_track_record_length(SR=0.5, SR0=0) 

396 >>> bool(mtrl_high < mtrl_low) 

397 True 

398 """ 

399 var = sharpe_ratio_variance(SR0, T=1, gamma3=gamma3, gamma4=gamma4, rho=rho, K=1) 

400 return float(var * (scipy.stats.norm.ppf(1 - alpha) / (SR - SR0)) ** 2) 

401 

402 

403def make_expectation_gh( 

404 n_nodes: int = 200, 

405) -> Callable[[Callable[[np.ndarray], np.ndarray]], float]: 

406 """Create an expectation function using Gauss-Hermite quadrature. 

407 

408 Returns a function that computes E[g(Z)] where Z ~ N(0,1) using 

409 Gauss-Hermite quadrature with the specified number of nodes. 

410 

411 The approximation is: E[g(Z)] ≈ (1/√π) Σ w_i g(√2 t_i) 

412 where (t_i, w_i) are Gauss-Hermite nodes and weights. 

413 

414 Args: 

415 n_nodes: Number of quadrature nodes. Higher values increase accuracy. 

416 Default 200. 

417 

418 Returns: 

419 Function E(g) that computes E[g(Z)] for Z ~ N(0,1). 

420 

421 Example: 

422 >>> E = make_expectation_gh(n_nodes=100) 

423 >>> # E[Z^2] should be 1 for standard normal 

424 >>> bool(abs(E(lambda z: z**2) - 1.0) < 1e-6) 

425 True 

426 >>> # E[Z] should be 0 for standard normal 

427 >>> bool(abs(E(lambda z: z)) < 1e-10) 

428 True 

429 """ 

430 nodes, weights = np.polynomial.hermite.hermgauss(n_nodes) 

431 scale = np.sqrt(2.0) 

432 norm = 1.0 / np.sqrt(np.pi) 

433 x = scale * nodes 

434 

435 def E(g: Callable[[np.ndarray], np.ndarray]) -> float: 

436 """Compute E[g(Z)] for Z ~ N(0,1) via Gauss-Hermite quadrature. 

437 

438 Args: 

439 g: Function to integrate against the standard normal density. 

440 

441 Returns: 

442 Approximation of E[g(Z)]. 

443 """ 

444 vals = g(x) 

445 return float(norm * np.dot(weights, vals)) 

446 

447 return E 

448 

449 

450E_under_normal = make_expectation_gh(n_nodes=200) 

451 

452 

453def adjusted_p_values_bonferroni(ps: np.ndarray) -> np.ndarray: 

454 """Adjust p-values using Bonferroni correction for FWER control. 

455 

456 Multiplies each p-value by the number of tests M, capping at 1. 

457 This is the most conservative multiple testing correction. 

458 

459 Args: 

460 ps: Array of unadjusted p-values. 

461 

462 Returns: 

463 Array of Bonferroni-adjusted p-values, each in [0, 1]. 

464 

465 Example: 

466 >>> p_vals = np.array([0.01, 0.03, 0.05]) 

467 >>> adj_p = adjusted_p_values_bonferroni(p_vals) 

468 >>> np.allclose(adj_p, [0.03, 0.09, 0.15]) 

469 True 

470 """ 

471 M = len(ps) 

472 result: np.ndarray = np.minimum(1, M * ps) 

473 return result 

474 

475 

476def adjusted_p_values_sidak(ps: np.ndarray) -> np.ndarray: 

477 """Adjust p-values using Šidák correction for FWER control. 

478 

479 Uses the formula: 1 - (1 - p)^M, which is slightly less conservative 

480 than Bonferroni when tests are independent. 

481 

482 Args: 

483 ps: Array of unadjusted p-values. 

484 

485 Returns: 

486 Array of Šidák-adjusted p-values, each in [0, 1]. 

487 

488 Example: 

489 >>> p_vals = np.array([0.01, 0.03, 0.05]) 

490 >>> adj_p = adjusted_p_values_sidak(p_vals) 

491 >>> np.all(adj_p <= adjusted_p_values_bonferroni(p_vals)) 

492 np.True_ 

493 """ 

494 M = len(ps) 

495 return 1 - (1 - ps) ** M 

496 

497 

498def adjusted_p_values_holm(ps: np.ndarray, *, variant: str = "bonferroni") -> np.ndarray: 

499 """Adjust p-values using Holm's step-down procedure for FWER control. 

500 

501 A step-down procedure that is uniformly more powerful than Bonferroni 

502 while still controlling the Family-Wise Error Rate. 

503 

504 Args: 

505 ps: Array of unadjusted p-values. 

506 variant: Correction method for each step. Either "bonferroni" 

507 (default) or "sidak". 

508 

509 Returns: 

510 Array of Holm-adjusted p-values, each in [0, 1]. 

511 

512 Raises: 

513 AssertionError: If variant is not "bonferroni" or "sidak". 

514 

515 Example: 

516 >>> p_vals = np.array([0.01, 0.04, 0.03]) 

517 >>> adj_p = adjusted_p_values_holm(p_vals) 

518 >>> float(adj_p[0]) # Smallest p-value adjusted most 

519 0.03 

520 """ 

521 assert variant in ["bonferroni", "sidak"] 

522 i = np.argsort(ps) 

523 M = len(ps) 

524 p_adjusted = np.zeros(M) 

525 previous = 0 

526 for j, idx in enumerate(i): 

527 candidate = min(1, ps[idx] * (M - j)) if variant == "bonferroni" else 1 - (1 - ps[idx]) ** (M - j) 

528 p_adjusted[idx] = max(previous, candidate) 

529 previous = p_adjusted[idx] 

530 return p_adjusted 

531 

532 

533def FDR_critical_value(q: float, SR0: float, SR1: float, sigma0: float, sigma1: float, p_H1: float) -> float: 

534 """Compute critical value for FDR control in hypothesis testing. 

535 

536 Given a mixture model where H ~ Bernoulli(p_H1) determines whether 

537 X follows N(SR0, sigma0^2) or N(SR1, sigma1^2), finds the critical 

538 value c such that P[H=0 | X > c] = q. 

539 

540 Args: 

541 q: Desired False Discovery Rate in (0, 1). 

542 SR0: Mean under null hypothesis (must be < SR1). 

543 SR1: Mean under alternative hypothesis. 

544 sigma0: Standard deviation under null hypothesis (must be > 0). 

545 sigma1: Standard deviation under alternative hypothesis (must be > 0). 

546 p_H1: Prior probability of alternative hypothesis in (0, 1). 

547 

548 Returns: 

549 Critical value c. Returns -inf if solution is outside [-10, 10], 

550 or nan if no solution exists. 

551 

552 Raises: 

553 AssertionError: If parameters are out of valid ranges. 

554 

555 Example: 

556 >>> c = FDR_critical_value(q=0.2, SR0=0, SR1=0.5, sigma0=0.2, sigma1=0.3, p_H1=0.1) 

557 >>> c > 0 # Critical value should be positive 

558 True 

559 """ 

560 assert SR0 < SR1 

561 assert 0 < q < 1 

562 assert 0 < p_H1 < 1 

563 assert sigma0 > 0 

564 assert sigma1 > 0 

565 

566 with warnings.catch_warnings(): 

567 warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide") 

568 warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide") 

569 

570 def f(c: float) -> float: 

571 """Compute the posterior probability P[H=0 | X > c]. 

572 

573 Args: 

574 c: Candidate critical value. 

575 

576 Returns: 

577 Posterior false discovery probability at threshold c. 

578 """ 

579 a = 1 / ( 

580 1 

581 + scipy.stats.norm.sf((c - SR1) / sigma1) / scipy.stats.norm.sf((c - SR0) / sigma0) * p_H1 / (1 - p_H1) 

582 ) 

583 return float(np.where(np.isfinite(a), a, 0)) 

584 

585 if f(-10) < q: # Solution outside of the search interval 

586 return float(-np.inf) 

587 

588 if (f(-10) - q) * (f(10) - q) > 0: # No solution, for instance if σ₀≫σ₁ and q small 

589 return float(np.nan) 

590 

591 return float(scipy.optimize.brentq(lambda c: f(c) - q, -10, 10)) 

592 

593 

594def critical_sharpe_ratio( 

595 SR0: float, 

596 T: int, 

597 *, 

598 gamma3: float = 0.0, 

599 gamma4: float = 3.0, 

600 rho: float = 0.0, 

601 alpha: float = 0.05, 

602 K: int = 1, 

603) -> float: 

604 """Compute critical Sharpe ratio for hypothesis testing. 

605 

606 Determines the threshold SR_c for the one-sided test: 

607 H0: SR = SR0 vs H1: SR > SR0 

608 at significance level alpha. 

609 

610 Args: 

611 SR0: Sharpe ratio under null hypothesis. 

612 T: Number of observations. 

613 gamma3: Skewness of returns. Default 0. 

614 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

615 rho: Autocorrelation of returns. Default 0. 

616 alpha: Significance level. Default 0.05. 

617 K: Number of strategies for variance adjustment. Default 1. 

618 

619 Returns: 

620 Critical Sharpe ratio threshold. Reject H0 if observed SR > SR_c. 

621 

622 Example: 

623 >>> SR_c = critical_sharpe_ratio(SR0=0, T=24, alpha=0.05) 

624 >>> bool(SR_c > 0) # Need positive SR to reject H0: SR=0 

625 True 

626 """ 

627 variance = sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

628 return float(SR0 + scipy.stats.norm.ppf(1 - alpha) * math.sqrt(variance)) 

629 

630 

631def probabilistic_sharpe_ratio( 

632 SR: float, 

633 SR0: float, 

634 *, 

635 variance: float | None = None, 

636 T: int | None = None, 

637 gamma3: float = 0.0, 

638 gamma4: float = 3.0, 

639 rho: float = 0.0, 

640 K: int = 1, 

641) -> float: 

642 """Compute the Probabilistic Sharpe Ratio (PSR). 

643 

644 The PSR is 1 - p, where p is the p-value of testing H0: SR = SR0 vs 

645 H1: SR > SR0. It can be interpreted as a "Sharpe ratio on a probability 

646 scale", i.e., mapping the SR to [0, 1]. 

647 

648 Args: 

649 SR: Observed Sharpe ratio. 

650 SR0: Sharpe ratio under null hypothesis. 

651 variance: Variance of SR estimator. Provide this OR (T, gamma3, ...). 

652 T: Number of observations (if variance not provided). 

653 gamma3: Skewness of returns. Default 0. 

654 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

655 rho: Autocorrelation of returns. Default 0. 

656 K: Number of strategies for variance adjustment. Default 1. 

657 

658 Returns: 

659 Probabilistic Sharpe Ratio in [0, 1]. Values near 1 indicate 

660 strong evidence that the true SR exceeds SR0. 

661 

662 Raises: 

663 AssertionError: If both variance and T are provided. 

664 

665 Example: 

666 >>> psr = probabilistic_sharpe_ratio(SR=0.5, SR0=0, T=24) 

667 >>> bool(0 < psr < 1) 

668 True 

669 >>> # Higher observed SR gives higher PSR 

670 >>> psr_high = probabilistic_sharpe_ratio(SR=1.0, SR0=0, T=24) 

671 >>> bool(psr_high > psr) 

672 True 

673 """ 

674 if variance is None: 

675 assert T is not None, "T must be provided if variance is not provided" 

676 variance = sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

677 else: 

678 assert T is None, "Provide either the variance or (T, gamma3, gamma4, rho)" 

679 return float(scipy.stats.norm.cdf((SR - SR0) / math.sqrt(variance))) 

680 

681 

682def sharpe_ratio_power( 

683 SR0: float, 

684 SR1: float, 

685 T: int, 

686 *, 

687 gamma3: float = 0.0, 

688 gamma4: float = 3.0, 

689 rho: float = 0.0, 

690 alpha: float = 0.05, 

691 K: int = 1, 

692) -> float: 

693 """Compute statistical power for Sharpe ratio hypothesis test. 

694 

695 Calculates the power (1 - β) of the test H0: SR = SR0 vs H1: SR = SR1, 

696 which is the probability of correctly rejecting H0 when the true 

697 Sharpe ratio is SR1. 

698 

699 Note: Power is equivalent to recall in classification: 

700 Power = P[reject H0 | H1] = TP / (TP + FN) 

701 

702 Args: 

703 SR0: Sharpe ratio under null hypothesis. 

704 SR1: Sharpe ratio under alternative hypothesis (should be > SR0). 

705 T: Number of observations. 

706 gamma3: Skewness of returns. Default 0. 

707 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

708 rho: Autocorrelation of returns. Default 0. 

709 alpha: Significance level. Default 0.05. 

710 K: Number of strategies for variance adjustment. Default 1. 

711 

712 Returns: 

713 Statistical power in [0, 1]. 

714 

715 Example: 

716 >>> # More observations increases power 

717 >>> power_short = sharpe_ratio_power(SR0=0, SR1=0.5, T=12) 

718 >>> power_long = sharpe_ratio_power(SR0=0, SR1=0.5, T=48) 

719 >>> bool(power_long > power_short) 

720 True 

721 """ 

722 critical_SR = critical_sharpe_ratio(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, alpha=alpha) 

723 variance = sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

724 beta = scipy.stats.norm.cdf((critical_SR - SR1) / math.sqrt(variance)) 

725 return float(1 - beta) 

726 

727 

728def generate_autocorrelated_non_gaussian_data( 

729 N: int, 

730 n: int, 

731 SR0: float = 0, 

732 name: str = "gaussian", 

733 rho: float | None = None, 

734 gaussian_autocorrelation: float = 0, 

735) -> np.ndarray: 

736 """Generate autocorrelated non-Gaussian return data for simulation. 

737 

738 Creates a matrix of simulated returns with specified autocorrelation 

739 and marginal distribution characteristics (skewness/kurtosis). 

740 

741 Uses a copula-like approach: 

742 1. Generate AR(1) Gaussian processes 

743 2. Transform to uniform via Gaussian CDF 

744 3. Transform to target marginals via inverse CDF 

745 

746 Args: 

747 N: Number of time periods (rows). 

748 n: Number of assets/strategies (columns). 

749 SR0: Target Sharpe ratio. Default 0. 

750 name: Distribution type. One of "gaussian", "mild", "moderate", 

751 "severe". Default "gaussian". 

752 rho: Autocorrelation coefficient. If None, uses gaussian_autocorrelation. 

753 gaussian_autocorrelation: Autocorrelation for Gaussian case. Default 0. 

754 

755 Returns: 

756 Array of shape (N, n) containing simulated returns. 

757 

758 Example: 

759 >>> np.random.seed(42) 

760 >>> X = generate_autocorrelated_non_gaussian_data(100, 2, SR0=0.1, name="mild") 

761 >>> X.shape 

762 (100, 2) 

763 """ 

764 if rho is None: 

765 # With the distributions we consider the autocorrelation is almost the same. 

766 rho = gaussian_autocorrelation 

767 

768 shape = (N, n) 

769 

770 # Marginal distribution: ppf 

771 R = 10_000 

772 marginal = generate_non_gaussian_data(R, 1, SR0=SR0, name=name)[:, 0] 

773 ppf = scipy.interpolate.interp1d(ppoints(R), sorted(marginal), fill_value="extrapolate") 

774 

775 # AR(1) processes 

776 X = np.random.normal(size=shape) 

777 for i in range(1, shape[0]): 

778 X[i, :] = rho * X[i - 1, :] + np.sqrt(1 - rho**2) * X[i, :] 

779 

780 # Convert the margins to uniform, with the Gaussian cdf 

781 X = scipy.stats.norm.cdf(X) 

782 

783 # Convert the uniforms to the target margins, using the ppf 

784 result: np.ndarray = ppf(X) 

785 

786 return result 

787 

788 

789def get_random_correlation_matrix( 

790 number_of_trials: int = 100, 

791 effective_number_of_trials: int = 10, 

792 number_of_observations: int = 200, 

793 noise: float = 0.1, 

794) -> tuple[np.ndarray, np.ndarray, np.ndarray]: 

795 """Generate a random correlation matrix with block structure. 

796 

797 Creates a correlation matrix representing clustered strategies, where 

798 strategies within the same cluster are highly correlated and strategies 

799 across clusters have lower correlation. 

800 

801 Args: 

802 number_of_trials: Number of time series (strategies). Default 100. 

803 effective_number_of_trials: Number of clusters. Default 10. 

804 number_of_observations: Number of time periods to simulate. Default 200. 

805 noise: Noise level added to each series. Default 0.1. 

806 

807 Returns: 

808 Tuple of (C, X, clusters): 

809 - C: Correlation matrix of shape (number_of_trials, number_of_trials) 

810 - X: Data matrix of shape (number_of_observations, number_of_trials) 

811 - clusters: Cluster assignment for each strategy 

812 

813 Example: 

814 >>> np.random.seed(42) 

815 >>> C, X, clusters = get_random_correlation_matrix( 

816 ... number_of_trials=20, effective_number_of_trials=4 

817 ... ) 

818 >>> C.shape 

819 (20, 20) 

820 >>> np.allclose(np.diag(C), 1) # Diagonal is all ones 

821 True 

822 """ 

823 while True: 

824 block_positions = [ 

825 0, 

826 *sorted(np.random.choice(number_of_trials, effective_number_of_trials - 1, replace=True)), 

827 number_of_trials, 

828 ] 

829 block_sizes = np.diff(block_positions) 

830 if np.all(block_sizes > 0): 

831 break 

832 

833 clusters = np.array([block_number for block_number, size in enumerate(block_sizes) for _ in range(size)]) 

834 X0 = np.random.normal(size=(number_of_observations, effective_number_of_trials)) 

835 X = np.zeros(shape=(number_of_observations, number_of_trials)) 

836 for i, cluster in enumerate(clusters): 

837 X[:, i] = X0[:, cluster] + noise * np.random.normal(size=number_of_observations) 

838 C = np.asarray(np.corrcoef(X, rowvar=False)) 

839 np.fill_diagonal(C, 1) # rounding errors 

840 C = np.clip(C, -1, 1) 

841 return C, X, clusters 

842 

843 

844def generate_non_gaussian_data( 

845 nr: int, 

846 nc: int, 

847 *, 

848 SR0: float = 0, 

849 name: str = "severe", 

850) -> np.ndarray: 

851 """Generate non-Gaussian return data with specified characteristics. 

852 

853 Creates a matrix of simulated returns from a mixture distribution that 

854 exhibits the specified skewness and kurtosis characteristics while 

855 maintaining the target Sharpe ratio. 

856 

857 Args: 

858 nr: Number of rows (observations/time periods). 

859 nc: Number of columns (assets/strategies). 

860 SR0: Target Sharpe ratio. Default 0. 

861 name: Distribution severity. One of: 

862 - "gaussian": No skewness or kurtosis 

863 - "mild": Slight negative skew and excess kurtosis 

864 - "moderate": Moderate negative skew and excess kurtosis 

865 - "severe": Strong negative skew and excess kurtosis 

866 Default "severe". 

867 

868 Returns: 

869 Array of shape (nr, nc) containing simulated returns. 

870 

871 Raises: 

872 AssertionError: If name is not a valid distribution type. 

873 

874 Example: 

875 >>> np.random.seed(42) 

876 >>> X = generate_non_gaussian_data(1000, 1, SR0=0.2, name="mild") 

877 >>> X.shape 

878 (1000, 1) 

879 """ 

880 configs = { 

881 "gaussian": (0, 0, 0.015, 0.010), 

882 "mild": (0.04, -0.03, 0.015, 0.010), 

883 "moderate": (0.03, -0.045, 0.020, 0.010), 

884 "severe": (0.02, -0.060, 0.025, 0.010), 

885 } 

886 assert name in configs 

887 

888 def mixture_variance( 

889 p_tail: float, 

890 mu_tail: float, 

891 sigma_tail: float, 

892 mu_core: float, 

893 sigma_core: float, 

894 ) -> float: 

895 """Compute the variance of a two-component Gaussian mixture. 

896 

897 Args: 

898 p_tail: Mixing weight of the tail component. 

899 mu_tail: Mean of the tail component. 

900 sigma_tail: Standard deviation of the tail component. 

901 mu_core: Mean of the core component. 

902 sigma_core: Standard deviation of the core component. 

903 

904 Returns: 

905 Variance of the mixture distribution. 

906 """ 

907 w = 1.0 - p_tail 

908 mu = w * mu_core + p_tail * mu_tail 

909 m2 = w * (sigma_core**2 + mu_core**2) + p_tail * (sigma_tail**2 + mu_tail**2) 

910 return float(m2 - mu**2) 

911 

912 def gen_with_true_SR0(reps: int, T: int, cfg: tuple[float, float, float, float], SR0: float) -> np.ndarray: 

913 """Generate mixture returns scaled to a target population Sharpe ratio. 

914 

915 Args: 

916 reps: Number of independent return series to generate. 

917 T: Length of each return series. 

918 cfg: Mixture config tuple (p_tail, mu_tail, sigma_tail, sigma_core). 

919 SR0: Target population Sharpe ratio. 

920 

921 Returns: 

922 Array of shape (reps, T) with non-Gaussian returns at the given Sharpe ratio. 

923 """ 

924 p, mu_tail, sig_tail, sig_core = cfg 

925 # Zero-mean baseline mixture (choose mu_core so mean=0) 

926 mu_core0 = -p * mu_tail / (1.0 - p) 

927 std0 = np.sqrt(mixture_variance(p, mu_tail, sig_tail, mu_core0, sig_core)) 

928 mu_shift = SR0 * std0 # sets population Sharpe to SR0, preserves skew/kurt 

929 mask = np.random.uniform(size=(reps, T)) < p 

930 X = np.random.normal(mu_core0 + mu_shift, sig_core, size=(reps, T)) 

931 X[mask] = np.random.normal(mu_tail + mu_shift, sig_tail, size=mask.sum()) 

932 return X 

933 

934 return gen_with_true_SR0(nr, nc, configs[name], SR0) 

935 

936 

937def autocorrelation(X: np.ndarray) -> float: 

938 """Compute mean first-order autocorrelation across columns. 

939 

940 Calculates the lag-1 autocorrelation for each column of the input 

941 matrix and returns the mean across all columns. 

942 

943 Args: 

944 X: Data matrix of shape (n_observations, n_series). 

945 

946 Returns: 

947 Mean autocorrelation coefficient across all columns. 

948 

949 Example: 

950 >>> np.random.seed(42) 

951 >>> # Generate AR(1) process with rho=0.5 

952 >>> n = 1000 

953 >>> X = np.zeros((n, 1)) 

954 >>> X[0] = np.random.normal() 

955 >>> for i in range(1, n): 

956 ... X[i] = 0.5 * X[i-1] + np.sqrt(1-0.25) * np.random.normal() 

957 >>> ac = autocorrelation(X) 

958 >>> bool(0.4 < ac < 0.6) # Should be close to 0.5 

959 True 

960 """ 

961 _nr, nc = X.shape 

962 ac = np.zeros(nc) 

963 for i in range(nc): 

964 ac[i] = np.corrcoef(X[1:, i], X[:-1, i])[0, 1] 

965 return float(ac.mean()) 

966 

967 

968def pFDR( 

969 p_H1: float, 

970 alpha: float, 

971 beta: float, 

972) -> float: 

973 """Compute posterior FDR given test outcome exceeds critical value. 

974 

975 Calculates P[H0 | SR > SR_c], the probability that the null hypothesis 

976 is true given that the observed Sharpe ratio exceeds the critical value. 

977 This is the "predictive" FDR based on the critical value, not the 

978 observed value. 

979 

980 Args: 

981 p_H1: Prior probability that H1 is true. 

982 alpha: Significance level (Type I error rate). 

983 beta: Type II error rate (1 - power). 

984 

985 Returns: 

986 Posterior probability of H0 given rejection. 

987 

988 Example: 

989 >>> # With 5% prior on H1 and 5% significance 

990 >>> fdr = pFDR(p_H1=0.05, alpha=0.05, beta=0.3) 

991 >>> 0 < fdr < 1 

992 True 

993 """ 

994 p_H0 = 1 - p_H1 

995 return 1 / (1 + (1 - beta) * p_H1 / alpha / p_H0) 

996 

997 

998def oFDR( 

999 SR: float, 

1000 SR0: float, 

1001 SR1: float, 

1002 T: int, 

1003 p_H1: float, 

1004 *, 

1005 gamma3: float = 0.0, 

1006 gamma4: float = 3.0, 

1007 rho: float = 0.0, 

1008 K: int = 1, 

1009) -> float: 

1010 """Compute observed FDR given the observed Sharpe ratio. 

1011 

1012 Calculates P[H0 | SR > SR_obs], the probability that the null hypothesis 

1013 is true given the observed Sharpe ratio value. This is the "observed" 

1014 FDR which conditions on the actual observation rather than just the 

1015 critical value. 

1016 

1017 Args: 

1018 SR: Observed Sharpe ratio. 

1019 SR0: Sharpe ratio under null hypothesis. 

1020 SR1: Sharpe ratio under alternative hypothesis. 

1021 T: Number of observations. 

1022 p_H1: Prior probability that H1 is true. 

1023 gamma3: Skewness of returns. Default 0. 

1024 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

1025 rho: Autocorrelation of returns. Default 0. 

1026 K: Number of strategies for variance adjustment. Default 1. 

1027 

1028 Returns: 

1029 Posterior probability of H0 given the observed SR. 

1030 

1031 Example: 

1032 >>> # Higher observed SR should give lower probability of H0 

1033 >>> fdr_low = oFDR(SR=0.3, SR0=0, SR1=0.5, T=24, p_H1=0.1) 

1034 >>> fdr_high = oFDR(SR=0.8, SR0=0, SR1=0.5, T=24, p_H1=0.1) 

1035 >>> bool(fdr_high < fdr_low) 

1036 True 

1037 """ 

1038 p0 = 1 - probabilistic_sharpe_ratio(SR, SR0, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

1039 p1 = 1 - probabilistic_sharpe_ratio(SR, SR1, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

1040 p_H0 = 1 - p_H1 

1041 return p0 * p_H0 / (p0 * p_H0 + p1 * p_H1)