Coverage for src/jsharpe/sharpe.py: 100%

1"""Sharpe-related utilities for statistical analysis and hypothesis testing.

3This module provides comprehensive tools for Sharpe ratio analysis, including:

4 - Variance estimation under non-Gaussian returns

5 - Statistical significance testing

6 - Multiple testing corrections (FDR, FWER)

7 - Portfolio optimization utilities

8"""

9# ruff: noqa: N802, N803, N806, S101, TRY003

11import math

12import warnings

13from collections.abc import Callable

15import numpy as np

16import scipy

19def ppoints(n: int, a: float | None = None) -> np.ndarray:

20 """Generate probability points for Q-Q plots and distribution fitting.

22 Creates n equidistant points in the interval (0, 1), suitable for

23 probability plotting positions. The boundaries 0 and 1 are excluded.

24 This function mirrors the behavior of R's `ppoints()` function.

26 Args:

27 n: Number of probability points to generate.

28 a: Plotting position parameter in [0, 1]. Defaults to 0.5 if n > 10,

29 otherwise 3/8 (following R's convention).

31 Returns:

32 Array of n equidistant probability points in (0, 1).

34 Raises:

35 ValueError: If a is not in [0, 1].

37 Example:

38 >>> import numpy as np

39 >>> pts = ppoints(5)

40 >>> len(pts)

41 5

42 >>> np.all((pts > 0) & (pts < 1))

43 np.True_

44 >>> ppoints(5, a=0.5)

45 array([0.1, 0.3, 0.5, 0.7, 0.9])

46 """

47 if a is None:

48 a = 0.5 if n > 10 else 3 / 8

49 if not (0 <= a <= 1):

50 raise ValueError(f"the offset should be in [0,1], got {a}")

51 return np.linspace(1 - a, n - a, n) / (n + 1 - 2 * a)

54def robust_covariance_inverse(V: np.ndarray) -> np.ndarray:

55 r"""Compute inverse of a constant-correlation covariance matrix.

57 Uses the Sherman-Morrison formula for efficient computation.

58 Assumes the variance matrix has the form:

59 $V = \rho \sigma \sigma' + (1-\rho) \text{diag}(\sigma^2)$

60 (constant correlations across all pairs).

62 Its inverse is computed as:

63 $V^{-1} = A^{-1} - \dfrac{ A^{-1} \rho \sigma \sigma' A^{-1} }

64 { 1 + \rho \sigma' A^{-1} \sigma }$

66 Args:

67 V: Covariance matrix with constant off-diagonal correlations.

68 Shape (n, n).

70 Returns:

71 Inverse of the covariance matrix. Shape (n, n).

73 Example:

74 >>> import numpy as np

75 >>> # Create a constant-correlation covariance matrix

76 >>> rho = 0.5

77 >>> sigma = np.array([1.0, 2.0, 1.5])

78 >>> C = rho * np.ones((3, 3))

79 >>> np.fill_diagonal(C, 1)

80 >>> V = (C * sigma.reshape(-1, 1)).T * sigma.reshape(-1, 1)

81 >>> V_inv = robust_covariance_inverse(V)

82 >>> np.allclose(V @ V_inv, np.eye(3), atol=1e-10)

83 True

84 """

85 sigma = np.sqrt(np.diag(V))

86 C = (V.T / sigma).T / sigma

87 rho = np.mean(C[np.triu_indices_from(C, 1)])

88 A = np.diag(1 / sigma**2) / (1 - rho)

89 sigma = sigma.reshape(-1, 1)

90 result: np.ndarray = A - (rho * A @ sigma @ sigma.T @ A) / (1 + rho * sigma.T @ A @ sigma)

91 return result

94def minimum_variance_weights_for_correlated_assets(V: np.ndarray) -> np.ndarray:

95 """Compute weights of the minimum variance portfolio for correlated assets.

97 Computes the portfolio weights that minimize portfolio variance subject

98 to the constraint that weights sum to 1. Assumes a constant-correlation

99 covariance structure for efficient computation.

100

101 Args:

102 V: Covariance matrix of asset returns. Shape (n, n).

103

104 Returns:

105 Portfolio weights that minimize variance. Shape (n,).

106 Weights sum to 1.

107

108 Example:

109 >>> import numpy as np

110 >>> # Create a simple covariance matrix

111 >>> V = np.array([[0.04, 0.01], [0.01, 0.09]])

112 >>> w = minimum_variance_weights_for_correlated_assets(V)

113 >>> np.isclose(w.sum(), 1.0)

114 np.True_

115 """

116 ones = np.ones(shape=V.shape[0])

117 S = robust_covariance_inverse(V)

118 w = S @ ones

119 w = w / np.sum(w)

120 return w

121

122

123def effective_rank(C: np.ndarray) -> float:

124 """Compute the effective rank of a positive semi-definite matrix.

125

126 The effective rank measures the "effective dimensionality" of a matrix

127 by computing the exponential of the entropy of its normalized eigenvalues.

128 This provides a continuous measure between 1 (perfectly correlated) and

129 n (perfectly uncorrelated/identity matrix).

130

131 Algorithm:

132 1. Compute eigenvalues (non-negative for PSD matrices)

133 2. Discard zero eigenvalues

134 3. Normalize to form a probability distribution

135 4. Compute entropy H = -Σ p_i log(p_i)

136 5. Return exp(H)

137

138 Args:

139 C: Positive semi-definite matrix (e.g., correlation matrix).

140 Shape (n, n).

141

142 Returns:

143 Effective rank, a value in [1, n] where n is the matrix dimension.

144

145 References:

146 Roy, O. and Vetterli, M. (2007). "The effective rank: a measure of

147 effective dimensionality." EURASIP Journal on Advances in Signal

148 Processing. http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.177.2721

149

150 Example:

151 >>> import numpy as np

152 >>> # Identity matrix has effective rank equal to its dimension

153 >>> abs(effective_rank(np.eye(3)) - 3.0) < 1e-10

154 True

155 >>> # Perfectly correlated matrix has effective rank 1

156 >>> C = np.ones((3, 3))

157 >>> abs(effective_rank(C) - 1.0) < 1e-10

158 True

159 """

160 p = np.linalg.eigvalsh(C)

161 p = p[p > 0]

162 p = p / sum(p)

163 H = np.sum(-p * np.log(p))

164 return math.exp(H)

165

166

167def moments_Mk(k: int, *, rho: float = 0) -> tuple[float, float, float]:

168 """Compute moments of the maximum of k standard normal random variables.

169

170 Computes E[M_k], E[M_k^2], and Var[M_k] where M_k = max(Z_1, ..., Z_k)

171 and Z_i are standard normal. Supports both independent (rho=0) and

172 equi-correlated (rho > 0) cases.

173

174 For the correlated case with equi-correlation rho:

175 Z_i = sqrt(rho) * X + sqrt(1-rho) * Y_i

176 M = sqrt(rho) * X + sqrt(1-rho) * max(Y_i)

177

178 Args:

179 k: Number of standard normal random variables.

180 rho: Equi-correlation coefficient in [0, 1). Default is 0 (independent).

181

182 Returns:

183 Tuple of (Ez, Ez2, var):

184 - Ez: Expected value E[M_k]

185 - Ez2: Second moment E[M_k^2]

186 - var: Variance Var[M_k]

187

188 Example:

189 >>> Ez, Ez2, var = moments_Mk(1)

190 >>> bool(abs(Ez) < 1e-10) # E[max of one N(0,1)] = 0

191 True

192 >>> bool(abs(var - 1.0) < 1e-10) # Var[max of one N(0,1)] = 1

193 True

194 """

195 Phi = scipy.stats.norm.cdf

196 Ez = E_under_normal(lambda z: k * z * (Phi(z) ** (k - 1)))

197 Ez2 = E_under_normal(lambda z: k * z**2 * (Phi(z) ** (k - 1)))

198 var = Ez2 - Ez**2

199

200 Ez = (1 - rho) * Ez

201 var = rho + (1 - rho) * var

202 Ez2 = var + Ez**2

203

204 return Ez, Ez2, var

205

206

207def sharpe_ratio_variance(

208 SR: float,

209 T: int,

210 *,

211 gamma3: float = 0.0,

212 gamma4: float = 3.0,

213 rho: float = 0.0,

214 K: int = 1,

215) -> float:

216 """Compute the asymptotic variance of the Sharpe ratio estimator.

217

218 Accounts for non-Gaussian returns (skewness, kurtosis) and autocorrelation.

219 Under Gaussian iid returns, reduces to (1 + SR^2/2) / T.

220

221 Args:

222 SR: Sharpe ratio value (annualized or per-period).

223 T: Number of observations (time periods).

224 gamma3: Skewness of returns. Default 0 (symmetric).

225 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

226 rho: First-order autocorrelation of returns. Default 0 (iid).

227 K: Number of strategies for multiple testing adjustment. Default 1.

228

229 Returns:

230 Variance of the Sharpe ratio estimator.

231

232 Example:

233 >>> # Variance under Gaussian iid assumptions

234 >>> var_gaussian = sharpe_ratio_variance(SR=0.5, T=24)

235 >>> # Variance with non-Gaussian returns (higher kurtosis)

236 >>> var_nongauss = sharpe_ratio_variance(SR=0.5, T=24, gamma4=6.0)

237 >>> bool(var_nongauss > var_gaussian) # Higher kurtosis increases variance

238 True

239 """

240 A = 1

241 B = rho / (1 - rho)

242 C = rho**2 / (1 - rho**2)

243 a = A + 2 * B

244 b = A + B + C

245 c = A + 2 * C

246 V = (a * 1 - b * gamma3 * SR + c * (gamma4 - 1) / 4 * SR**2) / T

247 return float(V * moments_Mk(K)[2])

248

249

250def variance_of_the_maximum_of_k_Sharpe_ratios(number_of_trials: int, variance: float) -> float:

251 """Compute the variance of the maximum Sharpe ratio across K strategies.

252

253 Selection across a larger pool increases the uncertainty of the selected

254 (maximum) Sharpe ratio estimate. This function applies a logarithmic

255 variance inflation factor that increases monotonically with K.

256

257 Args:

258 number_of_trials: Number of strategies (K >= 1).

259 variance: Base variance of individual Sharpe ratio estimates.

260

261 Returns:

262 Inflated variance accounting for selection from K strategies.

263

264 Example:

265 >>> # More trials increases variance due to selection bias

266 >>> v1 = variance_of_the_maximum_of_k_Sharpe_ratios(1, 0.1)

267 >>> v10 = variance_of_the_maximum_of_k_Sharpe_ratios(10, 0.1)

268 >>> bool(v10 > v1)

269 True

270 """

271 # Monotone increasing variance inflation with K (K>=1)

272 inflation = 1.0 + np.log(max(1, int(number_of_trials)))

273 return float(variance * inflation)

274

275

276def control_for_FDR(

277 q: float,

278 *,

279 SR0: float = 0,

280 SR1: float = 0.5,

281 p_H1: float = 0.05,

282 T: int = 24,

283 gamma3: float = 0.0,

284 gamma4: float = 3.0,

285 rho: float = 0.0,

286 K: int = 1,

287) -> tuple[float, float, float, float]:

288 """Compute critical value to test multiple Sharpe ratios controlling FDR.

289

290 Determines the critical Sharpe ratio threshold and associated error rates

291 to control the False Discovery Rate at level q when testing multiple

292 strategies.

293

294 Args:

295 q: Desired False Discovery Rate level in (0, 1).

296 SR0: Sharpe ratio under null hypothesis H0. Default 0.

297 SR1: Sharpe ratio under alternative hypothesis H1. Default 0.5.

298 p_H1: Prior probability that H1 is true. Default 0.05.

299 T: Number of observations. Default 24.

300 gamma3: Skewness of returns. Default 0.

301 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

302 rho: Autocorrelation of returns. Default 0.

303 K: Number of strategies (K=1 for FDR; K>1 for FWER-FDR). Default 1.

304

305 Returns:

306 Tuple of (alpha, beta, SR_c, q_hat):

307 - alpha: Significance level P[SR > SR_c | H0]

308 - beta: Type II error P[SR <= SR_c | H1]; power is 1 - beta

309 - SR_c: Critical Sharpe ratio threshold

310 - q_hat: Estimated FDR (should be close to q)

311

312 Example:

313 >>> alpha, beta, SR_c, q_hat = control_for_FDR(q=0.25, T=24)

314 >>> bool(0 < alpha < 1)

315 True

316 >>> bool(SR_c > 0) # Critical value is positive

317 True

318 """

319 Z = scipy.stats.norm.cdf

320

321 s0 = math.sqrt(sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))

322 s1 = math.sqrt(sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))

323 SRc = FDR_critical_value(q, SR0, SR1, s0, s1, p_H1)

324

325 beta = Z((SRc - SR1) / s1)

326 alpha = q / (1 - q) * p_H1 / (1 - p_H1) * (1 - beta)

327 q_hat = 1 / (1 + (1 - beta) / alpha * p_H1 / (1 - p_H1))

328

329 return alpha, beta, SRc, q_hat

330

331

332def expected_maximum_sharpe_ratio(number_of_trials: int, variance: float, SR0: float = 0) -> float:

333 """Compute expected maximum Sharpe ratio under multiple testing.

334

335 Estimates E[max(SR_1, ..., SR_K)] assuming K independent Sharpe ratio

336 estimates, each with the same variance. Uses the Gumbel approximation

337 for the expected maximum of normals.

338

339 Args:

340 number_of_trials: Number of strategies (K).

341 variance: Variance of individual Sharpe ratio estimates.

342 SR0: Baseline Sharpe ratio to add. Default 0.

343

344 Returns:

345 Expected value of the maximum Sharpe ratio across K strategies.

346

347 Example:

348 >>> # Expected maximum increases with number of trials

349 >>> e1 = expected_maximum_sharpe_ratio(1, 0.1)

350 >>> e10 = expected_maximum_sharpe_ratio(10, 0.1)

351 >>> bool(e10 > e1)

352 True

353 """

354 return float(

355 SR0

356 + (

357 np.sqrt(variance)

358 * (

359 (1 - np.euler_gamma) * scipy.stats.norm.ppf(1 - 1 / number_of_trials)

360 + np.euler_gamma * scipy.stats.norm.ppf(1 - 1 / number_of_trials / np.exp(1))

361 )

362 )

363 )

364

365

366def minimum_track_record_length(

367 SR: float,

368 SR0: float,

369 *,

370 gamma3: float = 0.0,

371 gamma4: float = 3.0,

372 rho: float = 0.0,

373 alpha: float = 0.05,

374) -> float:

375 """Compute minimum track record length for statistical significance.

376

377 Determines the minimum number of observations T required for the observed

378 Sharpe ratio SR to be significantly greater than SR0 at confidence level

379 1 - alpha.

380

381 Args:

382 SR: Observed Sharpe ratio.

383 SR0: Sharpe ratio under null hypothesis H0.

384 gamma3: Skewness of returns. Default 0.

385 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

386 rho: Autocorrelation of returns. Default 0.

387 alpha: Significance level. Default 0.05.

388

389 Returns:

390 Minimum track record length (number of periods) required.

391

392 Example:

393 >>> # Higher Sharpe ratio needs shorter track record

394 >>> mtrl_high = minimum_track_record_length(SR=1.0, SR0=0)

395 >>> mtrl_low = minimum_track_record_length(SR=0.5, SR0=0)

396 >>> bool(mtrl_high < mtrl_low)

397 True

398 """

399 var = sharpe_ratio_variance(SR0, T=1, gamma3=gamma3, gamma4=gamma4, rho=rho, K=1)

400 return float(var * (scipy.stats.norm.ppf(1 - alpha) / (SR - SR0)) ** 2)

401

402

403def make_expectation_gh(

404 n_nodes: int = 200,

405) -> Callable[[Callable[[np.ndarray], np.ndarray]], float]:

406 """Create an expectation function using Gauss-Hermite quadrature.

407

408 Returns a function that computes E[g(Z)] where Z ~ N(0,1) using

409 Gauss-Hermite quadrature with the specified number of nodes.

410

411 The approximation is: E[g(Z)] ≈ (1/√π) Σ w_i g(√2 t_i)

412 where (t_i, w_i) are Gauss-Hermite nodes and weights.

413

414 Args:

415 n_nodes: Number of quadrature nodes. Higher values increase accuracy.

416 Default 200.

417

418 Returns:

419 Function E(g) that computes E[g(Z)] for Z ~ N(0,1).

420

421 Example:

422 >>> E = make_expectation_gh(n_nodes=100)

423 >>> # E[Z^2] should be 1 for standard normal

424 >>> bool(abs(E(lambda z: z**2) - 1.0) < 1e-6)

425 True

426 >>> # E[Z] should be 0 for standard normal

427 >>> bool(abs(E(lambda z: z)) < 1e-10)

428 True

429 """

430 nodes, weights = np.polynomial.hermite.hermgauss(n_nodes)

431 scale = np.sqrt(2.0)

432 norm = 1.0 / np.sqrt(np.pi)

433 x = scale * nodes

434

435 def E(g: Callable[[np.ndarray], np.ndarray]) -> float:

436 """Compute E[g(Z)] for Z ~ N(0,1) via Gauss-Hermite quadrature.

437

438 Args:

439 g: Function to integrate against the standard normal density.

440

441 Returns:

442 Approximation of E[g(Z)].

443 """

444 vals = g(x)

445 return float(norm * np.dot(weights, vals))

446

447 return E

448

449

450E_under_normal = make_expectation_gh(n_nodes=200)

451

452

453def adjusted_p_values_bonferroni(ps: np.ndarray) -> np.ndarray:

454 """Adjust p-values using Bonferroni correction for FWER control.

455

456 Multiplies each p-value by the number of tests M, capping at 1.

457 This is the most conservative multiple testing correction.

458

459 Args:

460 ps: Array of unadjusted p-values.

461

462 Returns:

463 Array of Bonferroni-adjusted p-values, each in [0, 1].

464

465 Example:

466 >>> p_vals = np.array([0.01, 0.03, 0.05])

467 >>> adj_p = adjusted_p_values_bonferroni(p_vals)

468 >>> np.allclose(adj_p, [0.03, 0.09, 0.15])

469 True

470 """

471 M = len(ps)

472 result: np.ndarray = np.minimum(1, M * ps)

473 return result

474

475

476def adjusted_p_values_sidak(ps: np.ndarray) -> np.ndarray:

477 """Adjust p-values using Šidák correction for FWER control.

478

479 Uses the formula: 1 - (1 - p)^M, which is slightly less conservative

480 than Bonferroni when tests are independent.

481

482 Args:

483 ps: Array of unadjusted p-values.

484

485 Returns:

486 Array of Šidák-adjusted p-values, each in [0, 1].

487

488 Example:

489 >>> p_vals = np.array([0.01, 0.03, 0.05])

490 >>> adj_p = adjusted_p_values_sidak(p_vals)

491 >>> np.all(adj_p <= adjusted_p_values_bonferroni(p_vals))

492 np.True_

493 """

494 M = len(ps)

495 return 1 - (1 - ps) ** M

496

497

498def adjusted_p_values_holm(ps: np.ndarray, *, variant: str = "bonferroni") -> np.ndarray:

499 """Adjust p-values using Holm's step-down procedure for FWER control.

500

501 A step-down procedure that is uniformly more powerful than Bonferroni

502 while still controlling the Family-Wise Error Rate.

503

504 Args:

505 ps: Array of unadjusted p-values.

506 variant: Correction method for each step. Either "bonferroni"

507 (default) or "sidak".

508

509 Returns:

510 Array of Holm-adjusted p-values, each in [0, 1].

511

512 Raises:

513 AssertionError: If variant is not "bonferroni" or "sidak".

514

515 Example:

516 >>> p_vals = np.array([0.01, 0.04, 0.03])

517 >>> adj_p = adjusted_p_values_holm(p_vals)

518 >>> float(adj_p[0]) # Smallest p-value adjusted most

519 0.03

520 """

521 assert variant in ["bonferroni", "sidak"]

522 i = np.argsort(ps)

523 M = len(ps)

524 p_adjusted = np.zeros(M)

525 previous = 0

526 for j, idx in enumerate(i):

527 candidate = min(1, ps[idx] * (M - j)) if variant == "bonferroni" else 1 - (1 - ps[idx]) ** (M - j)

528 p_adjusted[idx] = max(previous, candidate)

529 previous = p_adjusted[idx]

530 return p_adjusted

531

532

533def FDR_critical_value(q: float, SR0: float, SR1: float, sigma0: float, sigma1: float, p_H1: float) -> float:

534 """Compute critical value for FDR control in hypothesis testing.

535

536 Given a mixture model where H ~ Bernoulli(p_H1) determines whether

537 X follows N(SR0, sigma0^2) or N(SR1, sigma1^2), finds the critical

538 value c such that P[H=0 | X > c] = q.

539

540 Args:

541 q: Desired False Discovery Rate in (0, 1).

542 SR0: Mean under null hypothesis (must be < SR1).

543 SR1: Mean under alternative hypothesis.

544 sigma0: Standard deviation under null hypothesis (must be > 0).

545 sigma1: Standard deviation under alternative hypothesis (must be > 0).

546 p_H1: Prior probability of alternative hypothesis in (0, 1).

547

548 Returns:

549 Critical value c. Returns -inf if solution is outside [-10, 10],

550 or nan if no solution exists.

551

552 Raises:

553 AssertionError: If parameters are out of valid ranges.

554

555 Example:

556 >>> c = FDR_critical_value(q=0.2, SR0=0, SR1=0.5, sigma0=0.2, sigma1=0.3, p_H1=0.1)

557 >>> c > 0 # Critical value should be positive

558 True

559 """

560 assert SR0 < SR1

561 assert 0 < q < 1

562 assert 0 < p_H1 < 1

563 assert sigma0 > 0

564 assert sigma1 > 0

565

566 with warnings.catch_warnings():

567 warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")

568 warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide")

569

570 def f(c: float) -> float:

571 """Compute the posterior probability P[H=0 | X > c].

572

573 Args:

574 c: Candidate critical value.

575

576 Returns:

577 Posterior false discovery probability at threshold c.

578 """

579 a = 1 / (

580 1

581 + scipy.stats.norm.sf((c - SR1) / sigma1) / scipy.stats.norm.sf((c - SR0) / sigma0) * p_H1 / (1 - p_H1)

582 )

583 return float(np.where(np.isfinite(a), a, 0))

584

585 if f(-10) < q: # Solution outside of the search interval

586 return float(-np.inf)

587

588 if (f(-10) - q) * (f(10) - q) > 0: # No solution, for instance if σ₀≫σ₁ and q small

589 return float(np.nan)

590

591 return float(scipy.optimize.brentq(lambda c: f(c) - q, -10, 10))

592

593

594def critical_sharpe_ratio(

595 SR0: float,

596 T: int,

597 *,

598 gamma3: float = 0.0,

599 gamma4: float = 3.0,

600 rho: float = 0.0,

601 alpha: float = 0.05,

602 K: int = 1,

603) -> float:

604 """Compute critical Sharpe ratio for hypothesis testing.

605

606 Determines the threshold SR_c for the one-sided test:

607 H0: SR = SR0 vs H1: SR > SR0

608 at significance level alpha.

609

610 Args:

611 SR0: Sharpe ratio under null hypothesis.

612 T: Number of observations.

613 gamma3: Skewness of returns. Default 0.

614 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

615 rho: Autocorrelation of returns. Default 0.

616 alpha: Significance level. Default 0.05.

617 K: Number of strategies for variance adjustment. Default 1.

618

619 Returns:

620 Critical Sharpe ratio threshold. Reject H0 if observed SR > SR_c.

621

622 Example:

623 >>> SR_c = critical_sharpe_ratio(SR0=0, T=24, alpha=0.05)

624 >>> bool(SR_c > 0) # Need positive SR to reject H0: SR=0

625 True

626 """

627 variance = sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

628 return float(SR0 + scipy.stats.norm.ppf(1 - alpha) * math.sqrt(variance))

629

630

631def probabilistic_sharpe_ratio(

632 SR: float,

633 SR0: float,

634 *,

635 variance: float | None = None,

636 T: int | None = None,

637 gamma3: float = 0.0,

638 gamma4: float = 3.0,

639 rho: float = 0.0,

640 K: int = 1,

641) -> float:

642 """Compute the Probabilistic Sharpe Ratio (PSR).

643

644 The PSR is 1 - p, where p is the p-value of testing H0: SR = SR0 vs

645 H1: SR > SR0. It can be interpreted as a "Sharpe ratio on a probability

646 scale", i.e., mapping the SR to [0, 1].

647

648 Args:

649 SR: Observed Sharpe ratio.

650 SR0: Sharpe ratio under null hypothesis.

651 variance: Variance of SR estimator. Provide this OR (T, gamma3, ...).

652 T: Number of observations (if variance not provided).

653 gamma3: Skewness of returns. Default 0.

654 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

655 rho: Autocorrelation of returns. Default 0.

656 K: Number of strategies for variance adjustment. Default 1.

657

658 Returns:

659 Probabilistic Sharpe Ratio in [0, 1]. Values near 1 indicate

660 strong evidence that the true SR exceeds SR0.

661

662 Raises:

663 AssertionError: If both variance and T are provided.

664

665 Example:

666 >>> psr = probabilistic_sharpe_ratio(SR=0.5, SR0=0, T=24)

667 >>> bool(0 < psr < 1)

668 True

669 >>> # Higher observed SR gives higher PSR

670 >>> psr_high = probabilistic_sharpe_ratio(SR=1.0, SR0=0, T=24)

671 >>> bool(psr_high > psr)

672 True

673 """

674 if variance is None:

675 assert T is not None, "T must be provided if variance is not provided"

676 variance = sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

677 else:

678 assert T is None, "Provide either the variance or (T, gamma3, gamma4, rho)"

679 return float(scipy.stats.norm.cdf((SR - SR0) / math.sqrt(variance)))

680

681

682def sharpe_ratio_power(

683 SR0: float,

684 SR1: float,

685 T: int,

686 *,

687 gamma3: float = 0.0,

688 gamma4: float = 3.0,

689 rho: float = 0.0,

690 alpha: float = 0.05,

691 K: int = 1,

692) -> float:

693 """Compute statistical power for Sharpe ratio hypothesis test.

694

695 Calculates the power (1 - β) of the test H0: SR = SR0 vs H1: SR = SR1,

696 which is the probability of correctly rejecting H0 when the true

697 Sharpe ratio is SR1.

698

699 Note: Power is equivalent to recall in classification:

700 Power = P[reject H0 | H1] = TP / (TP + FN)

701

702 Args:

703 SR0: Sharpe ratio under null hypothesis.

704 SR1: Sharpe ratio under alternative hypothesis (should be > SR0).

705 T: Number of observations.

706 gamma3: Skewness of returns. Default 0.

707 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

708 rho: Autocorrelation of returns. Default 0.

709 alpha: Significance level. Default 0.05.

710 K: Number of strategies for variance adjustment. Default 1.

711

712 Returns:

713 Statistical power in [0, 1].

714

715 Example:

716 >>> # More observations increases power

717 >>> power_short = sharpe_ratio_power(SR0=0, SR1=0.5, T=12)

718 >>> power_long = sharpe_ratio_power(SR0=0, SR1=0.5, T=48)

719 >>> bool(power_long > power_short)

720 True

721 """

722 critical_SR = critical_sharpe_ratio(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, alpha=alpha)

723 variance = sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

724 beta = scipy.stats.norm.cdf((critical_SR - SR1) / math.sqrt(variance))

725 return float(1 - beta)

726

727

728def generate_autocorrelated_non_gaussian_data(

729 N: int,

730 n: int,

731 SR0: float = 0,

732 name: str = "gaussian",

733 rho: float | None = None,

734 gaussian_autocorrelation: float = 0,

735) -> np.ndarray:

736 """Generate autocorrelated non-Gaussian return data for simulation.

737

738 Creates a matrix of simulated returns with specified autocorrelation

739 and marginal distribution characteristics (skewness/kurtosis).

740

741 Uses a copula-like approach:

742 1. Generate AR(1) Gaussian processes

743 2. Transform to uniform via Gaussian CDF

744 3. Transform to target marginals via inverse CDF

745

746 Args:

747 N: Number of time periods (rows).

748 n: Number of assets/strategies (columns).

749 SR0: Target Sharpe ratio. Default 0.

750 name: Distribution type. One of "gaussian", "mild", "moderate",

751 "severe". Default "gaussian".

752 rho: Autocorrelation coefficient. If None, uses gaussian_autocorrelation.

753 gaussian_autocorrelation: Autocorrelation for Gaussian case. Default 0.

754

755 Returns:

756 Array of shape (N, n) containing simulated returns.

757

758 Example:

759 >>> np.random.seed(42)

760 >>> X = generate_autocorrelated_non_gaussian_data(100, 2, SR0=0.1, name="mild")

761 >>> X.shape

762 (100, 2)

763 """

764 if rho is None:

765 # With the distributions we consider the autocorrelation is almost the same.

766 rho = gaussian_autocorrelation

767

768 shape = (N, n)

769

770 # Marginal distribution: ppf

771 R = 10_000

772 marginal = generate_non_gaussian_data(R, 1, SR0=SR0, name=name)[:, 0]

773 ppf = scipy.interpolate.interp1d(ppoints(R), sorted(marginal), fill_value="extrapolate")

774

775 # AR(1) processes

776 X = np.random.normal(size=shape)

777 for i in range(1, shape[0]):

778 X[i, :] = rho * X[i - 1, :] + np.sqrt(1 - rho**2) * X[i, :]

779

780 # Convert the margins to uniform, with the Gaussian cdf

781 X = scipy.stats.norm.cdf(X)

782

783 # Convert the uniforms to the target margins, using the ppf

784 result: np.ndarray = ppf(X)

785

786 return result

787

788

789def get_random_correlation_matrix(

790 number_of_trials: int = 100,

791 effective_number_of_trials: int = 10,

792 number_of_observations: int = 200,

793 noise: float = 0.1,

794) -> tuple[np.ndarray, np.ndarray, np.ndarray]:

795 """Generate a random correlation matrix with block structure.

796

797 Creates a correlation matrix representing clustered strategies, where

798 strategies within the same cluster are highly correlated and strategies

799 across clusters have lower correlation.

800

801 Args:

802 number_of_trials: Number of time series (strategies). Default 100.

803 effective_number_of_trials: Number of clusters. Default 10.

804 number_of_observations: Number of time periods to simulate. Default 200.

805 noise: Noise level added to each series. Default 0.1.

806

807 Returns:

808 Tuple of (C, X, clusters):

809 - C: Correlation matrix of shape (number_of_trials, number_of_trials)

810 - X: Data matrix of shape (number_of_observations, number_of_trials)

811 - clusters: Cluster assignment for each strategy

812

813 Example:

814 >>> np.random.seed(42)

815 >>> C, X, clusters = get_random_correlation_matrix(

816 ... number_of_trials=20, effective_number_of_trials=4

817 ... )

818 >>> C.shape

819 (20, 20)

820 >>> np.allclose(np.diag(C), 1) # Diagonal is all ones

821 True

822 """

823 while True:

824 block_positions = [

825 0,

826 *sorted(np.random.choice(number_of_trials, effective_number_of_trials - 1, replace=True)),

827 number_of_trials,

828 ]

829 block_sizes = np.diff(block_positions)

830 if np.all(block_sizes > 0):

831 break

832

833 clusters = np.array([block_number for block_number, size in enumerate(block_sizes) for _ in range(size)])

834 X0 = np.random.normal(size=(number_of_observations, effective_number_of_trials))

835 X = np.zeros(shape=(number_of_observations, number_of_trials))

836 for i, cluster in enumerate(clusters):

837 X[:, i] = X0[:, cluster] + noise * np.random.normal(size=number_of_observations)

838 C = np.asarray(np.corrcoef(X, rowvar=False))

839 np.fill_diagonal(C, 1) # rounding errors

840 C = np.clip(C, -1, 1)

841 return C, X, clusters

842

843

844def generate_non_gaussian_data(

845 nr: int,

846 nc: int,

847 *,

848 SR0: float = 0,

849 name: str = "severe",

850) -> np.ndarray:

851 """Generate non-Gaussian return data with specified characteristics.

852

853 Creates a matrix of simulated returns from a mixture distribution that

854 exhibits the specified skewness and kurtosis characteristics while

855 maintaining the target Sharpe ratio.

856

857 Args:

858 nr: Number of rows (observations/time periods).

859 nc: Number of columns (assets/strategies).

860 SR0: Target Sharpe ratio. Default 0.

861 name: Distribution severity. One of:

862 - "gaussian": No skewness or kurtosis

863 - "mild": Slight negative skew and excess kurtosis

864 - "moderate": Moderate negative skew and excess kurtosis

865 - "severe": Strong negative skew and excess kurtosis

866 Default "severe".

867

868 Returns:

869 Array of shape (nr, nc) containing simulated returns.

870

871 Raises:

872 AssertionError: If name is not a valid distribution type.

873

874 Example:

875 >>> np.random.seed(42)

876 >>> X = generate_non_gaussian_data(1000, 1, SR0=0.2, name="mild")

877 >>> X.shape

878 (1000, 1)

879 """

880 configs = {

881 "gaussian": (0, 0, 0.015, 0.010),

882 "mild": (0.04, -0.03, 0.015, 0.010),

883 "moderate": (0.03, -0.045, 0.020, 0.010),

884 "severe": (0.02, -0.060, 0.025, 0.010),

885 }

886 assert name in configs

887

888 def mixture_variance(

889 p_tail: float,

890 mu_tail: float,

891 sigma_tail: float,

892 mu_core: float,

893 sigma_core: float,

894 ) -> float:

895 """Compute the variance of a two-component Gaussian mixture.

896

897 Args:

898 p_tail: Mixing weight of the tail component.

899 mu_tail: Mean of the tail component.

900 sigma_tail: Standard deviation of the tail component.

901 mu_core: Mean of the core component.

902 sigma_core: Standard deviation of the core component.

903

904 Returns:

905 Variance of the mixture distribution.

906 """

907 w = 1.0 - p_tail

908 mu = w * mu_core + p_tail * mu_tail

909 m2 = w * (sigma_core**2 + mu_core**2) + p_tail * (sigma_tail**2 + mu_tail**2)

910 return float(m2 - mu**2)

911

912 def gen_with_true_SR0(reps: int, T: int, cfg: tuple[float, float, float, float], SR0: float) -> np.ndarray:

913 """Generate mixture returns scaled to a target population Sharpe ratio.

914

915 Args:

916 reps: Number of independent return series to generate.

917 T: Length of each return series.

918 cfg: Mixture config tuple (p_tail, mu_tail, sigma_tail, sigma_core).

919 SR0: Target population Sharpe ratio.

920

921 Returns:

922 Array of shape (reps, T) with non-Gaussian returns at the given Sharpe ratio.

923 """

924 p, mu_tail, sig_tail, sig_core = cfg

925 # Zero-mean baseline mixture (choose mu_core so mean=0)

926 mu_core0 = -p * mu_tail / (1.0 - p)

927 std0 = np.sqrt(mixture_variance(p, mu_tail, sig_tail, mu_core0, sig_core))

928 mu_shift = SR0 * std0 # sets population Sharpe to SR0, preserves skew/kurt

929 mask = np.random.uniform(size=(reps, T)) < p

930 X = np.random.normal(mu_core0 + mu_shift, sig_core, size=(reps, T))

931 X[mask] = np.random.normal(mu_tail + mu_shift, sig_tail, size=mask.sum())

932 return X

933

934 return gen_with_true_SR0(nr, nc, configs[name], SR0)

935

936

937def autocorrelation(X: np.ndarray) -> float:

938 """Compute mean first-order autocorrelation across columns.

939

940 Calculates the lag-1 autocorrelation for each column of the input

941 matrix and returns the mean across all columns.

942

943 Args:

944 X: Data matrix of shape (n_observations, n_series).

945

946 Returns:

947 Mean autocorrelation coefficient across all columns.

948

949 Example:

950 >>> np.random.seed(42)

951 >>> # Generate AR(1) process with rho=0.5

952 >>> n = 1000

953 >>> X = np.zeros((n, 1))

954 >>> X[0] = np.random.normal()

955 >>> for i in range(1, n):

956 ... X[i] = 0.5 * X[i-1] + np.sqrt(1-0.25) * np.random.normal()

957 >>> ac = autocorrelation(X)

958 >>> bool(0.4 < ac < 0.6) # Should be close to 0.5

959 True

960 """

961 _nr, nc = X.shape

962 ac = np.zeros(nc)

963 for i in range(nc):

964 ac[i] = np.corrcoef(X[1:, i], X[:-1, i])[0, 1]

965 return float(ac.mean())

966

967

968def pFDR(

969 p_H1: float,

970 alpha: float,

971 beta: float,

972) -> float:

973 """Compute posterior FDR given test outcome exceeds critical value.

974

975 Calculates P[H0 | SR > SR_c], the probability that the null hypothesis

976 is true given that the observed Sharpe ratio exceeds the critical value.

977 This is the "predictive" FDR based on the critical value, not the

978 observed value.

979

980 Args:

981 p_H1: Prior probability that H1 is true.

982 alpha: Significance level (Type I error rate).

983 beta: Type II error rate (1 - power).

984

985 Returns:

986 Posterior probability of H0 given rejection.

987

988 Example:

989 >>> # With 5% prior on H1 and 5% significance

990 >>> fdr = pFDR(p_H1=0.05, alpha=0.05, beta=0.3)

991 >>> 0 < fdr < 1

992 True

993 """

994 p_H0 = 1 - p_H1

995 return 1 / (1 + (1 - beta) * p_H1 / alpha / p_H0)

996

997

998def oFDR(

999 SR: float,

1000 SR0: float,

1001 SR1: float,

1002 T: int,

1003 p_H1: float,

1004 *,

1005 gamma3: float = 0.0,

1006 gamma4: float = 3.0,

1007 rho: float = 0.0,

1008 K: int = 1,

1009) -> float:

1010 """Compute observed FDR given the observed Sharpe ratio.

1011

1012 Calculates P[H0 | SR > SR_obs], the probability that the null hypothesis

1013 is true given the observed Sharpe ratio value. This is the "observed"

1014 FDR which conditions on the actual observation rather than just the

1015 critical value.

1016

1017 Args:

1018 SR: Observed Sharpe ratio.

1019 SR0: Sharpe ratio under null hypothesis.

1020 SR1: Sharpe ratio under alternative hypothesis.

1021 T: Number of observations.

1022 p_H1: Prior probability that H1 is true.

1023 gamma3: Skewness of returns. Default 0.

1024 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

1025 rho: Autocorrelation of returns. Default 0.

1026 K: Number of strategies for variance adjustment. Default 1.

1027

1028 Returns:

1029 Posterior probability of H0 given the observed SR.

1030

1031 Example:

1032 >>> # Higher observed SR should give lower probability of H0

1033 >>> fdr_low = oFDR(SR=0.3, SR0=0, SR1=0.5, T=24, p_H1=0.1)

1034 >>> fdr_high = oFDR(SR=0.8, SR0=0, SR1=0.5, T=24, p_H1=0.1)

1035 >>> bool(fdr_high < fdr_low)

1036 True

1037 """

1038 p0 = 1 - probabilistic_sharpe_ratio(SR, SR0, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

1039 p1 = 1 - probabilistic_sharpe_ratio(SR, SR1, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

1040 p_H0 = 1 - p_H1

1041 return p0 * p_H0 / (p0 * p_H0 + p1 * p_H1)

Coverage for src / jsharpe / sharpe.py: 100%

185 statements