Coverage for src/jsharpe/sharpe/corrections.py: 100%
57 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-29 13:57 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-29 13:57 +0000
1"""Multiple-testing corrections and false-discovery-rate control.
3This module groups the family-wise error rate corrections (Bonferroni,
4Šidák, Holm) and the false-discovery-rate routines (critical value, FDR
5control, predictive pFDR and observed oFDR) used when screening many
6candidate strategies.
7"""
8# ruff: noqa: N802, N803, N806, S101
10import math
11import warnings
13import numpy as np
14import scipy
16from .psr import probabilistic_sharpe_ratio, sharpe_ratio_variance
19def adjusted_p_values_bonferroni(ps: np.ndarray) -> np.ndarray:
20 """Adjust p-values using Bonferroni correction for FWER control.
22 Multiplies each p-value by the number of tests M, capping at 1.
23 This is the most conservative multiple testing correction.
25 Args:
26 ps: Array of unadjusted p-values.
28 Returns:
29 Array of Bonferroni-adjusted p-values, each in [0, 1].
31 Example:
32 >>> p_vals = np.array([0.01, 0.03, 0.05])
33 >>> adj_p = adjusted_p_values_bonferroni(p_vals)
34 >>> np.allclose(adj_p, [0.03, 0.09, 0.15])
35 True
36 """
37 M = len(ps)
38 result: np.ndarray = np.minimum(1, M * ps)
39 return result
42def adjusted_p_values_sidak(ps: np.ndarray) -> np.ndarray:
43 """Adjust p-values using Šidák correction for FWER control.
45 Uses the formula: 1 - (1 - p)^M, which is slightly less conservative
46 than Bonferroni when tests are independent.
48 Args:
49 ps: Array of unadjusted p-values.
51 Returns:
52 Array of Šidák-adjusted p-values, each in [0, 1].
54 Example:
55 >>> p_vals = np.array([0.01, 0.03, 0.05])
56 >>> adj_p = adjusted_p_values_sidak(p_vals)
57 >>> np.all(adj_p <= adjusted_p_values_bonferroni(p_vals))
58 np.True_
59 """
60 M = len(ps)
61 return 1 - (1 - ps) ** M
64def adjusted_p_values_holm(ps: np.ndarray, *, variant: str = "bonferroni") -> np.ndarray:
65 """Adjust p-values using Holm's step-down procedure for FWER control.
67 A step-down procedure that is uniformly more powerful than Bonferroni
68 while still controlling the Family-Wise Error Rate.
70 Args:
71 ps: Array of unadjusted p-values.
72 variant: Correction method for each step. Either "bonferroni"
73 (default) or "sidak".
75 Returns:
76 Array of Holm-adjusted p-values, each in [0, 1].
78 Raises:
79 AssertionError: If variant is not "bonferroni" or "sidak".
81 Example:
82 >>> p_vals = np.array([0.01, 0.04, 0.03])
83 >>> adj_p = adjusted_p_values_holm(p_vals)
84 >>> float(adj_p[0]) # Smallest p-value adjusted most
85 0.03
86 """
87 assert variant in ["bonferroni", "sidak"]
88 i = np.argsort(ps)
89 M = len(ps)
90 p_adjusted = np.zeros(M)
91 previous = 0
92 for j, idx in enumerate(i):
93 candidate = min(1, ps[idx] * (M - j)) if variant == "bonferroni" else 1 - (1 - ps[idx]) ** (M - j)
94 p_adjusted[idx] = max(previous, candidate)
95 previous = p_adjusted[idx]
96 return p_adjusted
99def FDR_critical_value(q: float, SR0: float, SR1: float, sigma0: float, sigma1: float, p_H1: float) -> float:
100 """Compute critical value for FDR control in hypothesis testing.
102 Given a mixture model where H ~ Bernoulli(p_H1) determines whether
103 X follows N(SR0, sigma0^2) or N(SR1, sigma1^2), finds the critical
104 value c such that P[H=0 | X > c] = q.
106 Args:
107 q: Desired False Discovery Rate in (0, 1).
108 SR0: Mean under null hypothesis (must be < SR1).
109 SR1: Mean under alternative hypothesis.
110 sigma0: Standard deviation under null hypothesis (must be > 0).
111 sigma1: Standard deviation under alternative hypothesis (must be > 0).
112 p_H1: Prior probability of alternative hypothesis in (0, 1).
114 Returns:
115 Critical value c. Returns -inf if solution is outside [-10, 10],
116 or nan if no solution exists.
118 Raises:
119 AssertionError: If parameters are out of valid ranges.
121 Example:
122 >>> c = FDR_critical_value(q=0.2, SR0=0, SR1=0.5, sigma0=0.2, sigma1=0.3, p_H1=0.1)
123 >>> c > 0 # Critical value should be positive
124 True
125 """
126 assert SR0 < SR1
127 assert 0 < q < 1
128 assert 0 < p_H1 < 1
129 assert sigma0 > 0
130 assert sigma1 > 0
132 with warnings.catch_warnings():
133 warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")
134 warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide")
136 def f(c: float) -> float:
137 """Compute the posterior probability P[H=0 | X > c].
139 Args:
140 c: Candidate critical value.
142 Returns:
143 Posterior false discovery probability at threshold c.
144 """
145 a = 1 / (
146 1
147 + scipy.stats.norm.sf((c - SR1) / sigma1) / scipy.stats.norm.sf((c - SR0) / sigma0) * p_H1 / (1 - p_H1)
148 )
149 return float(np.where(np.isfinite(a), a, 0))
151 if f(-10) < q: # Solution outside of the search interval
152 return float(-np.inf)
154 if (f(-10) - q) * (f(10) - q) > 0: # No solution, for instance if σ₀≫σ₁ and q small
155 return float(np.nan)
157 return float(scipy.optimize.brentq(lambda c: f(c) - q, -10, 10))
160def control_for_FDR(
161 q: float,
162 *,
163 SR0: float = 0,
164 SR1: float = 0.5,
165 p_H1: float = 0.05,
166 T: int = 24,
167 gamma3: float = 0.0,
168 gamma4: float = 3.0,
169 rho: float = 0.0,
170 K: int = 1,
171) -> tuple[float, float, float, float]:
172 """Compute critical value to test multiple Sharpe ratios controlling FDR.
174 Determines the critical Sharpe ratio threshold and associated error rates
175 to control the False Discovery Rate at level q when testing multiple
176 strategies.
178 Args:
179 q: Desired False Discovery Rate level in (0, 1).
180 SR0: Sharpe ratio under null hypothesis H0. Default 0.
181 SR1: Sharpe ratio under alternative hypothesis H1. Default 0.5.
182 p_H1: Prior probability that H1 is true. Default 0.05.
183 T: Number of observations. Default 24.
184 gamma3: Skewness of returns. Default 0.
185 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).
186 rho: Autocorrelation of returns. Default 0.
187 K: Number of strategies (K=1 for FDR; K>1 for FWER-FDR). Default 1.
189 Returns:
190 Tuple of (alpha, beta, SR_c, q_hat):
191 - alpha: Significance level P[SR > SR_c | H0]
192 - beta: Type II error P[SR <= SR_c | H1]; power is 1 - beta
193 - SR_c: Critical Sharpe ratio threshold
194 - q_hat: Estimated FDR (should be close to q)
196 Example:
197 >>> alpha, beta, SR_c, q_hat = control_for_FDR(q=0.25, T=24)
198 >>> bool(0 < alpha < 1)
199 True
200 >>> bool(SR_c > 0) # Critical value is positive
201 True
202 """
203 Z = scipy.stats.norm.cdf
205 s0 = math.sqrt(sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))
206 s1 = math.sqrt(sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))
207 SRc = FDR_critical_value(q, SR0, SR1, s0, s1, p_H1)
209 beta = Z((SRc - SR1) / s1)
210 alpha = q / (1 - q) * p_H1 / (1 - p_H1) * (1 - beta)
211 q_hat = 1 / (1 + (1 - beta) / alpha * p_H1 / (1 - p_H1))
213 return alpha, beta, SRc, q_hat
216def pFDR(
217 p_H1: float,
218 alpha: float,
219 beta: float,
220) -> float:
221 """Compute posterior FDR given test outcome exceeds critical value.
223 Calculates P[H0 | SR > SR_c], the probability that the null hypothesis
224 is true given that the observed Sharpe ratio exceeds the critical value.
225 This is the "predictive" FDR based on the critical value, not the
226 observed value.
228 Args:
229 p_H1: Prior probability that H1 is true.
230 alpha: Significance level (Type I error rate).
231 beta: Type II error rate (1 - power).
233 Returns:
234 Posterior probability of H0 given rejection.
236 Example:
237 >>> # With 5% prior on H1 and 5% significance
238 >>> fdr = pFDR(p_H1=0.05, alpha=0.05, beta=0.3)
239 >>> 0 < fdr < 1
240 True
241 """
242 p_H0 = 1 - p_H1
243 return 1 / (1 + (1 - beta) * p_H1 / alpha / p_H0)
246def oFDR(
247 SR: float,
248 SR0: float,
249 SR1: float,
250 T: int,
251 p_H1: float,
252 *,
253 gamma3: float = 0.0,
254 gamma4: float = 3.0,
255 rho: float = 0.0,
256 K: int = 1,
257) -> float:
258 """Compute observed FDR given the observed Sharpe ratio.
260 Calculates P[H0 | SR > SR_obs], the probability that the null hypothesis
261 is true given the observed Sharpe ratio value. This is the "observed"
262 FDR which conditions on the actual observation rather than just the
263 critical value.
265 Args:
266 SR: Observed Sharpe ratio.
267 SR0: Sharpe ratio under null hypothesis.
268 SR1: Sharpe ratio under alternative hypothesis.
269 T: Number of observations.
270 p_H1: Prior probability that H1 is true.
271 gamma3: Skewness of returns. Default 0.
272 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).
273 rho: Autocorrelation of returns. Default 0.
274 K: Number of strategies for variance adjustment. Default 1.
276 Returns:
277 Posterior probability of H0 given the observed SR.
279 Example:
280 >>> # Higher observed SR should give lower probability of H0
281 >>> fdr_low = oFDR(SR=0.3, SR0=0, SR1=0.5, T=24, p_H1=0.1)
282 >>> fdr_high = oFDR(SR=0.8, SR0=0, SR1=0.5, T=24, p_H1=0.1)
283 >>> bool(fdr_high < fdr_low)
284 True
285 """
286 p0 = 1 - probabilistic_sharpe_ratio(SR, SR0, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)
287 p1 = 1 - probabilistic_sharpe_ratio(SR, SR1, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)
288 p_H0 = 1 - p_H1
289 return p0 * p_H0 / (p0 * p_H0 + p1 * p_H1)