Coverage for src/jsharpe/sharpe/corrections.py: 100%

57 statements  

« prev     ^ index     » next       coverage.py v7.14.3, created at 2026-06-29 13:57 +0000

1"""Multiple-testing corrections and false-discovery-rate control. 

2 

3This module groups the family-wise error rate corrections (Bonferroni, 

4Šidák, Holm) and the false-discovery-rate routines (critical value, FDR 

5control, predictive pFDR and observed oFDR) used when screening many 

6candidate strategies. 

7""" 

8# ruff: noqa: N802, N803, N806, S101 

9 

10import math 

11import warnings 

12 

13import numpy as np 

14import scipy 

15 

16from .psr import probabilistic_sharpe_ratio, sharpe_ratio_variance 

17 

18 

19def adjusted_p_values_bonferroni(ps: np.ndarray) -> np.ndarray: 

20 """Adjust p-values using Bonferroni correction for FWER control. 

21 

22 Multiplies each p-value by the number of tests M, capping at 1. 

23 This is the most conservative multiple testing correction. 

24 

25 Args: 

26 ps: Array of unadjusted p-values. 

27 

28 Returns: 

29 Array of Bonferroni-adjusted p-values, each in [0, 1]. 

30 

31 Example: 

32 >>> p_vals = np.array([0.01, 0.03, 0.05]) 

33 >>> adj_p = adjusted_p_values_bonferroni(p_vals) 

34 >>> np.allclose(adj_p, [0.03, 0.09, 0.15]) 

35 True 

36 """ 

37 M = len(ps) 

38 result: np.ndarray = np.minimum(1, M * ps) 

39 return result 

40 

41 

42def adjusted_p_values_sidak(ps: np.ndarray) -> np.ndarray: 

43 """Adjust p-values using Šidák correction for FWER control. 

44 

45 Uses the formula: 1 - (1 - p)^M, which is slightly less conservative 

46 than Bonferroni when tests are independent. 

47 

48 Args: 

49 ps: Array of unadjusted p-values. 

50 

51 Returns: 

52 Array of Šidák-adjusted p-values, each in [0, 1]. 

53 

54 Example: 

55 >>> p_vals = np.array([0.01, 0.03, 0.05]) 

56 >>> adj_p = adjusted_p_values_sidak(p_vals) 

57 >>> np.all(adj_p <= adjusted_p_values_bonferroni(p_vals)) 

58 np.True_ 

59 """ 

60 M = len(ps) 

61 return 1 - (1 - ps) ** M 

62 

63 

64def adjusted_p_values_holm(ps: np.ndarray, *, variant: str = "bonferroni") -> np.ndarray: 

65 """Adjust p-values using Holm's step-down procedure for FWER control. 

66 

67 A step-down procedure that is uniformly more powerful than Bonferroni 

68 while still controlling the Family-Wise Error Rate. 

69 

70 Args: 

71 ps: Array of unadjusted p-values. 

72 variant: Correction method for each step. Either "bonferroni" 

73 (default) or "sidak". 

74 

75 Returns: 

76 Array of Holm-adjusted p-values, each in [0, 1]. 

77 

78 Raises: 

79 AssertionError: If variant is not "bonferroni" or "sidak". 

80 

81 Example: 

82 >>> p_vals = np.array([0.01, 0.04, 0.03]) 

83 >>> adj_p = adjusted_p_values_holm(p_vals) 

84 >>> float(adj_p[0]) # Smallest p-value adjusted most 

85 0.03 

86 """ 

87 assert variant in ["bonferroni", "sidak"] 

88 i = np.argsort(ps) 

89 M = len(ps) 

90 p_adjusted = np.zeros(M) 

91 previous = 0 

92 for j, idx in enumerate(i): 

93 candidate = min(1, ps[idx] * (M - j)) if variant == "bonferroni" else 1 - (1 - ps[idx]) ** (M - j) 

94 p_adjusted[idx] = max(previous, candidate) 

95 previous = p_adjusted[idx] 

96 return p_adjusted 

97 

98 

99def FDR_critical_value(q: float, SR0: float, SR1: float, sigma0: float, sigma1: float, p_H1: float) -> float: 

100 """Compute critical value for FDR control in hypothesis testing. 

101 

102 Given a mixture model where H ~ Bernoulli(p_H1) determines whether 

103 X follows N(SR0, sigma0^2) or N(SR1, sigma1^2), finds the critical 

104 value c such that P[H=0 | X > c] = q. 

105 

106 Args: 

107 q: Desired False Discovery Rate in (0, 1). 

108 SR0: Mean under null hypothesis (must be < SR1). 

109 SR1: Mean under alternative hypothesis. 

110 sigma0: Standard deviation under null hypothesis (must be > 0). 

111 sigma1: Standard deviation under alternative hypothesis (must be > 0). 

112 p_H1: Prior probability of alternative hypothesis in (0, 1). 

113 

114 Returns: 

115 Critical value c. Returns -inf if solution is outside [-10, 10], 

116 or nan if no solution exists. 

117 

118 Raises: 

119 AssertionError: If parameters are out of valid ranges. 

120 

121 Example: 

122 >>> c = FDR_critical_value(q=0.2, SR0=0, SR1=0.5, sigma0=0.2, sigma1=0.3, p_H1=0.1) 

123 >>> c > 0 # Critical value should be positive 

124 True 

125 """ 

126 assert SR0 < SR1 

127 assert 0 < q < 1 

128 assert 0 < p_H1 < 1 

129 assert sigma0 > 0 

130 assert sigma1 > 0 

131 

132 with warnings.catch_warnings(): 

133 warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide") 

134 warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide") 

135 

136 def f(c: float) -> float: 

137 """Compute the posterior probability P[H=0 | X > c]. 

138 

139 Args: 

140 c: Candidate critical value. 

141 

142 Returns: 

143 Posterior false discovery probability at threshold c. 

144 """ 

145 a = 1 / ( 

146 1 

147 + scipy.stats.norm.sf((c - SR1) / sigma1) / scipy.stats.norm.sf((c - SR0) / sigma0) * p_H1 / (1 - p_H1) 

148 ) 

149 return float(np.where(np.isfinite(a), a, 0)) 

150 

151 if f(-10) < q: # Solution outside of the search interval 

152 return float(-np.inf) 

153 

154 if (f(-10) - q) * (f(10) - q) > 0: # No solution, for instance if σ₀≫σ₁ and q small 

155 return float(np.nan) 

156 

157 return float(scipy.optimize.brentq(lambda c: f(c) - q, -10, 10)) 

158 

159 

160def control_for_FDR( 

161 q: float, 

162 *, 

163 SR0: float = 0, 

164 SR1: float = 0.5, 

165 p_H1: float = 0.05, 

166 T: int = 24, 

167 gamma3: float = 0.0, 

168 gamma4: float = 3.0, 

169 rho: float = 0.0, 

170 K: int = 1, 

171) -> tuple[float, float, float, float]: 

172 """Compute critical value to test multiple Sharpe ratios controlling FDR. 

173 

174 Determines the critical Sharpe ratio threshold and associated error rates 

175 to control the False Discovery Rate at level q when testing multiple 

176 strategies. 

177 

178 Args: 

179 q: Desired False Discovery Rate level in (0, 1). 

180 SR0: Sharpe ratio under null hypothesis H0. Default 0. 

181 SR1: Sharpe ratio under alternative hypothesis H1. Default 0.5. 

182 p_H1: Prior probability that H1 is true. Default 0.05. 

183 T: Number of observations. Default 24. 

184 gamma3: Skewness of returns. Default 0. 

185 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

186 rho: Autocorrelation of returns. Default 0. 

187 K: Number of strategies (K=1 for FDR; K>1 for FWER-FDR). Default 1. 

188 

189 Returns: 

190 Tuple of (alpha, beta, SR_c, q_hat): 

191 - alpha: Significance level P[SR > SR_c | H0] 

192 - beta: Type II error P[SR <= SR_c | H1]; power is 1 - beta 

193 - SR_c: Critical Sharpe ratio threshold 

194 - q_hat: Estimated FDR (should be close to q) 

195 

196 Example: 

197 >>> alpha, beta, SR_c, q_hat = control_for_FDR(q=0.25, T=24) 

198 >>> bool(0 < alpha < 1) 

199 True 

200 >>> bool(SR_c > 0) # Critical value is positive 

201 True 

202 """ 

203 Z = scipy.stats.norm.cdf 

204 

205 s0 = math.sqrt(sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)) 

206 s1 = math.sqrt(sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)) 

207 SRc = FDR_critical_value(q, SR0, SR1, s0, s1, p_H1) 

208 

209 beta = Z((SRc - SR1) / s1) 

210 alpha = q / (1 - q) * p_H1 / (1 - p_H1) * (1 - beta) 

211 q_hat = 1 / (1 + (1 - beta) / alpha * p_H1 / (1 - p_H1)) 

212 

213 return alpha, beta, SRc, q_hat 

214 

215 

216def pFDR( 

217 p_H1: float, 

218 alpha: float, 

219 beta: float, 

220) -> float: 

221 """Compute posterior FDR given test outcome exceeds critical value. 

222 

223 Calculates P[H0 | SR > SR_c], the probability that the null hypothesis 

224 is true given that the observed Sharpe ratio exceeds the critical value. 

225 This is the "predictive" FDR based on the critical value, not the 

226 observed value. 

227 

228 Args: 

229 p_H1: Prior probability that H1 is true. 

230 alpha: Significance level (Type I error rate). 

231 beta: Type II error rate (1 - power). 

232 

233 Returns: 

234 Posterior probability of H0 given rejection. 

235 

236 Example: 

237 >>> # With 5% prior on H1 and 5% significance 

238 >>> fdr = pFDR(p_H1=0.05, alpha=0.05, beta=0.3) 

239 >>> 0 < fdr < 1 

240 True 

241 """ 

242 p_H0 = 1 - p_H1 

243 return 1 / (1 + (1 - beta) * p_H1 / alpha / p_H0) 

244 

245 

246def oFDR( 

247 SR: float, 

248 SR0: float, 

249 SR1: float, 

250 T: int, 

251 p_H1: float, 

252 *, 

253 gamma3: float = 0.0, 

254 gamma4: float = 3.0, 

255 rho: float = 0.0, 

256 K: int = 1, 

257) -> float: 

258 """Compute observed FDR given the observed Sharpe ratio. 

259 

260 Calculates P[H0 | SR > SR_obs], the probability that the null hypothesis 

261 is true given the observed Sharpe ratio value. This is the "observed" 

262 FDR which conditions on the actual observation rather than just the 

263 critical value. 

264 

265 Args: 

266 SR: Observed Sharpe ratio. 

267 SR0: Sharpe ratio under null hypothesis. 

268 SR1: Sharpe ratio under alternative hypothesis. 

269 T: Number of observations. 

270 p_H1: Prior probability that H1 is true. 

271 gamma3: Skewness of returns. Default 0. 

272 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian). 

273 rho: Autocorrelation of returns. Default 0. 

274 K: Number of strategies for variance adjustment. Default 1. 

275 

276 Returns: 

277 Posterior probability of H0 given the observed SR. 

278 

279 Example: 

280 >>> # Higher observed SR should give lower probability of H0 

281 >>> fdr_low = oFDR(SR=0.3, SR0=0, SR1=0.5, T=24, p_H1=0.1) 

282 >>> fdr_high = oFDR(SR=0.8, SR0=0, SR1=0.5, T=24, p_H1=0.1) 

283 >>> bool(fdr_high < fdr_low) 

284 True 

285 """ 

286 p0 = 1 - probabilistic_sharpe_ratio(SR, SR0, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

287 p1 = 1 - probabilistic_sharpe_ratio(SR, SR1, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K) 

288 p_H0 = 1 - p_H1 

289 return p0 * p_H0 / (p0 * p_H0 + p1 * p_H1)