Coverage for src/jsharpe/sharpe/corrections.py: 100%

1"""Multiple-testing corrections and false-discovery-rate control.

3This module groups the family-wise error rate corrections (Bonferroni,

4Šidák, Holm) and the false-discovery-rate routines (critical value, FDR

5control, predictive pFDR and observed oFDR) used when screening many

6candidate strategies.

7"""

8# ruff: noqa: N802, N803, N806, S101

10import math

11import warnings

13import numpy as np

14import scipy

16from .psr import probabilistic_sharpe_ratio, sharpe_ratio_variance

19def adjusted_p_values_bonferroni(ps: np.ndarray) -> np.ndarray:

20 """Adjust p-values using Bonferroni correction for FWER control.

22 Multiplies each p-value by the number of tests M, capping at 1.

23 This is the most conservative multiple testing correction.

25 Args:

26 ps: Array of unadjusted p-values.

28 Returns:

29 Array of Bonferroni-adjusted p-values, each in [0, 1].

31 Example:

32 >>> p_vals = np.array([0.01, 0.03, 0.05])

33 >>> adj_p = adjusted_p_values_bonferroni(p_vals)

34 >>> np.allclose(adj_p, [0.03, 0.09, 0.15])

35 True

36 """

37 M = len(ps)

38 result: np.ndarray = np.minimum(1, M * ps)

39 return result

42def adjusted_p_values_sidak(ps: np.ndarray) -> np.ndarray:

43 """Adjust p-values using Šidák correction for FWER control.

45 Uses the formula: 1 - (1 - p)^M, which is slightly less conservative

46 than Bonferroni when tests are independent.

48 Args:

49 ps: Array of unadjusted p-values.

51 Returns:

52 Array of Šidák-adjusted p-values, each in [0, 1].

54 Example:

55 >>> p_vals = np.array([0.01, 0.03, 0.05])

56 >>> adj_p = adjusted_p_values_sidak(p_vals)

57 >>> np.all(adj_p <= adjusted_p_values_bonferroni(p_vals))

58 np.True_

59 """

60 M = len(ps)

61 return 1 - (1 - ps) ** M

64def adjusted_p_values_holm(ps: np.ndarray, *, variant: str = "bonferroni") -> np.ndarray:

65 """Adjust p-values using Holm's step-down procedure for FWER control.

67 A step-down procedure that is uniformly more powerful than Bonferroni

68 while still controlling the Family-Wise Error Rate.

70 Args:

71 ps: Array of unadjusted p-values.

72 variant: Correction method for each step. Either "bonferroni"

73 (default) or "sidak".

75 Returns:

76 Array of Holm-adjusted p-values, each in [0, 1].

78 Raises:

79 AssertionError: If variant is not "bonferroni" or "sidak".

81 Example:

82 >>> p_vals = np.array([0.01, 0.04, 0.03])

83 >>> adj_p = adjusted_p_values_holm(p_vals)

84 >>> float(adj_p[0]) # Smallest p-value adjusted most

85 0.03

86 """

87 assert variant in ["bonferroni", "sidak"]

88 i = np.argsort(ps)

89 M = len(ps)

90 p_adjusted = np.zeros(M)

91 previous = 0

92 for j, idx in enumerate(i):

93 candidate = min(1, ps[idx] * (M - j)) if variant == "bonferroni" else 1 - (1 - ps[idx]) ** (M - j)

94 p_adjusted[idx] = max(previous, candidate)

95 previous = p_adjusted[idx]

96 return p_adjusted

99def FDR_critical_value(q: float, SR0: float, SR1: float, sigma0: float, sigma1: float, p_H1: float) -> float:

100 """Compute critical value for FDR control in hypothesis testing.

101

102 Given a mixture model where H ~ Bernoulli(p_H1) determines whether

103 X follows N(SR0, sigma0^2) or N(SR1, sigma1^2), finds the critical

104 value c such that P[H=0 | X > c] = q.

105

106 Args:

107 q: Desired False Discovery Rate in (0, 1).

108 SR0: Mean under null hypothesis (must be < SR1).

109 SR1: Mean under alternative hypothesis.

110 sigma0: Standard deviation under null hypothesis (must be > 0).

111 sigma1: Standard deviation under alternative hypothesis (must be > 0).

112 p_H1: Prior probability of alternative hypothesis in (0, 1).

113

114 Returns:

115 Critical value c. Returns -inf if solution is outside [-10, 10],

116 or nan if no solution exists.

117

118 Raises:

119 AssertionError: If parameters are out of valid ranges.

120

121 Example:

122 >>> c = FDR_critical_value(q=0.2, SR0=0, SR1=0.5, sigma0=0.2, sigma1=0.3, p_H1=0.1)

123 >>> c > 0 # Critical value should be positive

124 True

125 """

126 assert SR0 < SR1

127 assert 0 < q < 1

128 assert 0 < p_H1 < 1

129 assert sigma0 > 0

130 assert sigma1 > 0

131

132 with warnings.catch_warnings():

133 warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")

134 warnings.filterwarnings("ignore", message="divide by zero encountered in scalar divide")

135

136 def f(c: float) -> float:

137 """Compute the posterior probability P[H=0 | X > c].

138

139 Args:

140 c: Candidate critical value.

141

142 Returns:

143 Posterior false discovery probability at threshold c.

144 """

145 a = 1 / (

146 1

147 + scipy.stats.norm.sf((c - SR1) / sigma1) / scipy.stats.norm.sf((c - SR0) / sigma0) * p_H1 / (1 - p_H1)

148 )

149 return float(np.where(np.isfinite(a), a, 0))

150

151 if f(-10) < q: # Solution outside of the search interval

152 return float(-np.inf)

153

154 if (f(-10) - q) * (f(10) - q) > 0: # No solution, for instance if σ₀≫σ₁ and q small

155 return float(np.nan)

156

157 return float(scipy.optimize.brentq(lambda c: f(c) - q, -10, 10))

158

159

160def control_for_FDR(

161 q: float,

162 *,

163 SR0: float = 0,

164 SR1: float = 0.5,

165 p_H1: float = 0.05,

166 T: int = 24,

167 gamma3: float = 0.0,

168 gamma4: float = 3.0,

169 rho: float = 0.0,

170 K: int = 1,

171) -> tuple[float, float, float, float]:

172 """Compute critical value to test multiple Sharpe ratios controlling FDR.

173

174 Determines the critical Sharpe ratio threshold and associated error rates

175 to control the False Discovery Rate at level q when testing multiple

176 strategies.

177

178 Args:

179 q: Desired False Discovery Rate level in (0, 1).

180 SR0: Sharpe ratio under null hypothesis H0. Default 0.

181 SR1: Sharpe ratio under alternative hypothesis H1. Default 0.5.

182 p_H1: Prior probability that H1 is true. Default 0.05.

183 T: Number of observations. Default 24.

184 gamma3: Skewness of returns. Default 0.

185 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

186 rho: Autocorrelation of returns. Default 0.

187 K: Number of strategies (K=1 for FDR; K>1 for FWER-FDR). Default 1.

188

189 Returns:

190 Tuple of (alpha, beta, SR_c, q_hat):

191 - alpha: Significance level P[SR > SR_c | H0]

192 - beta: Type II error P[SR <= SR_c | H1]; power is 1 - beta

193 - SR_c: Critical Sharpe ratio threshold

194 - q_hat: Estimated FDR (should be close to q)

195

196 Example:

197 >>> alpha, beta, SR_c, q_hat = control_for_FDR(q=0.25, T=24)

198 >>> bool(0 < alpha < 1)

199 True

200 >>> bool(SR_c > 0) # Critical value is positive

201 True

202 """

203 Z = scipy.stats.norm.cdf

204

205 s0 = math.sqrt(sharpe_ratio_variance(SR0, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))

206 s1 = math.sqrt(sharpe_ratio_variance(SR1, T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K))

207 SRc = FDR_critical_value(q, SR0, SR1, s0, s1, p_H1)

208

209 beta = Z((SRc - SR1) / s1)

210 alpha = q / (1 - q) * p_H1 / (1 - p_H1) * (1 - beta)

211 q_hat = 1 / (1 + (1 - beta) / alpha * p_H1 / (1 - p_H1))

212

213 return alpha, beta, SRc, q_hat

214

215

216def pFDR(

217 p_H1: float,

218 alpha: float,

219 beta: float,

220) -> float:

221 """Compute posterior FDR given test outcome exceeds critical value.

222

223 Calculates P[H0 | SR > SR_c], the probability that the null hypothesis

224 is true given that the observed Sharpe ratio exceeds the critical value.

225 This is the "predictive" FDR based on the critical value, not the

226 observed value.

227

228 Args:

229 p_H1: Prior probability that H1 is true.

230 alpha: Significance level (Type I error rate).

231 beta: Type II error rate (1 - power).

232

233 Returns:

234 Posterior probability of H0 given rejection.

235

236 Example:

237 >>> # With 5% prior on H1 and 5% significance

238 >>> fdr = pFDR(p_H1=0.05, alpha=0.05, beta=0.3)

239 >>> 0 < fdr < 1

240 True

241 """

242 p_H0 = 1 - p_H1

243 return 1 / (1 + (1 - beta) * p_H1 / alpha / p_H0)

244

245

246def oFDR(

247 SR: float,

248 SR0: float,

249 SR1: float,

250 T: int,

251 p_H1: float,

252 *,

253 gamma3: float = 0.0,

254 gamma4: float = 3.0,

255 rho: float = 0.0,

256 K: int = 1,

257) -> float:

258 """Compute observed FDR given the observed Sharpe ratio.

259

260 Calculates P[H0 | SR > SR_obs], the probability that the null hypothesis

261 is true given the observed Sharpe ratio value. This is the "observed"

262 FDR which conditions on the actual observation rather than just the

263 critical value.

264

265 Args:

266 SR: Observed Sharpe ratio.

267 SR0: Sharpe ratio under null hypothesis.

268 SR1: Sharpe ratio under alternative hypothesis.

269 T: Number of observations.

270 p_H1: Prior probability that H1 is true.

271 gamma3: Skewness of returns. Default 0.

272 gamma4: Kurtosis of returns (non-excess). Default 3 (Gaussian).

273 rho: Autocorrelation of returns. Default 0.

274 K: Number of strategies for variance adjustment. Default 1.

275

276 Returns:

277 Posterior probability of H0 given the observed SR.

278

279 Example:

280 >>> # Higher observed SR should give lower probability of H0

281 >>> fdr_low = oFDR(SR=0.3, SR0=0, SR1=0.5, T=24, p_H1=0.1)

282 >>> fdr_high = oFDR(SR=0.8, SR0=0, SR1=0.5, T=24, p_H1=0.1)

283 >>> bool(fdr_high < fdr_low)

284 True

285 """

286 p0 = 1 - probabilistic_sharpe_ratio(SR, SR0, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

287 p1 = 1 - probabilistic_sharpe_ratio(SR, SR1, T=T, gamma3=gamma3, gamma4=gamma4, rho=rho, K=K)

288 p_H0 = 1 - p_H1

289 return p0 * p_H0 / (p0 * p_H0 + p1 * p_H1)