Coverage for src/jsharpe/sharpe/generators.py: 100%
55 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-29 13:57 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-29 13:57 +0000
1"""Synthetic return-data generators and autocorrelation estimation.
3This module groups the simulation helpers used to generate
4(autocorrelated) non-Gaussian return series and block-structured random
5correlation matrices, plus the mean first-order autocorrelation estimator.
6"""
7# ruff: noqa: N802, N803, N806, S101
9import numpy as np
10import scipy
12from .linalg import ppoints
15def generate_autocorrelated_non_gaussian_data(
16 N: int,
17 n: int,
18 SR0: float = 0,
19 name: str = "gaussian",
20 rho: float | None = None,
21 gaussian_autocorrelation: float = 0,
22) -> np.ndarray:
23 """Generate autocorrelated non-Gaussian return data for simulation.
25 Creates a matrix of simulated returns with specified autocorrelation
26 and marginal distribution characteristics (skewness/kurtosis).
28 Uses a copula-like approach:
29 1. Generate AR(1) Gaussian processes
30 2. Transform to uniform via Gaussian CDF
31 3. Transform to target marginals via inverse CDF
33 Args:
34 N: Number of time periods (rows).
35 n: Number of assets/strategies (columns).
36 SR0: Target Sharpe ratio. Default 0.
37 name: Distribution type. One of "gaussian", "mild", "moderate",
38 "severe". Default "gaussian".
39 rho: Autocorrelation coefficient. If None, uses gaussian_autocorrelation.
40 gaussian_autocorrelation: Autocorrelation for Gaussian case. Default 0.
42 Returns:
43 Array of shape (N, n) containing simulated returns.
45 Example:
46 >>> np.random.seed(42)
47 >>> X = generate_autocorrelated_non_gaussian_data(100, 2, SR0=0.1, name="mild")
48 >>> X.shape
49 (100, 2)
50 """
51 if rho is None:
52 # With the distributions we consider the autocorrelation is almost the same.
53 rho = gaussian_autocorrelation
55 shape = (N, n)
57 # Marginal distribution: ppf
58 R = 10_000
59 marginal = generate_non_gaussian_data(R, 1, SR0=SR0, name=name)[:, 0]
60 ppf = scipy.interpolate.interp1d(ppoints(R), sorted(marginal), fill_value="extrapolate")
62 # AR(1) processes
63 X = np.random.normal(size=shape)
64 for i in range(1, shape[0]):
65 X[i, :] = rho * X[i - 1, :] + np.sqrt(1 - rho**2) * X[i, :]
67 # Convert the margins to uniform, with the Gaussian cdf
68 X = scipy.stats.norm.cdf(X)
70 # Convert the uniforms to the target margins, using the ppf
71 result: np.ndarray = ppf(X)
73 return result
76def get_random_correlation_matrix(
77 number_of_trials: int = 100,
78 effective_number_of_trials: int = 10,
79 number_of_observations: int = 200,
80 noise: float = 0.1,
81) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
82 """Generate a random correlation matrix with block structure.
84 Creates a correlation matrix representing clustered strategies, where
85 strategies within the same cluster are highly correlated and strategies
86 across clusters have lower correlation.
88 Args:
89 number_of_trials: Number of time series (strategies). Default 100.
90 effective_number_of_trials: Number of clusters. Default 10.
91 number_of_observations: Number of time periods to simulate. Default 200.
92 noise: Noise level added to each series. Default 0.1.
94 Returns:
95 Tuple of (C, X, clusters):
96 - C: Correlation matrix of shape (number_of_trials, number_of_trials)
97 - X: Data matrix of shape (number_of_observations, number_of_trials)
98 - clusters: Cluster assignment for each strategy
100 Example:
101 >>> np.random.seed(42)
102 >>> C, X, clusters = get_random_correlation_matrix(
103 ... number_of_trials=20, effective_number_of_trials=4
104 ... )
105 >>> C.shape
106 (20, 20)
107 >>> np.allclose(np.diag(C), 1) # Diagonal is all ones
108 True
109 """
110 while True:
111 block_positions = [
112 0,
113 *sorted(np.random.choice(number_of_trials, effective_number_of_trials - 1, replace=True)),
114 number_of_trials,
115 ]
116 block_sizes = np.diff(block_positions)
117 if np.all(block_sizes > 0):
118 break
120 clusters = np.array([block_number for block_number, size in enumerate(block_sizes) for _ in range(size)])
121 X0 = np.random.normal(size=(number_of_observations, effective_number_of_trials))
122 X = np.zeros(shape=(number_of_observations, number_of_trials))
123 for i, cluster in enumerate(clusters):
124 X[:, i] = X0[:, cluster] + noise * np.random.normal(size=number_of_observations)
125 C = np.asarray(np.corrcoef(X, rowvar=False))
126 np.fill_diagonal(C, 1) # rounding errors
127 C = np.clip(C, -1, 1)
128 return C, X, clusters
131def generate_non_gaussian_data(
132 nr: int,
133 nc: int,
134 *,
135 SR0: float = 0,
136 name: str = "severe",
137) -> np.ndarray:
138 """Generate non-Gaussian return data with specified characteristics.
140 Creates a matrix of simulated returns from a mixture distribution that
141 exhibits the specified skewness and kurtosis characteristics while
142 maintaining the target Sharpe ratio.
144 Args:
145 nr: Number of rows (observations/time periods).
146 nc: Number of columns (assets/strategies).
147 SR0: Target Sharpe ratio. Default 0.
148 name: Distribution severity. One of:
149 - "gaussian": No skewness or kurtosis
150 - "mild": Slight negative skew and excess kurtosis
151 - "moderate": Moderate negative skew and excess kurtosis
152 - "severe": Strong negative skew and excess kurtosis
153 Default "severe".
155 Returns:
156 Array of shape (nr, nc) containing simulated returns.
158 Raises:
159 AssertionError: If name is not a valid distribution type.
161 Example:
162 >>> np.random.seed(42)
163 >>> X = generate_non_gaussian_data(1000, 1, SR0=0.2, name="mild")
164 >>> X.shape
165 (1000, 1)
166 """
167 configs = {
168 "gaussian": (0, 0, 0.015, 0.010),
169 "mild": (0.04, -0.03, 0.015, 0.010),
170 "moderate": (0.03, -0.045, 0.020, 0.010),
171 "severe": (0.02, -0.060, 0.025, 0.010),
172 }
173 assert name in configs
175 def mixture_variance(
176 p_tail: float,
177 mu_tail: float,
178 sigma_tail: float,
179 mu_core: float,
180 sigma_core: float,
181 ) -> float:
182 """Compute the variance of a two-component Gaussian mixture.
184 Args:
185 p_tail: Mixing weight of the tail component.
186 mu_tail: Mean of the tail component.
187 sigma_tail: Standard deviation of the tail component.
188 mu_core: Mean of the core component.
189 sigma_core: Standard deviation of the core component.
191 Returns:
192 Variance of the mixture distribution.
193 """
194 w = 1.0 - p_tail
195 mu = w * mu_core + p_tail * mu_tail
196 m2 = w * (sigma_core**2 + mu_core**2) + p_tail * (sigma_tail**2 + mu_tail**2)
197 return float(m2 - mu**2)
199 def gen_with_true_SR0(reps: int, T: int, cfg: tuple[float, float, float, float], SR0: float) -> np.ndarray:
200 """Generate mixture returns scaled to a target population Sharpe ratio.
202 Args:
203 reps: Number of independent return series to generate.
204 T: Length of each return series.
205 cfg: Mixture config tuple (p_tail, mu_tail, sigma_tail, sigma_core).
206 SR0: Target population Sharpe ratio.
208 Returns:
209 Array of shape (reps, T) with non-Gaussian returns at the given Sharpe ratio.
210 """
211 p, mu_tail, sig_tail, sig_core = cfg
212 # Zero-mean baseline mixture (choose mu_core so mean=0)
213 mu_core0 = -p * mu_tail / (1.0 - p)
214 std0 = np.sqrt(mixture_variance(p, mu_tail, sig_tail, mu_core0, sig_core))
215 mu_shift = SR0 * std0 # sets population Sharpe to SR0, preserves skew/kurt
216 mask = np.random.uniform(size=(reps, T)) < p
217 X = np.random.normal(mu_core0 + mu_shift, sig_core, size=(reps, T))
218 X[mask] = np.random.normal(mu_tail + mu_shift, sig_tail, size=mask.sum())
219 return X
221 return gen_with_true_SR0(nr, nc, configs[name], SR0)
224def autocorrelation(X: np.ndarray) -> float:
225 """Compute mean first-order autocorrelation across columns.
227 Calculates the lag-1 autocorrelation for each column of the input
228 matrix and returns the mean across all columns.
230 Args:
231 X: Data matrix of shape (n_observations, n_series).
233 Returns:
234 Mean autocorrelation coefficient across all columns.
236 Example:
237 >>> np.random.seed(42)
238 >>> # Generate AR(1) process with rho=0.5
239 >>> n = 1000
240 >>> X = np.zeros((n, 1))
241 >>> X[0] = np.random.normal()
242 >>> for i in range(1, n):
243 ... X[i] = 0.5 * X[i-1] + np.sqrt(1-0.25) * np.random.normal()
244 >>> ac = autocorrelation(X)
245 >>> bool(0.4 < ac < 0.6) # Should be close to 0.5
246 True
247 """
248 _nr, nc = X.shape
249 ac = np.zeros(nc)
250 for i in range(nc):
251 ac[i] = np.corrcoef(X[1:, i], X[:-1, i])[0, 1]
252 return float(ac.mean())