Coverage for src / jquantstats / _data.py: 99%

78 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-03 02:21 +0000

1from __future__ import annotations 

2 

3import dataclasses 

4from collections.abc import Iterator 

5from datetime import timedelta 

6from typing import TYPE_CHECKING, cast 

7 

8import polars as pl 

9 

10if TYPE_CHECKING: 

11 from ._plots import Plots 

12 from ._reports import Reports 

13 from ._stats import Stats 

14 

15 

16@dataclasses.dataclass(frozen=True) 

17class Data: 

18 """A container for financial returns data and an optional benchmark. 

19 

20 This class provides methods for analyzing and manipulating financial returns data, 

21 including converting returns to prices, calculating drawdowns, and resampling data 

22 to different time periods. It also provides access to statistical metrics through 

23 the stats property and visualization through the plots property. 

24 

25 Attributes: 

26 returns (pl.DataFrame): DataFrame containing returns data with assets as columns. 

27 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data. 

28 Defaults to None. 

29 index (pl.DataFrame): DataFrame containing the date index for the returns data. 

30 

31 """ 

32 

33 returns: pl.DataFrame 

34 index: pl.DataFrame 

35 benchmark: pl.DataFrame | None = None 

36 

37 def __post_init__(self) -> None: 

38 # You need at least two points 

39 if self.index.shape[0] < 2: 

40 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003 

41 

42 # Check index is monotonically increasing 

43 datetime_col = self.index[self.index.columns[0]] 

44 if not datetime_col.is_sorted(): 

45 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003 

46 

47 # Check row count matches returns 

48 if self.returns.shape[0] != self.index.shape[0]: 

49 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003 

50 

51 # Check row count matches benchmark (if provided) 

52 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]: 

53 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003 

54 

55 @property 

56 def plots(self) -> Plots: 

57 """Provides access to visualization methods for the financial data. 

58 

59 Returns: 

60 Plots: An instance of the Plots class initialized with this data. 

61 

62 """ 

63 from ._plots import Plots 

64 

65 return Plots(self) 

66 

67 @property 

68 def stats(self) -> Stats: 

69 """Provides access to statistical analysis methods for the financial data. 

70 

71 Returns: 

72 Stats: An instance of the Stats class initialized with this data. 

73 

74 """ 

75 from ._stats import Stats 

76 

77 return Stats(self) 

78 

79 @property 

80 def reports(self) -> Reports: 

81 """Provides access to reporting methods for the financial data. 

82 

83 Returns: 

84 Reports: An instance of the Reports class initialized with this data. 

85 

86 """ 

87 from ._reports import Reports 

88 

89 return Reports(self) 

90 

91 @property 

92 def date_col(self) -> list[str]: 

93 """Return the column names of the index DataFrame. 

94 

95 Returns: 

96 list[str]: List of column names in the index DataFrame, typically containing 

97 the date column name. 

98 

99 """ 

100 return list(self.index.columns) 

101 

102 @property 

103 def assets(self) -> list[str]: 

104 """Return the combined list of asset column names from returns and benchmark. 

105 

106 Returns: 

107 list[str]: List of all asset column names from both returns and benchmark 

108 (if available). 

109 

110 """ 

111 if self.benchmark is not None: 

112 return list(self.returns.columns) + list(self.benchmark.columns) 

113 return list(self.returns.columns) 

114 

115 @property 

116 def all(self) -> pl.DataFrame: 

117 """Combine index, returns, and benchmark data into a single DataFrame. 

118 

119 This property provides a convenient way to access all data in a single DataFrame, 

120 which is useful for analysis and visualization. 

121 

122 Returns: 

123 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data 

124 (if available) combined horizontally. 

125 

126 """ 

127 if self.benchmark is None: 

128 return pl.concat([self.index, self.returns], how="horizontal") 

129 else: 

130 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal") 

131 

132 def resample(self, every: str = "1mo") -> Data: 

133 """Resamples returns and benchmark to a different frequency using Polars. 

134 

135 Args: 

136 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'. 

137 compounded (bool, optional): Whether to compound returns. Defaults to False. 

138 

139 Returns: 

140 Data: Resampled data. 

141 

142 """ 

143 

144 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame: 

145 dframe = self.index.hstack(dframe) # Add the date column for resampling 

146 

147 return dframe.group_by_dynamic( 

148 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right" 

149 ).agg( 

150 [ 

151 ((pl.col(col) + 1.0).product() - 1.0).alias(col) 

152 for col in dframe.columns 

153 if col != self.index.columns[0] 

154 ] 

155 ) 

156 

157 resampled_returns = resample_frame(self.returns) 

158 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None 

159 resampled_index = resampled_returns.select(self.index.columns[0]) 

160 

161 return Data( 

162 returns=resampled_returns.drop(self.index.columns[0]), 

163 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None, 

164 index=resampled_index, 

165 ) 

166 

167 def copy(self) -> Data: 

168 """Create a deep copy of the Data object. 

169 

170 Returns: 

171 Data: A new Data object with copies of the returns and benchmark. 

172 

173 """ 

174 if self.benchmark is not None: 

175 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone()) 

176 return Data(returns=self.returns.clone(), index=self.index.clone()) 

177 

178 def head(self, n: int = 5) -> Data: 

179 """Return the first n rows of the combined returns and benchmark data. 

180 

181 Args: 

182 n (int, optional): Number of rows to return. Defaults to 5. 

183 

184 Returns: 

185 Data: A new Data object containing the first n rows of the combined data. 

186 

187 """ 

188 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None 

189 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n)) 

190 

191 def tail(self, n: int = 5) -> Data: 

192 """Return the last n rows of the combined returns and benchmark data. 

193 

194 Args: 

195 n (int, optional): Number of rows to return. Defaults to 5. 

196 

197 Returns: 

198 Data: A new Data object containing the last n rows of the combined data. 

199 

200 """ 

201 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None 

202 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n)) 

203 

204 @property 

205 def _periods_per_year(self) -> float: 

206 """Estimate the number of periods per year based on average frequency in the index. 

207 

208 Assumes `self.index` is a Polars DataFrame with a single datetime column. 

209 """ 

210 # Extract the datetime column (assuming only one) 

211 datetime_col = self.index[self.index.columns[0]] 

212 

213 # Ensure it's sorted 

214 sorted_dt = datetime_col.sort() 

215 

216 # Compute differences 

217 diffs = sorted_dt.diff().drop_nulls() 

218 

219 # Mean difference (Duration) 

220 mean_diff = diffs.mean() 

221 

222 # Convert Duration (timedelta) to seconds 

223 if isinstance(mean_diff, timedelta): 

224 seconds = mean_diff.total_seconds() 

225 else: 

226 # Should not happen for datetime diff, but handle gracefully 

227 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0 

228 

229 return (365 * 24 * 60 * 60) / seconds 

230 

231 def items(self) -> Iterator[tuple[str, pl.Series]]: 

232 """Iterate over all assets and their corresponding data series. 

233 

234 This method provides a convenient way to iterate over all assets in the data, 

235 yielding each asset name and its corresponding data series. 

236 

237 Yields: 

238 tuple[str, pl.Series]: A tuple containing the asset name and its data series. 

239 

240 """ 

241 matrix = self.all 

242 

243 for col in self.assets: 

244 yield col, matrix.get_column(col)