Coverage for src/jquantstats/_data.py: 100%

74 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-05 07:23 +0000

1import dataclasses 

2from collections.abc import Iterator 

3from datetime import timedelta 

4 

5import polars as pl 

6 

7from ._plots import Plots 

8from ._reports import Reports 

9from ._stats import Stats 

10 

11 

12@dataclasses.dataclass(frozen=True) 

13class Data: 

14 """A container for financial returns data and an optional benchmark. 

15 

16 This class provides methods for analyzing and manipulating financial returns data, 

17 including converting returns to prices, calculating drawdowns, and resampling data 

18 to different time periods. It also provides access to statistical metrics through 

19 the stats property and visualization through the plots property. 

20 

21 Attributes: 

22 returns (pl.DataFrame): DataFrame containing returns data with assets as columns. 

23 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data. 

24 Defaults to None. 

25 index (pl.DataFrame): DataFrame containing the date index for the returns data. 

26 

27 """ 

28 

29 returns: pl.DataFrame 

30 index: pl.DataFrame 

31 benchmark: pl.DataFrame | None = None 

32 

33 def __post_init__(self): 

34 # You need at least two points 

35 if self.index.shape[0] < 2: 

36 raise ValueError("Index must contain at least two timestamps.") 

37 

38 # Check index is monotonically increasing 

39 datetime_col = self.index[self.index.columns[0]] 

40 if not datetime_col.is_sorted(): 

41 raise ValueError("Index must be monotonically increasing.") 

42 

43 # Check row count matches returns 

44 if self.returns.shape[0] != self.index.shape[0]: 

45 raise ValueError("Returns and index must have the same number of rows.") 

46 

47 # Check row count matches benchmark (if provided) 

48 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]: 

49 raise ValueError("Benchmark and index must have the same number of rows.") 

50 

51 @property 

52 def plots(self) -> "Plots": 

53 """Provides access to visualization methods for the financial data. 

54 

55 Returns: 

56 Plots: An instance of the Plots class initialized with this data. 

57 

58 """ 

59 return Plots(self) 

60 

61 @property 

62 def stats(self) -> "Stats": 

63 """Provides access to statistical analysis methods for the financial data. 

64 

65 Returns: 

66 Stats: An instance of the Stats class initialized with this data. 

67 

68 """ 

69 return Stats(self) 

70 

71 @property 

72 def reports(self) -> "Reports": 

73 """Provides access to reporting methods for the financial data. 

74 

75 Returns: 

76 Reports: An instance of the Reports class initialized with this data. 

77 

78 """ 

79 return Reports(self) 

80 

81 @property 

82 def date_col(self) -> list[str]: 

83 """Return the column names of the index DataFrame. 

84 

85 Returns: 

86 list[str]: List of column names in the index DataFrame, typically containing 

87 the date column name. 

88 

89 """ 

90 return self.index.columns 

91 

92 @property 

93 def assets(self) -> list[str]: 

94 """Return the combined list of asset column names from returns and benchmark. 

95 

96 Returns: 

97 list[str]: List of all asset column names from both returns and benchmark 

98 (if available). 

99 

100 """ 

101 try: 

102 return self.returns.columns + self.benchmark.columns 

103 except AttributeError: 

104 return self.returns.columns 

105 

106 @property 

107 def all(self) -> pl.DataFrame: 

108 """Combine index, returns, and benchmark data into a single DataFrame. 

109 

110 This property provides a convenient way to access all data in a single DataFrame, 

111 which is useful for analysis and visualization. 

112 

113 Returns: 

114 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data 

115 (if available) combined horizontally. 

116 

117 """ 

118 if self.benchmark is None: 

119 return pl.concat([self.index, self.returns], how="horizontal") 

120 else: 

121 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal") 

122 

123 def resample(self, every: str = "1mo") -> "Data": 

124 """Resamples returns and benchmark to a different frequency using Polars. 

125 

126 Args: 

127 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'. 

128 compounded (bool, optional): Whether to compound returns. Defaults to False. 

129 

130 Returns: 

131 Data: Resampled data. 

132 

133 """ 

134 

135 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame: 

136 dframe = self.index.hstack(dframe) # Add the date column for resampling 

137 

138 return dframe.group_by_dynamic( 

139 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right" 

140 ).agg( 

141 [ 

142 ((pl.col(col) + 1.0).product() - 1.0).alias(col) 

143 for col in dframe.columns 

144 if col != self.index.columns[0] 

145 ] 

146 ) 

147 

148 resampled_returns = resample_frame(self.returns) 

149 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None 

150 resampled_index = resampled_returns.select(self.index.columns[0]) 

151 

152 return Data( 

153 returns=resampled_returns.drop(self.index.columns[0]), 

154 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None, 

155 index=resampled_index, 

156 ) 

157 

158 def copy(self) -> "Data": 

159 """Create a deep copy of the Data object. 

160 

161 Returns: 

162 Data: A new Data object with copies of the returns and benchmark. 

163 

164 """ 

165 try: 

166 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone()) 

167 except AttributeError: 

168 # Handle case where benchmark is None 

169 return Data(returns=self.returns.clone(), index=self.index.clone()) 

170 

171 def head(self, n: int = 5) -> "Data": 

172 """Return the first n rows of the combined returns and benchmark data. 

173 

174 Args: 

175 n (int, optional): Number of rows to return. Defaults to 5. 

176 

177 Returns: 

178 Data: A new Data object containing the first n rows of the combined data. 

179 

180 """ 

181 return Data(returns=self.returns.head(n), benchmark=self.benchmark.head(n), index=self.index.head(n)) 

182 

183 def tail(self, n: int = 5) -> "Data": 

184 """Return the last n rows of the combined returns and benchmark data. 

185 

186 Args: 

187 n (int, optional): Number of rows to return. Defaults to 5. 

188 

189 Returns: 

190 Data: A new Data object containing the last n rows of the combined data. 

191 

192 """ 

193 return Data(returns=self.returns.tail(n), benchmark=self.benchmark.tail(n), index=self.index.tail(n)) 

194 

195 @property 

196 def _periods_per_year(self) -> float: 

197 """Estimate the number of periods per year based on average frequency in the index. 

198 

199 Assumes `self.index` is a Polars DataFrame with a single datetime column. 

200 """ 

201 # Extract the datetime column (assuming only one) 

202 datetime_col = self.index[self.index.columns[0]] 

203 

204 # Ensure it's sorted 

205 sorted_dt = datetime_col.sort() 

206 

207 # Compute differences 

208 diffs = sorted_dt.diff().drop_nulls() 

209 

210 # Mean difference (Duration) 

211 mean_diff = diffs.mean() 

212 

213 # Convert Duration (timedelta) to seconds 

214 seconds = mean_diff.total_seconds() if isinstance(mean_diff, timedelta) else mean_diff / timedelta(seconds=1) 

215 

216 return (365 * 24 * 60 * 60) / seconds 

217 

218 def items(self) -> Iterator[tuple[str, pl.Series]]: 

219 """Iterate over all assets and their corresponding data series. 

220 

221 This method provides a convenient way to iterate over all assets in the data, 

222 yielding each asset name and its corresponding data series. 

223 

224 Yields: 

225 tuple[str, pl.Series]: A tuple containing the asset name and its data series. 

226 

227 """ 

228 matrix = self.all 

229 

230 for col in self.assets: 

231 yield col, matrix.get_column(col)