Coverage for src/jquantstats/

1import dataclasses

2from collections.abc import Iterator

3from datetime import timedelta

5import polars as pl

7from ._plots import Plots

8from ._reports import Reports

9from ._stats import Stats

12@dataclasses.dataclass(frozen=True)

13class Data:

14 """A container for financial returns data and an optional benchmark.

16 This class provides methods for analyzing and manipulating financial returns data,

17 including converting returns to prices, calculating drawdowns, and resampling data

18 to different time periods. It also provides access to statistical metrics through

19 the stats property and visualization through the plots property.

21 Attributes:

22 returns (pl.DataFrame): DataFrame containing returns data with assets as columns.

23 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data.

24 Defaults to None.

25 index (pl.DataFrame): DataFrame containing the date index for the returns data.

27 """

29 returns: pl.DataFrame

30 index: pl.DataFrame

31 benchmark: pl.DataFrame | None = None

33 def __post_init__(self):

34 # You need at least two points

35 if self.index.shape[0] < 2:

36 raise ValueError("Index must contain at least two timestamps.")

38 # Check index is monotonically increasing

39 datetime_col = self.index[self.index.columns[0]]

40 if not datetime_col.is_sorted():

41 raise ValueError("Index must be monotonically increasing.")

43 # Check row count matches returns

44 if self.returns.shape[0] != self.index.shape[0]:

45 raise ValueError("Returns and index must have the same number of rows.")

47 # Check row count matches benchmark (if provided)

48 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:

49 raise ValueError("Benchmark and index must have the same number of rows.")

51 @property

52 def plots(self) -> "Plots":

53 """Provides access to visualization methods for the financial data.

55 Returns:

56 Plots: An instance of the Plots class initialized with this data.

58 """

59 return Plots(self)

61 @property

62 def stats(self) -> "Stats":

63 """Provides access to statistical analysis methods for the financial data.

65 Returns:

66 Stats: An instance of the Stats class initialized with this data.

68 """

69 return Stats(self)

71 @property

72 def reports(self) -> "Reports":

73 """Provides access to reporting methods for the financial data.

75 Returns:

76 Reports: An instance of the Reports class initialized with this data.

78 """

79 return Reports(self)

81 @property

82 def date_col(self) -> list[str]:

83 """Return the column names of the index DataFrame.

85 Returns:

86 list[str]: List of column names in the index DataFrame, typically containing

87 the date column name.

89 """

90 return self.index.columns

92 @property

93 def assets(self) -> list[str]:

94 """Return the combined list of asset column names from returns and benchmark.

96 Returns:

97 list[str]: List of all asset column names from both returns and benchmark

98 (if available).

100 """

101 try:

102 return self.returns.columns + self.benchmark.columns

103 except AttributeError:

104 return self.returns.columns

105

106 @property

107 def all(self) -> pl.DataFrame:

108 """Combine index, returns, and benchmark data into a single DataFrame.

109

110 This property provides a convenient way to access all data in a single DataFrame,

111 which is useful for analysis and visualization.

112

113 Returns:

114 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data

115 (if available) combined horizontally.

116

117 """

118 if self.benchmark is None:

119 return pl.concat([self.index, self.returns], how="horizontal")

120 else:

121 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")

122

123 def resample(self, every: str = "1mo") -> "Data":

124 """Resamples returns and benchmark to a different frequency using Polars.

125

126 Args:

127 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'.

128 compounded (bool, optional): Whether to compound returns. Defaults to False.

129

130 Returns:

131 Data: Resampled data.

132

133 """

134

135 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:

136 dframe = self.index.hstack(dframe) # Add the date column for resampling

137

138 return dframe.group_by_dynamic(

139 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"

140 ).agg(

141 [

142 ((pl.col(col) + 1.0).product() - 1.0).alias(col)

143 for col in dframe.columns

144 if col != self.index.columns[0]

145 ]

146 )

147

148 resampled_returns = resample_frame(self.returns)

149 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None

150 resampled_index = resampled_returns.select(self.index.columns[0])

151

152 return Data(

153 returns=resampled_returns.drop(self.index.columns[0]),

154 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,

155 index=resampled_index,

156 )

157

158 def copy(self) -> "Data":

159 """Create a deep copy of the Data object.

160

161 Returns:

162 Data: A new Data object with copies of the returns and benchmark.

163

164 """

165 try:

166 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())

167 except AttributeError:

168 # Handle case where benchmark is None

169 return Data(returns=self.returns.clone(), index=self.index.clone())

170

171 def head(self, n: int = 5) -> "Data":

172 """Return the first n rows of the combined returns and benchmark data.

173

174 Args:

175 n (int, optional): Number of rows to return. Defaults to 5.

176

177 Returns:

178 Data: A new Data object containing the first n rows of the combined data.

179

180 """

181 return Data(returns=self.returns.head(n), benchmark=self.benchmark.head(n), index=self.index.head(n))

182

183 def tail(self, n: int = 5) -> "Data":

184 """Return the last n rows of the combined returns and benchmark data.

185

186 Args:

187 n (int, optional): Number of rows to return. Defaults to 5.

188

189 Returns:

190 Data: A new Data object containing the last n rows of the combined data.

191

192 """

193 return Data(returns=self.returns.tail(n), benchmark=self.benchmark.tail(n), index=self.index.tail(n))

194

195 @property

196 def _periods_per_year(self) -> float:

197 """Estimate the number of periods per year based on average frequency in the index.

198

199 Assumes `self.index` is a Polars DataFrame with a single datetime column.

200 """

201 # Extract the datetime column (assuming only one)

202 datetime_col = self.index[self.index.columns[0]]

203

204 # Ensure it's sorted

205 sorted_dt = datetime_col.sort()

206

207 # Compute differences

208 diffs = sorted_dt.diff().drop_nulls()

209

210 # Mean difference (Duration)

211 mean_diff = diffs.mean()

212

213 # Convert Duration (timedelta) to seconds

214 seconds = mean_diff.total_seconds() if isinstance(mean_diff, timedelta) else mean_diff / timedelta(seconds=1)

215

216 return (365 * 24 * 60 * 60) / seconds

217

218 def items(self) -> Iterator[tuple[str, pl.Series]]:

219 """Iterate over all assets and their corresponding data series.

220

221 This method provides a convenient way to iterate over all assets in the data,

222 yielding each asset name and its corresponding data series.

223

224 Yields:

225 tuple[str, pl.Series]: A tuple containing the asset name and its data series.

226

227 """

228 matrix = self.all

229

230 for col in self.assets:

231 yield col, matrix.get_column(col)

Coverage for src/jquantstats/_data.py: 100%

74 statements