Coverage for src/jquantstats/_data.py: 100%
74 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 07:23 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 07:23 +0000
1import dataclasses
2from collections.abc import Iterator
3from datetime import timedelta
5import polars as pl
7from ._plots import Plots
8from ._reports import Reports
9from ._stats import Stats
12@dataclasses.dataclass(frozen=True)
13class Data:
14 """A container for financial returns data and an optional benchmark.
16 This class provides methods for analyzing and manipulating financial returns data,
17 including converting returns to prices, calculating drawdowns, and resampling data
18 to different time periods. It also provides access to statistical metrics through
19 the stats property and visualization through the plots property.
21 Attributes:
22 returns (pl.DataFrame): DataFrame containing returns data with assets as columns.
23 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data.
24 Defaults to None.
25 index (pl.DataFrame): DataFrame containing the date index for the returns data.
27 """
29 returns: pl.DataFrame
30 index: pl.DataFrame
31 benchmark: pl.DataFrame | None = None
33 def __post_init__(self):
34 # You need at least two points
35 if self.index.shape[0] < 2:
36 raise ValueError("Index must contain at least two timestamps.")
38 # Check index is monotonically increasing
39 datetime_col = self.index[self.index.columns[0]]
40 if not datetime_col.is_sorted():
41 raise ValueError("Index must be monotonically increasing.")
43 # Check row count matches returns
44 if self.returns.shape[0] != self.index.shape[0]:
45 raise ValueError("Returns and index must have the same number of rows.")
47 # Check row count matches benchmark (if provided)
48 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:
49 raise ValueError("Benchmark and index must have the same number of rows.")
51 @property
52 def plots(self) -> "Plots":
53 """Provides access to visualization methods for the financial data.
55 Returns:
56 Plots: An instance of the Plots class initialized with this data.
58 """
59 return Plots(self)
61 @property
62 def stats(self) -> "Stats":
63 """Provides access to statistical analysis methods for the financial data.
65 Returns:
66 Stats: An instance of the Stats class initialized with this data.
68 """
69 return Stats(self)
71 @property
72 def reports(self) -> "Reports":
73 """Provides access to reporting methods for the financial data.
75 Returns:
76 Reports: An instance of the Reports class initialized with this data.
78 """
79 return Reports(self)
81 @property
82 def date_col(self) -> list[str]:
83 """Return the column names of the index DataFrame.
85 Returns:
86 list[str]: List of column names in the index DataFrame, typically containing
87 the date column name.
89 """
90 return self.index.columns
92 @property
93 def assets(self) -> list[str]:
94 """Return the combined list of asset column names from returns and benchmark.
96 Returns:
97 list[str]: List of all asset column names from both returns and benchmark
98 (if available).
100 """
101 try:
102 return self.returns.columns + self.benchmark.columns
103 except AttributeError:
104 return self.returns.columns
106 @property
107 def all(self) -> pl.DataFrame:
108 """Combine index, returns, and benchmark data into a single DataFrame.
110 This property provides a convenient way to access all data in a single DataFrame,
111 which is useful for analysis and visualization.
113 Returns:
114 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data
115 (if available) combined horizontally.
117 """
118 if self.benchmark is None:
119 return pl.concat([self.index, self.returns], how="horizontal")
120 else:
121 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")
123 def resample(self, every: str = "1mo") -> "Data":
124 """Resamples returns and benchmark to a different frequency using Polars.
126 Args:
127 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'.
128 compounded (bool, optional): Whether to compound returns. Defaults to False.
130 Returns:
131 Data: Resampled data.
133 """
135 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:
136 dframe = self.index.hstack(dframe) # Add the date column for resampling
138 return dframe.group_by_dynamic(
139 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"
140 ).agg(
141 [
142 ((pl.col(col) + 1.0).product() - 1.0).alias(col)
143 for col in dframe.columns
144 if col != self.index.columns[0]
145 ]
146 )
148 resampled_returns = resample_frame(self.returns)
149 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None
150 resampled_index = resampled_returns.select(self.index.columns[0])
152 return Data(
153 returns=resampled_returns.drop(self.index.columns[0]),
154 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,
155 index=resampled_index,
156 )
158 def copy(self) -> "Data":
159 """Create a deep copy of the Data object.
161 Returns:
162 Data: A new Data object with copies of the returns and benchmark.
164 """
165 try:
166 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())
167 except AttributeError:
168 # Handle case where benchmark is None
169 return Data(returns=self.returns.clone(), index=self.index.clone())
171 def head(self, n: int = 5) -> "Data":
172 """Return the first n rows of the combined returns and benchmark data.
174 Args:
175 n (int, optional): Number of rows to return. Defaults to 5.
177 Returns:
178 Data: A new Data object containing the first n rows of the combined data.
180 """
181 return Data(returns=self.returns.head(n), benchmark=self.benchmark.head(n), index=self.index.head(n))
183 def tail(self, n: int = 5) -> "Data":
184 """Return the last n rows of the combined returns and benchmark data.
186 Args:
187 n (int, optional): Number of rows to return. Defaults to 5.
189 Returns:
190 Data: A new Data object containing the last n rows of the combined data.
192 """
193 return Data(returns=self.returns.tail(n), benchmark=self.benchmark.tail(n), index=self.index.tail(n))
195 @property
196 def _periods_per_year(self) -> float:
197 """Estimate the number of periods per year based on average frequency in the index.
199 Assumes `self.index` is a Polars DataFrame with a single datetime column.
200 """
201 # Extract the datetime column (assuming only one)
202 datetime_col = self.index[self.index.columns[0]]
204 # Ensure it's sorted
205 sorted_dt = datetime_col.sort()
207 # Compute differences
208 diffs = sorted_dt.diff().drop_nulls()
210 # Mean difference (Duration)
211 mean_diff = diffs.mean()
213 # Convert Duration (timedelta) to seconds
214 seconds = mean_diff.total_seconds() if isinstance(mean_diff, timedelta) else mean_diff / timedelta(seconds=1)
216 return (365 * 24 * 60 * 60) / seconds
218 def items(self) -> Iterator[tuple[str, pl.Series]]:
219 """Iterate over all assets and their corresponding data series.
221 This method provides a convenient way to iterate over all assets in the data,
222 yielding each asset name and its corresponding data series.
224 Yields:
225 tuple[str, pl.Series]: A tuple containing the asset name and its data series.
227 """
228 matrix = self.all
230 for col in self.assets:
231 yield col, matrix.get_column(col)