Coverage for src / jquantstats / _data.py: 99%
78 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:21 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:21 +0000
1from __future__ import annotations
3import dataclasses
4from collections.abc import Iterator
5from datetime import timedelta
6from typing import TYPE_CHECKING, cast
8import polars as pl
10if TYPE_CHECKING:
11 from ._plots import Plots
12 from ._reports import Reports
13 from ._stats import Stats
16@dataclasses.dataclass(frozen=True)
17class Data:
18 """A container for financial returns data and an optional benchmark.
20 This class provides methods for analyzing and manipulating financial returns data,
21 including converting returns to prices, calculating drawdowns, and resampling data
22 to different time periods. It also provides access to statistical metrics through
23 the stats property and visualization through the plots property.
25 Attributes:
26 returns (pl.DataFrame): DataFrame containing returns data with assets as columns.
27 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data.
28 Defaults to None.
29 index (pl.DataFrame): DataFrame containing the date index for the returns data.
31 """
33 returns: pl.DataFrame
34 index: pl.DataFrame
35 benchmark: pl.DataFrame | None = None
37 def __post_init__(self) -> None:
38 # You need at least two points
39 if self.index.shape[0] < 2:
40 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003
42 # Check index is monotonically increasing
43 datetime_col = self.index[self.index.columns[0]]
44 if not datetime_col.is_sorted():
45 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003
47 # Check row count matches returns
48 if self.returns.shape[0] != self.index.shape[0]:
49 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003
51 # Check row count matches benchmark (if provided)
52 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:
53 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003
55 @property
56 def plots(self) -> Plots:
57 """Provides access to visualization methods for the financial data.
59 Returns:
60 Plots: An instance of the Plots class initialized with this data.
62 """
63 from ._plots import Plots
65 return Plots(self)
67 @property
68 def stats(self) -> Stats:
69 """Provides access to statistical analysis methods for the financial data.
71 Returns:
72 Stats: An instance of the Stats class initialized with this data.
74 """
75 from ._stats import Stats
77 return Stats(self)
79 @property
80 def reports(self) -> Reports:
81 """Provides access to reporting methods for the financial data.
83 Returns:
84 Reports: An instance of the Reports class initialized with this data.
86 """
87 from ._reports import Reports
89 return Reports(self)
91 @property
92 def date_col(self) -> list[str]:
93 """Return the column names of the index DataFrame.
95 Returns:
96 list[str]: List of column names in the index DataFrame, typically containing
97 the date column name.
99 """
100 return list(self.index.columns)
102 @property
103 def assets(self) -> list[str]:
104 """Return the combined list of asset column names from returns and benchmark.
106 Returns:
107 list[str]: List of all asset column names from both returns and benchmark
108 (if available).
110 """
111 if self.benchmark is not None:
112 return list(self.returns.columns) + list(self.benchmark.columns)
113 return list(self.returns.columns)
115 @property
116 def all(self) -> pl.DataFrame:
117 """Combine index, returns, and benchmark data into a single DataFrame.
119 This property provides a convenient way to access all data in a single DataFrame,
120 which is useful for analysis and visualization.
122 Returns:
123 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data
124 (if available) combined horizontally.
126 """
127 if self.benchmark is None:
128 return pl.concat([self.index, self.returns], how="horizontal")
129 else:
130 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")
132 def resample(self, every: str = "1mo") -> Data:
133 """Resamples returns and benchmark to a different frequency using Polars.
135 Args:
136 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'.
137 compounded (bool, optional): Whether to compound returns. Defaults to False.
139 Returns:
140 Data: Resampled data.
142 """
144 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:
145 dframe = self.index.hstack(dframe) # Add the date column for resampling
147 return dframe.group_by_dynamic(
148 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"
149 ).agg(
150 [
151 ((pl.col(col) + 1.0).product() - 1.0).alias(col)
152 for col in dframe.columns
153 if col != self.index.columns[0]
154 ]
155 )
157 resampled_returns = resample_frame(self.returns)
158 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None
159 resampled_index = resampled_returns.select(self.index.columns[0])
161 return Data(
162 returns=resampled_returns.drop(self.index.columns[0]),
163 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,
164 index=resampled_index,
165 )
167 def copy(self) -> Data:
168 """Create a deep copy of the Data object.
170 Returns:
171 Data: A new Data object with copies of the returns and benchmark.
173 """
174 if self.benchmark is not None:
175 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())
176 return Data(returns=self.returns.clone(), index=self.index.clone())
178 def head(self, n: int = 5) -> Data:
179 """Return the first n rows of the combined returns and benchmark data.
181 Args:
182 n (int, optional): Number of rows to return. Defaults to 5.
184 Returns:
185 Data: A new Data object containing the first n rows of the combined data.
187 """
188 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None
189 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n))
191 def tail(self, n: int = 5) -> Data:
192 """Return the last n rows of the combined returns and benchmark data.
194 Args:
195 n (int, optional): Number of rows to return. Defaults to 5.
197 Returns:
198 Data: A new Data object containing the last n rows of the combined data.
200 """
201 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None
202 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n))
204 @property
205 def _periods_per_year(self) -> float:
206 """Estimate the number of periods per year based on average frequency in the index.
208 Assumes `self.index` is a Polars DataFrame with a single datetime column.
209 """
210 # Extract the datetime column (assuming only one)
211 datetime_col = self.index[self.index.columns[0]]
213 # Ensure it's sorted
214 sorted_dt = datetime_col.sort()
216 # Compute differences
217 diffs = sorted_dt.diff().drop_nulls()
219 # Mean difference (Duration)
220 mean_diff = diffs.mean()
222 # Convert Duration (timedelta) to seconds
223 if isinstance(mean_diff, timedelta):
224 seconds = mean_diff.total_seconds()
225 else:
226 # Should not happen for datetime diff, but handle gracefully
227 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0
229 return (365 * 24 * 60 * 60) / seconds
231 def items(self) -> Iterator[tuple[str, pl.Series]]:
232 """Iterate over all assets and their corresponding data series.
234 This method provides a convenient way to iterate over all assets in the data,
235 yielding each asset name and its corresponding data series.
237 Yields:
238 tuple[str, pl.Series]: A tuple containing the asset name and its data series.
240 """
241 matrix = self.all
243 for col in self.assets:
244 yield col, matrix.get_column(col)