Coverage for src/jquantstats/api.py: 97%
34 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 07:23 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 07:23 +0000
1# QuantStats: Portfolio analytics for quants
2# https://github.com/tschm/jquantstats
3#
4# Copyright 2019-2024 Ran Aroussi
5# Copyright 2025 Thomas Schmelzer
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
19"""jQuantStats API module.
21This module provides the core API for the jQuantStats library, including the Data class
22for handling financial returns data and benchmarks.
24Overview
25--------
26The main entry point is the `build_data` function, which creates a Data object from
27returns and optional benchmark data. The Data class provides methods for analyzing and
28manipulating financial returns data, including accessing statistical metrics through
29the `stats` property and visualization through the `plots` property.
31Features
32--------
33- Support for both pandas and polars DataFrames as input
34- Automatic conversion to polars for efficient data processing
35- Handling of risk-free rate adjustments
36- Benchmark comparison capabilities
37- Date alignment between returns and benchmark data
39Example:
40-------
41```python
42import polars as pl
43from jquantstats.api import build_data
45# Create a Data object from returns
46returns = pl.DataFrame({
47 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
48 "Asset1": [0.01, -0.02, 0.03]
49}).with_columns(pl.col("Date").str.to_date())
51data = build_data(returns=returns)
53# With benchmark and risk-free rate
54benchmark = pl.DataFrame({
55 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
56 "Market": [0.005, -0.01, 0.02]
57}).with_columns(pl.col("Date").str.to_date())
59data = build_data(
60 returns=returns,
61 benchmark=benchmark,
62 rf=0.0002, # risk-free rate (e.g., 0.02% per day)
63)
64```
66"""
68import pandas as pd
69import polars as pl
71from ._data import Data
74def build_data(
75 returns: pl.DataFrame | pd.DataFrame | pd.Series,
76 rf: float | pl.DataFrame | pd.DataFrame | pd.Series = 0.0,
77 benchmark: pl.DataFrame | pd.DataFrame | pd.Series | None = None,
78 date_col: str = "Date",
79) -> Data:
80 """Build a Data object from returns and optional benchmark using Polars.
82 This function is the main entry point for creating a Data object, which is the core
83 container for financial returns data in jQuantStats.
85 Description
86 -----------
87 The `build_data` function handles the conversion of pandas DataFrames and Series to
88 polars DataFrames, aligns dates between returns and benchmark data, and subtracts
89 the risk-free rate to calculate excess returns.
91 Parameters
92 ----------
93 returns : pl.DataFrame | pd.DataFrame | pd.Series
94 Financial returns data.
96 - If pl.DataFrame: First column should be the date column, remaining columns are asset returns.
97 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present.
98 - If pd.Series: Index should be dates, values are returns for a single asset.
100 rf : float | pl.DataFrame | pd.DataFrame | pd.Series, optional
101 Risk-free rate. Default is 0.0 (no risk-free rate adjustment).
103 - If float: Constant risk-free rate applied to all dates.
104 - If DataFrame/Series: Time-varying risk-free rate with dates matching returns.
106 benchmark : pl.DataFrame | pd.DataFrame | pd.Series, optional
107 Benchmark returns. Default is None (no benchmark).
109 - If pl.DataFrame: First column should be the date column, remaining columns are benchmark returns.
110 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present.
111 - If pd.Series: Index should be dates, values are returns for a single benchmark.
113 date_col : str, optional
114 Name of the date column in the DataFrames. Default is "Date".
116 Returns
117 -------
118 Data
119 Object containing excess returns and benchmark (if any), with methods for
120 analysis and visualization through the `stats` and `plots` properties.
122 Raises
123 ------
124 ValueError
125 If there are no overlapping dates between returns and benchmark.
127 Examples
128 --------
129 Basic usage with polars DataFrame:
131 ```python
132 import polars as pl
133 from jquantstats.api import build_data
135 returns = pl.DataFrame({
136 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
137 "Asset1": [0.01, -0.02, 0.03],
138 "Asset2": [0.02, 0.01, -0.01]
139 }).with_columns(pl.col("Date").str.to_date())
141 data = build_data(returns=returns)
142 ```
144 With pandas DataFrame:
146 ```python
147 import pandas as pd
148 from jquantstats.api import build_data
150 returns_pd = pd.DataFrame({
151 "Date": pd.to_datetime(["2023-01-01", "2023-01-02", "2023-01-03"]),
152 "Asset1": [0.01, -0.02, 0.03],
153 "Asset2": [0.02, 0.01, -0.01]
154 })
156 data = build_data(returns=returns_pd)
157 ```
159 With benchmark and risk-free rate:
161 ```python
162 benchmark = pl.DataFrame({
163 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
164 "Market": [0.005, -0.01, 0.02]
165 }).with_columns(pl.col("Date").str.to_date())
167 data = build_data(returns=returns, benchmark=benchmark, rf=0.0002)
168 ```
170 """
172 def subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame:
173 """Subtract the risk-free rate from all numeric columns in the DataFrame.
175 Description
176 -----------
177 This function handles both scalar risk-free rates and time series risk-free rates.
178 For scalar rates, it creates a constant column with the risk-free rate value.
179 For time series, it joins the risk-free rate DataFrame with the returns DataFrame
180 on the date column and then subtracts the risk-free rate from each numeric column.
182 Parameters
183 ----------
184 dframe : pl.DataFrame
185 DataFrame containing returns data with a date column
186 and one or more numeric columns representing asset returns.
188 rf : float | pl.DataFrame
189 Risk-free rate to subtract from returns.
191 - If float: A constant risk-free rate applied to all dates.
192 - If pl.DataFrame: A DataFrame with a date column and a second column
193 containing time-varying risk-free rates.
195 date_col : str
196 Name of the date column in both DataFrames for joining
197 when rf is a DataFrame.
199 Returns
200 -------
201 pl.DataFrame
202 DataFrame with the risk-free rate subtracted from all numeric columns,
203 preserving the original column names. The resulting DataFrame includes the
204 date column and all numeric columns from the input DataFrame.
206 Notes
207 -----
208 - The function performs an inner join when rf is a DataFrame, which means
209 only dates present in both DataFrames will be included in the result.
210 - Only columns with numeric data types will have the risk-free rate subtracted.
211 - The date column and any non-numeric columns are preserved in the output.
213 """
214 # Handle scalar rf case
215 if isinstance(rf, float):
216 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")])
217 else:
218 # At this point, rf must be a DataFrame
219 if not isinstance(rf, pl.DataFrame):
220 raise TypeError("rf must be a float or DataFrame")
221 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf
223 # Join and subtract
224 dframe = dframe.join(rf_dframe, on=date_col, how="inner")
225 return dframe.select(
226 [pl.col(date_col)]
227 + [
228 (pl.col(col) - pl.col("rf")).alias(col)
229 for col in dframe.columns
230 if col not in {date_col, "rf"} and col != date_col
231 ]
232 )
234 if isinstance(returns, pd.Series):
235 returns = pl.from_pandas(returns.to_frame(), include_index=True)
237 if isinstance(returns, pd.DataFrame):
238 returns = pl.from_pandas(returns, include_index=True)
240 if isinstance(rf, pd.Series):
241 rf = pl.from_pandas(rf.to_frame(), include_index=True)
243 if isinstance(rf, pd.DataFrame):
244 rf = pl.from_pandas(rf, include_index=True)
246 if isinstance(benchmark, pd.Series):
247 benchmark = pl.from_pandas(benchmark.to_frame(), include_index=True)
249 if isinstance(benchmark, pd.DataFrame):
250 benchmark = pl.from_pandas(benchmark, include_index=True)
252 # Align returns and benchmark if both provided
253 if benchmark is not None:
254 joined_dates = returns.join(benchmark, on=date_col, how="inner").select(date_col)
255 if joined_dates.is_empty():
256 raise ValueError("No overlapping dates between returns and benchmark.")
257 returns = returns.join(joined_dates, on=date_col, how="inner")
258 benchmark = benchmark.join(joined_dates, on=date_col, how="inner")
260 # Subtract risk-free rate
261 index = returns.select(date_col)
262 excess_returns = subtract_risk_free(returns, rf, date_col).drop(date_col)
263 excess_benchmark = subtract_risk_free(benchmark, rf, date_col).drop(date_col) if benchmark is not None else None
265 return Data(returns=excess_returns, benchmark=excess_benchmark, index=index)