Coverage for src/jquantstats/api.py: 97%

34 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-05 07:23 +0000

1# QuantStats: Portfolio analytics for quants 

2# https://github.com/tschm/jquantstats 

3# 

4# Copyright 2019-2024 Ran Aroussi 

5# Copyright 2025 Thomas Schmelzer 

6# 

7# Licensed under the Apache License, Version 2.0 (the "License"); 

8# you may not use this file except in compliance with the License. 

9# You may obtain a copy of the License at 

10# 

11# http://www.apache.org/licenses/LICENSE-2.0 

12# 

13# Unless required by applicable law or agreed to in writing, software 

14# distributed under the License is distributed on an "AS IS" BASIS, 

15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

16# See the License for the specific language governing permissions and 

17# limitations under the License. 

18 

19"""jQuantStats API module. 

20 

21This module provides the core API for the jQuantStats library, including the Data class 

22for handling financial returns data and benchmarks. 

23 

24Overview 

25-------- 

26The main entry point is the `build_data` function, which creates a Data object from 

27returns and optional benchmark data. The Data class provides methods for analyzing and 

28manipulating financial returns data, including accessing statistical metrics through 

29the `stats` property and visualization through the `plots` property. 

30 

31Features 

32-------- 

33- Support for both pandas and polars DataFrames as input 

34- Automatic conversion to polars for efficient data processing 

35- Handling of risk-free rate adjustments 

36- Benchmark comparison capabilities 

37- Date alignment between returns and benchmark data 

38 

39Example: 

40------- 

41```python 

42import polars as pl 

43from jquantstats.api import build_data 

44 

45# Create a Data object from returns 

46returns = pl.DataFrame({ 

47 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

48 "Asset1": [0.01, -0.02, 0.03] 

49}).with_columns(pl.col("Date").str.to_date()) 

50 

51data = build_data(returns=returns) 

52 

53# With benchmark and risk-free rate 

54benchmark = pl.DataFrame({ 

55 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

56 "Market": [0.005, -0.01, 0.02] 

57}).with_columns(pl.col("Date").str.to_date()) 

58 

59data = build_data( 

60 returns=returns, 

61 benchmark=benchmark, 

62 rf=0.0002, # risk-free rate (e.g., 0.02% per day) 

63) 

64``` 

65 

66""" 

67 

68import pandas as pd 

69import polars as pl 

70 

71from ._data import Data 

72 

73 

74def build_data( 

75 returns: pl.DataFrame | pd.DataFrame | pd.Series, 

76 rf: float | pl.DataFrame | pd.DataFrame | pd.Series = 0.0, 

77 benchmark: pl.DataFrame | pd.DataFrame | pd.Series | None = None, 

78 date_col: str = "Date", 

79) -> Data: 

80 """Build a Data object from returns and optional benchmark using Polars. 

81 

82 This function is the main entry point for creating a Data object, which is the core 

83 container for financial returns data in jQuantStats. 

84 

85 Description 

86 ----------- 

87 The `build_data` function handles the conversion of pandas DataFrames and Series to 

88 polars DataFrames, aligns dates between returns and benchmark data, and subtracts 

89 the risk-free rate to calculate excess returns. 

90 

91 Parameters 

92 ---------- 

93 returns : pl.DataFrame | pd.DataFrame | pd.Series 

94 Financial returns data. 

95 

96 - If pl.DataFrame: First column should be the date column, remaining columns are asset returns. 

97 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present. 

98 - If pd.Series: Index should be dates, values are returns for a single asset. 

99 

100 rf : float | pl.DataFrame | pd.DataFrame | pd.Series, optional 

101 Risk-free rate. Default is 0.0 (no risk-free rate adjustment). 

102 

103 - If float: Constant risk-free rate applied to all dates. 

104 - If DataFrame/Series: Time-varying risk-free rate with dates matching returns. 

105 

106 benchmark : pl.DataFrame | pd.DataFrame | pd.Series, optional 

107 Benchmark returns. Default is None (no benchmark). 

108 

109 - If pl.DataFrame: First column should be the date column, remaining columns are benchmark returns. 

110 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present. 

111 - If pd.Series: Index should be dates, values are returns for a single benchmark. 

112 

113 date_col : str, optional 

114 Name of the date column in the DataFrames. Default is "Date". 

115 

116 Returns 

117 ------- 

118 Data 

119 Object containing excess returns and benchmark (if any), with methods for 

120 analysis and visualization through the `stats` and `plots` properties. 

121 

122 Raises 

123 ------ 

124 ValueError 

125 If there are no overlapping dates between returns and benchmark. 

126 

127 Examples 

128 -------- 

129 Basic usage with polars DataFrame: 

130 

131 ```python 

132 import polars as pl 

133 from jquantstats.api import build_data 

134 

135 returns = pl.DataFrame({ 

136 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

137 "Asset1": [0.01, -0.02, 0.03], 

138 "Asset2": [0.02, 0.01, -0.01] 

139 }).with_columns(pl.col("Date").str.to_date()) 

140 

141 data = build_data(returns=returns) 

142 ``` 

143 

144 With pandas DataFrame: 

145 

146 ```python 

147 import pandas as pd 

148 from jquantstats.api import build_data 

149 

150 returns_pd = pd.DataFrame({ 

151 "Date": pd.to_datetime(["2023-01-01", "2023-01-02", "2023-01-03"]), 

152 "Asset1": [0.01, -0.02, 0.03], 

153 "Asset2": [0.02, 0.01, -0.01] 

154 }) 

155 

156 data = build_data(returns=returns_pd) 

157 ``` 

158 

159 With benchmark and risk-free rate: 

160 

161 ```python 

162 benchmark = pl.DataFrame({ 

163 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

164 "Market": [0.005, -0.01, 0.02] 

165 }).with_columns(pl.col("Date").str.to_date()) 

166 

167 data = build_data(returns=returns, benchmark=benchmark, rf=0.0002) 

168 ``` 

169 

170 """ 

171 

172 def subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame: 

173 """Subtract the risk-free rate from all numeric columns in the DataFrame. 

174 

175 Description 

176 ----------- 

177 This function handles both scalar risk-free rates and time series risk-free rates. 

178 For scalar rates, it creates a constant column with the risk-free rate value. 

179 For time series, it joins the risk-free rate DataFrame with the returns DataFrame 

180 on the date column and then subtracts the risk-free rate from each numeric column. 

181 

182 Parameters 

183 ---------- 

184 dframe : pl.DataFrame 

185 DataFrame containing returns data with a date column 

186 and one or more numeric columns representing asset returns. 

187 

188 rf : float | pl.DataFrame 

189 Risk-free rate to subtract from returns. 

190 

191 - If float: A constant risk-free rate applied to all dates. 

192 - If pl.DataFrame: A DataFrame with a date column and a second column 

193 containing time-varying risk-free rates. 

194 

195 date_col : str 

196 Name of the date column in both DataFrames for joining 

197 when rf is a DataFrame. 

198 

199 Returns 

200 ------- 

201 pl.DataFrame 

202 DataFrame with the risk-free rate subtracted from all numeric columns, 

203 preserving the original column names. The resulting DataFrame includes the 

204 date column and all numeric columns from the input DataFrame. 

205 

206 Notes 

207 ----- 

208 - The function performs an inner join when rf is a DataFrame, which means 

209 only dates present in both DataFrames will be included in the result. 

210 - Only columns with numeric data types will have the risk-free rate subtracted. 

211 - The date column and any non-numeric columns are preserved in the output. 

212 

213 """ 

214 # Handle scalar rf case 

215 if isinstance(rf, float): 

216 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")]) 

217 else: 

218 # At this point, rf must be a DataFrame 

219 if not isinstance(rf, pl.DataFrame): 

220 raise TypeError("rf must be a float or DataFrame") 

221 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf 

222 

223 # Join and subtract 

224 dframe = dframe.join(rf_dframe, on=date_col, how="inner") 

225 return dframe.select( 

226 [pl.col(date_col)] 

227 + [ 

228 (pl.col(col) - pl.col("rf")).alias(col) 

229 for col in dframe.columns 

230 if col not in {date_col, "rf"} and col != date_col 

231 ] 

232 ) 

233 

234 if isinstance(returns, pd.Series): 

235 returns = pl.from_pandas(returns.to_frame(), include_index=True) 

236 

237 if isinstance(returns, pd.DataFrame): 

238 returns = pl.from_pandas(returns, include_index=True) 

239 

240 if isinstance(rf, pd.Series): 

241 rf = pl.from_pandas(rf.to_frame(), include_index=True) 

242 

243 if isinstance(rf, pd.DataFrame): 

244 rf = pl.from_pandas(rf, include_index=True) 

245 

246 if isinstance(benchmark, pd.Series): 

247 benchmark = pl.from_pandas(benchmark.to_frame(), include_index=True) 

248 

249 if isinstance(benchmark, pd.DataFrame): 

250 benchmark = pl.from_pandas(benchmark, include_index=True) 

251 

252 # Align returns and benchmark if both provided 

253 if benchmark is not None: 

254 joined_dates = returns.join(benchmark, on=date_col, how="inner").select(date_col) 

255 if joined_dates.is_empty(): 

256 raise ValueError("No overlapping dates between returns and benchmark.") 

257 returns = returns.join(joined_dates, on=date_col, how="inner") 

258 benchmark = benchmark.join(joined_dates, on=date_col, how="inner") 

259 

260 # Subtract risk-free rate 

261 index = returns.select(date_col) 

262 excess_returns = subtract_risk_free(returns, rf, date_col).drop(date_col) 

263 excess_benchmark = subtract_risk_free(benchmark, rf, date_col).drop(date_col) if benchmark is not None else None 

264 

265 return Data(returns=excess_returns, benchmark=excess_benchmark, index=index)