Coverage for src/jquantstats/api.py: 97%

1# QuantStats: Portfolio analytics for quants

2# https://github.com/tschm/jquantstats

7# Licensed under the Apache License, Version 2.0 (the "License");

8# you may not use this file except in compliance with the License.

9# You may obtain a copy of the License at

10#

11# http://www.apache.org/licenses/LICENSE-2.0

12#

13# Unless required by applicable law or agreed to in writing, software

14# distributed under the License is distributed on an "AS IS" BASIS,

15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

16# See the License for the specific language governing permissions and

17# limitations under the License.

19"""jQuantStats API module.

21This module provides the core API for the jQuantStats library, including the Data class

22for handling financial returns data and benchmarks.

24Overview

25--------

26The main entry point is the `build_data` function, which creates a Data object from

27returns and optional benchmark data. The Data class provides methods for analyzing and

28manipulating financial returns data, including accessing statistical metrics through

29the `stats` property and visualization through the `plots` property.

31Features

32--------

33- Support for both pandas and polars DataFrames as input

34- Automatic conversion to polars for efficient data processing

35- Handling of risk-free rate adjustments

36- Benchmark comparison capabilities

37- Date alignment between returns and benchmark data

39Example:

40-------

41```python

42import polars as pl

43from jquantstats.api import build_data

45# Create a Data object from returns

46returns = pl.DataFrame({

47 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

48 "Asset1": [0.01, -0.02, 0.03]

49}).with_columns(pl.col("Date").str.to_date())

51data = build_data(returns=returns)

53# With benchmark and risk-free rate

54benchmark = pl.DataFrame({

55 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

56 "Market": [0.005, -0.01, 0.02]

57}).with_columns(pl.col("Date").str.to_date())

59data = build_data(

60 returns=returns,

61 benchmark=benchmark,

62 rf=0.0002, # risk-free rate (e.g., 0.02% per day)

63)

64```

66"""

68import pandas as pd

69import polars as pl

71from ._data import Data

74def build_data(

75 returns: pl.DataFrame | pd.DataFrame | pd.Series,

76 rf: float | pl.DataFrame | pd.DataFrame | pd.Series = 0.0,

77 benchmark: pl.DataFrame | pd.DataFrame | pd.Series | None = None,

78 date_col: str = "Date",

79) -> Data:

80 """Build a Data object from returns and optional benchmark using Polars.

82 This function is the main entry point for creating a Data object, which is the core

83 container for financial returns data in jQuantStats.

85 Description

86 -----------

87 The `build_data` function handles the conversion of pandas DataFrames and Series to

88 polars DataFrames, aligns dates between returns and benchmark data, and subtracts

89 the risk-free rate to calculate excess returns.

91 Parameters

92 ----------

93 returns : pl.DataFrame | pd.DataFrame | pd.Series

94 Financial returns data.

96 - If pl.DataFrame: First column should be the date column, remaining columns are asset returns.

97 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present.

98 - If pd.Series: Index should be dates, values are returns for a single asset.

100 rf : float | pl.DataFrame | pd.DataFrame | pd.Series, optional

101 Risk-free rate. Default is 0.0 (no risk-free rate adjustment).

102

103 - If float: Constant risk-free rate applied to all dates.

104 - If DataFrame/Series: Time-varying risk-free rate with dates matching returns.

105

106 benchmark : pl.DataFrame | pd.DataFrame | pd.Series, optional

107 Benchmark returns. Default is None (no benchmark).

108

109 - If pl.DataFrame: First column should be the date column, remaining columns are benchmark returns.

110 - If pd.DataFrame: Index can be dates (will be included) or a date column should be present.

111 - If pd.Series: Index should be dates, values are returns for a single benchmark.

112

113 date_col : str, optional

114 Name of the date column in the DataFrames. Default is "Date".

115

116 Returns

117 -------

118 Data

119 Object containing excess returns and benchmark (if any), with methods for

120 analysis and visualization through the `stats` and `plots` properties.

121

122 Raises

123 ------

124 ValueError

125 If there are no overlapping dates between returns and benchmark.

126

127 Examples

128 --------

129 Basic usage with polars DataFrame:

130

131 ```python

132 import polars as pl

133 from jquantstats.api import build_data

134

135 returns = pl.DataFrame({

136 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

137 "Asset1": [0.01, -0.02, 0.03],

138 "Asset2": [0.02, 0.01, -0.01]

139 }).with_columns(pl.col("Date").str.to_date())

140

141 data = build_data(returns=returns)

142 ```

143

144 With pandas DataFrame:

145

146 ```python

147 import pandas as pd

148 from jquantstats.api import build_data

149

150 returns_pd = pd.DataFrame({

151 "Date": pd.to_datetime(["2023-01-01", "2023-01-02", "2023-01-03"]),

152 "Asset1": [0.01, -0.02, 0.03],

153 "Asset2": [0.02, 0.01, -0.01]

154 })

155

156 data = build_data(returns=returns_pd)

157 ```

158

159 With benchmark and risk-free rate:

160

161 ```python

162 benchmark = pl.DataFrame({

163 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

164 "Market": [0.005, -0.01, 0.02]

165 }).with_columns(pl.col("Date").str.to_date())

166

167 data = build_data(returns=returns, benchmark=benchmark, rf=0.0002)

168 ```

169

170 """

171

172 def subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame:

173 """Subtract the risk-free rate from all numeric columns in the DataFrame.

174

175 Description

176 -----------

177 This function handles both scalar risk-free rates and time series risk-free rates.

178 For scalar rates, it creates a constant column with the risk-free rate value.

179 For time series, it joins the risk-free rate DataFrame with the returns DataFrame

180 on the date column and then subtracts the risk-free rate from each numeric column.

181

182 Parameters

183 ----------

184 dframe : pl.DataFrame

185 DataFrame containing returns data with a date column

186 and one or more numeric columns representing asset returns.

187

188 rf : float | pl.DataFrame

189 Risk-free rate to subtract from returns.

190

191 - If float: A constant risk-free rate applied to all dates.

192 - If pl.DataFrame: A DataFrame with a date column and a second column

193 containing time-varying risk-free rates.

194

195 date_col : str

196 Name of the date column in both DataFrames for joining

197 when rf is a DataFrame.

198

199 Returns

200 -------

201 pl.DataFrame

202 DataFrame with the risk-free rate subtracted from all numeric columns,

203 preserving the original column names. The resulting DataFrame includes the

204 date column and all numeric columns from the input DataFrame.

205

206 Notes

207 -----

208 - The function performs an inner join when rf is a DataFrame, which means

209 only dates present in both DataFrames will be included in the result.

210 - Only columns with numeric data types will have the risk-free rate subtracted.

211 - The date column and any non-numeric columns are preserved in the output.

212

213 """

214 # Handle scalar rf case

215 if isinstance(rf, float):

216 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")])

217 else:

218 # At this point, rf must be a DataFrame

219 if not isinstance(rf, pl.DataFrame):

220 raise TypeError("rf must be a float or DataFrame")

221 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf

222

223 # Join and subtract

224 dframe = dframe.join(rf_dframe, on=date_col, how="inner")

225 return dframe.select(

226 [pl.col(date_col)]

227 + [

228 (pl.col(col) - pl.col("rf")).alias(col)

229 for col in dframe.columns

230 if col not in {date_col, "rf"} and col != date_col

231 ]

232 )

233

234 if isinstance(returns, pd.Series):

235 returns = pl.from_pandas(returns.to_frame(), include_index=True)

236

237 if isinstance(returns, pd.DataFrame):

238 returns = pl.from_pandas(returns, include_index=True)

239

240 if isinstance(rf, pd.Series):

241 rf = pl.from_pandas(rf.to_frame(), include_index=True)

242

243 if isinstance(rf, pd.DataFrame):

244 rf = pl.from_pandas(rf, include_index=True)

245

246 if isinstance(benchmark, pd.Series):

247 benchmark = pl.from_pandas(benchmark.to_frame(), include_index=True)

248

249 if isinstance(benchmark, pd.DataFrame):

250 benchmark = pl.from_pandas(benchmark, include_index=True)

251

252 # Align returns and benchmark if both provided

253 if benchmark is not None:

254 joined_dates = returns.join(benchmark, on=date_col, how="inner").select(date_col)

255 if joined_dates.is_empty():

256 raise ValueError("No overlapping dates between returns and benchmark.")

257 returns = returns.join(joined_dates, on=date_col, how="inner")

258 benchmark = benchmark.join(joined_dates, on=date_col, how="inner")

259

260 # Subtract risk-free rate

261 index = returns.select(date_col)

262 excess_returns = subtract_risk_free(returns, rf, date_col).drop(date_col)

263 excess_benchmark = subtract_risk_free(benchmark, rf, date_col).drop(date_col) if benchmark is not None else None

264

265 return Data(returns=excess_returns, benchmark=excess_benchmark, index=index)