Coverage for src/ifunnel/models/main.py: 0%

275 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-12 09:14 +0000

1from functools import lru_cache 

2from itertools import cycle 

3from math import ceil 

4from pathlib import Path 

5 

6import numpy as np 

7import pandas as pd 

8import plotly.express as px 

9import plotly.graph_objects as go 

10import plotly.io as pio 

11from loguru import logger 

12from plotly.subplots import make_subplots 

13from scipy.stats import gaussian_kde 

14 

15from .Clustering import cluster, pick_cluster 

16from .CVaRmodel import cvar_model 

17from .CVaRtargets import get_cvar_targets 

18from .dataAnalyser import final_stats, mean_an_returns 

19from .lifecycle.glidePathCreator import generate_risk_profiles 

20from .lifecycle.MVOlifecycleModel import ( 

21 get_port_allocations, 

22 riskadjust_model_scen, 

23) 

24from .MST import minimum_spanning_tree 

25from .MVOmodel import mvo_model 

26from .MVOtargets import get_mvo_targets 

27from .ScenarioGeneration import MomentGenerator, ScenarioGenerator 

28 

29pio.renderers.default = "browser" 

30 

31# that's unfortunate but will be addressed later 

32# ROOT_DIR = Path(__file__).parent.parent 

33# Load our data 

34# weekly_returns = pd.read_parquet(os.path.join(ROOT_DIR, "financial_data/all_etfs_rets.parquet.gzip")) 

35# tickers = [pair[0] for pair in weekly_returns.columns.values] 

36# names = [pair[1] for pair in weekly_returns.columns.values] 

37 

38 

39@lru_cache(maxsize=1) # Cache the result of this function 

40def initialize_bot(file=None): 

41 """Initialize and return the trading bot.""" 

42 if file is None: 

43 ROOT_DIR = Path(__file__).parent.parent 

44 file = ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip" 

45 

46 weekly_returns = pd.read_parquet(file) 

47 

48 tickers = [pair[0] for pair in weekly_returns.columns.values] 

49 names = [pair[1] for pair in weekly_returns.columns.values] 

50 weekly_returns.columns = tickers 

51 return _TradeBot(tickers, names, weekly_returns) 

52 

53 

54class _TradeBot: 

55 """ 

56 Python class analysing financial products and based on machine learning algorithms and mathematical 

57 optimization suggesting optimal portfolio of assets. 

58 """ 

59 

60 def __init__(self, tickers, names, weekly_returns): 

61 self.tickers = tickers 

62 self.names = names 

63 self.weeklyReturns = weekly_returns 

64 self.min_date = str(weekly_returns.index[0]) 

65 self.max_date = str(weekly_returns.index[-1]) 

66 

67 weekly_returns.columns = tickers 

68 

69 @staticmethod 

70 def __plot_backtest( 

71 performance: pd.DataFrame, 

72 performance_benchmark: pd.DataFrame, 

73 composition: pd.DataFrame, 

74 names: list, 

75 tickers: list, 

76 ) -> tuple[px.line, go.Figure]: 

77 """METHOD TO PLOT THE BACKTEST RESULTS""" 

78 

79 performance.index = pd.to_datetime(performance.index.values, utc=True) 

80 

81 # ** PERFORMANCE GRAPH ** 

82 try: 

83 df_to_plot = pd.concat([performance, performance_benchmark], axis=1) 

84 except Exception: 

85 logger.warning("⚠️ Old data format.") 

86 performance.index = [date.date() for date in performance.index] # needed for old data 

87 df_to_plot = pd.concat([performance, performance_benchmark], axis=1) 

88 

89 color_discrete_map = { 

90 "Portfolio_Value": "#21304f", 

91 "Benchmark_Value": "#f58f02", 

92 } 

93 fig = px.line( 

94 df_to_plot, 

95 x=df_to_plot.index, 

96 y=df_to_plot.columns, 

97 title="Comparison of different strategies", 

98 color_discrete_map=color_discrete_map, 

99 ) 

100 fig_performance = fig 

101 

102 # ** COMPOSITION GRAPH ** 

103 # change ISIN to NAMES in allocation df 

104 composition_names = [] 

105 for ticker in composition.columns: 

106 ticker_index = list(tickers).index(ticker) 

107 composition_names.append(list(names)[ticker_index]) 

108 composition.columns = composition_names 

109 

110 composition = composition.loc[:, (composition != 0).any(axis=0)] 

111 data = [] 

112 idx_color = 0 

113 composition_color = ( 

114 px.colors.sequential.turbid 

115 + px.colors.sequential.Brwnyl 

116 + px.colors.sequential.YlOrBr 

117 + px.colors.sequential.gray 

118 + px.colors.sequential.Mint 

119 + px.colors.sequential.dense 

120 + px.colors.sequential.Plasma 

121 + px.colors.sequential.Viridis 

122 + px.colors.sequential.Cividis 

123 ) 

124 for isin in composition.columns: 

125 trace = go.Bar( 

126 x=composition.index, 

127 y=composition[isin], 

128 name=str(isin), 

129 marker_color=composition_color[idx_color % len(composition_color)], # custom color 

130 ) 

131 data.append(trace) 

132 idx_color += 1 

133 

134 layout = go.Layout(barmode="stack") 

135 fig = go.Figure(data=data, layout=layout) 

136 fig.update_layout( 

137 title="Portfolio Composition", 

138 xaxis_title="Number of the Investment Period", 

139 yaxis_title="Composition", 

140 legend_title="Name of the Fund", 

141 ) 

142 fig.layout.yaxis.tickformat = ",.1%" 

143 fig_composition = fig 

144 

145 # Show figure if needed 

146 # fig.show() 

147 

148 return fig_performance, fig_composition 

149 

150 @staticmethod 

151 def __plot_portfolio_densities( 

152 portfolio_performance_dict: dict, 

153 compositions: dict[str, pd.DataFrame], 

154 tickers: list, 

155 names: list, 

156 ) -> tuple[go.Figure, dict[str, go.Figure], go.Figure]: 

157 """METHOD TO PLOT THE LIFECYCLE SIMULATION RESULTS""" 

158 

159 # Define colors 

160 colors = [ 

161 "#99A4AE", # gray50 

162 "#3b4956", # dark 

163 "#b7ada5", # secondary 

164 "#4099da", # blue 

165 "#8ecdc8", # aqua 

166 "#e85757", # coral 

167 "#fdd779", # sun 

168 "#644c76", # eggplant 

169 "#D8D1CA", # warmGray50 

170 ] 

171 

172 color_cycle = cycle(colors) # To cycle through colors 

173 fig = go.Figure() 

174 

175 max_density_across_all_datasets = 0 # Initialize max density tracker 

176 

177 for label, df in portfolio_performance_dict.items(): 

178 # Kernel Density Estimation for each dataset 

179 kde = gaussian_kde(df["Terminal Wealth"]) 

180 

181 # Generating a range of values to evaluate the KDE 

182 x_min = df["Terminal Wealth"].min() 

183 x_max = df["Terminal Wealth"].max() 

184 x = np.linspace(x_min, x_max, 1000) 

185 

186 # Evaluate the KDE 

187 density = kde(x) 

188 

189 # Update max density if current density peak is higher 

190 max_density_across_all_datasets = max(max_density_across_all_datasets, max(density)) 

191 

192 # Create line plot trace for this dataset 

193 fig.add_trace( 

194 go.Scatter( 

195 x=x, 

196 y=density, 

197 mode="lines", 

198 name=label, # Use the dictionary key as the label 

199 line=dict(width=2.5, color=next(color_cycle)), # Assign color from Orsted-Colors 

200 ) 

201 ) 

202 

203 # Add a dashed vertical line at x=0 

204 fig.add_shape( 

205 type="line", 

206 x0=0, 

207 y0=0, 

208 x1=0, 

209 y1=max_density_across_all_datasets, # Use the max density across all datasets 

210 line=dict( 

211 color="Black", 

212 width=3, 

213 dash="dash", # Define dash pattern 

214 ), 

215 ) 

216 """ 

217 # Update the layout 

218 fig.update_layout( 

219 title_text='Density function(s) of terminal wealth for risk classes in 1000 different scenarios.', 

220 xaxis_title='Terminal Wealth', 

221 yaxis_title='Density', 

222 legend_title='Risk Class', 

223 template='plotly_white' 

224 ) 

225 """ 

226 # Update the layout with larger fonts 

227 fig.update_layout( 

228 title_text="Density function(s) of the end portfolio value for various glide paths.", 

229 title_font=dict(size=24), # Increase title font size 

230 xaxis_title="Target date portfolio value", 

231 xaxis_title_font=dict(size=18), # Increase x-axis title font size 

232 xaxis_tickfont=dict(size=16), # Increase x-axis tick label font size 

233 yaxis_title="Density", 

234 yaxis_title_font=dict(size=18), # Increase y-axis title font size 

235 yaxis_tickfont=dict(size=16), # Increase y-axis tick label font size 

236 legend_title="Risb Budget glide path", 

237 legend_title_font=dict(size=18), # Increase legend title font size 

238 legend_font=dict(size=16), # Increase legend text font size 

239 template="plotly_white", 

240 ) 

241 

242 # Show the figure in a browser 

243 # fig.show(renderer="browser") 

244 

245 composition_figures = {} 

246 filtered_compositions = {name: comp for name, comp in compositions.items() if "reverse" not in name} 

247 num_portfolios = len(filtered_compositions) 

248 cols = 2 if num_portfolios > 1 else 1 

249 rows = ceil( 

250 num_portfolios / cols 

251 ) # Calculate the number of rows needed based on the total number of compositions 

252 

253 subplot_titles = [f"Portfolio Composition: {name}" for name in filtered_compositions.keys()] 

254 fig_subplots = make_subplots( 

255 rows=rows, 

256 cols=cols, 

257 subplot_titles=subplot_titles, 

258 vertical_spacing=0.1, 

259 horizontal_spacing=0.05, 

260 ) 

261 

262 tickers_in_legend = set() 

263 current_plot = 1 # Keep track of the current plot index to correctly calculate row and col 

264 

265 for portfolio_name, composition in filtered_compositions.items(): 

266 composition_names = [] 

267 for ticker in composition.columns[:-1]: 

268 ticker_index = list(tickers).index(ticker) 

269 composition_names.append(list(names)[ticker_index]) 

270 if "Cash" not in composition_names: 

271 composition_names.append("Cash") 

272 composition.columns = composition_names 

273 composition = composition.loc[:, (composition != 0).any(axis=0)] 

274 

275 idx_color = 0 

276 composition_color = ( 

277 px.colors.sequential.turbid 

278 + px.colors.sequential.Brwnyl 

279 + px.colors.sequential.YlOrBr 

280 + px.colors.sequential.gray 

281 + px.colors.sequential.Mint 

282 + px.colors.sequential.dense 

283 + px.colors.sequential.Plasma 

284 + px.colors.sequential.Viridis 

285 + px.colors.sequential.Cividis 

286 ) 

287 

288 # Create an individual figure for the current portfolio 

289 individual_fig = go.Figure() 

290 

291 for isin in composition.columns: 

292 show_legend = isin not in tickers_in_legend 

293 tickers_in_legend.add(isin) 

294 

295 trace = go.Bar( 

296 x=composition.index, 

297 y=composition[isin], 

298 name=str(isin), 

299 marker_color=composition_color[idx_color % len(composition_color)], 

300 showlegend=show_legend, 

301 ) 

302 

303 # Add trace to both the subplot and the individual figure 

304 row, col = divmod(current_plot - 1, cols) 

305 fig_subplots.add_trace(trace, row=row + 1, col=col + 1) 

306 individual_fig.add_trace(trace) 

307 

308 idx_color += 1 

309 

310 # Configure the individual figure layout 

311 individual_fig.update_layout( 

312 title=f"Portfolio Composition: {portfolio_name}", 

313 plot_bgcolor="white", 

314 barmode="stack", 

315 ) 

316 individual_fig["layout"]["yaxis"].tickformat = ",.1%" 

317 

318 # Store the individual figure in the dictionary 

319 composition_figures[portfolio_name] = individual_fig 

320 

321 current_plot += 1 

322 

323 fig_subplots.update_layout( 

324 title="Portfolio Compositions", 

325 height=500 * rows, 

326 width=1000 * cols, 

327 plot_bgcolor="white", 

328 barmode="stack", 

329 ) 

330 # Update y-axis tick format for all subplots 

331 for i in range(1, cols * rows + 1): 

332 fig_subplots["layout"][f"yaxis{i}"].tickformat = ",.1%" 

333 

334 # fig_subplots.show() 

335 

336 return fig, composition_figures, fig_subplots 

337 

338 def get_stat(self, start_date: str, end_date: str) -> pd.DataFrame: 

339 """METHOD COMPUTING ANNUAL RETURNS, ANNUAL STD. DEV. & SHARPE RATIO OF ASSETS""" 

340 

341 # ANALYZE THE DATA for a given time period 

342 weekly_data = self.weeklyReturns[ 

343 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date) 

344 ].copy() 

345 

346 # Create table with summary statistics 

347 mu_ga = mean_an_returns(weekly_data) # Annualised geometric mean of returns 

348 std_dev_a = weekly_data.std(axis=0) * np.sqrt(52) # Annualised standard deviation of returns 

349 sharpe = round(mu_ga / std_dev_a, 2) # Sharpe ratio of each financial product 

350 

351 # Write all results into a data frame 

352 stat_df = pd.concat([mu_ga, std_dev_a, sharpe], axis=1) 

353 stat_df.columns = [ 

354 "Average Annual Returns", 

355 "Standard Deviation of Returns", 

356 "Sharpe Ratio", 

357 ] 

358 stat_df["ISIN"] = stat_df.index # Add names into the table 

359 stat_df["Name"] = self.names 

360 stat_df["Size"] = 1 

361 stat_df["Type"] = "ETF" 

362 

363 return stat_df 

364 

365 def get_top_performing_assets(self, time_periods: list[tuple[str, str]], top_percent: float = 0.2) -> list[str]: 

366 stats_for_periods = {f"period_{i}": self.get_stat(*period) for i, period in enumerate(time_periods, 1)} 

367 

368 # Create 'Risk class' column where the value is 

369 # 'Risk Class 1' if Standard Deviation of Returns <= 0.005 

370 # 'Risk Class 2' if > 0.005 and < 0.02 

371 # 'Risk Class 3' if > 0.02 and < 0.05 

372 # 'Risk Class 4' if > 0.05 and < 0.1 

373 # 'Risk Class 5' if > 0.1 and < 0.15 

374 # 'Risk Class 6' if > 0.15 and < 0.25 then 

375 # 'Risk Class 7' if > 0.25 

376 risk_level = { 

377 "Risk Class 1": 0.005, 

378 "Risk Class 2": 0.02, 

379 "Risk Class 3": 0.05, 

380 "Risk Class 4": 0.10, 

381 "Risk Class 5": 0.15, 

382 "Risk Class 6": 0.25, 

383 "Risk Class 7": 1, 

384 } 

385 for data in stats_for_periods.values(): 

386 data["Risk Class"] = pd.cut( 

387 data["Standard Deviation of Returns"], 

388 bins=[-1] + list(risk_level.values()), 

389 labels=list(risk_level.keys()), 

390 right=True, 

391 ) 

392 # For each data_period and each risk class, find the top 20% best performing assets 

393 # mark them as True in column 'Top Performer' 

394 for data in stats_for_periods.values(): 

395 for risk_class in risk_level.keys(): 

396 data.loc[ 

397 data["Risk Class"] == risk_class, 

398 "Top Performer", 

399 ] = data.loc[data["Risk Class"] == risk_class, "Sharpe Ratio"].rank(pct=True) > (1 - top_percent) 

400 # for each period, save the pandas dataframe into excel files 

401 # for index, data in enumerate(stats_for_periods.values()): 

402 # data.to_excel(f"top_performers_{time_periods[index]}.xlsx") 

403 

404 # ISIN codes for assets which were top performers in all n periods 

405 top_isins = stats_for_periods["period_1"].loc[stats_for_periods["period_1"]["Top Performer"], "ISIN"].values 

406 for data in stats_for_periods.values(): 

407 top_isins = np.intersect1d(top_isins, data.loc[data["Top Performer"], "ISIN"].values) 

408 

409 top_names = [self.names[self.tickers.index(isin)] for isin in top_isins] 

410 

411 return top_names 

412 

413 def plot_dots( 

414 self, 

415 start_date: str, 

416 end_date: str, 

417 ml: str = "", 

418 ml_subset: list | pd.DataFrame = None, 

419 fund_set: list | None = None, 

420 top_performers: list | None = None, 

421 optimal_portfolio: list | None = None, 

422 benchmark: list | None = None, 

423 ) -> px.scatter: 

424 """METHOD TO PLOT THE OVERVIEW OF THE FINANCIAL PRODUCTS IN TERMS OF RISK AND RETURNS""" 

425 fund_set = fund_set if fund_set else [] 

426 top_performers = top_performers if top_performers else [] 

427 

428 # Get statistics for a given time period 

429 data = self.get_stat(start_date, end_date) 

430 

431 # Add data about the optimal portfolio and benchmark for plotting 

432 if optimal_portfolio: 

433 data.loc[optimal_portfolio[4]] = optimal_portfolio 

434 if benchmark: 

435 data.loc[benchmark[4]] = benchmark 

436 

437 # IF WE WANT TO HIGHLIGHT THE SUBSET OF ASSETS BASED ON ML 

438 if ml == "MST": 

439 data.loc[:, "Type"] = "Funds" 

440 for fund in ml_subset: 

441 data.loc[fund, "Type"] = "MST subset" 

442 if ml == "Clustering": 

443 data.loc[:, "Type"] = ml_subset.loc[:, "Cluster"] 

444 

445 # If selected any fund for comparison 

446 for fund in fund_set: 

447 isin_idx = list(self.names).index(fund) 

448 data.loc[self.tickers[isin_idx], "Type"] = str(data.loc[self.tickers[isin_idx], "Name"]) 

449 data.loc[self.tickers[isin_idx], "Size"] = 3 

450 

451 for fund in top_performers: 

452 isin_idx = list(self.names).index(fund) 

453 data.loc[self.tickers[isin_idx], "Type"] = "Top Performer" 

454 data.loc[self.tickers[isin_idx], "Size"] = 3 

455 

456 # PLOTTING Data 

457 color_discrete_map = { 

458 "ETF": "#21304f", 

459 "Mutual Fund": "#f58f02", 

460 "Funds": "#21304f", 

461 "MST subset": "#f58f02", 

462 "Top Performer": "#f58f02", 

463 "Cluster 1": "#21304f", 

464 "Cluster 2": "#f58f02", 

465 "Benchmark Portfolio": "#f58f02", 

466 "Optimal Portfolio": "olive", 

467 } 

468 fig = px.scatter( 

469 data, 

470 x="Standard Deviation of Returns", 

471 y="Average Annual Returns", 

472 color="Type", 

473 size="Size", 

474 size_max=8, 

475 hover_name="Name", 

476 hover_data={"Sharpe Ratio": True, "ISIN": True, "Size": False}, 

477 color_discrete_map=color_discrete_map, 

478 title="Annual Returns and Standard Deviation of Returns from " + start_date[:10] + " to " + end_date[:10], 

479 ) 

480 

481 # AXIS IN PERCENTAGES 

482 fig.layout.yaxis.tickformat = ",.1%" 

483 fig.layout.xaxis.tickformat = ",.1%" 

484 

485 # RISK LEVEL MARKER 

486 min_risk = data["Standard Deviation of Returns"].min() 

487 max_risk = data["Standard Deviation of Returns"].max() 

488 risk_level = { 

489 "Risk Class 1": 0.005, 

490 "Risk Class 2": 0.02, 

491 "Risk Class 3": 0.05, 

492 "Risk Class 4": 0.10, 

493 "Risk Class 5": 0.15, 

494 "Risk Class 6": 0.25, 

495 "Risk Class 7": max_risk, 

496 } 

497 # Initialize dynamic risk levels 

498 actual_risk_level = set() 

499 for i in range(1, 8): 

500 k = "Risk Class " + str(i) 

501 if (risk_level[k] >= min_risk) and (risk_level[k] <= max_risk): 

502 actual_risk_level.add(i) 

503 

504 if max(actual_risk_level) < 7: 

505 actual_risk_level.add(max(actual_risk_level) + 1) # Add the final risk level 

506 

507 for level in actual_risk_level: 

508 k = "Risk Class " + str(level) 

509 fig.add_vline( 

510 x=risk_level[k], line_width=1, line_dash="dash", line_color="#7c90a0" 

511 ) # annotation_text=k, annotation_position="top left") 

512 fig.add_annotation( 

513 x=risk_level[k] - 0.01, 

514 y=max(data["Average Annual Returns"]), 

515 text=k, 

516 textangle=-90, 

517 showarrow=False, 

518 ) 

519 

520 # RETURN LEVEL MARKER 

521 fig.add_hline(y=0, line_width=1.5, line_color="rgba(233, 30, 99, 0.5)") 

522 

523 # TITLES 

524 fig.update_annotations(font_color="#000000") 

525 fig.update_layout( 

526 xaxis_title="Annualised standard deviation of returns (Risk)", 

527 yaxis_title="Annualised average returns", 

528 ) 

529 # Position of legend 

530 fig.update_layout(legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.01)) 

531 # fig.show() 

532 return fig 

533 

534 def mst(self, start_date: str, end_date: str, n_mst_runs: int, plot: bool = False): 

535 """METHOD TO RUN MST METHOD AND PRINT RESULTS""" 

536 fig, subset_mst = None, [] 

537 

538 # Starting subset of data for MST 

539 subset_mst_df = self.weeklyReturns[ 

540 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date) 

541 ].copy() 

542 

543 for i in range(n_mst_runs): 

544 subset_mst, subset_mst_df, corr_mst_avg, pdi_mst = minimum_spanning_tree(subset_mst_df) 

545 

546 # PLOTTING RESULTS 

547 if plot and len(subset_mst) > 0: 

548 end_df_date = str(subset_mst_df.index.date[-1]) 

549 fig = self.plot_dots( 

550 start_date=start_date, 

551 end_date=end_df_date, 

552 ml="MST", 

553 ml_subset=subset_mst, 

554 ) 

555 

556 return fig, subset_mst 

557 

558 def clustering( 

559 self, 

560 start_date: str, 

561 end_date: str, 

562 n_clusters: int, 

563 n_assets: int, 

564 plot: bool = False, 

565 ): 

566 """ 

567 METHOD TO RUN MST METHOD AND PRINT RESULTS 

568 """ 

569 fig = None 

570 dataset = self.weeklyReturns[ 

571 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date) 

572 ].copy() 

573 # CLUSTER DATA 

574 clusters = cluster(dataset, n_clusters) 

575 

576 # SELECT ASSETS 

577 end_dataset_date = str(dataset.index.date[-1]) 

578 clustering_stats = self.get_stat(start_date, end_dataset_date) 

579 subset_clustering, subset_clustering_df = pick_cluster( 

580 data=dataset, stat=clustering_stats, ml=clusters, n_assets=n_assets 

581 ) # Number of assets from each cluster 

582 

583 # PLOTTING DATA 

584 if plot: 

585 fig = self.plot_dots( 

586 start_date=start_date, 

587 end_date=end_dataset_date, 

588 ml="Clustering", 

589 ml_subset=clusters, 

590 ) 

591 

592 # fig.show() 

593 

594 return fig, subset_clustering 

595 

596 def backtest( 

597 self, 

598 start_train_date: str, 

599 start_test_date: str, 

600 end_test_date: str, 

601 subset_of_assets: list, 

602 benchmarks: list, 

603 scenarios_type: str, 

604 n_simulations: int, 

605 model: str, 

606 solver: str = "CLARABEL", 

607 lower_bound: int = 0, 

608 ) -> tuple[pd.DataFrame, pd.DataFrame, px.line, go.Figure]: 

609 """METHOD TO COMPUTE THE BACKTEST""" 

610 

611 # Find Benchmarks' ISIN codes 

612 benchmark_isin = [self.tickers[list(self.names).index(name)] for name in benchmarks] 

613 

614 # Get train and testing datasets 

615 whole_dataset = self.weeklyReturns[ 

616 (self.weeklyReturns.index >= start_train_date) & (self.weeklyReturns.index <= end_test_date) 

617 ].copy() 

618 test_dataset = self.weeklyReturns[ 

619 (self.weeklyReturns.index > start_test_date) & (self.weeklyReturns.index <= end_test_date) 

620 ].copy() 

621 

622 # SCENARIO GENERATION 

623 # --------------------------------------------------------------------------------------------------- 

624 # Create scenario generator 

625 sg = ScenarioGenerator(np.random.default_rng()) 

626 

627 if model == "Markowitz model" or scenarios_type == "MonteCarlo": 

628 sigma_lst, mu_lst = MomentGenerator.generate_sigma_mu_for_test_periods( 

629 data=whole_dataset[subset_of_assets], n_test=len(test_dataset.index) 

630 ) 

631 

632 if scenarios_type == "MonteCarlo": 

633 scenarios = sg.monte_carlo( 

634 data=whole_dataset[subset_of_assets], # subsetMST_df or subsetCLUST_df 

635 n_simulations=n_simulations, 

636 n_test=len(test_dataset.index), 

637 sigma_lst=sigma_lst, 

638 mu_lst=mu_lst, 

639 ) 

640 else: 

641 scenarios = sg.bootstrapping( 

642 data=whole_dataset[subset_of_assets], # subsetMST or subsetCLUST 

643 n_simulations=n_simulations, # number of scenarios per period 

644 n_test=len(test_dataset.index), 

645 ) # number of periods 

646 

647 # TARGETS GENERATION 

648 # --------------------------------------------------------------------------------------------------- 

649 start_of_test_dataset = str(test_dataset.index.date[0]) 

650 if model == "Markowitz model": 

651 targets, benchmark_port_val = get_mvo_targets( 

652 test_date=start_of_test_dataset, 

653 benchmark=benchmark_isin, 

654 budget=100, 

655 data=whole_dataset, 

656 ) 

657 

658 else: 

659 targets, benchmark_port_val = get_cvar_targets( 

660 test_date=start_of_test_dataset, 

661 benchmark=benchmark_isin, 

662 budget=100, 

663 cvar_alpha=0.05, 

664 data=whole_dataset, 

665 scgen=sg, 

666 n_simulations=n_simulations, 

667 ) 

668 

669 # MATHEMATICAL MODELING 

670 # --------------------------------------------------------------------------------------------------- 

671 if model == "Markowitz model": 

672 port_allocation, port_value, port_cvar = mvo_model( 

673 test_ret=test_dataset[subset_of_assets], 

674 mu_lst=mu_lst, 

675 sigma_lst=sigma_lst, 

676 targets=targets, 

677 budget=100, 

678 trans_cost=0.001, 

679 max_weight=1, 

680 solver=solver, 

681 lower_bound=lower_bound, 

682 ) 

683 # inaccurate=inaccurate_solution) 

684 

685 else: 

686 port_allocation, port_value, port_cvar = cvar_model( 

687 test_ret=test_dataset[subset_of_assets], 

688 scenarios=scenarios, # Scenarios 

689 targets=targets, # Target 

690 budget=100, 

691 cvar_alpha=0.05, 

692 trans_cost=0.001, 

693 max_weight=1, 

694 solver=solver, 

695 lower_bound=lower_bound, 

696 ) 

697 # inaccurate=inaccurate_solution) 

698 

699 # PLOTTING 

700 # ------------------------------------------------------------------ 

701 fig_performance, fig_composition = self.__plot_backtest( 

702 performance=port_value.copy(), 

703 performance_benchmark=benchmark_port_val.copy(), 

704 composition=port_allocation, 

705 names=self.names, 

706 tickers=self.tickers, 

707 ) 

708 

709 # RETURN STATISTICS 

710 # ------------------------------------------------------------------ 

711 optimal_portfolio_stat = final_stats(port_value) 

712 benchmark_stat = final_stats(benchmark_port_val) 

713 

714 return optimal_portfolio_stat, benchmark_stat, fig_performance, fig_composition 

715 

716 def lifecycle_scenario_analysis( 

717 self, 

718 subset_of_assets: list, 

719 scenarios_type: str, 

720 n_simulations: int, 

721 end_year: int, 

722 withdrawals: int, 

723 initial_risk_appetite: float, 

724 initial_budget: int, 

725 rng_seed=0, 

726 test_split: float = False, 

727 ) -> tuple[dict, pd.DataFrame, go.Figure, go.Figure, dict, dict, go.Figure]: 

728 """METHOD TO COMPUTE THE LIFECYCLE SCENARIO ANALYSIS""" 

729 

730 # ------------------------------- INITIALIZE FUNCTION ------------------------------- 

731 n_periods = end_year - 2023 

732 withdrawal_lst = [withdrawals * (1 + 0.04) ** i for i in range(n_periods)] 

733 

734 # ------------------------------- PARAMETER INITIALIZATION ------------------------------- 

735 if test_split != 0: 

736 sampling_set, estimating_set = MomentGenerator.split_dataset( 

737 data=self.weeklyReturns[subset_of_assets], sampling_ratio=test_split 

738 ) 

739 

740 _, _, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=sampling_set) 

741 

742 sigma, mu, _, _ = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=estimating_set) 

743 else: 

744 sigma, mu, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free( 

745 data=self.weeklyReturns[subset_of_assets] 

746 ) 

747 

748 # ------------------------------- SCENARIO GENERATION ------------------------------- 

749 if rng_seed == 0: 

750 sg = ScenarioGenerator(np.random.default_rng()) 

751 else: 

752 sg = ScenarioGenerator(np.random.default_rng(rng_seed)) 

753 

754 if scenarios_type == "MonteCarlo": 

755 scenarios = sg.MC_simulation_annual_from_weekly( 

756 weekly_mu=mu_weekly, 

757 weekly_sigma=sigma_weekly, 

758 n_simulations=n_simulations, 

759 n_years=n_periods, 

760 ) 

761 

762 elif scenarios_type == "Bootstrap": 

763 scenarios = sg.bootstrap_simulation_annual_from_weekly( 

764 historical_weekly_returns=self.weeklyReturns[subset_of_assets], 

765 n_simulations=n_simulations, 

766 n_years=n_periods, 

767 ) 

768 

769 else: 

770 raise ValueError( 

771 "It appears that a scenario method other than MonteCarlo or Bootstrap has been chosen. " 

772 "Please check for spelling mistakes." 

773 ) 

774 

775 # ------------------------------- Allocation Target Generation ------------------------------- 

776 glide_paths_df, fig_glidepaths = generate_risk_profiles( 

777 n_periods=n_periods, initial_risk=initial_risk_appetite, minimum_risk=0.01 

778 ) 

779 

780 allocation_targets = {} 

781 for r in glide_paths_df.columns: 

782 targets = get_port_allocations( 

783 mu_lst=mu, 

784 sigma_lst=sigma, 

785 targets=glide_paths_df[r], 

786 max_weight=1 / 4, 

787 solver="CLARABEL", 

788 ) 

789 allocation_targets[f"{r}"] = targets 

790 

791 # ------------------------------- MATHEMATICAL MODELING ------------------------------- 

792 exhibition_summary = pd.DataFrame() 

793 terminal_wealth_dict = {} 

794 

795 for key, df in allocation_targets.items(): 

796 logger.info( 

797 f"Optimizing portfolio for {key} over {n_simulations} scenarios. An info message will " 

798 f"appear, when we are halfway through the scenarios for the current strategy." 

799 ) 

800 portfolio_df, mean_allocations_df, analysis_metrics = riskadjust_model_scen( 

801 scen=scenarios[:, :, :], 

802 targets=df, 

803 budget=initial_budget, 

804 trans_cost=0.002, 

805 withdrawal_lst=withdrawal_lst, 

806 interest_rate=0.04, 

807 ) 

808 

809 # Add the analysis_metrics DataFrame as a new column in the storage DataFrame 

810 exhibition_summary[key] = analysis_metrics.squeeze() 

811 

812 portfolio_df["Terminal Wealth"] = pd.to_numeric(portfolio_df["Terminal Wealth"], errors="coerce") 

813 terminal_wealth_dict[f"{key}"] = portfolio_df 

814 

815 # ------------------------------- PLOTTING ------------------------------- 

816 fig_performance, fig_compositions, fig_compositions_all = self.__plot_portfolio_densities( 

817 portfolio_performance_dict=terminal_wealth_dict, 

818 compositions=allocation_targets, 

819 tickers=self.tickers, 

820 names=self.names, 

821 ) 

822 

823 # ------------------------------- RETURN STATISTICS ------------------------------- 

824 return ( 

825 terminal_wealth_dict, 

826 exhibition_summary, 

827 fig_performance, 

828 fig_glidepaths, 

829 allocation_targets, 

830 fig_compositions, 

831 fig_compositions_all, 

832 ) 

833 

834 

835if __name__ == "__main__": 

836 # INITIALIZATION OF THE CLASS 

837 

838 # that's unfortunate but will be addressed later 

839 # ROOT_DIR = Path(__file__).parent.parent 

840 # Load our data 

841 # weekly_returns = pd.read_parquet(ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip") 

842 # algo = build_bot(weekly_returns=weekly_returns) 

843 

844 algo = initialize_bot() 

845 

846 # algo = TradeBot() 

847 

848 # Get top performing assets for given periods and measure 

849 top_assets = algo.get_top_performing_assets( 

850 time_periods=[ 

851 (algo.min_date, "2017-01-01"), 

852 ("2017-01-02", "2020-01-01"), 

853 ("2020-01-02", algo.max_date), 

854 ], 

855 top_percent=0.2, 

856 ) 

857 

858 # PLOT INTERACTIVE GRAPH 

859 algo.plot_dots(start_date=algo.min_date, end_date=algo.max_date, top_performers=top_assets) 

860 

861 # RUN THE MINIMUM SPANNING TREE METHOD 

862 _, mst_subset_of_assets = algo.mst(start_date="2000-01-01", end_date="2024-01-01", n_mst_runs=5, plot=False) 

863 

864 # RUN THE CLUSTERING METHOD 

865 _, clustering_subset_of_assets = algo.clustering( 

866 start_date="2015-12-23", 

867 end_date="2017-07-01", 

868 n_clusters=3, 

869 n_assets=10, 

870 plot=True, 

871 ) 

872 

873 # RUN THE LIFECYCLE 

874 lifecycle = algo.lifecycle_scenario_analysis( 

875 subset_of_assets=mst_subset_of_assets, 

876 scenarios_type="MonteCarlo", 

877 n_simulations=1000, 

878 end_year=2050, 

879 withdrawals=51000, 

880 initial_risk_appetite=0.15, 

881 initial_budget=137000, 

882 ) 

883 

884 # RUN THE BACKTEST 

885 backtest = algo.backtest( 

886 start_train_date="2015-12-23", 

887 start_test_date="2018-09-24", 

888 end_test_date="2019-09-01", 

889 subset_of_assets=mst_subset_of_assets, 

890 benchmarks=["BankInvest Danske Aktier W"], 

891 scenarios_type="Bootstrapping", 

892 n_simulations=500, 

893 model="Markowitz model", 

894 lower_bound=0, 

895 )