Coverage for src/ifunnel/models/main.py: 0%

1from functools import lru_cache

2from itertools import cycle

3from math import ceil

4from pathlib import Path

6import numpy as np

7import pandas as pd

8import plotly.express as px

9import plotly.graph_objects as go

10import plotly.io as pio

11from loguru import logger

12from plotly.subplots import make_subplots

13from scipy.stats import gaussian_kde

15from .Clustering import cluster, pick_cluster

16from .CVaRmodel import cvar_model

17from .CVaRtargets import get_cvar_targets

18from .dataAnalyser import final_stats, mean_an_returns

19from .lifecycle.glidePathCreator import generate_risk_profiles

20from .lifecycle.MVOlifecycleModel import (

21 get_port_allocations,

22 riskadjust_model_scen,

23)

24from .MST import minimum_spanning_tree

25from .MVOmodel import mvo_model

26from .MVOtargets import get_mvo_targets

27from .ScenarioGeneration import MomentGenerator, ScenarioGenerator

29pio.renderers.default = "browser"

31# that's unfortunate but will be addressed later

32# ROOT_DIR = Path(__file__).parent.parent

33# Load our data

34# weekly_returns = pd.read_parquet(os.path.join(ROOT_DIR, "financial_data/all_etfs_rets.parquet.gzip"))

35# tickers = [pair[0] for pair in weekly_returns.columns.values]

36# names = [pair[1] for pair in weekly_returns.columns.values]

39@lru_cache(maxsize=1) # Cache the result of this function

40def initialize_bot(file=None):

41 """Initialize and return the trading bot."""

42 if file is None:

43 ROOT_DIR = Path(__file__).parent.parent

44 file = ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip"

46 weekly_returns = pd.read_parquet(file)

48 tickers = [pair[0] for pair in weekly_returns.columns.values]

49 names = [pair[1] for pair in weekly_returns.columns.values]

50 weekly_returns.columns = tickers

51 return _TradeBot(tickers, names, weekly_returns)

54class _TradeBot:

55 """

56 Python class analysing financial products and based on machine learning algorithms and mathematical

57 optimization suggesting optimal portfolio of assets.

58 """

60 def __init__(self, tickers, names, weekly_returns):

61 self.tickers = tickers

62 self.names = names

63 self.weeklyReturns = weekly_returns

64 self.min_date = str(weekly_returns.index[0])

65 self.max_date = str(weekly_returns.index[-1])

67 weekly_returns.columns = tickers

69 @staticmethod

70 def __plot_backtest(

71 performance: pd.DataFrame,

72 performance_benchmark: pd.DataFrame,

73 composition: pd.DataFrame,

74 names: list,

75 tickers: list,

76 ) -> tuple[px.line, go.Figure]:

77 """METHOD TO PLOT THE BACKTEST RESULTS"""

79 performance.index = pd.to_datetime(performance.index.values, utc=True)

81 # ** PERFORMANCE GRAPH **

82 try:

83 df_to_plot = pd.concat([performance, performance_benchmark], axis=1)

84 except Exception:

85 logger.warning("⚠️ Old data format.")

86 performance.index = [date.date() for date in performance.index] # needed for old data

87 df_to_plot = pd.concat([performance, performance_benchmark], axis=1)

89 color_discrete_map = {

90 "Portfolio_Value": "#21304f",

91 "Benchmark_Value": "#f58f02",

92 }

93 fig = px.line(

94 df_to_plot,

95 x=df_to_plot.index,

96 y=df_to_plot.columns,

97 title="Comparison of different strategies",

98 color_discrete_map=color_discrete_map,

99 )

100 fig_performance = fig

101

102 # ** COMPOSITION GRAPH **

103 # change ISIN to NAMES in allocation df

104 composition_names = []

105 for ticker in composition.columns:

106 ticker_index = list(tickers).index(ticker)

107 composition_names.append(list(names)[ticker_index])

108 composition.columns = composition_names

109

110 composition = composition.loc[:, (composition != 0).any(axis=0)]

111 data = []

112 idx_color = 0

113 composition_color = (

114 px.colors.sequential.turbid

115 + px.colors.sequential.Brwnyl

116 + px.colors.sequential.YlOrBr

117 + px.colors.sequential.gray

118 + px.colors.sequential.Mint

119 + px.colors.sequential.dense

120 + px.colors.sequential.Plasma

121 + px.colors.sequential.Viridis

122 + px.colors.sequential.Cividis

123 )

124 for isin in composition.columns:

125 trace = go.Bar(

126 x=composition.index,

127 y=composition[isin],

128 name=str(isin),

129 marker_color=composition_color[idx_color % len(composition_color)], # custom color

130 )

131 data.append(trace)

132 idx_color += 1

133

134 layout = go.Layout(barmode="stack")

135 fig = go.Figure(data=data, layout=layout)

136 fig.update_layout(

137 title="Portfolio Composition",

138 xaxis_title="Number of the Investment Period",

139 yaxis_title="Composition",

140 legend_title="Name of the Fund",

141 )

142 fig.layout.yaxis.tickformat = ",.1%"

143 fig_composition = fig

144

145 # Show figure if needed

146 # fig.show()

147

148 return fig_performance, fig_composition

149

150 @staticmethod

151 def __plot_portfolio_densities(

152 portfolio_performance_dict: dict,

153 compositions: dict[str, pd.DataFrame],

154 tickers: list,

155 names: list,

156 ) -> tuple[go.Figure, dict[str, go.Figure], go.Figure]:

157 """METHOD TO PLOT THE LIFECYCLE SIMULATION RESULTS"""

158

159 # Define colors

160 colors = [

161 "#99A4AE", # gray50

162 "#3b4956", # dark

163 "#b7ada5", # secondary

164 "#4099da", # blue

165 "#8ecdc8", # aqua

166 "#e85757", # coral

167 "#fdd779", # sun

168 "#644c76", # eggplant

169 "#D8D1CA", # warmGray50

170 ]

171

172 color_cycle = cycle(colors) # To cycle through colors

173 fig = go.Figure()

174

175 max_density_across_all_datasets = 0 # Initialize max density tracker

176

177 for label, df in portfolio_performance_dict.items():

178 # Kernel Density Estimation for each dataset

179 kde = gaussian_kde(df["Terminal Wealth"])

180

181 # Generating a range of values to evaluate the KDE

182 x_min = df["Terminal Wealth"].min()

183 x_max = df["Terminal Wealth"].max()

184 x = np.linspace(x_min, x_max, 1000)

185

186 # Evaluate the KDE

187 density = kde(x)

188

189 # Update max density if current density peak is higher

190 max_density_across_all_datasets = max(max_density_across_all_datasets, max(density))

191

192 # Create line plot trace for this dataset

193 fig.add_trace(

194 go.Scatter(

195 x=x,

196 y=density,

197 mode="lines",

198 name=label, # Use the dictionary key as the label

199 line=dict(width=2.5, color=next(color_cycle)), # Assign color from Orsted-Colors

200 )

201 )

202

203 # Add a dashed vertical line at x=0

204 fig.add_shape(

205 type="line",

206 x0=0,

207 y0=0,

208 x1=0,

209 y1=max_density_across_all_datasets, # Use the max density across all datasets

210 line=dict(

211 color="Black",

212 width=3,

213 dash="dash", # Define dash pattern

214 ),

215 )

216 """

217 # Update the layout

218 fig.update_layout(

219 title_text='Density function(s) of terminal wealth for risk classes in 1000 different scenarios.',

220 xaxis_title='Terminal Wealth',

221 yaxis_title='Density',

222 legend_title='Risk Class',

223 template='plotly_white'

224 )

225 """

226 # Update the layout with larger fonts

227 fig.update_layout(

228 title_text="Density function(s) of the end portfolio value for various glide paths.",

229 title_font=dict(size=24), # Increase title font size

230 xaxis_title="Target date portfolio value",

231 xaxis_title_font=dict(size=18), # Increase x-axis title font size

232 xaxis_tickfont=dict(size=16), # Increase x-axis tick label font size

233 yaxis_title="Density",

234 yaxis_title_font=dict(size=18), # Increase y-axis title font size

235 yaxis_tickfont=dict(size=16), # Increase y-axis tick label font size

236 legend_title="Risb Budget glide path",

237 legend_title_font=dict(size=18), # Increase legend title font size

238 legend_font=dict(size=16), # Increase legend text font size

239 template="plotly_white",

240 )

241

242 # Show the figure in a browser

243 # fig.show(renderer="browser")

244

245 composition_figures = {}

246 filtered_compositions = {name: comp for name, comp in compositions.items() if "reverse" not in name}

247 num_portfolios = len(filtered_compositions)

248 cols = 2 if num_portfolios > 1 else 1

249 rows = ceil(

250 num_portfolios / cols

251 ) # Calculate the number of rows needed based on the total number of compositions

252

253 subplot_titles = [f"Portfolio Composition: {name}" for name in filtered_compositions.keys()]

254 fig_subplots = make_subplots(

255 rows=rows,

256 cols=cols,

257 subplot_titles=subplot_titles,

258 vertical_spacing=0.1,

259 horizontal_spacing=0.05,

260 )

261

262 tickers_in_legend = set()

263 current_plot = 1 # Keep track of the current plot index to correctly calculate row and col

264

265 for portfolio_name, composition in filtered_compositions.items():

266 composition_names = []

267 for ticker in composition.columns[:-1]:

268 ticker_index = list(tickers).index(ticker)

269 composition_names.append(list(names)[ticker_index])

270 if "Cash" not in composition_names:

271 composition_names.append("Cash")

272 composition.columns = composition_names

273 composition = composition.loc[:, (composition != 0).any(axis=0)]

274

275 idx_color = 0

276 composition_color = (

277 px.colors.sequential.turbid

278 + px.colors.sequential.Brwnyl

279 + px.colors.sequential.YlOrBr

280 + px.colors.sequential.gray

281 + px.colors.sequential.Mint

282 + px.colors.sequential.dense

283 + px.colors.sequential.Plasma

284 + px.colors.sequential.Viridis

285 + px.colors.sequential.Cividis

286 )

287

288 # Create an individual figure for the current portfolio

289 individual_fig = go.Figure()

290

291 for isin in composition.columns:

292 show_legend = isin not in tickers_in_legend

293 tickers_in_legend.add(isin)

294

295 trace = go.Bar(

296 x=composition.index,

297 y=composition[isin],

298 name=str(isin),

299 marker_color=composition_color[idx_color % len(composition_color)],

300 showlegend=show_legend,

301 )

302

303 # Add trace to both the subplot and the individual figure

304 row, col = divmod(current_plot - 1, cols)

305 fig_subplots.add_trace(trace, row=row + 1, col=col + 1)

306 individual_fig.add_trace(trace)

307

308 idx_color += 1

309

310 # Configure the individual figure layout

311 individual_fig.update_layout(

312 title=f"Portfolio Composition: {portfolio_name}",

313 plot_bgcolor="white",

314 barmode="stack",

315 )

316 individual_fig["layout"]["yaxis"].tickformat = ",.1%"

317

318 # Store the individual figure in the dictionary

319 composition_figures[portfolio_name] = individual_fig

320

321 current_plot += 1

322

323 fig_subplots.update_layout(

324 title="Portfolio Compositions",

325 height=500 * rows,

326 width=1000 * cols,

327 plot_bgcolor="white",

328 barmode="stack",

329 )

330 # Update y-axis tick format for all subplots

331 for i in range(1, cols * rows + 1):

332 fig_subplots["layout"][f"yaxis{i}"].tickformat = ",.1%"

333

334 # fig_subplots.show()

335

336 return fig, composition_figures, fig_subplots

337

338 def get_stat(self, start_date: str, end_date: str) -> pd.DataFrame:

339 """METHOD COMPUTING ANNUAL RETURNS, ANNUAL STD. DEV. & SHARPE RATIO OF ASSETS"""

340

341 # ANALYZE THE DATA for a given time period

342 weekly_data = self.weeklyReturns[

343 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)

344 ].copy()

345

346 # Create table with summary statistics

347 mu_ga = mean_an_returns(weekly_data) # Annualised geometric mean of returns

348 std_dev_a = weekly_data.std(axis=0) * np.sqrt(52) # Annualised standard deviation of returns

349 sharpe = round(mu_ga / std_dev_a, 2) # Sharpe ratio of each financial product

350

351 # Write all results into a data frame

352 stat_df = pd.concat([mu_ga, std_dev_a, sharpe], axis=1)

353 stat_df.columns = [

354 "Average Annual Returns",

355 "Standard Deviation of Returns",

356 "Sharpe Ratio",

357 ]

358 stat_df["ISIN"] = stat_df.index # Add names into the table

359 stat_df["Name"] = self.names

360 stat_df["Size"] = 1

361 stat_df["Type"] = "ETF"

362

363 return stat_df

364

365 def get_top_performing_assets(self, time_periods: list[tuple[str, str]], top_percent: float = 0.2) -> list[str]:

366 stats_for_periods = {f"period_{i}": self.get_stat(*period) for i, period in enumerate(time_periods, 1)}

367

368 # Create 'Risk class' column where the value is

369 # 'Risk Class 1' if Standard Deviation of Returns <= 0.005

370 # 'Risk Class 2' if > 0.005 and < 0.02

371 # 'Risk Class 3' if > 0.02 and < 0.05

372 # 'Risk Class 4' if > 0.05 and < 0.1

373 # 'Risk Class 5' if > 0.1 and < 0.15

374 # 'Risk Class 6' if > 0.15 and < 0.25 then

375 # 'Risk Class 7' if > 0.25

376 risk_level = {

377 "Risk Class 1": 0.005,

378 "Risk Class 2": 0.02,

379 "Risk Class 3": 0.05,

380 "Risk Class 4": 0.10,

381 "Risk Class 5": 0.15,

382 "Risk Class 6": 0.25,

383 "Risk Class 7": 1,

384 }

385 for data in stats_for_periods.values():

386 data["Risk Class"] = pd.cut(

387 data["Standard Deviation of Returns"],

388 bins=[-1] + list(risk_level.values()),

389 labels=list(risk_level.keys()),

390 right=True,

391 )

392 # For each data_period and each risk class, find the top 20% best performing assets

393 # mark them as True in column 'Top Performer'

394 for data in stats_for_periods.values():

395 for risk_class in risk_level.keys():

396 data.loc[

397 data["Risk Class"] == risk_class,

398 "Top Performer",

399 ] = data.loc[data["Risk Class"] == risk_class, "Sharpe Ratio"].rank(pct=True) > (1 - top_percent)

400 # for each period, save the pandas dataframe into excel files

401 # for index, data in enumerate(stats_for_periods.values()):

402 # data.to_excel(f"top_performers_{time_periods[index]}.xlsx")

403

404 # ISIN codes for assets which were top performers in all n periods

405 top_isins = stats_for_periods["period_1"].loc[stats_for_periods["period_1"]["Top Performer"], "ISIN"].values

406 for data in stats_for_periods.values():

407 top_isins = np.intersect1d(top_isins, data.loc[data["Top Performer"], "ISIN"].values)

408

409 top_names = [self.names[self.tickers.index(isin)] for isin in top_isins]

410

411 return top_names

412

413 def plot_dots(

414 self,

415 start_date: str,

416 end_date: str,

417 ml: str = "",

418 ml_subset: list | pd.DataFrame = None,

419 fund_set: list | None = None,

420 top_performers: list | None = None,

421 optimal_portfolio: list | None = None,

422 benchmark: list | None = None,

423 ) -> px.scatter:

424 """METHOD TO PLOT THE OVERVIEW OF THE FINANCIAL PRODUCTS IN TERMS OF RISK AND RETURNS"""

425 fund_set = fund_set if fund_set else []

426 top_performers = top_performers if top_performers else []

427

428 # Get statistics for a given time period

429 data = self.get_stat(start_date, end_date)

430

431 # Add data about the optimal portfolio and benchmark for plotting

432 if optimal_portfolio:

433 data.loc[optimal_portfolio[4]] = optimal_portfolio

434 if benchmark:

435 data.loc[benchmark[4]] = benchmark

436

437 # IF WE WANT TO HIGHLIGHT THE SUBSET OF ASSETS BASED ON ML

438 if ml == "MST":

439 data.loc[:, "Type"] = "Funds"

440 for fund in ml_subset:

441 data.loc[fund, "Type"] = "MST subset"

442 if ml == "Clustering":

443 data.loc[:, "Type"] = ml_subset.loc[:, "Cluster"]

444

445 # If selected any fund for comparison

446 for fund in fund_set:

447 isin_idx = list(self.names).index(fund)

448 data.loc[self.tickers[isin_idx], "Type"] = str(data.loc[self.tickers[isin_idx], "Name"])

449 data.loc[self.tickers[isin_idx], "Size"] = 3

450

451 for fund in top_performers:

452 isin_idx = list(self.names).index(fund)

453 data.loc[self.tickers[isin_idx], "Type"] = "Top Performer"

454 data.loc[self.tickers[isin_idx], "Size"] = 3

455

456 # PLOTTING Data

457 color_discrete_map = {

458 "ETF": "#21304f",

459 "Mutual Fund": "#f58f02",

460 "Funds": "#21304f",

461 "MST subset": "#f58f02",

462 "Top Performer": "#f58f02",

463 "Cluster 1": "#21304f",

464 "Cluster 2": "#f58f02",

465 "Benchmark Portfolio": "#f58f02",

466 "Optimal Portfolio": "olive",

467 }

468 fig = px.scatter(

469 data,

470 x="Standard Deviation of Returns",

471 y="Average Annual Returns",

472 color="Type",

473 size="Size",

474 size_max=8,

475 hover_name="Name",

476 hover_data={"Sharpe Ratio": True, "ISIN": True, "Size": False},

477 color_discrete_map=color_discrete_map,

478 title="Annual Returns and Standard Deviation of Returns from " + start_date[:10] + " to " + end_date[:10],

479 )

480

481 # AXIS IN PERCENTAGES

482 fig.layout.yaxis.tickformat = ",.1%"

483 fig.layout.xaxis.tickformat = ",.1%"

484

485 # RISK LEVEL MARKER

486 min_risk = data["Standard Deviation of Returns"].min()

487 max_risk = data["Standard Deviation of Returns"].max()

488 risk_level = {

489 "Risk Class 1": 0.005,

490 "Risk Class 2": 0.02,

491 "Risk Class 3": 0.05,

492 "Risk Class 4": 0.10,

493 "Risk Class 5": 0.15,

494 "Risk Class 6": 0.25,

495 "Risk Class 7": max_risk,

496 }

497 # Initialize dynamic risk levels

498 actual_risk_level = set()

499 for i in range(1, 8):

500 k = "Risk Class " + str(i)

501 if (risk_level[k] >= min_risk) and (risk_level[k] <= max_risk):

502 actual_risk_level.add(i)

503

504 if max(actual_risk_level) < 7:

505 actual_risk_level.add(max(actual_risk_level) + 1) # Add the final risk level

506

507 for level in actual_risk_level:

508 k = "Risk Class " + str(level)

509 fig.add_vline(

510 x=risk_level[k], line_width=1, line_dash="dash", line_color="#7c90a0"

511 ) # annotation_text=k, annotation_position="top left")

512 fig.add_annotation(

513 x=risk_level[k] - 0.01,

514 y=max(data["Average Annual Returns"]),

515 text=k,

516 textangle=-90,

517 showarrow=False,

518 )

519

520 # RETURN LEVEL MARKER

521 fig.add_hline(y=0, line_width=1.5, line_color="rgba(233, 30, 99, 0.5)")

522

523 # TITLES

524 fig.update_annotations(font_color="#000000")

525 fig.update_layout(

526 xaxis_title="Annualised standard deviation of returns (Risk)",

527 yaxis_title="Annualised average returns",

528 )

529 # Position of legend

530 fig.update_layout(legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.01))

531 # fig.show()

532 return fig

533

534 def mst(self, start_date: str, end_date: str, n_mst_runs: int, plot: bool = False):

535 """METHOD TO RUN MST METHOD AND PRINT RESULTS"""

536 fig, subset_mst = None, []

537

538 # Starting subset of data for MST

539 subset_mst_df = self.weeklyReturns[

540 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)

541 ].copy()

542

543 for i in range(n_mst_runs):

544 subset_mst, subset_mst_df, corr_mst_avg, pdi_mst = minimum_spanning_tree(subset_mst_df)

545

546 # PLOTTING RESULTS

547 if plot and len(subset_mst) > 0:

548 end_df_date = str(subset_mst_df.index.date[-1])

549 fig = self.plot_dots(

550 start_date=start_date,

551 end_date=end_df_date,

552 ml="MST",

553 ml_subset=subset_mst,

554 )

555

556 return fig, subset_mst

557

558 def clustering(

559 self,

560 start_date: str,

561 end_date: str,

562 n_clusters: int,

563 n_assets: int,

564 plot: bool = False,

565 ):

566 """

567 METHOD TO RUN MST METHOD AND PRINT RESULTS

568 """

569 fig = None

570 dataset = self.weeklyReturns[

571 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)

572 ].copy()

573 # CLUSTER DATA

574 clusters = cluster(dataset, n_clusters)

575

576 # SELECT ASSETS

577 end_dataset_date = str(dataset.index.date[-1])

578 clustering_stats = self.get_stat(start_date, end_dataset_date)

579 subset_clustering, subset_clustering_df = pick_cluster(

580 data=dataset, stat=clustering_stats, ml=clusters, n_assets=n_assets

581 ) # Number of assets from each cluster

582

583 # PLOTTING DATA

584 if plot:

585 fig = self.plot_dots(

586 start_date=start_date,

587 end_date=end_dataset_date,

588 ml="Clustering",

589 ml_subset=clusters,

590 )

591

592 # fig.show()

593

594 return fig, subset_clustering

595

596 def backtest(

597 self,

598 start_train_date: str,

599 start_test_date: str,

600 end_test_date: str,

601 subset_of_assets: list,

602 benchmarks: list,

603 scenarios_type: str,

604 n_simulations: int,

605 model: str,

606 solver: str = "CLARABEL",

607 lower_bound: int = 0,

608 ) -> tuple[pd.DataFrame, pd.DataFrame, px.line, go.Figure]:

609 """METHOD TO COMPUTE THE BACKTEST"""

610

611 # Find Benchmarks' ISIN codes

612 benchmark_isin = [self.tickers[list(self.names).index(name)] for name in benchmarks]

613

614 # Get train and testing datasets

615 whole_dataset = self.weeklyReturns[

616 (self.weeklyReturns.index >= start_train_date) & (self.weeklyReturns.index <= end_test_date)

617 ].copy()

618 test_dataset = self.weeklyReturns[

619 (self.weeklyReturns.index > start_test_date) & (self.weeklyReturns.index <= end_test_date)

620 ].copy()

621

622 # SCENARIO GENERATION

623 # ---------------------------------------------------------------------------------------------------

624 # Create scenario generator

625 sg = ScenarioGenerator(np.random.default_rng())

626

627 if model == "Markowitz model" or scenarios_type == "MonteCarlo":

628 sigma_lst, mu_lst = MomentGenerator.generate_sigma_mu_for_test_periods(

629 data=whole_dataset[subset_of_assets], n_test=len(test_dataset.index)

630 )

631

632 if scenarios_type == "MonteCarlo":

633 scenarios = sg.monte_carlo(

634 data=whole_dataset[subset_of_assets], # subsetMST_df or subsetCLUST_df

635 n_simulations=n_simulations,

636 n_test=len(test_dataset.index),

637 sigma_lst=sigma_lst,

638 mu_lst=mu_lst,

639 )

640 else:

641 scenarios = sg.bootstrapping(

642 data=whole_dataset[subset_of_assets], # subsetMST or subsetCLUST

643 n_simulations=n_simulations, # number of scenarios per period

644 n_test=len(test_dataset.index),

645 ) # number of periods

646

647 # TARGETS GENERATION

648 # ---------------------------------------------------------------------------------------------------

649 start_of_test_dataset = str(test_dataset.index.date[0])

650 if model == "Markowitz model":

651 targets, benchmark_port_val = get_mvo_targets(

652 test_date=start_of_test_dataset,

653 benchmark=benchmark_isin,

654 budget=100,

655 data=whole_dataset,

656 )

657

658 else:

659 targets, benchmark_port_val = get_cvar_targets(

660 test_date=start_of_test_dataset,

661 benchmark=benchmark_isin,

662 budget=100,

663 cvar_alpha=0.05,

664 data=whole_dataset,

665 scgen=sg,

666 n_simulations=n_simulations,

667 )

668

669 # MATHEMATICAL MODELING

670 # ---------------------------------------------------------------------------------------------------

671 if model == "Markowitz model":

672 port_allocation, port_value, port_cvar = mvo_model(

673 test_ret=test_dataset[subset_of_assets],

674 mu_lst=mu_lst,

675 sigma_lst=sigma_lst,

676 targets=targets,

677 budget=100,

678 trans_cost=0.001,

679 max_weight=1,

680 solver=solver,

681 lower_bound=lower_bound,

682 )

683 # inaccurate=inaccurate_solution)

684

685 else:

686 port_allocation, port_value, port_cvar = cvar_model(

687 test_ret=test_dataset[subset_of_assets],

688 scenarios=scenarios, # Scenarios

689 targets=targets, # Target

690 budget=100,

691 cvar_alpha=0.05,

692 trans_cost=0.001,

693 max_weight=1,

694 solver=solver,

695 lower_bound=lower_bound,

696 )

697 # inaccurate=inaccurate_solution)

698

699 # PLOTTING

700 # ------------------------------------------------------------------

701 fig_performance, fig_composition = self.__plot_backtest(

702 performance=port_value.copy(),

703 performance_benchmark=benchmark_port_val.copy(),

704 composition=port_allocation,

705 names=self.names,

706 tickers=self.tickers,

707 )

708

709 # RETURN STATISTICS

710 # ------------------------------------------------------------------

711 optimal_portfolio_stat = final_stats(port_value)

712 benchmark_stat = final_stats(benchmark_port_val)

713

714 return optimal_portfolio_stat, benchmark_stat, fig_performance, fig_composition

715

716 def lifecycle_scenario_analysis(

717 self,

718 subset_of_assets: list,

719 scenarios_type: str,

720 n_simulations: int,

721 end_year: int,

722 withdrawals: int,

723 initial_risk_appetite: float,

724 initial_budget: int,

725 rng_seed=0,

726 test_split: float = False,

727 ) -> tuple[dict, pd.DataFrame, go.Figure, go.Figure, dict, dict, go.Figure]:

728 """METHOD TO COMPUTE THE LIFECYCLE SCENARIO ANALYSIS"""

729

730 # ------------------------------- INITIALIZE FUNCTION -------------------------------

731 n_periods = end_year - 2023

732 withdrawal_lst = [withdrawals * (1 + 0.04) ** i for i in range(n_periods)]

733

734 # ------------------------------- PARAMETER INITIALIZATION -------------------------------

735 if test_split != 0:

736 sampling_set, estimating_set = MomentGenerator.split_dataset(

737 data=self.weeklyReturns[subset_of_assets], sampling_ratio=test_split

738 )

739

740 _, _, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=sampling_set)

741

742 sigma, mu, _, _ = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=estimating_set)

743 else:

744 sigma, mu, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free(

745 data=self.weeklyReturns[subset_of_assets]

746 )

747

748 # ------------------------------- SCENARIO GENERATION -------------------------------

749 if rng_seed == 0:

750 sg = ScenarioGenerator(np.random.default_rng())

751 else:

752 sg = ScenarioGenerator(np.random.default_rng(rng_seed))

753

754 if scenarios_type == "MonteCarlo":

755 scenarios = sg.MC_simulation_annual_from_weekly(

756 weekly_mu=mu_weekly,

757 weekly_sigma=sigma_weekly,

758 n_simulations=n_simulations,

759 n_years=n_periods,

760 )

761

762 elif scenarios_type == "Bootstrap":

763 scenarios = sg.bootstrap_simulation_annual_from_weekly(

764 historical_weekly_returns=self.weeklyReturns[subset_of_assets],

765 n_simulations=n_simulations,

766 n_years=n_periods,

767 )

768

769 else:

770 raise ValueError(

771 "It appears that a scenario method other than MonteCarlo or Bootstrap has been chosen. "

772 "Please check for spelling mistakes."

773 )

774

775 # ------------------------------- Allocation Target Generation -------------------------------

776 glide_paths_df, fig_glidepaths = generate_risk_profiles(

777 n_periods=n_periods, initial_risk=initial_risk_appetite, minimum_risk=0.01

778 )

779

780 allocation_targets = {}

781 for r in glide_paths_df.columns:

782 targets = get_port_allocations(

783 mu_lst=mu,

784 sigma_lst=sigma,

785 targets=glide_paths_df[r],

786 max_weight=1 / 4,

787 solver="CLARABEL",

788 )

789 allocation_targets[f"{r}"] = targets

790

791 # ------------------------------- MATHEMATICAL MODELING -------------------------------

792 exhibition_summary = pd.DataFrame()

793 terminal_wealth_dict = {}

794

795 for key, df in allocation_targets.items():

796 logger.info(

797 f"Optimizing portfolio for {key} over {n_simulations} scenarios. An info message will "

798 f"appear, when we are halfway through the scenarios for the current strategy."

799 )

800 portfolio_df, mean_allocations_df, analysis_metrics = riskadjust_model_scen(

801 scen=scenarios[:, :, :],

802 targets=df,

803 budget=initial_budget,

804 trans_cost=0.002,

805 withdrawal_lst=withdrawal_lst,

806 interest_rate=0.04,

807 )

808

809 # Add the analysis_metrics DataFrame as a new column in the storage DataFrame

810 exhibition_summary[key] = analysis_metrics.squeeze()

811

812 portfolio_df["Terminal Wealth"] = pd.to_numeric(portfolio_df["Terminal Wealth"], errors="coerce")

813 terminal_wealth_dict[f"{key}"] = portfolio_df

814

815 # ------------------------------- PLOTTING -------------------------------

816 fig_performance, fig_compositions, fig_compositions_all = self.__plot_portfolio_densities(

817 portfolio_performance_dict=terminal_wealth_dict,

818 compositions=allocation_targets,

819 tickers=self.tickers,

820 names=self.names,

821 )

822

823 # ------------------------------- RETURN STATISTICS -------------------------------

824 return (

825 terminal_wealth_dict,

826 exhibition_summary,

827 fig_performance,

828 fig_glidepaths,

829 allocation_targets,

830 fig_compositions,

831 fig_compositions_all,

832 )

833

834

835if __name__ == "__main__":

836 # INITIALIZATION OF THE CLASS

837

838 # that's unfortunate but will be addressed later

839 # ROOT_DIR = Path(__file__).parent.parent

840 # Load our data

841 # weekly_returns = pd.read_parquet(ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip")

842 # algo = build_bot(weekly_returns=weekly_returns)

843

844 algo = initialize_bot()

845

846 # algo = TradeBot()

847

848 # Get top performing assets for given periods and measure

849 top_assets = algo.get_top_performing_assets(

850 time_periods=[

851 (algo.min_date, "2017-01-01"),

852 ("2017-01-02", "2020-01-01"),

853 ("2020-01-02", algo.max_date),

854 ],

855 top_percent=0.2,

856 )

857

858 # PLOT INTERACTIVE GRAPH

859 algo.plot_dots(start_date=algo.min_date, end_date=algo.max_date, top_performers=top_assets)

860

861 # RUN THE MINIMUM SPANNING TREE METHOD

862 _, mst_subset_of_assets = algo.mst(start_date="2000-01-01", end_date="2024-01-01", n_mst_runs=5, plot=False)

863

864 # RUN THE CLUSTERING METHOD

865 _, clustering_subset_of_assets = algo.clustering(

866 start_date="2015-12-23",

867 end_date="2017-07-01",

868 n_clusters=3,

869 n_assets=10,

870 plot=True,

871 )

872

873 # RUN THE LIFECYCLE

874 lifecycle = algo.lifecycle_scenario_analysis(

875 subset_of_assets=mst_subset_of_assets,

876 scenarios_type="MonteCarlo",

877 n_simulations=1000,

878 end_year=2050,

879 withdrawals=51000,

880 initial_risk_appetite=0.15,

881 initial_budget=137000,

882 )

883

884 # RUN THE BACKTEST

885 backtest = algo.backtest(

886 start_train_date="2015-12-23",

887 start_test_date="2018-09-24",

888 end_test_date="2019-09-01",

889 subset_of_assets=mst_subset_of_assets,

890 benchmarks=["BankInvest Danske Aktier W"],

891 scenarios_type="Bootstrapping",

892 n_simulations=500,

893 model="Markowitz model",

894 lower_bound=0,

895 )