Coverage for src/ifunnel/models/main.py: 0%
275 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
1from functools import lru_cache
2from itertools import cycle
3from math import ceil
4from pathlib import Path
6import numpy as np
7import pandas as pd
8import plotly.express as px
9import plotly.graph_objects as go
10import plotly.io as pio
11from loguru import logger
12from plotly.subplots import make_subplots
13from scipy.stats import gaussian_kde
15from .Clustering import cluster, pick_cluster
16from .CVaRmodel import cvar_model
17from .CVaRtargets import get_cvar_targets
18from .dataAnalyser import final_stats, mean_an_returns
19from .lifecycle.glidePathCreator import generate_risk_profiles
20from .lifecycle.MVOlifecycleModel import (
21 get_port_allocations,
22 riskadjust_model_scen,
23)
24from .MST import minimum_spanning_tree
25from .MVOmodel import mvo_model
26from .MVOtargets import get_mvo_targets
27from .ScenarioGeneration import MomentGenerator, ScenarioGenerator
29pio.renderers.default = "browser"
31# that's unfortunate but will be addressed later
32# ROOT_DIR = Path(__file__).parent.parent
33# Load our data
34# weekly_returns = pd.read_parquet(os.path.join(ROOT_DIR, "financial_data/all_etfs_rets.parquet.gzip"))
35# tickers = [pair[0] for pair in weekly_returns.columns.values]
36# names = [pair[1] for pair in weekly_returns.columns.values]
39@lru_cache(maxsize=1) # Cache the result of this function
40def initialize_bot(file=None):
41 """Initialize and return the trading bot."""
42 if file is None:
43 ROOT_DIR = Path(__file__).parent.parent
44 file = ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip"
46 weekly_returns = pd.read_parquet(file)
48 tickers = [pair[0] for pair in weekly_returns.columns.values]
49 names = [pair[1] for pair in weekly_returns.columns.values]
50 weekly_returns.columns = tickers
51 return _TradeBot(tickers, names, weekly_returns)
54class _TradeBot:
55 """
56 Python class analysing financial products and based on machine learning algorithms and mathematical
57 optimization suggesting optimal portfolio of assets.
58 """
60 def __init__(self, tickers, names, weekly_returns):
61 self.tickers = tickers
62 self.names = names
63 self.weeklyReturns = weekly_returns
64 self.min_date = str(weekly_returns.index[0])
65 self.max_date = str(weekly_returns.index[-1])
67 weekly_returns.columns = tickers
69 @staticmethod
70 def __plot_backtest(
71 performance: pd.DataFrame,
72 performance_benchmark: pd.DataFrame,
73 composition: pd.DataFrame,
74 names: list,
75 tickers: list,
76 ) -> tuple[px.line, go.Figure]:
77 """METHOD TO PLOT THE BACKTEST RESULTS"""
79 performance.index = pd.to_datetime(performance.index.values, utc=True)
81 # ** PERFORMANCE GRAPH **
82 try:
83 df_to_plot = pd.concat([performance, performance_benchmark], axis=1)
84 except Exception:
85 logger.warning("⚠️ Old data format.")
86 performance.index = [date.date() for date in performance.index] # needed for old data
87 df_to_plot = pd.concat([performance, performance_benchmark], axis=1)
89 color_discrete_map = {
90 "Portfolio_Value": "#21304f",
91 "Benchmark_Value": "#f58f02",
92 }
93 fig = px.line(
94 df_to_plot,
95 x=df_to_plot.index,
96 y=df_to_plot.columns,
97 title="Comparison of different strategies",
98 color_discrete_map=color_discrete_map,
99 )
100 fig_performance = fig
102 # ** COMPOSITION GRAPH **
103 # change ISIN to NAMES in allocation df
104 composition_names = []
105 for ticker in composition.columns:
106 ticker_index = list(tickers).index(ticker)
107 composition_names.append(list(names)[ticker_index])
108 composition.columns = composition_names
110 composition = composition.loc[:, (composition != 0).any(axis=0)]
111 data = []
112 idx_color = 0
113 composition_color = (
114 px.colors.sequential.turbid
115 + px.colors.sequential.Brwnyl
116 + px.colors.sequential.YlOrBr
117 + px.colors.sequential.gray
118 + px.colors.sequential.Mint
119 + px.colors.sequential.dense
120 + px.colors.sequential.Plasma
121 + px.colors.sequential.Viridis
122 + px.colors.sequential.Cividis
123 )
124 for isin in composition.columns:
125 trace = go.Bar(
126 x=composition.index,
127 y=composition[isin],
128 name=str(isin),
129 marker_color=composition_color[idx_color % len(composition_color)], # custom color
130 )
131 data.append(trace)
132 idx_color += 1
134 layout = go.Layout(barmode="stack")
135 fig = go.Figure(data=data, layout=layout)
136 fig.update_layout(
137 title="Portfolio Composition",
138 xaxis_title="Number of the Investment Period",
139 yaxis_title="Composition",
140 legend_title="Name of the Fund",
141 )
142 fig.layout.yaxis.tickformat = ",.1%"
143 fig_composition = fig
145 # Show figure if needed
146 # fig.show()
148 return fig_performance, fig_composition
150 @staticmethod
151 def __plot_portfolio_densities(
152 portfolio_performance_dict: dict,
153 compositions: dict[str, pd.DataFrame],
154 tickers: list,
155 names: list,
156 ) -> tuple[go.Figure, dict[str, go.Figure], go.Figure]:
157 """METHOD TO PLOT THE LIFECYCLE SIMULATION RESULTS"""
159 # Define colors
160 colors = [
161 "#99A4AE", # gray50
162 "#3b4956", # dark
163 "#b7ada5", # secondary
164 "#4099da", # blue
165 "#8ecdc8", # aqua
166 "#e85757", # coral
167 "#fdd779", # sun
168 "#644c76", # eggplant
169 "#D8D1CA", # warmGray50
170 ]
172 color_cycle = cycle(colors) # To cycle through colors
173 fig = go.Figure()
175 max_density_across_all_datasets = 0 # Initialize max density tracker
177 for label, df in portfolio_performance_dict.items():
178 # Kernel Density Estimation for each dataset
179 kde = gaussian_kde(df["Terminal Wealth"])
181 # Generating a range of values to evaluate the KDE
182 x_min = df["Terminal Wealth"].min()
183 x_max = df["Terminal Wealth"].max()
184 x = np.linspace(x_min, x_max, 1000)
186 # Evaluate the KDE
187 density = kde(x)
189 # Update max density if current density peak is higher
190 max_density_across_all_datasets = max(max_density_across_all_datasets, max(density))
192 # Create line plot trace for this dataset
193 fig.add_trace(
194 go.Scatter(
195 x=x,
196 y=density,
197 mode="lines",
198 name=label, # Use the dictionary key as the label
199 line=dict(width=2.5, color=next(color_cycle)), # Assign color from Orsted-Colors
200 )
201 )
203 # Add a dashed vertical line at x=0
204 fig.add_shape(
205 type="line",
206 x0=0,
207 y0=0,
208 x1=0,
209 y1=max_density_across_all_datasets, # Use the max density across all datasets
210 line=dict(
211 color="Black",
212 width=3,
213 dash="dash", # Define dash pattern
214 ),
215 )
216 """
217 # Update the layout
218 fig.update_layout(
219 title_text='Density function(s) of terminal wealth for risk classes in 1000 different scenarios.',
220 xaxis_title='Terminal Wealth',
221 yaxis_title='Density',
222 legend_title='Risk Class',
223 template='plotly_white'
224 )
225 """
226 # Update the layout with larger fonts
227 fig.update_layout(
228 title_text="Density function(s) of the end portfolio value for various glide paths.",
229 title_font=dict(size=24), # Increase title font size
230 xaxis_title="Target date portfolio value",
231 xaxis_title_font=dict(size=18), # Increase x-axis title font size
232 xaxis_tickfont=dict(size=16), # Increase x-axis tick label font size
233 yaxis_title="Density",
234 yaxis_title_font=dict(size=18), # Increase y-axis title font size
235 yaxis_tickfont=dict(size=16), # Increase y-axis tick label font size
236 legend_title="Risb Budget glide path",
237 legend_title_font=dict(size=18), # Increase legend title font size
238 legend_font=dict(size=16), # Increase legend text font size
239 template="plotly_white",
240 )
242 # Show the figure in a browser
243 # fig.show(renderer="browser")
245 composition_figures = {}
246 filtered_compositions = {name: comp for name, comp in compositions.items() if "reverse" not in name}
247 num_portfolios = len(filtered_compositions)
248 cols = 2 if num_portfolios > 1 else 1
249 rows = ceil(
250 num_portfolios / cols
251 ) # Calculate the number of rows needed based on the total number of compositions
253 subplot_titles = [f"Portfolio Composition: {name}" for name in filtered_compositions.keys()]
254 fig_subplots = make_subplots(
255 rows=rows,
256 cols=cols,
257 subplot_titles=subplot_titles,
258 vertical_spacing=0.1,
259 horizontal_spacing=0.05,
260 )
262 tickers_in_legend = set()
263 current_plot = 1 # Keep track of the current plot index to correctly calculate row and col
265 for portfolio_name, composition in filtered_compositions.items():
266 composition_names = []
267 for ticker in composition.columns[:-1]:
268 ticker_index = list(tickers).index(ticker)
269 composition_names.append(list(names)[ticker_index])
270 if "Cash" not in composition_names:
271 composition_names.append("Cash")
272 composition.columns = composition_names
273 composition = composition.loc[:, (composition != 0).any(axis=0)]
275 idx_color = 0
276 composition_color = (
277 px.colors.sequential.turbid
278 + px.colors.sequential.Brwnyl
279 + px.colors.sequential.YlOrBr
280 + px.colors.sequential.gray
281 + px.colors.sequential.Mint
282 + px.colors.sequential.dense
283 + px.colors.sequential.Plasma
284 + px.colors.sequential.Viridis
285 + px.colors.sequential.Cividis
286 )
288 # Create an individual figure for the current portfolio
289 individual_fig = go.Figure()
291 for isin in composition.columns:
292 show_legend = isin not in tickers_in_legend
293 tickers_in_legend.add(isin)
295 trace = go.Bar(
296 x=composition.index,
297 y=composition[isin],
298 name=str(isin),
299 marker_color=composition_color[idx_color % len(composition_color)],
300 showlegend=show_legend,
301 )
303 # Add trace to both the subplot and the individual figure
304 row, col = divmod(current_plot - 1, cols)
305 fig_subplots.add_trace(trace, row=row + 1, col=col + 1)
306 individual_fig.add_trace(trace)
308 idx_color += 1
310 # Configure the individual figure layout
311 individual_fig.update_layout(
312 title=f"Portfolio Composition: {portfolio_name}",
313 plot_bgcolor="white",
314 barmode="stack",
315 )
316 individual_fig["layout"]["yaxis"].tickformat = ",.1%"
318 # Store the individual figure in the dictionary
319 composition_figures[portfolio_name] = individual_fig
321 current_plot += 1
323 fig_subplots.update_layout(
324 title="Portfolio Compositions",
325 height=500 * rows,
326 width=1000 * cols,
327 plot_bgcolor="white",
328 barmode="stack",
329 )
330 # Update y-axis tick format for all subplots
331 for i in range(1, cols * rows + 1):
332 fig_subplots["layout"][f"yaxis{i}"].tickformat = ",.1%"
334 # fig_subplots.show()
336 return fig, composition_figures, fig_subplots
338 def get_stat(self, start_date: str, end_date: str) -> pd.DataFrame:
339 """METHOD COMPUTING ANNUAL RETURNS, ANNUAL STD. DEV. & SHARPE RATIO OF ASSETS"""
341 # ANALYZE THE DATA for a given time period
342 weekly_data = self.weeklyReturns[
343 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)
344 ].copy()
346 # Create table with summary statistics
347 mu_ga = mean_an_returns(weekly_data) # Annualised geometric mean of returns
348 std_dev_a = weekly_data.std(axis=0) * np.sqrt(52) # Annualised standard deviation of returns
349 sharpe = round(mu_ga / std_dev_a, 2) # Sharpe ratio of each financial product
351 # Write all results into a data frame
352 stat_df = pd.concat([mu_ga, std_dev_a, sharpe], axis=1)
353 stat_df.columns = [
354 "Average Annual Returns",
355 "Standard Deviation of Returns",
356 "Sharpe Ratio",
357 ]
358 stat_df["ISIN"] = stat_df.index # Add names into the table
359 stat_df["Name"] = self.names
360 stat_df["Size"] = 1
361 stat_df["Type"] = "ETF"
363 return stat_df
365 def get_top_performing_assets(self, time_periods: list[tuple[str, str]], top_percent: float = 0.2) -> list[str]:
366 stats_for_periods = {f"period_{i}": self.get_stat(*period) for i, period in enumerate(time_periods, 1)}
368 # Create 'Risk class' column where the value is
369 # 'Risk Class 1' if Standard Deviation of Returns <= 0.005
370 # 'Risk Class 2' if > 0.005 and < 0.02
371 # 'Risk Class 3' if > 0.02 and < 0.05
372 # 'Risk Class 4' if > 0.05 and < 0.1
373 # 'Risk Class 5' if > 0.1 and < 0.15
374 # 'Risk Class 6' if > 0.15 and < 0.25 then
375 # 'Risk Class 7' if > 0.25
376 risk_level = {
377 "Risk Class 1": 0.005,
378 "Risk Class 2": 0.02,
379 "Risk Class 3": 0.05,
380 "Risk Class 4": 0.10,
381 "Risk Class 5": 0.15,
382 "Risk Class 6": 0.25,
383 "Risk Class 7": 1,
384 }
385 for data in stats_for_periods.values():
386 data["Risk Class"] = pd.cut(
387 data["Standard Deviation of Returns"],
388 bins=[-1] + list(risk_level.values()),
389 labels=list(risk_level.keys()),
390 right=True,
391 )
392 # For each data_period and each risk class, find the top 20% best performing assets
393 # mark them as True in column 'Top Performer'
394 for data in stats_for_periods.values():
395 for risk_class in risk_level.keys():
396 data.loc[
397 data["Risk Class"] == risk_class,
398 "Top Performer",
399 ] = data.loc[data["Risk Class"] == risk_class, "Sharpe Ratio"].rank(pct=True) > (1 - top_percent)
400 # for each period, save the pandas dataframe into excel files
401 # for index, data in enumerate(stats_for_periods.values()):
402 # data.to_excel(f"top_performers_{time_periods[index]}.xlsx")
404 # ISIN codes for assets which were top performers in all n periods
405 top_isins = stats_for_periods["period_1"].loc[stats_for_periods["period_1"]["Top Performer"], "ISIN"].values
406 for data in stats_for_periods.values():
407 top_isins = np.intersect1d(top_isins, data.loc[data["Top Performer"], "ISIN"].values)
409 top_names = [self.names[self.tickers.index(isin)] for isin in top_isins]
411 return top_names
413 def plot_dots(
414 self,
415 start_date: str,
416 end_date: str,
417 ml: str = "",
418 ml_subset: list | pd.DataFrame = None,
419 fund_set: list | None = None,
420 top_performers: list | None = None,
421 optimal_portfolio: list | None = None,
422 benchmark: list | None = None,
423 ) -> px.scatter:
424 """METHOD TO PLOT THE OVERVIEW OF THE FINANCIAL PRODUCTS IN TERMS OF RISK AND RETURNS"""
425 fund_set = fund_set if fund_set else []
426 top_performers = top_performers if top_performers else []
428 # Get statistics for a given time period
429 data = self.get_stat(start_date, end_date)
431 # Add data about the optimal portfolio and benchmark for plotting
432 if optimal_portfolio:
433 data.loc[optimal_portfolio[4]] = optimal_portfolio
434 if benchmark:
435 data.loc[benchmark[4]] = benchmark
437 # IF WE WANT TO HIGHLIGHT THE SUBSET OF ASSETS BASED ON ML
438 if ml == "MST":
439 data.loc[:, "Type"] = "Funds"
440 for fund in ml_subset:
441 data.loc[fund, "Type"] = "MST subset"
442 if ml == "Clustering":
443 data.loc[:, "Type"] = ml_subset.loc[:, "Cluster"]
445 # If selected any fund for comparison
446 for fund in fund_set:
447 isin_idx = list(self.names).index(fund)
448 data.loc[self.tickers[isin_idx], "Type"] = str(data.loc[self.tickers[isin_idx], "Name"])
449 data.loc[self.tickers[isin_idx], "Size"] = 3
451 for fund in top_performers:
452 isin_idx = list(self.names).index(fund)
453 data.loc[self.tickers[isin_idx], "Type"] = "Top Performer"
454 data.loc[self.tickers[isin_idx], "Size"] = 3
456 # PLOTTING Data
457 color_discrete_map = {
458 "ETF": "#21304f",
459 "Mutual Fund": "#f58f02",
460 "Funds": "#21304f",
461 "MST subset": "#f58f02",
462 "Top Performer": "#f58f02",
463 "Cluster 1": "#21304f",
464 "Cluster 2": "#f58f02",
465 "Benchmark Portfolio": "#f58f02",
466 "Optimal Portfolio": "olive",
467 }
468 fig = px.scatter(
469 data,
470 x="Standard Deviation of Returns",
471 y="Average Annual Returns",
472 color="Type",
473 size="Size",
474 size_max=8,
475 hover_name="Name",
476 hover_data={"Sharpe Ratio": True, "ISIN": True, "Size": False},
477 color_discrete_map=color_discrete_map,
478 title="Annual Returns and Standard Deviation of Returns from " + start_date[:10] + " to " + end_date[:10],
479 )
481 # AXIS IN PERCENTAGES
482 fig.layout.yaxis.tickformat = ",.1%"
483 fig.layout.xaxis.tickformat = ",.1%"
485 # RISK LEVEL MARKER
486 min_risk = data["Standard Deviation of Returns"].min()
487 max_risk = data["Standard Deviation of Returns"].max()
488 risk_level = {
489 "Risk Class 1": 0.005,
490 "Risk Class 2": 0.02,
491 "Risk Class 3": 0.05,
492 "Risk Class 4": 0.10,
493 "Risk Class 5": 0.15,
494 "Risk Class 6": 0.25,
495 "Risk Class 7": max_risk,
496 }
497 # Initialize dynamic risk levels
498 actual_risk_level = set()
499 for i in range(1, 8):
500 k = "Risk Class " + str(i)
501 if (risk_level[k] >= min_risk) and (risk_level[k] <= max_risk):
502 actual_risk_level.add(i)
504 if max(actual_risk_level) < 7:
505 actual_risk_level.add(max(actual_risk_level) + 1) # Add the final risk level
507 for level in actual_risk_level:
508 k = "Risk Class " + str(level)
509 fig.add_vline(
510 x=risk_level[k], line_width=1, line_dash="dash", line_color="#7c90a0"
511 ) # annotation_text=k, annotation_position="top left")
512 fig.add_annotation(
513 x=risk_level[k] - 0.01,
514 y=max(data["Average Annual Returns"]),
515 text=k,
516 textangle=-90,
517 showarrow=False,
518 )
520 # RETURN LEVEL MARKER
521 fig.add_hline(y=0, line_width=1.5, line_color="rgba(233, 30, 99, 0.5)")
523 # TITLES
524 fig.update_annotations(font_color="#000000")
525 fig.update_layout(
526 xaxis_title="Annualised standard deviation of returns (Risk)",
527 yaxis_title="Annualised average returns",
528 )
529 # Position of legend
530 fig.update_layout(legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.01))
531 # fig.show()
532 return fig
534 def mst(self, start_date: str, end_date: str, n_mst_runs: int, plot: bool = False):
535 """METHOD TO RUN MST METHOD AND PRINT RESULTS"""
536 fig, subset_mst = None, []
538 # Starting subset of data for MST
539 subset_mst_df = self.weeklyReturns[
540 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)
541 ].copy()
543 for i in range(n_mst_runs):
544 subset_mst, subset_mst_df, corr_mst_avg, pdi_mst = minimum_spanning_tree(subset_mst_df)
546 # PLOTTING RESULTS
547 if plot and len(subset_mst) > 0:
548 end_df_date = str(subset_mst_df.index.date[-1])
549 fig = self.plot_dots(
550 start_date=start_date,
551 end_date=end_df_date,
552 ml="MST",
553 ml_subset=subset_mst,
554 )
556 return fig, subset_mst
558 def clustering(
559 self,
560 start_date: str,
561 end_date: str,
562 n_clusters: int,
563 n_assets: int,
564 plot: bool = False,
565 ):
566 """
567 METHOD TO RUN MST METHOD AND PRINT RESULTS
568 """
569 fig = None
570 dataset = self.weeklyReturns[
571 (self.weeklyReturns.index >= start_date) & (self.weeklyReturns.index <= end_date)
572 ].copy()
573 # CLUSTER DATA
574 clusters = cluster(dataset, n_clusters)
576 # SELECT ASSETS
577 end_dataset_date = str(dataset.index.date[-1])
578 clustering_stats = self.get_stat(start_date, end_dataset_date)
579 subset_clustering, subset_clustering_df = pick_cluster(
580 data=dataset, stat=clustering_stats, ml=clusters, n_assets=n_assets
581 ) # Number of assets from each cluster
583 # PLOTTING DATA
584 if plot:
585 fig = self.plot_dots(
586 start_date=start_date,
587 end_date=end_dataset_date,
588 ml="Clustering",
589 ml_subset=clusters,
590 )
592 # fig.show()
594 return fig, subset_clustering
596 def backtest(
597 self,
598 start_train_date: str,
599 start_test_date: str,
600 end_test_date: str,
601 subset_of_assets: list,
602 benchmarks: list,
603 scenarios_type: str,
604 n_simulations: int,
605 model: str,
606 solver: str = "CLARABEL",
607 lower_bound: int = 0,
608 ) -> tuple[pd.DataFrame, pd.DataFrame, px.line, go.Figure]:
609 """METHOD TO COMPUTE THE BACKTEST"""
611 # Find Benchmarks' ISIN codes
612 benchmark_isin = [self.tickers[list(self.names).index(name)] for name in benchmarks]
614 # Get train and testing datasets
615 whole_dataset = self.weeklyReturns[
616 (self.weeklyReturns.index >= start_train_date) & (self.weeklyReturns.index <= end_test_date)
617 ].copy()
618 test_dataset = self.weeklyReturns[
619 (self.weeklyReturns.index > start_test_date) & (self.weeklyReturns.index <= end_test_date)
620 ].copy()
622 # SCENARIO GENERATION
623 # ---------------------------------------------------------------------------------------------------
624 # Create scenario generator
625 sg = ScenarioGenerator(np.random.default_rng())
627 if model == "Markowitz model" or scenarios_type == "MonteCarlo":
628 sigma_lst, mu_lst = MomentGenerator.generate_sigma_mu_for_test_periods(
629 data=whole_dataset[subset_of_assets], n_test=len(test_dataset.index)
630 )
632 if scenarios_type == "MonteCarlo":
633 scenarios = sg.monte_carlo(
634 data=whole_dataset[subset_of_assets], # subsetMST_df or subsetCLUST_df
635 n_simulations=n_simulations,
636 n_test=len(test_dataset.index),
637 sigma_lst=sigma_lst,
638 mu_lst=mu_lst,
639 )
640 else:
641 scenarios = sg.bootstrapping(
642 data=whole_dataset[subset_of_assets], # subsetMST or subsetCLUST
643 n_simulations=n_simulations, # number of scenarios per period
644 n_test=len(test_dataset.index),
645 ) # number of periods
647 # TARGETS GENERATION
648 # ---------------------------------------------------------------------------------------------------
649 start_of_test_dataset = str(test_dataset.index.date[0])
650 if model == "Markowitz model":
651 targets, benchmark_port_val = get_mvo_targets(
652 test_date=start_of_test_dataset,
653 benchmark=benchmark_isin,
654 budget=100,
655 data=whole_dataset,
656 )
658 else:
659 targets, benchmark_port_val = get_cvar_targets(
660 test_date=start_of_test_dataset,
661 benchmark=benchmark_isin,
662 budget=100,
663 cvar_alpha=0.05,
664 data=whole_dataset,
665 scgen=sg,
666 n_simulations=n_simulations,
667 )
669 # MATHEMATICAL MODELING
670 # ---------------------------------------------------------------------------------------------------
671 if model == "Markowitz model":
672 port_allocation, port_value, port_cvar = mvo_model(
673 test_ret=test_dataset[subset_of_assets],
674 mu_lst=mu_lst,
675 sigma_lst=sigma_lst,
676 targets=targets,
677 budget=100,
678 trans_cost=0.001,
679 max_weight=1,
680 solver=solver,
681 lower_bound=lower_bound,
682 )
683 # inaccurate=inaccurate_solution)
685 else:
686 port_allocation, port_value, port_cvar = cvar_model(
687 test_ret=test_dataset[subset_of_assets],
688 scenarios=scenarios, # Scenarios
689 targets=targets, # Target
690 budget=100,
691 cvar_alpha=0.05,
692 trans_cost=0.001,
693 max_weight=1,
694 solver=solver,
695 lower_bound=lower_bound,
696 )
697 # inaccurate=inaccurate_solution)
699 # PLOTTING
700 # ------------------------------------------------------------------
701 fig_performance, fig_composition = self.__plot_backtest(
702 performance=port_value.copy(),
703 performance_benchmark=benchmark_port_val.copy(),
704 composition=port_allocation,
705 names=self.names,
706 tickers=self.tickers,
707 )
709 # RETURN STATISTICS
710 # ------------------------------------------------------------------
711 optimal_portfolio_stat = final_stats(port_value)
712 benchmark_stat = final_stats(benchmark_port_val)
714 return optimal_portfolio_stat, benchmark_stat, fig_performance, fig_composition
716 def lifecycle_scenario_analysis(
717 self,
718 subset_of_assets: list,
719 scenarios_type: str,
720 n_simulations: int,
721 end_year: int,
722 withdrawals: int,
723 initial_risk_appetite: float,
724 initial_budget: int,
725 rng_seed=0,
726 test_split: float = False,
727 ) -> tuple[dict, pd.DataFrame, go.Figure, go.Figure, dict, dict, go.Figure]:
728 """METHOD TO COMPUTE THE LIFECYCLE SCENARIO ANALYSIS"""
730 # ------------------------------- INITIALIZE FUNCTION -------------------------------
731 n_periods = end_year - 2023
732 withdrawal_lst = [withdrawals * (1 + 0.04) ** i for i in range(n_periods)]
734 # ------------------------------- PARAMETER INITIALIZATION -------------------------------
735 if test_split != 0:
736 sampling_set, estimating_set = MomentGenerator.split_dataset(
737 data=self.weeklyReturns[subset_of_assets], sampling_ratio=test_split
738 )
740 _, _, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=sampling_set)
742 sigma, mu, _, _ = MomentGenerator.generate_annual_sigma_mu_with_risk_free(data=estimating_set)
743 else:
744 sigma, mu, sigma_weekly, mu_weekly = MomentGenerator.generate_annual_sigma_mu_with_risk_free(
745 data=self.weeklyReturns[subset_of_assets]
746 )
748 # ------------------------------- SCENARIO GENERATION -------------------------------
749 if rng_seed == 0:
750 sg = ScenarioGenerator(np.random.default_rng())
751 else:
752 sg = ScenarioGenerator(np.random.default_rng(rng_seed))
754 if scenarios_type == "MonteCarlo":
755 scenarios = sg.MC_simulation_annual_from_weekly(
756 weekly_mu=mu_weekly,
757 weekly_sigma=sigma_weekly,
758 n_simulations=n_simulations,
759 n_years=n_periods,
760 )
762 elif scenarios_type == "Bootstrap":
763 scenarios = sg.bootstrap_simulation_annual_from_weekly(
764 historical_weekly_returns=self.weeklyReturns[subset_of_assets],
765 n_simulations=n_simulations,
766 n_years=n_periods,
767 )
769 else:
770 raise ValueError(
771 "It appears that a scenario method other than MonteCarlo or Bootstrap has been chosen. "
772 "Please check for spelling mistakes."
773 )
775 # ------------------------------- Allocation Target Generation -------------------------------
776 glide_paths_df, fig_glidepaths = generate_risk_profiles(
777 n_periods=n_periods, initial_risk=initial_risk_appetite, minimum_risk=0.01
778 )
780 allocation_targets = {}
781 for r in glide_paths_df.columns:
782 targets = get_port_allocations(
783 mu_lst=mu,
784 sigma_lst=sigma,
785 targets=glide_paths_df[r],
786 max_weight=1 / 4,
787 solver="CLARABEL",
788 )
789 allocation_targets[f"{r}"] = targets
791 # ------------------------------- MATHEMATICAL MODELING -------------------------------
792 exhibition_summary = pd.DataFrame()
793 terminal_wealth_dict = {}
795 for key, df in allocation_targets.items():
796 logger.info(
797 f"Optimizing portfolio for {key} over {n_simulations} scenarios. An info message will "
798 f"appear, when we are halfway through the scenarios for the current strategy."
799 )
800 portfolio_df, mean_allocations_df, analysis_metrics = riskadjust_model_scen(
801 scen=scenarios[:, :, :],
802 targets=df,
803 budget=initial_budget,
804 trans_cost=0.002,
805 withdrawal_lst=withdrawal_lst,
806 interest_rate=0.04,
807 )
809 # Add the analysis_metrics DataFrame as a new column in the storage DataFrame
810 exhibition_summary[key] = analysis_metrics.squeeze()
812 portfolio_df["Terminal Wealth"] = pd.to_numeric(portfolio_df["Terminal Wealth"], errors="coerce")
813 terminal_wealth_dict[f"{key}"] = portfolio_df
815 # ------------------------------- PLOTTING -------------------------------
816 fig_performance, fig_compositions, fig_compositions_all = self.__plot_portfolio_densities(
817 portfolio_performance_dict=terminal_wealth_dict,
818 compositions=allocation_targets,
819 tickers=self.tickers,
820 names=self.names,
821 )
823 # ------------------------------- RETURN STATISTICS -------------------------------
824 return (
825 terminal_wealth_dict,
826 exhibition_summary,
827 fig_performance,
828 fig_glidepaths,
829 allocation_targets,
830 fig_compositions,
831 fig_compositions_all,
832 )
835if __name__ == "__main__":
836 # INITIALIZATION OF THE CLASS
838 # that's unfortunate but will be addressed later
839 # ROOT_DIR = Path(__file__).parent.parent
840 # Load our data
841 # weekly_returns = pd.read_parquet(ROOT_DIR / "financial_data" / "all_etfs_rets.parquet.gzip")
842 # algo = build_bot(weekly_returns=weekly_returns)
844 algo = initialize_bot()
846 # algo = TradeBot()
848 # Get top performing assets for given periods and measure
849 top_assets = algo.get_top_performing_assets(
850 time_periods=[
851 (algo.min_date, "2017-01-01"),
852 ("2017-01-02", "2020-01-01"),
853 ("2020-01-02", algo.max_date),
854 ],
855 top_percent=0.2,
856 )
858 # PLOT INTERACTIVE GRAPH
859 algo.plot_dots(start_date=algo.min_date, end_date=algo.max_date, top_performers=top_assets)
861 # RUN THE MINIMUM SPANNING TREE METHOD
862 _, mst_subset_of_assets = algo.mst(start_date="2000-01-01", end_date="2024-01-01", n_mst_runs=5, plot=False)
864 # RUN THE CLUSTERING METHOD
865 _, clustering_subset_of_assets = algo.clustering(
866 start_date="2015-12-23",
867 end_date="2017-07-01",
868 n_clusters=3,
869 n_assets=10,
870 plot=True,
871 )
873 # RUN THE LIFECYCLE
874 lifecycle = algo.lifecycle_scenario_analysis(
875 subset_of_assets=mst_subset_of_assets,
876 scenarios_type="MonteCarlo",
877 n_simulations=1000,
878 end_year=2050,
879 withdrawals=51000,
880 initial_risk_appetite=0.15,
881 initial_budget=137000,
882 )
884 # RUN THE BACKTEST
885 backtest = algo.backtest(
886 start_train_date="2015-12-23",
887 start_test_date="2018-09-24",
888 end_test_date="2019-09-01",
889 subset_of_assets=mst_subset_of_assets,
890 benchmarks=["BankInvest Danske Aktier W"],
891 scenarios_type="Bootstrapping",
892 n_simulations=500,
893 model="Markowitz model",
894 lower_bound=0,
895 )