Coverage for src/ifunnel/financial_data_preprocessing/get_yahoo_data.py: 0%
19 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
1import os
3import pandas as pd
4import yfinance as yf
5from loguru import logger
8def download_data(start_date: str, end_date: str, tickers: list[str]) -> pd.DataFrame:
9 """
10 Function to download all needed ETF data for NORD and Lysa portfolios
11 """
13 # Download price data from Yahoo! finance based on list of ETF tickers and start/end dates
14 try:
15 daily_prices = yf.download(tickers, start=start_date, end=end_date)["Adj Close"]
16 except Exception as e:
17 logger.warning(f"⚠️ Problem when downloading our data with an error: {e}")
18 daily_prices = None
20 return daily_prices
23if __name__ == "__main__":
24 # Load tickers' names
25 path_to_tickers = os.path.join(os.path.dirname(os.getcwd()), "financial_data/top_2000_etfs.xlsx")
26 data_excel = pd.read_excel(path_to_tickers)
27 tickers = data_excel["List of Top 100 ETFs"].to_list()[1:]
28 mapping = dict(
29 zip(
30 data_excel["List of Top 100 ETFs"].to_list()[1:],
31 data_excel["Unnamed: 1"].to_list()[1:],
32 )
33 )
35 # Download raw data
36 data_yahoo = download_data(start_date="2022-12-31", end_date="2023-07-30", tickers=tickers)
37 data_yahoo.columns = [
38 data_yahoo.columns,
39 [mapping[col] for col in data_yahoo.columns],
40 ]
41 data_yahoo.to_parquet(os.path.join(os.path.dirname(os.getcwd()), "financial_data/daily_price.parquet"))