Coverage for src/ifunnel/financial_data_preprocessing/get_yahoo_data.py: 0%

19 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-12 09:14 +0000

1import os 

2 

3import pandas as pd 

4import yfinance as yf 

5from loguru import logger 

6 

7 

8def download_data(start_date: str, end_date: str, tickers: list[str]) -> pd.DataFrame: 

9 """ 

10 Function to download all needed ETF data for NORD and Lysa portfolios 

11 """ 

12 

13 # Download price data from Yahoo! finance based on list of ETF tickers and start/end dates 

14 try: 

15 daily_prices = yf.download(tickers, start=start_date, end=end_date)["Adj Close"] 

16 except Exception as e: 

17 logger.warning(f"⚠️ Problem when downloading our data with an error: {e}") 

18 daily_prices = None 

19 

20 return daily_prices 

21 

22 

23if __name__ == "__main__": 

24 # Load tickers' names 

25 path_to_tickers = os.path.join(os.path.dirname(os.getcwd()), "financial_data/top_2000_etfs.xlsx") 

26 data_excel = pd.read_excel(path_to_tickers) 

27 tickers = data_excel["List of Top 100 ETFs"].to_list()[1:] 

28 mapping = dict( 

29 zip( 

30 data_excel["List of Top 100 ETFs"].to_list()[1:], 

31 data_excel["Unnamed: 1"].to_list()[1:], 

32 ) 

33 ) 

34 

35 # Download raw data 

36 data_yahoo = download_data(start_date="2022-12-31", end_date="2023-07-30", tickers=tickers) 

37 data_yahoo.columns = [ 

38 data_yahoo.columns, 

39 [mapping[col] for col in data_yahoo.columns], 

40 ] 

41 data_yahoo.to_parquet(os.path.join(os.path.dirname(os.getcwd()), "financial_data/daily_price.parquet"))