Coverage for src/ifunnel/financial_data_preprocessing/get_algostrata_data.py: 0%

49 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-12 09:14 +0000

1import os 

2 

3import dateutil.parser 

4import numpy as np 

5import pandas as pd 

6import requests 

7 

8from ..settings import settings 

9 

10 

11# BATCH FUNCTION 

12# ---------------------------------------------------------------------- 

13def batch(iterable, n=1): 

14 length = len(iterable) 

15 for ndx in range(0, length, n): 

16 yield iterable[ndx : min(ndx + n, length)] 

17 

18 

19# Get IDs and ISIN codes 

20# ---------------------------------------------------------------------- 

21def get_algostrata_data() -> pd.DataFrame: 

22 idList = [] # empty list of IDs 

23 isinList = [] # empty list of isin codes 

24 nameList = [] # empty list of names 

25 # GET ASSET NAME DATA 

26 response = requests.get( 

27 settings.ALGOSTRATA_NAMES_URL, 

28 headers={ 

29 "X-Api-Key": settings.ALGOSTRATA_KEY, 

30 "content-type": "application/json", 

31 }, 

32 ) 

33 data = response.json() # downloaded data 

34 # SAVE IDs and ISIN CODES INTO LISTS 

35 for asset in data: 

36 idList.append(asset["id"]) 

37 isinList.append(asset["isin"]) 

38 nameList.append(asset["name"]) 

39 

40 # Get the price data with index 

41 # ---------------------------------------------------------------------- 

42 batchSize = 3 # size of a batch 

43 roundRep = int(np.ceil(len(idList) / batchSize)) # number of iterations 

44 rep = 0 # current iteration 

45 firstRun = True 

46 # LOAD DATASET BY STEP, EACH STEP XY ASSETS 

47 for subIdList in batch(idList, batchSize): 

48 # GET ASSET PRICE DATA 

49 print("---- Starting round", rep + 1, "out of", roundRep, "----") 

50 response = requests.post( 

51 settings.ALGOSTRATA_PRICES_URL, 

52 json={"idList": subIdList}, 

53 headers={ 

54 "X-Api-Key": settings.ALGOSTRATA_KEY, 

55 "content-type": "application/json", 

56 }, 

57 ) 

58 

59 if response.status_code != 200: 

60 print(f"Code {response.reason}, content {response.text}") 

61 print("---- Error round", rep + 1, "out of", roundRep, "----") 

62 continue 

63 

64 data = response.json() # downloaded data 

65 

66 # CREATE PANDAS TABLE WITH ALL PRICE DATA 

67 for num, asset in enumerate(data["result"]): 

68 # IF WE HAVE A PRICE DATA THEN 

69 if asset["priceData"] is not None: 

70 priceData = asset["priceData"] 

71 reInvestedPrices = priceData["reInvestedPrices"] 

72 dates = list(map(lambda x: dateutil.parser.parse(x["date"]), reInvestedPrices)) 

73 prices = list(map(lambda x: x["unit_DKK"], reInvestedPrices)) 

74 

75 # IF THE FIRST RUN, THEN CREATE A TABLE 

76 if firstRun: 

77 daily_prices = pd.DataFrame(prices, index=dates, columns=[isinList[0:1], nameList[0:1]]) 

78 firstRun = False 

79 # IF NOT THE FIRST RUN, JUST CONCAT THE COLUMN INTO EXISTING TABLE 

80 else: 

81 df = pd.DataFrame( 

82 prices, 

83 index=dates, 

84 columns=[ 

85 isinList[rep * batchSize + num : rep * batchSize + num + 1], 

86 nameList[rep * batchSize + num : rep * batchSize + num + 1], 

87 ], 

88 ) 

89 # IF THE PRICE DATA ARE NOT ALL NaN, THEN 

90 if not df.isnull().values.all(): 

91 daily_prices = pd.concat([daily_prices, df], axis=1) 

92 rep += 1 

93 

94 return daily_prices 

95 

96 

97if __name__ == "__main__": 

98 # Download raw data 

99 data_algostrata = get_algostrata_data() 

100 # Save daily_prices into parquet file 

101 data_algostrata.to_parquet(os.path.join(os.path.dirname(os.getcwd()), "financial_data/daily_price.parquet"))