Coverage for src/ifunnel/financial_data_preprocessing/get_algostrata

1import os

3import dateutil.parser

4import numpy as np

5import pandas as pd

6import requests

8from ..settings import settings

11# BATCH FUNCTION

12# ----------------------------------------------------------------------

13def batch(iterable, n=1):

14 length = len(iterable)

15 for ndx in range(0, length, n):

16 yield iterable[ndx : min(ndx + n, length)]

19# Get IDs and ISIN codes

20# ----------------------------------------------------------------------

21def get_algostrata_data() -> pd.DataFrame:

22 idList = [] # empty list of IDs

23 isinList = [] # empty list of isin codes

24 nameList = [] # empty list of names

25 # GET ASSET NAME DATA

26 response = requests.get(

27 settings.ALGOSTRATA_NAMES_URL,

28 headers={

29 "X-Api-Key": settings.ALGOSTRATA_KEY,

30 "content-type": "application/json",

31 },

32 )

33 data = response.json() # downloaded data

34 # SAVE IDs and ISIN CODES INTO LISTS

35 for asset in data:

36 idList.append(asset["id"])

37 isinList.append(asset["isin"])

38 nameList.append(asset["name"])

40 # Get the price data with index

41 # ----------------------------------------------------------------------

42 batchSize = 3 # size of a batch

43 roundRep = int(np.ceil(len(idList) / batchSize)) # number of iterations

44 rep = 0 # current iteration

45 firstRun = True

46 # LOAD DATASET BY STEP, EACH STEP XY ASSETS

47 for subIdList in batch(idList, batchSize):

48 # GET ASSET PRICE DATA

49 print("---- Starting round", rep + 1, "out of", roundRep, "----")

50 response = requests.post(

51 settings.ALGOSTRATA_PRICES_URL,

52 json={"idList": subIdList},

53 headers={

54 "X-Api-Key": settings.ALGOSTRATA_KEY,

55 "content-type": "application/json",

56 },

57 )

59 if response.status_code != 200:

60 print(f"Code {response.reason}, content {response.text}")

61 print("---- Error round", rep + 1, "out of", roundRep, "----")

62 continue

64 data = response.json() # downloaded data

66 # CREATE PANDAS TABLE WITH ALL PRICE DATA

67 for num, asset in enumerate(data["result"]):

68 # IF WE HAVE A PRICE DATA THEN

69 if asset["priceData"] is not None:

70 priceData = asset["priceData"]

71 reInvestedPrices = priceData["reInvestedPrices"]

72 dates = list(map(lambda x: dateutil.parser.parse(x["date"]), reInvestedPrices))

73 prices = list(map(lambda x: x["unit_DKK"], reInvestedPrices))

75 # IF THE FIRST RUN, THEN CREATE A TABLE

76 if firstRun:

77 daily_prices = pd.DataFrame(prices, index=dates, columns=[isinList[0:1], nameList[0:1]])

78 firstRun = False

79 # IF NOT THE FIRST RUN, JUST CONCAT THE COLUMN INTO EXISTING TABLE

80 else:

81 df = pd.DataFrame(

82 prices,

83 index=dates,

84 columns=[

85 isinList[rep * batchSize + num : rep * batchSize + num + 1],

86 nameList[rep * batchSize + num : rep * batchSize + num + 1],

87 ],

88 )

89 # IF THE PRICE DATA ARE NOT ALL NaN, THEN

90 if not df.isnull().values.all():

91 daily_prices = pd.concat([daily_prices, df], axis=1)

92 rep += 1

94 return daily_prices

97if __name__ == "__main__":

98 # Download raw data

99 data_algostrata = get_algostrata_data()

100 # Save daily_prices into parquet file

101 data_algostrata.to_parquet(os.path.join(os.path.dirname(os.getcwd()), "financial_data/daily_price.parquet"))

Coverage for src/ifunnel/financial_data_preprocessing/get_algostrata_data.py: 0%

49 statements