Coverage for src/ifunnel/financial_data_preprocessing/get_algostrata_data.py: 0%
49 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-12 09:14 +0000
1import os
3import dateutil.parser
4import numpy as np
5import pandas as pd
6import requests
8from ..settings import settings
11# BATCH FUNCTION
12# ----------------------------------------------------------------------
13def batch(iterable, n=1):
14 length = len(iterable)
15 for ndx in range(0, length, n):
16 yield iterable[ndx : min(ndx + n, length)]
19# Get IDs and ISIN codes
20# ----------------------------------------------------------------------
21def get_algostrata_data() -> pd.DataFrame:
22 idList = [] # empty list of IDs
23 isinList = [] # empty list of isin codes
24 nameList = [] # empty list of names
25 # GET ASSET NAME DATA
26 response = requests.get(
27 settings.ALGOSTRATA_NAMES_URL,
28 headers={
29 "X-Api-Key": settings.ALGOSTRATA_KEY,
30 "content-type": "application/json",
31 },
32 )
33 data = response.json() # downloaded data
34 # SAVE IDs and ISIN CODES INTO LISTS
35 for asset in data:
36 idList.append(asset["id"])
37 isinList.append(asset["isin"])
38 nameList.append(asset["name"])
40 # Get the price data with index
41 # ----------------------------------------------------------------------
42 batchSize = 3 # size of a batch
43 roundRep = int(np.ceil(len(idList) / batchSize)) # number of iterations
44 rep = 0 # current iteration
45 firstRun = True
46 # LOAD DATASET BY STEP, EACH STEP XY ASSETS
47 for subIdList in batch(idList, batchSize):
48 # GET ASSET PRICE DATA
49 print("---- Starting round", rep + 1, "out of", roundRep, "----")
50 response = requests.post(
51 settings.ALGOSTRATA_PRICES_URL,
52 json={"idList": subIdList},
53 headers={
54 "X-Api-Key": settings.ALGOSTRATA_KEY,
55 "content-type": "application/json",
56 },
57 )
59 if response.status_code != 200:
60 print(f"Code {response.reason}, content {response.text}")
61 print("---- Error round", rep + 1, "out of", roundRep, "----")
62 continue
64 data = response.json() # downloaded data
66 # CREATE PANDAS TABLE WITH ALL PRICE DATA
67 for num, asset in enumerate(data["result"]):
68 # IF WE HAVE A PRICE DATA THEN
69 if asset["priceData"] is not None:
70 priceData = asset["priceData"]
71 reInvestedPrices = priceData["reInvestedPrices"]
72 dates = list(map(lambda x: dateutil.parser.parse(x["date"]), reInvestedPrices))
73 prices = list(map(lambda x: x["unit_DKK"], reInvestedPrices))
75 # IF THE FIRST RUN, THEN CREATE A TABLE
76 if firstRun:
77 daily_prices = pd.DataFrame(prices, index=dates, columns=[isinList[0:1], nameList[0:1]])
78 firstRun = False
79 # IF NOT THE FIRST RUN, JUST CONCAT THE COLUMN INTO EXISTING TABLE
80 else:
81 df = pd.DataFrame(
82 prices,
83 index=dates,
84 columns=[
85 isinList[rep * batchSize + num : rep * batchSize + num + 1],
86 nameList[rep * batchSize + num : rep * batchSize + num + 1],
87 ],
88 )
89 # IF THE PRICE DATA ARE NOT ALL NaN, THEN
90 if not df.isnull().values.all():
91 daily_prices = pd.concat([daily_prices, df], axis=1)
92 rep += 1
94 return daily_prices
97if __name__ == "__main__":
98 # Download raw data
99 data_algostrata = get_algostrata_data()
100 # Save daily_prices into parquet file
101 data_algostrata.to_parquet(os.path.join(os.path.dirname(os.getcwd()), "financial_data/daily_price.parquet"))