Coverage for src / monkeys / data.py: 100%

26 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-03 02:14 +0000

1"""Data acquisition module for historical stock prices. 

2 

3This module provides functionality to download and process historical 

4stock price data from various sources. 

5""" 

6 

7from __future__ import annotations 

8 

9import logging 

10from pathlib import Path 

11 

12import polars as pl 

13 

14logger = logging.getLogger(__name__) 

15 

16 

17def load_prices_from_csv(filepath: str | Path) -> pl.DataFrame: 

18 """Load price data from a CSV file. 

19 

20 Args: 

21 filepath: Path to the CSV file containing price data. 

22 Expected format: Date column as first column, ticker columns with prices. 

23 

24 Returns: 

25 DataFrame with Date column and ticker columns. 

26 

27 Raises: 

28 FileNotFoundError: If the CSV file does not exist. 

29 ValueError: If the CSV file is empty or malformed. 

30 """ 

31 filepath = Path(filepath) 

32 if not filepath.exists(): 

33 msg = f"Price file not found: {filepath}" 

34 raise FileNotFoundError(msg) 

35 

36 prices = pl.read_csv(filepath, try_parse_dates=True) 

37 

38 if prices.is_empty(): 

39 msg = f"Price file is empty: {filepath}" 

40 raise ValueError(msg) 

41 

42 # Convert all non-date columns to float64 

43 ticker_cols = [col for col in prices.columns if col != "Date"] 

44 prices = prices.with_columns([pl.col(col).cast(pl.Float64) for col in ticker_cols]) 

45 

46 logger.info("Loaded prices for %d tickers from %s", len(ticker_cols), filepath) 

47 return prices 

48 

49 

50def calculate_returns(prices: pl.DataFrame, method: str = "simple") -> pl.DataFrame: 

51 """Calculate returns from price data. 

52 

53 Args: 

54 prices: DataFrame with Date column and ticker columns. 

55 method: Return calculation method. One of: 

56 - "simple": Simple returns (P_t / P_{t-1} - 1) 

57 - "log": Log returns (ln(P_t / P_{t-1})) 

58 

59 Returns: 

60 DataFrame of returns with same structure as input. 

61 

62 Raises: 

63 ValueError: If method is not recognized. 

64 """ 

65 # Get ticker columns (all columns except Date) 

66 ticker_cols = [col for col in prices.columns if col != "Date"] 

67 

68 if method == "simple": 

69 return prices.select( 

70 [pl.col("Date")] + [pl.col(col).pct_change().alias(col) for col in ticker_cols] 

71 ).drop_nulls() 

72 if method == "log": 

73 return prices.select( 

74 [pl.col("Date")] + [pl.col(col).log().diff().alias(col) for col in ticker_cols] 

75 ).drop_nulls() 

76 

77 msg = f"Unknown return method: {method}. Use 'simple' or 'log'." 

78 raise ValueError(msg)