Coverage for src / monkeys / data.py: 100%
26 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:14 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-03 02:14 +0000
1"""Data acquisition module for historical stock prices.
3This module provides functionality to download and process historical
4stock price data from various sources.
5"""
7from __future__ import annotations
9import logging
10from pathlib import Path
12import polars as pl
14logger = logging.getLogger(__name__)
17def load_prices_from_csv(filepath: str | Path) -> pl.DataFrame:
18 """Load price data from a CSV file.
20 Args:
21 filepath: Path to the CSV file containing price data.
22 Expected format: Date column as first column, ticker columns with prices.
24 Returns:
25 DataFrame with Date column and ticker columns.
27 Raises:
28 FileNotFoundError: If the CSV file does not exist.
29 ValueError: If the CSV file is empty or malformed.
30 """
31 filepath = Path(filepath)
32 if not filepath.exists():
33 msg = f"Price file not found: {filepath}"
34 raise FileNotFoundError(msg)
36 prices = pl.read_csv(filepath, try_parse_dates=True)
38 if prices.is_empty():
39 msg = f"Price file is empty: {filepath}"
40 raise ValueError(msg)
42 # Convert all non-date columns to float64
43 ticker_cols = [col for col in prices.columns if col != "Date"]
44 prices = prices.with_columns([pl.col(col).cast(pl.Float64) for col in ticker_cols])
46 logger.info("Loaded prices for %d tickers from %s", len(ticker_cols), filepath)
47 return prices
50def calculate_returns(prices: pl.DataFrame, method: str = "simple") -> pl.DataFrame:
51 """Calculate returns from price data.
53 Args:
54 prices: DataFrame with Date column and ticker columns.
55 method: Return calculation method. One of:
56 - "simple": Simple returns (P_t / P_{t-1} - 1)
57 - "log": Log returns (ln(P_t / P_{t-1}))
59 Returns:
60 DataFrame of returns with same structure as input.
62 Raises:
63 ValueError: If method is not recognized.
64 """
65 # Get ticker columns (all columns except Date)
66 ticker_cols = [col for col in prices.columns if col != "Date"]
68 if method == "simple":
69 return prices.select(
70 [pl.col("Date")] + [pl.col(col).pct_change().alias(col) for col in ticker_cols]
71 ).drop_nulls()
72 if method == "log":
73 return prices.select(
74 [pl.col("Date")] + [pl.col(col).log().diff().alias(col) for col in ticker_cols]
75 ).drop_nulls()
77 msg = f"Unknown return method: {method}. Use 'simple' or 'log'."
78 raise ValueError(msg)