Coverage for src/antarctic/document.py: 100%
49 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-07 13:57 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-07 13:57 +0000
1"""Extension of the MongoEngine Document class with additional functionality.
3This module provides an abstract base class that extends MongoEngine's Document
4with additional methods for working with collections of documents, extracting
5reference data, and converting documents to pandas DataFrames.
6"""
8from __future__ import annotations
10from collections.abc import Callable, Iterator
11from datetime import UTC, datetime
12from typing import Any
14import pandas as pd
15from bson.json_util import RELAXED_JSON_OPTIONS
16from mongoengine import DateTimeField, DictField, Document, QuerySet, StringField
19class XDocument(Document):
20 """Abstract base class for MongoDB documents with extended functionality.
22 XDocument is an abstract MongoDB Document that cannot be instantiated directly.
23 All concrete objects such as Symbols or Strategies should inherit from this class.
24 It provides common functionality for working with collections of documents,
25 extracting reference data, and converting documents to pandas DataFrames.
27 Attributes:
28 name: Unique identifier for the document
29 reference: Dictionary for storing reference data
30 date_modified: Timestamp of the last modification
32 """
34 meta = {"abstract": True}
36 name = StringField(unique=True, required=True)
37 reference = DictField()
39 # Date modified - automatically updated when the document is saved
40 date_modified = DateTimeField(default=lambda: datetime.now(UTC))
42 @classmethod
43 def reference_frame(cls, objects: QuerySet | None = None) -> pd.DataFrame:
44 """Create a DataFrame containing reference data for each document.
46 Args:
47 objects: QuerySet of documents to include (defaults to all documents of this class)
49 Returns:
50 pd.DataFrame: DataFrame with reference data, indexed by document name
52 """
53 objects = objects or cls.objects
55 # Create a DataFrame with each column representing a document's reference data
56 frame = pd.DataFrame(
57 {obj.name: pd.Series(dict(obj.reference.items()), dtype=object) for obj in objects}
58 ).transpose()
60 # Set the index name to the lowercase class name
61 frame.index.name = cls.__name__.lower()
62 return frame.sort_index()
64 @classmethod
65 def subset(cls, names: list[str] | None = None) -> QuerySet:
66 """Extract a subset of documents from the database.
68 Args:
69 names: List of document names to include (defaults to all documents)
71 Returns:
72 QuerySet: Filtered set of documents
74 """
75 if names is None:
76 return cls.objects
78 # Filter objects by name using MongoDB's $in operator
79 return cls.objects(name__in=names)
81 @classmethod
82 def to_dict(cls, objects: QuerySet | None = None) -> dict[str, XDocument]:
83 """Create a dictionary of documents with names as keys.
85 Args:
86 objects: QuerySet of documents to include (defaults to all documents of this class)
88 Returns:
89 Dict[str, XDocument]: Dictionary mapping document names to document objects
91 """
92 # Represent all documents of a class as a dictionary for easy lookup
93 objects = objects or cls.objects
94 return {x.name: x for x in objects}
96 @classmethod
97 def apply(
98 cls, func: Callable[[XDocument], Any], default: Any, objects: QuerySet | None = None
99 ) -> Iterator[tuple[str, Any]]:
100 """Apply a function to each document, yielding name and result pairs.
102 If the function raises an exception for a document, yields the default value instead.
104 Args:
105 func: Function to apply to each document
106 default: Default value to use if the function raises an exception
107 objects: QuerySet of documents to process (defaults to all documents of this class)
109 Yields:
110 Tuple[str, Any]: Pairs of (document_name, function_result)
112 """
113 objects = objects or cls.objects
115 for obj in objects:
116 try:
117 yield obj.name, func(obj)
118 except (TypeError, AttributeError, KeyError):
119 # If the function fails, yield the default value
120 yield obj.name, default
122 @classmethod
123 def frame(cls, series: str, key: str, objects: QuerySet | None = None) -> pd.DataFrame:
124 """Create a DataFrame from a specific field and key across multiple documents.
126 Args:
127 series: Name of the field to extract from each document
128 key: Key within the field to extract
129 objects: QuerySet of documents to include (defaults to all documents of this class)
131 Returns:
132 pd.DataFrame: DataFrame with columns named by document names and values from the specified field/key
134 """
135 objects = objects or cls.objects
137 # Extract the specified series and key from each document
138 # Drop columns that contain only NaN values
139 return pd.DataFrame({p.name: getattr(p, series)[key] for p in objects}).dropna(axis=1, how="all")
141 def __lt__(self, other: XDocument) -> bool:
142 """Compare documents by name for sorting.
144 Args:
145 other: Another document to compare with
147 Returns:
148 bool: True if this document's name is lexicographically less than the other's
150 """
151 return self.name < other.name
153 def __eq__(self, other: Any) -> bool:
154 """Check if two documents are equal.
156 Two documents are equal if they are of the same class and have the same name.
158 Args:
159 other: Another object to compare with
161 Returns:
162 bool: True if the documents are equal
164 """
165 # Two documents are the same if they have the same name and class
166 return self.__class__ == other.__class__ and self.name == other.name
168 def __hash__(self) -> int:
169 """Generate a hash value for the document.
171 This allows documents to be used in sets and as dictionary keys.
173 Returns:
174 int: Hash value based on the document's JSON representation
176 """
177 return hash(self.to_json(json_options=RELAXED_JSON_OPTIONS))
179 def __str__(self) -> str:
180 """Generate a string representation of the document.
182 Returns:
183 str: String in the format "<ClassName: document_name>"
185 """
186 return f"<{self.__class__.__name__}: {self.name}>"
188 def __repr__(self) -> str:
189 """Generate a representation of the document for debugging.
191 Returns:
192 str: String in the format "<ClassName: document_name>"
194 """
195 return f"<{self.__class__.__name__}: {self.name}>"