Coverage for src/antarctic/document.py: 100%

1"""Extension of the MongoEngine Document class with additional functionality.

3This module provides an abstract base class that extends MongoEngine's Document

4with additional methods for working with collections of documents, extracting

5reference data, and converting documents to pandas DataFrames.

6"""

8from __future__ import annotations

10from collections.abc import Callable, Iterator

11from datetime import UTC, datetime

12from typing import Any

14import pandas as pd

15from bson.json_util import RELAXED_JSON_OPTIONS

16from mongoengine import DateTimeField, DictField, Document, QuerySet, StringField

19class XDocument(Document):

20 """Abstract base class for MongoDB documents with extended functionality.

22 XDocument is an abstract MongoDB Document that cannot be instantiated directly.

23 All concrete objects such as Symbols or Strategies should inherit from this class.

24 It provides common functionality for working with collections of documents,

25 extracting reference data, and converting documents to pandas DataFrames.

27 Attributes:

28 name: Unique identifier for the document

29 reference: Dictionary for storing reference data

30 date_modified: Timestamp of the last modification

32 """

34 meta = {"abstract": True}

36 name = StringField(unique=True, required=True)

37 reference = DictField()

39 # Date modified - automatically updated when the document is saved

40 date_modified = DateTimeField(default=lambda: datetime.now(UTC))

42 @classmethod

43 def reference_frame(cls, objects: QuerySet | None = None) -> pd.DataFrame:

44 """Create a DataFrame containing reference data for each document.

46 Args:

47 objects: QuerySet of documents to include (defaults to all documents of this class)

49 Returns:

50 pd.DataFrame: DataFrame with reference data, indexed by document name

52 """

53 objects = objects or cls.objects

55 # Create a DataFrame with each column representing a document's reference data

56 frame = pd.DataFrame(

57 {obj.name: pd.Series(dict(obj.reference.items()), dtype=object) for obj in objects}

58 ).transpose()

60 # Set the index name to the lowercase class name

61 frame.index.name = cls.__name__.lower()

62 return frame.sort_index()

64 @classmethod

65 def subset(cls, names: list[str] | None = None) -> QuerySet:

66 """Extract a subset of documents from the database.

68 Args:

69 names: List of document names to include (defaults to all documents)

71 Returns:

72 QuerySet: Filtered set of documents

74 """

75 if names is None:

76 return cls.objects

78 # Filter objects by name using MongoDB's $in operator

79 return cls.objects(name__in=names)

81 @classmethod

82 def to_dict(cls, objects: QuerySet | None = None) -> dict[str, XDocument]:

83 """Create a dictionary of documents with names as keys.

85 Args:

86 objects: QuerySet of documents to include (defaults to all documents of this class)

88 Returns:

89 Dict[str, XDocument]: Dictionary mapping document names to document objects

91 """

92 # Represent all documents of a class as a dictionary for easy lookup

93 objects = objects or cls.objects

94 return {x.name: x for x in objects}

96 @classmethod

97 def apply(

98 cls, func: Callable[[XDocument], Any], default: Any, objects: QuerySet | None = None

99 ) -> Iterator[tuple[str, Any]]:

100 """Apply a function to each document, yielding name and result pairs.

101

102 If the function raises an exception for a document, yields the default value instead.

103

104 Args:

105 func: Function to apply to each document

106 default: Default value to use if the function raises an exception

107 objects: QuerySet of documents to process (defaults to all documents of this class)

108

109 Yields:

110 Tuple[str, Any]: Pairs of (document_name, function_result)

111

112 """

113 objects = objects or cls.objects

114

115 for obj in objects:

116 try:

117 yield obj.name, func(obj)

118 except (TypeError, AttributeError, KeyError):

119 # If the function fails, yield the default value

120 yield obj.name, default

121

122 @classmethod

123 def frame(cls, series: str, key: str, objects: QuerySet | None = None) -> pd.DataFrame:

124 """Create a DataFrame from a specific field and key across multiple documents.

125

126 Args:

127 series: Name of the field to extract from each document

128 key: Key within the field to extract

129 objects: QuerySet of documents to include (defaults to all documents of this class)

130

131 Returns:

132 pd.DataFrame: DataFrame with columns named by document names and values from the specified field/key

133

134 """

135 objects = objects or cls.objects

136

137 # Extract the specified series and key from each document

138 # Drop columns that contain only NaN values

139 return pd.DataFrame({p.name: getattr(p, series)[key] for p in objects}).dropna(axis=1, how="all")

140

141 def __lt__(self, other: XDocument) -> bool:

142 """Compare documents by name for sorting.

143

144 Args:

145 other: Another document to compare with

146

147 Returns:

148 bool: True if this document's name is lexicographically less than the other's

149

150 """

151 return self.name < other.name

152

153 def __eq__(self, other: Any) -> bool:

154 """Check if two documents are equal.

155

156 Two documents are equal if they are of the same class and have the same name.

157

158 Args:

159 other: Another object to compare with

160

161 Returns:

162 bool: True if the documents are equal

163

164 """

165 # Two documents are the same if they have the same name and class

166 return self.__class__ == other.__class__ and self.name == other.name

167

168 def __hash__(self) -> int:

169 """Generate a hash value for the document.

170

171 This allows documents to be used in sets and as dictionary keys.

172

173 Returns:

174 int: Hash value based on the document's JSON representation

175

176 """

177 return hash(self.to_json(json_options=RELAXED_JSON_OPTIONS))

178

179 def __str__(self) -> str:

180 """Generate a string representation of the document.

181

182 Returns:

183 str: String in the format "<ClassName: document_name>"

184

185 """

186 return f"<{self.__class__.__name__}: {self.name}>"

187

188 def __repr__(self) -> str:

189 """Generate a representation of the document for debugging.

190

191 Returns:

192 str: String in the format "<ClassName: document_name>"

193

194 """

195 return f"<{self.__class__.__name__}: {self.name}>"

Coverage for src / antarctic / document.py: 100%

49 statements