Coverage for src/antarctic/document.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-07 13:57 +0000

1"""Extension of the MongoEngine Document class with additional functionality. 

2 

3This module provides an abstract base class that extends MongoEngine's Document 

4with additional methods for working with collections of documents, extracting 

5reference data, and converting documents to pandas DataFrames. 

6""" 

7 

8from __future__ import annotations 

9 

10from collections.abc import Callable, Iterator 

11from datetime import UTC, datetime 

12from typing import Any 

13 

14import pandas as pd 

15from bson.json_util import RELAXED_JSON_OPTIONS 

16from mongoengine import DateTimeField, DictField, Document, QuerySet, StringField 

17 

18 

19class XDocument(Document): 

20 """Abstract base class for MongoDB documents with extended functionality. 

21 

22 XDocument is an abstract MongoDB Document that cannot be instantiated directly. 

23 All concrete objects such as Symbols or Strategies should inherit from this class. 

24 It provides common functionality for working with collections of documents, 

25 extracting reference data, and converting documents to pandas DataFrames. 

26 

27 Attributes: 

28 name: Unique identifier for the document 

29 reference: Dictionary for storing reference data 

30 date_modified: Timestamp of the last modification 

31 

32 """ 

33 

34 meta = {"abstract": True} 

35 

36 name = StringField(unique=True, required=True) 

37 reference = DictField() 

38 

39 # Date modified - automatically updated when the document is saved 

40 date_modified = DateTimeField(default=lambda: datetime.now(UTC)) 

41 

42 @classmethod 

43 def reference_frame(cls, objects: QuerySet | None = None) -> pd.DataFrame: 

44 """Create a DataFrame containing reference data for each document. 

45 

46 Args: 

47 objects: QuerySet of documents to include (defaults to all documents of this class) 

48 

49 Returns: 

50 pd.DataFrame: DataFrame with reference data, indexed by document name 

51 

52 """ 

53 objects = objects or cls.objects 

54 

55 # Create a DataFrame with each column representing a document's reference data 

56 frame = pd.DataFrame( 

57 {obj.name: pd.Series(dict(obj.reference.items()), dtype=object) for obj in objects} 

58 ).transpose() 

59 

60 # Set the index name to the lowercase class name 

61 frame.index.name = cls.__name__.lower() 

62 return frame.sort_index() 

63 

64 @classmethod 

65 def subset(cls, names: list[str] | None = None) -> QuerySet: 

66 """Extract a subset of documents from the database. 

67 

68 Args: 

69 names: List of document names to include (defaults to all documents) 

70 

71 Returns: 

72 QuerySet: Filtered set of documents 

73 

74 """ 

75 if names is None: 

76 return cls.objects 

77 

78 # Filter objects by name using MongoDB's $in operator 

79 return cls.objects(name__in=names) 

80 

81 @classmethod 

82 def to_dict(cls, objects: QuerySet | None = None) -> dict[str, XDocument]: 

83 """Create a dictionary of documents with names as keys. 

84 

85 Args: 

86 objects: QuerySet of documents to include (defaults to all documents of this class) 

87 

88 Returns: 

89 Dict[str, XDocument]: Dictionary mapping document names to document objects 

90 

91 """ 

92 # Represent all documents of a class as a dictionary for easy lookup 

93 objects = objects or cls.objects 

94 return {x.name: x for x in objects} 

95 

96 @classmethod 

97 def apply( 

98 cls, func: Callable[[XDocument], Any], default: Any, objects: QuerySet | None = None 

99 ) -> Iterator[tuple[str, Any]]: 

100 """Apply a function to each document, yielding name and result pairs. 

101 

102 If the function raises an exception for a document, yields the default value instead. 

103 

104 Args: 

105 func: Function to apply to each document 

106 default: Default value to use if the function raises an exception 

107 objects: QuerySet of documents to process (defaults to all documents of this class) 

108 

109 Yields: 

110 Tuple[str, Any]: Pairs of (document_name, function_result) 

111 

112 """ 

113 objects = objects or cls.objects 

114 

115 for obj in objects: 

116 try: 

117 yield obj.name, func(obj) 

118 except (TypeError, AttributeError, KeyError): 

119 # If the function fails, yield the default value 

120 yield obj.name, default 

121 

122 @classmethod 

123 def frame(cls, series: str, key: str, objects: QuerySet | None = None) -> pd.DataFrame: 

124 """Create a DataFrame from a specific field and key across multiple documents. 

125 

126 Args: 

127 series: Name of the field to extract from each document 

128 key: Key within the field to extract 

129 objects: QuerySet of documents to include (defaults to all documents of this class) 

130 

131 Returns: 

132 pd.DataFrame: DataFrame with columns named by document names and values from the specified field/key 

133 

134 """ 

135 objects = objects or cls.objects 

136 

137 # Extract the specified series and key from each document 

138 # Drop columns that contain only NaN values 

139 return pd.DataFrame({p.name: getattr(p, series)[key] for p in objects}).dropna(axis=1, how="all") 

140 

141 def __lt__(self, other: XDocument) -> bool: 

142 """Compare documents by name for sorting. 

143 

144 Args: 

145 other: Another document to compare with 

146 

147 Returns: 

148 bool: True if this document's name is lexicographically less than the other's 

149 

150 """ 

151 return self.name < other.name 

152 

153 def __eq__(self, other: Any) -> bool: 

154 """Check if two documents are equal. 

155 

156 Two documents are equal if they are of the same class and have the same name. 

157 

158 Args: 

159 other: Another object to compare with 

160 

161 Returns: 

162 bool: True if the documents are equal 

163 

164 """ 

165 # Two documents are the same if they have the same name and class 

166 return self.__class__ == other.__class__ and self.name == other.name 

167 

168 def __hash__(self) -> int: 

169 """Generate a hash value for the document. 

170 

171 This allows documents to be used in sets and as dictionary keys. 

172 

173 Returns: 

174 int: Hash value based on the document's JSON representation 

175 

176 """ 

177 return hash(self.to_json(json_options=RELAXED_JSON_OPTIONS)) 

178 

179 def __str__(self) -> str: 

180 """Generate a string representation of the document. 

181 

182 Returns: 

183 str: String in the format "<ClassName: document_name>" 

184 

185 """ 

186 return f"<{self.__class__.__name__}: {self.name}>" 

187 

188 def __repr__(self) -> str: 

189 """Generate a representation of the document for debugging. 

190 

191 Returns: 

192 str: String in the format "<ClassName: document_name>" 

193 

194 """ 

195 return f"<{self.__class__.__name__}: {self.name}>"