Coverage for src/flight/utils/alter.py: 100%
15 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-19 01:30 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-19 01:30 +0000
1"""Utility module for converting between NumPy arrays and PyArrow Tables.
3This module provides functions to convert NumPy arrays to PyArrow Tables and vice versa,
4preserving the original array shapes and data types. It's particularly useful for
5serializing multi-dimensional arrays for transmission over Arrow Flight.
6"""
8import numpy as np
9import pyarrow as pa
12def np_2_pa(data: dict[str, np.ndarray]) -> pa.Table:
13 """Convert a dictionary of NumPy arrays into a PyArrow Table.
15 Each array is stored as a structured array containing
16 the flattened data and its original shape.
18 Args:
19 data (Dict[str, np.ndarray]): Dictionary where keys are column names and values are NumPy arrays
20 of any shape.
22 Returns:
23 pa.Table: PyArrow Table where each column contains structured arrays with 'data' and 'shape' fields.
25 Examples:
26 >>> # Single array
27 >>> import numpy as np
28 >>> data = {'array1': np.array([[1, 2], [3, 4]])}
29 >>> table = np_2_pa(data)
30 >>> print(table.schema)
31 array1: struct<data: list<item: int64>, shape: list<item: int64>>
33 >>> # Multiple arrays of different shapes
34 >>> data = {
35 ... 'matrix': np.array([[1, 2], [3, 4]]),
36 ... 'vector': np.array([5, 6, 7]),
37 ... 'scalar': np.array(42)
38 ... }
39 >>> table = np_2_pa(data)
41 >>> # Working with complex data
42 >>> arr = table.column('matrix')[0].as_py()
43 >>> original_shape = tuple(arr['shape'])
44 >>> restored_array = np.array(arr['data']).reshape(original_shape)
45 """
47 def _f(value: np.ndarray) -> pa.Array:
48 """Convert a single NumPy array to a PyArrow Array with structure.
50 Args:
51 value: The NumPy array to convert.
53 Returns:
54 A PyArrow Array containing a single structured value with 'data' and 'shape' fields.
55 """
56 arr = np.asarray(value)
58 # Create a dictionary with the flattened data and shape
59 arr_dict = {"data": arr.flatten(), "shape": np.array(arr.shape, dtype=np.int64)}
61 return pa.array([arr_dict])
63 return pa.Table.from_pydict({key: _f(value) for key, value in data.items() if value is not None})
66def pa_2_np(table: pa.Table) -> dict[str, np.ndarray]:
67 """Convert a PyArrow Table back to a dictionary of NumPy arrays.
69 This is the inverse operation of np_2_pa.
71 Args:
72 table (pa.Table): PyArrow Table
74 Returns:
75 Dict[str, np.ndarray]: Dictionary where keys are column names and values are NumPy arrays
76 with their original shapes restored.
77 """
79 def _f(col_name: str) -> np.ndarray:
80 """Convert a single column from the PyArrow Table back to a NumPy array.
82 Args:
83 col_name: The name of the column to convert.
85 Returns:
86 The reconstructed NumPy array with its original shape.
87 """
88 struct_arr = table.column(col_name)[0].as_py()
89 # Reconstruct the original array
90 data = np.asarray(struct_arr["data"])
91 shape = tuple(struct_arr["shape"])
92 return data.reshape(shape)
94 return {name: _f(name) for name in table.column_names}