Coverage for src/flight/utils/alter.py: 100%

15 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-19 01:30 +0000

1"""Utility module for converting between NumPy arrays and PyArrow Tables. 

2 

3This module provides functions to convert NumPy arrays to PyArrow Tables and vice versa, 

4preserving the original array shapes and data types. It's particularly useful for 

5serializing multi-dimensional arrays for transmission over Arrow Flight. 

6""" 

7 

8import numpy as np 

9import pyarrow as pa 

10 

11 

12def np_2_pa(data: dict[str, np.ndarray]) -> pa.Table: 

13 """Convert a dictionary of NumPy arrays into a PyArrow Table. 

14 

15 Each array is stored as a structured array containing 

16 the flattened data and its original shape. 

17 

18 Args: 

19 data (Dict[str, np.ndarray]): Dictionary where keys are column names and values are NumPy arrays 

20 of any shape. 

21 

22 Returns: 

23 pa.Table: PyArrow Table where each column contains structured arrays with 'data' and 'shape' fields. 

24 

25 Examples: 

26 >>> # Single array 

27 >>> import numpy as np 

28 >>> data = {'array1': np.array([[1, 2], [3, 4]])} 

29 >>> table = np_2_pa(data) 

30 >>> print(table.schema) 

31 array1: struct<data: list<item: int64>, shape: list<item: int64>> 

32 

33 >>> # Multiple arrays of different shapes 

34 >>> data = { 

35 ... 'matrix': np.array([[1, 2], [3, 4]]), 

36 ... 'vector': np.array([5, 6, 7]), 

37 ... 'scalar': np.array(42) 

38 ... } 

39 >>> table = np_2_pa(data) 

40 

41 >>> # Working with complex data 

42 >>> arr = table.column('matrix')[0].as_py() 

43 >>> original_shape = tuple(arr['shape']) 

44 >>> restored_array = np.array(arr['data']).reshape(original_shape) 

45 """ 

46 

47 def _f(value: np.ndarray) -> pa.Array: 

48 """Convert a single NumPy array to a PyArrow Array with structure. 

49 

50 Args: 

51 value: The NumPy array to convert. 

52 

53 Returns: 

54 A PyArrow Array containing a single structured value with 'data' and 'shape' fields. 

55 """ 

56 arr = np.asarray(value) 

57 

58 # Create a dictionary with the flattened data and shape 

59 arr_dict = {"data": arr.flatten(), "shape": np.array(arr.shape, dtype=np.int64)} 

60 

61 return pa.array([arr_dict]) 

62 

63 return pa.Table.from_pydict({key: _f(value) for key, value in data.items() if value is not None}) 

64 

65 

66def pa_2_np(table: pa.Table) -> dict[str, np.ndarray]: 

67 """Convert a PyArrow Table back to a dictionary of NumPy arrays. 

68 

69 This is the inverse operation of np_2_pa. 

70 

71 Args: 

72 table (pa.Table): PyArrow Table 

73 

74 Returns: 

75 Dict[str, np.ndarray]: Dictionary where keys are column names and values are NumPy arrays 

76 with their original shapes restored. 

77 """ 

78 

79 def _f(col_name: str) -> np.ndarray: 

80 """Convert a single column from the PyArrow Table back to a NumPy array. 

81 

82 Args: 

83 col_name: The name of the column to convert. 

84 

85 Returns: 

86 The reconstructed NumPy array with its original shape. 

87 """ 

88 struct_arr = table.column(col_name)[0].as_py() 

89 # Reconstruct the original array 

90 data = np.asarray(struct_arr["data"]) 

91 shape = tuple(struct_arr["shape"]) 

92 return data.reshape(shape) 

93 

94 return {name: _f(name) for name in table.column_names}