Coverage for src / flight / utils / alter.py: 100%

15 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-27 04:42 +0000

1"""Utility module for converting between NumPy arrays and PyArrow Tables. 

2 

3This module provides functions to convert NumPy arrays to PyArrow Tables and vice versa, 

4preserving the original array shapes and data types. It's particularly useful for 

5serializing multi-dimensional arrays for transmission over Arrow Flight. 

6""" 

7 

8import numpy as np 

9import pyarrow as pa 

10 

11 

12def np_2_pa(data: dict[str, np.ndarray]) -> pa.Table: 

13 """Convert a dictionary of NumPy arrays into a PyArrow Table. 

14 

15 Each array is stored as a structured array containing 

16 the flattened data and its original shape. 

17 

18 Args: 

19 data (Dict[str, np.ndarray]): Dictionary where keys are column names and values are NumPy arrays 

20 of any shape. 

21 

22 Returns: 

23 pa.Table: PyArrow Table where each column contains structured arrays with 'data' and 'shape' fields. 

24 

25 Examples: 

26 >>> # Single array 

27 >>> import numpy as np 

28 >>> data = {'array1': np.array([[1, 2], [3, 4]])} 

29 >>> table = np_2_pa(data) 

30 >>> print(table.schema) 

31 array1: struct<data: list<item: int64>, shape: list<item: int64>> 

32 ... 

33 

34 >>> # Multiple arrays of different shapes 

35 >>> data = { 

36 ... 'matrix': np.array([[1, 2], [3, 4]]), 

37 ... 'vector': np.array([5, 6, 7]), 

38 ... 'scalar': np.array(42) 

39 ... } 

40 >>> table = np_2_pa(data) 

41 

42 >>> # Working with complex data 

43 >>> arr = table.column('matrix')[0].as_py() 

44 >>> original_shape = tuple(arr['shape']) 

45 >>> restored_array = np.array(arr['data']).reshape(original_shape) 

46 """ 

47 

48 def _f(value: np.ndarray) -> pa.Array: 

49 """Convert a single NumPy array to a PyArrow Array with structure. 

50 

51 Args: 

52 value: The NumPy array to convert. 

53 

54 Returns: 

55 A PyArrow Array containing a single structured value with 'data' and 'shape' fields. 

56 """ 

57 arr = np.asarray(value) 

58 

59 # Create a dictionary with the flattened data and shape 

60 arr_dict = {"data": arr.flatten(), "shape": np.array(arr.shape, dtype=np.int64)} 

61 

62 return pa.array([arr_dict]) 

63 

64 return pa.Table.from_pydict({key: _f(value) for key, value in data.items() if value is not None}) 

65 

66 

67def pa_2_np(table: pa.Table) -> dict[str, np.ndarray]: 

68 """Convert a PyArrow Table back to a dictionary of NumPy arrays. 

69 

70 This is the inverse operation of np_2_pa. 

71 

72 Args: 

73 table (pa.Table): PyArrow Table 

74 

75 Returns: 

76 Dict[str, np.ndarray]: Dictionary where keys are column names and values are NumPy arrays 

77 with their original shapes restored. 

78 """ 

79 

80 def _f(col_name: str) -> np.ndarray: 

81 """Convert a single column from the PyArrow Table back to a NumPy array. 

82 

83 Args: 

84 col_name: The name of the column to convert. 

85 

86 Returns: 

87 The reconstructed NumPy array with its original shape. 

88 """ 

89 struct_arr = table.column(col_name)[0].as_py() 

90 # Reconstruct the original array 

91 data = np.asarray(struct_arr["data"]) 

92 shape = tuple(struct_arr["shape"]) 

93 return data.reshape(shape) 

94 

95 return {name: _f(name) for name in table.column_names}