20
20
21
21
from hudi import HudiTable
22
22
23
- PYARROW_LE_8_0_0 = tuple (int (s ) for s in pa .__version__ .split ("." ) if s .isnumeric ()) < (8 , 0 , 0 )
24
- pytestmark = pytest .mark .skipif (PYARROW_LE_8_0_0 , reason = "hudi only supported if pyarrow >= 8.0.0" )
23
+ PYARROW_LE_8_0_0 = tuple (int (s ) for s in pa .__version__ .split ('.' ) if s .isnumeric ()) < (
24
+ 8 ,
25
+ 0 ,
26
+ 0 ,
27
+ )
28
+ pytestmark = pytest .mark .skipif (
29
+ PYARROW_LE_8_0_0 , reason = 'hudi only supported if pyarrow >= 8.0.0'
30
+ )
25
31
26
32
27
33
def test_sample_table (get_sample_table ):
28
34
table_path = get_sample_table
29
35
table = HudiTable (table_path )
30
36
31
- assert table .get_schema ().names == ['_hoodie_commit_time' , '_hoodie_commit_seqno' , '_hoodie_record_key' ,
32
- '_hoodie_partition_path' , '_hoodie_file_name' , 'ts' , 'uuid' , 'rider' , 'driver' ,
33
- 'fare' , 'city' ]
37
+ assert table .get_schema ().names == [
38
+ '_hoodie_commit_time' ,
39
+ '_hoodie_commit_seqno' ,
40
+ '_hoodie_record_key' ,
41
+ '_hoodie_partition_path' ,
42
+ '_hoodie_file_name' ,
43
+ 'ts' ,
44
+ 'uuid' ,
45
+ 'rider' ,
46
+ 'driver' ,
47
+ 'fare' ,
48
+ 'city' ,
49
+ ]
34
50
35
51
file_slices = table .get_file_slices ()
36
52
assert len (file_slices ) == 5
37
- assert set (f .commit_time for f in file_slices ) == {'20240402123035233' , '20240402144910683' }
53
+ assert set (f .commit_time for f in file_slices ) == {
54
+ '20240402123035233' ,
55
+ '20240402144910683' ,
56
+ }
38
57
assert all (f .num_records == 1 for f in file_slices )
39
58
file_slice_paths = [f .base_file_relative_path () for f in file_slices ]
40
- assert set (file_slice_paths ) == {'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet' ,
41
- 'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet' ,
42
- 'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet' ,
43
- 'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet' ,
44
- 'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet' }
59
+ assert set (file_slice_paths ) == {
60
+ 'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet' ,
61
+ 'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet' ,
62
+ 'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet' ,
63
+ 'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet' ,
64
+ 'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet' ,
65
+ }
45
66
46
67
batch = table .read_file_slice (file_slice_paths [0 ])
47
68
t = pa .Table .from_batches ([batch ])
@@ -53,29 +74,72 @@ def test_sample_table(get_sample_table):
53
74
assert len (next (file_slices_gen )) == 2
54
75
55
76
batches = table .read_snapshot ()
56
- t = pa .Table .from_batches (batches ).select ([0 , 5 , 6 , 9 ]).sort_by ("ts" )
57
- assert t .to_pylist () == [{'_hoodie_commit_time' : '20240402144910683' , 'ts' : 1695046462179 ,
58
- 'uuid' : '9909a8b1-2d15-4d3d-8ec9-efc48c536a00' , 'fare' : 339.0 },
59
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695091554788 ,
60
- 'uuid' : 'e96c4396-3fad-413a-a942-4cb36106d721' , 'fare' : 27.7 },
61
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695115999911 ,
62
- 'uuid' : 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa' , 'fare' : 17.85 },
63
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695159649087 ,
64
- 'uuid' : '334e26e9-8355-45cc-97c6-c31daf0df330' , 'fare' : 19.1 },
65
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695516137016 ,
66
- 'uuid' : 'e3cf430c-889d-4015-bc98-59bdce1e530c' , 'fare' : 34.15 }]
77
+ t = pa .Table .from_batches (batches ).select ([0 , 5 , 6 , 9 ]).sort_by ('ts' )
78
+ assert t .to_pylist () == [
79
+ {
80
+ '_hoodie_commit_time' : '20240402144910683' ,
81
+ 'ts' : 1695046462179 ,
82
+ 'uuid' : '9909a8b1-2d15-4d3d-8ec9-efc48c536a00' ,
83
+ 'fare' : 339.0 ,
84
+ },
85
+ {
86
+ '_hoodie_commit_time' : '20240402123035233' ,
87
+ 'ts' : 1695091554788 ,
88
+ 'uuid' : 'e96c4396-3fad-413a-a942-4cb36106d721' ,
89
+ 'fare' : 27.7 ,
90
+ },
91
+ {
92
+ '_hoodie_commit_time' : '20240402123035233' ,
93
+ 'ts' : 1695115999911 ,
94
+ 'uuid' : 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa' ,
95
+ 'fare' : 17.85 ,
96
+ },
97
+ {
98
+ '_hoodie_commit_time' : '20240402123035233' ,
99
+ 'ts' : 1695159649087 ,
100
+ 'uuid' : '334e26e9-8355-45cc-97c6-c31daf0df330' ,
101
+ 'fare' : 19.1 ,
102
+ },
103
+ {
104
+ '_hoodie_commit_time' : '20240402123035233' ,
105
+ 'ts' : 1695516137016 ,
106
+ 'uuid' : 'e3cf430c-889d-4015-bc98-59bdce1e530c' ,
107
+ 'fare' : 34.15 ,
108
+ },
109
+ ]
67
110
68
- table = HudiTable (table_path , {
69
- "hoodie.read.as.of.timestamp" : "20240402123035233" })
111
+ table = HudiTable (table_path , {'hoodie.read.as.of.timestamp' : '20240402123035233' })
70
112
batches = table .read_snapshot ()
71
- t = pa .Table .from_batches (batches ).select ([0 , 5 , 6 , 9 ]).sort_by ("ts" )
72
- assert t .to_pylist () == [{'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695046462179 ,
73
- 'uuid' : '9909a8b1-2d15-4d3d-8ec9-efc48c536a00' , 'fare' : 33.9 },
74
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695091554788 ,
75
- 'uuid' : 'e96c4396-3fad-413a-a942-4cb36106d721' , 'fare' : 27.7 },
76
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695115999911 ,
77
- 'uuid' : 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa' , 'fare' : 17.85 },
78
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695159649087 ,
79
- 'uuid' : '334e26e9-8355-45cc-97c6-c31daf0df330' , 'fare' : 19.1 },
80
- {'_hoodie_commit_time' : '20240402123035233' , 'ts' : 1695516137016 ,
81
- 'uuid' : 'e3cf430c-889d-4015-bc98-59bdce1e530c' , 'fare' : 34.15 }]
113
+ t = pa .Table .from_batches (batches ).select ([0 , 5 , 6 , 9 ]).sort_by ('ts' )
114
+ assert t .to_pylist () == [
115
+ {
116
+ '_hoodie_commit_time' : '20240402123035233' ,
117
+ 'ts' : 1695046462179 ,
118
+ 'uuid' : '9909a8b1-2d15-4d3d-8ec9-efc48c536a00' ,
119
+ 'fare' : 33.9 ,
120
+ },
121
+ {
122
+ '_hoodie_commit_time' : '20240402123035233' ,
123
+ 'ts' : 1695091554788 ,
124
+ 'uuid' : 'e96c4396-3fad-413a-a942-4cb36106d721' ,
125
+ 'fare' : 27.7 ,
126
+ },
127
+ {
128
+ '_hoodie_commit_time' : '20240402123035233' ,
129
+ 'ts' : 1695115999911 ,
130
+ 'uuid' : 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa' ,
131
+ 'fare' : 17.85 ,
132
+ },
133
+ {
134
+ '_hoodie_commit_time' : '20240402123035233' ,
135
+ 'ts' : 1695159649087 ,
136
+ 'uuid' : '334e26e9-8355-45cc-97c6-c31daf0df330' ,
137
+ 'fare' : 19.1 ,
138
+ },
139
+ {
140
+ '_hoodie_commit_time' : '20240402123035233' ,
141
+ 'ts' : 1695516137016 ,
142
+ 'uuid' : 'e3cf430c-889d-4015-bc98-59bdce1e530c' ,
143
+ 'fare' : 34.15 ,
144
+ },
145
+ ]
0 commit comments