Skip to content

Commit b545ce2

Browse files
yanghao14xushiyan
yanghao14
authored andcommitted
style: enforce python code style (apache#5)
- Add Ruff and MyPy configurations to pyproject.toml - Integrate Python linting in GitHub Actions - Format code with Ruff and Ruff Format
1 parent 7566337 commit b545ce2

File tree

6 files changed

+161
-57
lines changed

6 files changed

+161
-57
lines changed

.github/workflows/compliance.yml

+18-1
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,22 @@ jobs:
4747
- uses: actions/checkout@v4
4848
- name: Check license header
4949
uses: apache/skywalking-eyes/[email protected]
50-
- name: Check code style
50+
- name: Check rust code style
5151
run: cd python && make check-rust
52+
- name: Set up Python ${{ matrix.python-version }}
53+
uses: actions/setup-python@v5
54+
with:
55+
python-version: ${{ matrix.python-version }}
56+
cache: pip
57+
cache-dependency-path: pyproject.toml
58+
- name: Install python linter dependencies
59+
working-directory: ./python
60+
run: |
61+
make setup-env
62+
source venv/bin/activate
63+
pip install ruff mypy
64+
- name: Run python linter
65+
working-directory: ./python
66+
run: |
67+
source venv/bin/activate
68+
make check-python

python/Makefile

+8
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ check-rust: ## Run check on Rust
4848
$(info --- Check Rust format ---)
4949
cargo fmt --all -- --check
5050

51+
.PHONY: check-python
52+
check-python: ## Run check on Python
53+
$(info --- Check Python code quality ---)
54+
pip install ruff mypy
55+
ruff check .
56+
ruff format .
57+
mypy .
58+
5159
.PHONY: test-rust
5260
test-rust: ## Run tests on Rust
5361
$(info --- Run Rust tests ---)

python/hudi/_internal.pyi

+7-16
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import pyarrow
2121

2222
__version__: str
2323

24-
2524
@dataclass(init=False)
2625
class HudiFileSlice:
2726
file_group_id: str
@@ -33,24 +32,16 @@ class HudiFileSlice:
3332

3433
def base_file_relative_path(self) -> str: ...
3534

36-
3735
@dataclass(init=False)
3836
class HudiTable:
39-
4037
def __init__(
41-
self,
42-
table_uri: str,
43-
options: Optional[Dict[str, str]] = None,
38+
self,
39+
table_uri: str,
40+
options: Optional[Dict[str, str]] = None,
4441
): ...
45-
46-
def get_schema(self) -> "pyarrow.Schema": ...
47-
42+
def get_schema(self) -> 'pyarrow.Schema': ...
4843
def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ...
49-
5044
def get_file_slices(self) -> List[HudiFileSlice]: ...
51-
52-
def read_file_slice(self, base_file_relative_path) -> pyarrow.RecordBatch: ...
53-
54-
def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ...
55-
56-
def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ...
45+
def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ...
46+
def read_snapshot(self) -> List['pyarrow.RecordBatch']: ...
47+
def read_snapshot_as_of(self, timestamp: str) -> List['pyarrow.RecordBatch']: ...

python/pyproject.toml

+24
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,33 @@ dynamic = ["version"]
4949
[tool.maturin]
5050
module-name = "hudi._internal"
5151

52+
[tool.ruff]
53+
target-version = 'py38'
54+
lint.mccabe = { max-complexity = 14 }
55+
lint.flake8-quotes = {inline-quotes = 'single', multiline-quotes = 'double'}
56+
lint.pydocstyle = { convention = 'google' }
57+
format.quote-style = 'single'
58+
lint.ignore = [
59+
"Q000",
60+
"Q001",
61+
"Q002",
62+
"Q003",
63+
"COM812",
64+
"COM819",
65+
"D104",
66+
"I001",
67+
"UP006",
68+
"UP007",
69+
"UP037",
70+
"E501", # Formatted code may exceed the line length, leading to line-too-long (E501) errors.
71+
]
72+
5273
[tool.mypy]
5374
files = "hudi/*.py"
5475
exclude = "^tests"
76+
warn_unused_configs = true
77+
ignore_missing_imports = true
78+
strict = true
5579

5680
[tool.pytest.ini_options]
5781
testpaths = [

python/tests/conftest.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,18 @@
2323

2424

2525
def _extract_testing_table(zip_file_path, target_path) -> str:
26-
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
26+
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
2727
zip_ref.extractall(target_path)
28-
return os.path.join(target_path, "trips_table")
28+
return os.path.join(target_path, 'trips_table')
2929

3030

3131
@pytest.fixture(
3232
params=[
33-
"0.x_cow_partitioned",
33+
'0.x_cow_partitioned',
3434
]
3535
)
3636
def get_sample_table(request, tmp_path) -> str:
37-
fixture_path = "tests/table"
37+
fixture_path = 'tests/table'
3838
table_name = request.param
39-
zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip")
39+
zip_file_path = Path(fixture_path).joinpath(f'{table_name}.zip')
4040
return _extract_testing_table(zip_file_path, tmp_path)

python/tests/test_table_read.py

+99-35
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,49 @@
2020

2121
from hudi import HudiTable
2222

23-
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
24-
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0")
23+
PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split('.') if s.isnumeric()) < (
24+
8,
25+
0,
26+
0,
27+
)
28+
pytestmark = pytest.mark.skipif(
29+
PYARROW_LE_8_0_0, reason='hudi only supported if pyarrow >= 8.0.0'
30+
)
2531

2632

2733
def test_sample_table(get_sample_table):
2834
table_path = get_sample_table
2935
table = HudiTable(table_path)
3036

31-
assert table.get_schema().names == ['_hoodie_commit_time', '_hoodie_commit_seqno', '_hoodie_record_key',
32-
'_hoodie_partition_path', '_hoodie_file_name', 'ts', 'uuid', 'rider', 'driver',
33-
'fare', 'city']
37+
assert table.get_schema().names == [
38+
'_hoodie_commit_time',
39+
'_hoodie_commit_seqno',
40+
'_hoodie_record_key',
41+
'_hoodie_partition_path',
42+
'_hoodie_file_name',
43+
'ts',
44+
'uuid',
45+
'rider',
46+
'driver',
47+
'fare',
48+
'city',
49+
]
3450

3551
file_slices = table.get_file_slices()
3652
assert len(file_slices) == 5
37-
assert set(f.commit_time for f in file_slices) == {'20240402123035233', '20240402144910683'}
53+
assert set(f.commit_time for f in file_slices) == {
54+
'20240402123035233',
55+
'20240402144910683',
56+
}
3857
assert all(f.num_records == 1 for f in file_slices)
3958
file_slice_paths = [f.base_file_relative_path() for f in file_slices]
40-
assert set(file_slice_paths) == {'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
41-
'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
42-
'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
43-
'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
44-
'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet'}
59+
assert set(file_slice_paths) == {
60+
'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
61+
'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
62+
'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
63+
'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
64+
'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet',
65+
}
4566

4667
batch = table.read_file_slice(file_slice_paths[0])
4768
t = pa.Table.from_batches([batch])
@@ -53,29 +74,72 @@ def test_sample_table(get_sample_table):
5374
assert len(next(file_slices_gen)) == 2
5475

5576
batches = table.read_snapshot()
56-
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
57-
assert t.to_pylist() == [{'_hoodie_commit_time': '20240402144910683', 'ts': 1695046462179,
58-
'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 339.0},
59-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788,
60-
'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7},
61-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911,
62-
'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85},
63-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087,
64-
'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1},
65-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016,
66-
'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}]
77+
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
78+
assert t.to_pylist() == [
79+
{
80+
'_hoodie_commit_time': '20240402144910683',
81+
'ts': 1695046462179,
82+
'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
83+
'fare': 339.0,
84+
},
85+
{
86+
'_hoodie_commit_time': '20240402123035233',
87+
'ts': 1695091554788,
88+
'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
89+
'fare': 27.7,
90+
},
91+
{
92+
'_hoodie_commit_time': '20240402123035233',
93+
'ts': 1695115999911,
94+
'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
95+
'fare': 17.85,
96+
},
97+
{
98+
'_hoodie_commit_time': '20240402123035233',
99+
'ts': 1695159649087,
100+
'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
101+
'fare': 19.1,
102+
},
103+
{
104+
'_hoodie_commit_time': '20240402123035233',
105+
'ts': 1695516137016,
106+
'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
107+
'fare': 34.15,
108+
},
109+
]
67110

68-
table = HudiTable(table_path, {
69-
"hoodie.read.as.of.timestamp": "20240402123035233"})
111+
table = HudiTable(table_path, {'hoodie.read.as.of.timestamp': '20240402123035233'})
70112
batches = table.read_snapshot()
71-
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
72-
assert t.to_pylist() == [{'_hoodie_commit_time': '20240402123035233', 'ts': 1695046462179,
73-
'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 33.9},
74-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788,
75-
'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7},
76-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911,
77-
'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85},
78-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087,
79-
'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1},
80-
{'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016,
81-
'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}]
113+
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
114+
assert t.to_pylist() == [
115+
{
116+
'_hoodie_commit_time': '20240402123035233',
117+
'ts': 1695046462179,
118+
'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
119+
'fare': 33.9,
120+
},
121+
{
122+
'_hoodie_commit_time': '20240402123035233',
123+
'ts': 1695091554788,
124+
'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
125+
'fare': 27.7,
126+
},
127+
{
128+
'_hoodie_commit_time': '20240402123035233',
129+
'ts': 1695115999911,
130+
'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
131+
'fare': 17.85,
132+
},
133+
{
134+
'_hoodie_commit_time': '20240402123035233',
135+
'ts': 1695159649087,
136+
'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
137+
'fare': 19.1,
138+
},
139+
{
140+
'_hoodie_commit_time': '20240402123035233',
141+
'ts': 1695516137016,
142+
'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
143+
'fare': 34.15,
144+
},
145+
]

0 commit comments

Comments
 (0)