Skip to content

Commit f4eba70

Browse files
JSCU-CNIyunzheng
andauthored
Add -p/--progress bar option to rdump (#166)
Co-authored-by: Yun Zheng Hu <[email protected]>
1 parent 6033534 commit f4eba70

File tree

4 files changed

+56
-3
lines changed

4 files changed

+56
-3
lines changed

flow/record/adapter/text.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ class DefaultMissing(dict):
3030
3131
Example:
3232
>>> d = DefaultMissing({"foo": "bar"})
33-
>>> d['foo']
33+
>>> d["foo"]
3434
'bar'
35-
>>> d['missing_key']
35+
>>> d["missing_key"]
3636
'{missing_key}'
3737
"""
3838

flow/record/tools/rdump.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@
2121
except ImportError:
2222
version = "unknown"
2323

24+
try:
25+
import tqdm
26+
27+
HAS_TQDM = True
28+
29+
except ImportError:
30+
HAS_TQDM = False
31+
2432
log = logging.getLogger(__name__)
2533

2634

@@ -112,6 +120,12 @@ def main(argv: list[str] | None = None) -> int:
112120
help="Generate suffixes of length LEN for splitted output files",
113121
)
114122
output.add_argument("--multi-timestamp", action="store_true", help="Create records for datetime fields")
123+
output.add_argument(
124+
"-p",
125+
"--progress",
126+
action="store_true",
127+
help="Show progress bar (requires tqdm)",
128+
)
115129

116130
advanced = parser.add_argument_group("advanced")
117131
advanced.add_argument(
@@ -217,7 +231,14 @@ def main(argv: list[str] | None = None) -> int:
217231
seen_desc = set()
218232
islice_stop = (args.count + args.skip) if args.count else None
219233
record_iterator = islice(record_stream(args.src, selector), args.skip, islice_stop)
234+
235+
if args.progress:
236+
if not HAS_TQDM:
237+
parser.error("tqdm is required for progress bar")
238+
record_iterator = tqdm.tqdm(record_iterator, unit=" records", delay=sys.float_info.min)
239+
220240
count = 0
241+
record_writer = None
221242

222243
try:
223244
record_writer = RecordWriter(uri)
@@ -246,7 +267,8 @@ def main(argv: list[str] | None = None) -> int:
246267
record_writer.write(rec)
247268

248269
finally:
249-
record_writer.__exit__()
270+
if record_writer:
271+
record_writer.__exit__()
250272

251273
if args.list:
252274
print(f"Processed {count} records")

pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ test = [
6262
"flow.record[elastic]",
6363
"duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
6464
"pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
65+
"tqdm",
66+
]
67+
full = [
68+
"flow.record[compression]",
69+
"tqdm",
6570
]
6671

6772
[project.scripts]

tests/test_rdump.py

+26
Original file line numberDiff line numberDiff line change
@@ -696,3 +696,29 @@ def test_rdump_line_verbose(tmp_path: Path, capsys: pytest.CaptureFixture, rdump
696696
assert "data (bytes) =" in captured.out
697697
assert "counter (uint32) =" in captured.out
698698
assert "foo (string) =" in captured.out
699+
700+
701+
def test_rdump_list_progress(tmp_path: Path, capsys: pytest.CaptureFixture) -> None:
702+
TestRecord = RecordDescriptor(
703+
"test/rdump/progress",
704+
[
705+
("uint32", "counter"),
706+
],
707+
)
708+
record_path = tmp_path / "test.records"
709+
710+
with RecordWriter(record_path) as writer:
711+
for i in range(100):
712+
writer.write(TestRecord(counter=i))
713+
714+
rdump.main(["--list", "--progress", str(record_path)])
715+
captured = capsys.readouterr()
716+
717+
# stderr should contain tqdm progress bar
718+
# 100 records [00:00, 64987.67 records/s]
719+
assert "\r100 records [" in captured.err
720+
assert " records/s]" in captured.err
721+
722+
# stdout should contain the RecordDescriptor definition and count
723+
assert "# <RecordDescriptor test/rdump/progress, hash=eeb21156>" in captured.out
724+
assert "Processed 100 records" in captured.out

0 commit comments

Comments
 (0)