Skip to content

Commit 43ccc91

Browse files
committed
多次调用添加缓存,减少硬盘读取
1 parent cdcff0f commit 43ccc91

File tree

6 files changed

+39
-31
lines changed

6 files changed

+39
-31
lines changed

examples/demo_tdx.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,15 @@ def _code_block_2():
100100
# =====================================
101101
logger.info('计算开始')
102102
t1 = time.perf_counter()
103-
df = codegen_exec(df.lazy(), _code_block_1, _code_block_2, output_file=sys.stdout)
103+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file='1_out.py', run_file=False)
104104
t2 = time.perf_counter()
105-
print(t2 - t1)
105+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file='1_out.py', run_file=True)
106+
t3 = time.perf_counter()
107+
df = codegen_exec(df, _code_block_1, _code_block_2, output_file='1_out.py', run_file=True)
108+
t4 = time.perf_counter()
109+
print(t2 - t1, t3 - t2, t4 - t3)
106110
logger.info('计算结束')
107111
df = df.filter(
108112
~pl.col('is_st'),
109113
)
110-
print(df.collect())
114+
print(df)

expr_codegen/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.10.9"
1+
__version__ = "0.10.10"

expr_codegen/pandas/template.py.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,6 @@ def main(df: pd.DataFrame) -> pd.DataFrame:
6363

6464
return df
6565

66-
if __name__ in ("__main__", "builtins"):
67-
# TODO: 数据加载或外部传入
68-
df_output = main(df_input)
66+
# if __name__ in ("__main__", "builtins"):
67+
# # TODO: 数据加载或外部传入
68+
# df_output = main(df_input)

expr_codegen/polars_group/template.py.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,6 @@ def main(df: DataFrame) -> DataFrame:
7878

7979
return df
8080

81-
if __name__ in ("__main__", "builtins"):
82-
# TODO: 数据加载或外部传入
83-
df_output = main(df_input)
81+
# if __name__ in ("__main__", "builtins"):
82+
# # TODO: 数据加载或外部传入
83+
# df_output = main(df_input)

expr_codegen/polars_over/template.py.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,6 @@ def main(df: DataFrame) -> DataFrame:
7878

7979
return df
8080

81-
if __name__ in ("__main__", "builtins"):
82-
# TODO: 数据加载或外部传入
83-
df_output = main(df_input)
81+
# if __name__ in ("__main__", "builtins"):
82+
# # TODO: 数据加载或外部传入
83+
# df_output = main(df_input)

expr_codegen/tool.py

+22-18
Original file line numberDiff line numberDiff line change
@@ -299,25 +299,29 @@ def _get_code(self,
299299
return code
300300

301301

302-
def _exec_code(code: str, df_input):
303-
globals_ = {'df_input': df_input}
302+
@lru_cache(maxsize=64, typed=True)
303+
def _get_func_from_code(code: str):
304+
globals_ = {}
304305
exec(code, globals_)
305-
return globals_['df_output']
306+
return globals_['main']
306307

307308

308-
def _exec_file(file, df_input):
309-
file = pathlib.Path(file)
310-
logger.info(f'run file "{file.absolute()}"')
311-
with open(file, 'r', encoding='utf-8') as f:
312-
code = f.read()
313-
return _exec_code(code, df_input)
314-
315-
316-
def _exec_module(module: str, df_input):
309+
@lru_cache(maxsize=64, typed=True)
310+
def _get_func_from_module(module: str):
317311
""""可下断点调试"""
318312
m = __import__(module, fromlist=['*'])
319313
logger.info(f'run module {m}')
320-
return m.main(df_input)
314+
return m.main
315+
316+
317+
@lru_cache(maxsize=64, typed=True)
318+
def _get_func_from_file(file: str):
319+
file = pathlib.Path(file)
320+
logger.info(f'run file "{file.absolute()}"')
321+
with open(file, 'r', encoding='utf-8') as f:
322+
globals_ = {}
323+
exec(f.read(), globals_)
324+
return globals_['main']
321325

322326

323327
_TOOL_ = ExprTool()
@@ -347,7 +351,7 @@ def codegen_exec(df: Optional[DataFrame],
347351
output_file: str| TextIOBase
348352
保存生成的目标代码到文件中
349353
run_file: bool or str
350-
是否不生成脚本,直接运行代码。
354+
是否不生成脚本,直接运行代码。注意:带缓存功能,多次调用不重复生成
351355
- 如果是True,会自动从output_file中读取代码
352356
- 如果是字符串,会自动从run_file中读取代码
353357
- 如果是模块名,会自动从模块中读取代码(可调试)
@@ -378,13 +382,13 @@ def codegen_exec(df: Optional[DataFrame],
378382
if df is not None:
379383
if run_file is True:
380384
assert output_file is not None, 'output_file is required'
381-
return _exec_file(output_file, df)
385+
return _get_func_from_file(output_file)(df)
382386
if run_file is not False:
383387
run_file = str(run_file)
384388
if run_file.endswith('.py'):
385-
return _exec_file(run_file, df)
389+
return _get_func_from_file(run_file)(df)
386390
else:
387-
return _exec_module(run_file, df) # 可断点调试
391+
return _get_func_from_module(run_file)(df) # 可断点调试
388392

389393
# 此代码来自于sympy.var
390394
frame = inspect.currentframe().f_back
@@ -407,4 +411,4 @@ def codegen_exec(df: Optional[DataFrame],
407411
if df is None:
408412
return None
409413
else:
410-
return _exec_code(code, df)
414+
return _get_func_from_code(code)(df)

0 commit comments

Comments
 (0)