From 157912869cf1ad78ec4f7505b982ae040dc8f1c3 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 10:44:35 +0200 Subject: [PATCH 01/11] Move optimization of constant sequence creation from codegen to CFG --- Lib/test/test_dis.py | 4 +-- Python/codegen.c | 28 ---------------- Python/flowgraph.c | 76 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 2e149b32e5c1ec..7894688b7f7176 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -892,7 +892,7 @@ def loop_test(): %3d RESUME_CHECK 0 %3d BUILD_LIST 0 - LOAD_CONST_MORTAL 0 ((1, 2, 3)) + LOAD_CONST_MORTAL 1 ((1, 2, 3)) LIST_EXTEND 1 LOAD_SMALL_INT 3 BINARY_OP 5 (*) @@ -908,7 +908,7 @@ def loop_test(): %3d L2: END_FOR POP_ITER - LOAD_CONST_IMMORTAL 1 (None) + LOAD_CONST_IMMORTAL 0 (None) RETURN_VALUE """ % (loop_test.__code__.co_firstlineno, loop_test.__code__.co_firstlineno + 1, diff --git a/Python/codegen.c b/Python/codegen.c index df3b5aaac1d0d9..9c119467932120 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -3210,34 +3210,6 @@ starunpack_helper_impl(compiler *c, location loc, int build, int add, int extend, int tuple) { Py_ssize_t n = asdl_seq_LEN(elts); - if (!injected_arg && n > 2 && are_all_items_const(elts, 0, n)) { - PyObject *folded = PyTuple_New(n); - if (folded == NULL) { - return ERROR; - } - for (Py_ssize_t i = 0; i < n; i++) { - PyObject *val = ((expr_ty)asdl_seq_GET(elts, i))->v.Constant.value; - PyTuple_SET_ITEM(folded, i, Py_NewRef(val)); - } - if (tuple && !pushed) { - ADDOP_LOAD_CONST_NEW(c, loc, folded); - } else { - if (add == SET_ADD) { - Py_SETREF(folded, PyFrozenSet_New(folded)); - if (folded == NULL) { - return ERROR; - } - } - ADDOP_I(c, loc, build, pushed); - ADDOP_LOAD_CONST_NEW(c, loc, folded); - ADDOP_I(c, loc, extend, 1); - if (tuple) { - ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_LIST_TO_TUPLE); - } - } - return SUCCESS; - } - int big = n + pushed + (injected_arg ? 1 : 0) > STACK_USE_GUIDELINE; int seen_star = 0; for (Py_ssize_t i = 0; i < n; i++) { diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 24561c1ee04db9..2f911cd5523c13 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1336,6 +1336,17 @@ add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache) return (int)index; } +static int +is_sequence_constant(cfg_instr *inst, int n) +{ + for (int i = 0; i < n; i++) { + if(!loads_const(inst[i].i_opcode)) { + return 0; + } + } + return 1; +} + /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n with LOAD_CONST (c1, c2, ... cn). The consts table must still be in list form so that the @@ -1353,10 +1364,8 @@ fold_tuple_on_constants(PyObject *const_cache, assert(inst[n].i_opcode == BUILD_TUPLE); assert(inst[n].i_oparg == n); - for (int i = 0; i < n; i++) { - if (!loads_const(inst[i].i_opcode)) { - return SUCCESS; - } + if (!is_sequence_constant(inst, n)) { + return SUCCESS; } /* Buildup new tuple of constants */ @@ -1384,6 +1393,55 @@ fold_tuple_on_constants(PyObject *const_cache, return SUCCESS; } +// Replace LOAD_CONST x, LOAD_CONST y, LOAD_CONST z, BUILD_LIST 3 +// with BUILD_LIST 0, LOAD_CONST (x, y, z), LIST_EXTEND 1 +// or BUILD_SET & SET_UPDATE respectively. +static int +optimize_const_sequence(PyObject *const_cache, + cfg_instr* inst, + int n, PyObject *consts, + int list, int build, int extend) +{ + assert(PyDict_CheckExact(const_cache)); + assert(PyList_CheckExact(consts)); + assert(inst[n].i_oparg == n); + + if (n < 3 || !is_sequence_constant(inst, n)) { + return SUCCESS; + } + PyObject *newconst = PyTuple_New(n); + if (newconst == NULL) { + return ERROR; + } + for (int i = 0; i < n; i++) { + int op = inst[i].i_opcode; + int arg = inst[i].i_oparg; + PyObject *constant = get_const_value(op, arg, consts); + if (constant == NULL) { + return ERROR; + } + PyTuple_SET_ITEM(newconst, i, constant); + } + if (!list) { + PyObject *frozenset = PyFrozenSet_New(newconst); + if (frozenset == NULL) { + return ERROR; + } + Py_SETREF(newconst, frozenset); + } + int index = add_const(newconst, consts, const_cache); + if (index < 0) { + return ERROR; + } + INSTR_SET_OP1(&inst[0], build, 0); + for (int i = 1; i < n - 1; i++) { + INSTR_SET_OP0(&inst[i], NOP); + } + INSTR_SET_OP1(&inst[n-1], LOAD_CONST, index); + INSTR_SET_OP1(&inst[n], extend, 1); + return SUCCESS; +} + #define VISITED (-1) // Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the @@ -1751,6 +1809,16 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) } } break; + case BUILD_LIST: + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 1, BUILD_LIST, LIST_EXTEND)) { + goto error; + } + break; + case BUILD_SET: + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 0, BUILD_SET, SET_UPDATE)) { + goto error; + } + break; case POP_JUMP_IF_NOT_NONE: case POP_JUMP_IF_NONE: switch (target->i_opcode) { From 009165ba8f95dcea5ca270a1e84ec76f9fdce16c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 11:01:52 +0200 Subject: [PATCH 02/11] Remove unused function --- Python/codegen.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/Python/codegen.c b/Python/codegen.c index 9c119467932120..0bf9526cdc8435 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -201,9 +201,6 @@ static int codegen_subscript(compiler *, expr_ty); static int codegen_slice_two_parts(compiler *, expr_ty); static int codegen_slice(compiler *, expr_ty); -static bool are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t); - - static int codegen_with(compiler *, stmt_ty, int); static int codegen_async_with(compiler *, stmt_ty, int); static int codegen_async_for(compiler *, stmt_ty); @@ -3361,18 +3358,6 @@ codegen_set(compiler *c, expr_ty e) BUILD_SET, SET_ADD, SET_UPDATE, 0); } -static bool -are_all_items_const(asdl_expr_seq *seq, Py_ssize_t begin, Py_ssize_t end) -{ - for (Py_ssize_t i = begin; i < end; i++) { - expr_ty key = (expr_ty)asdl_seq_GET(seq, i); - if (key == NULL || key->kind != Constant_kind) { - return false; - } - } - return true; -} - static int codegen_subdict(compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end) { From ea07a50641b8af72bb401cfec5f6906fb383f090 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 12:11:03 +0200 Subject: [PATCH 03/11] Fix heap-buffer-overflow --- Python/flowgraph.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 2f911cd5523c13..20019762f4a302 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1810,13 +1810,17 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) } break; case BUILD_LIST: - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 1, BUILD_LIST, LIST_EXTEND)) { - goto error; + if (i >= oparg) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 1, BUILD_LIST, LIST_EXTEND)) { + goto error; + } } break; case BUILD_SET: - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 0, BUILD_SET, SET_UPDATE)) { - goto error; + if (i >= oparg) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 0, BUILD_SET, SET_UPDATE)) { + goto error; + } } break; case POP_JUMP_IF_NOT_NONE: From acd2f57d5aa534b6ed41891f5d20b8fb53a065ed Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 12:14:36 +0200 Subject: [PATCH 04/11] Add assertions --- Python/flowgraph.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 20019762f4a302..c62ed444d671d4 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1405,6 +1405,12 @@ optimize_const_sequence(PyObject *const_cache, assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); assert(inst[n].i_oparg == n); + if (list) { + assert(inst[n].i_opcode == BUILD_LIST) + } + else { + assert(inst[n].i_opcode == BUILD_SET) + } if (n < 3 || !is_sequence_constant(inst, n)) { return SUCCESS; From 7befc01a7f23cb4a7c4c187bc3bb5e8b67d9c937 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 12:20:31 +0200 Subject: [PATCH 05/11] Semicolons are my enemy --- Python/flowgraph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c62ed444d671d4..917575f9496d47 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1406,10 +1406,10 @@ optimize_const_sequence(PyObject *const_cache, assert(PyList_CheckExact(consts)); assert(inst[n].i_oparg == n); if (list) { - assert(inst[n].i_opcode == BUILD_LIST) + assert(inst[n].i_opcode == BUILD_LIST); } else { - assert(inst[n].i_opcode == BUILD_SET) + assert(inst[n].i_opcode == BUILD_SET); } if (n < 3 || !is_sequence_constant(inst, n)) { From 312fd30fbfbda443f50dd6d34660c519c6d6730c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 21:19:47 +0200 Subject: [PATCH 06/11] Remove list arg --- Python/flowgraph.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 917575f9496d47..b95d5c8b52d090 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1400,11 +1400,12 @@ static int optimize_const_sequence(PyObject *const_cache, cfg_instr* inst, int n, PyObject *consts, - int list, int build, int extend) + int build, int extend) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); assert(inst[n].i_oparg == n); + int list = build == BUILD_LIST; if (list) { assert(inst[n].i_opcode == BUILD_LIST); } @@ -1817,14 +1818,14 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) break; case BUILD_LIST: if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 1, BUILD_LIST, LIST_EXTEND)) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_LIST, LIST_EXTEND)) { goto error; } } break; case BUILD_SET: if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, 0, BUILD_SET, SET_UPDATE)) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_SET, SET_UPDATE)) { goto error; } } From b509c02802939af4ff629a8e6e4d0d9acf36c4a0 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 21:22:20 +0200 Subject: [PATCH 07/11] Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Python/flowgraph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b95d5c8b52d090..64e317aa3d71ae 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1818,14 +1818,14 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) break; case BUILD_LIST: if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_LIST, LIST_EXTEND)) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_LIST, LIST_EXTEND) < 0) { goto error; } } break; case BUILD_SET: if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_SET, SET_UPDATE)) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_SET, SET_UPDATE) < 0) { goto error; } } From 8539ee2f7fdac2a77dec5544a48725d2d2910b0a Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 29 Jan 2025 22:40:10 +0200 Subject: [PATCH 08/11] Address Irit's review --- Python/flowgraph.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 64e317aa3d71ae..254b0a30c85ce1 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1399,18 +1399,22 @@ fold_tuple_on_constants(PyObject *const_cache, static int optimize_const_sequence(PyObject *const_cache, cfg_instr* inst, - int n, PyObject *consts, - int build, int extend) + int n, PyObject *consts) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); assert(inst[n].i_oparg == n); - int list = build == BUILD_LIST; - if (list) { - assert(inst[n].i_opcode == BUILD_LIST); + + int construct_list = inst[n].i_opcode == BUILD_LIST; + int build, extend; + if (!construct_list) { + assert(inst[n].i_opcode == BUILD_SET); + build = BUILD_SET; + extend = SET_UPDATE; } else { - assert(inst[n].i_opcode == BUILD_SET); + build = BUILD_LIST; + extend = LIST_EXTEND; } if (n < 3 || !is_sequence_constant(inst, n)) { @@ -1429,7 +1433,7 @@ optimize_const_sequence(PyObject *const_cache, } PyTuple_SET_ITEM(newconst, i, constant); } - if (!list) { + if (!construct_list) { PyObject *frozenset = PyFrozenSet_New(newconst); if (frozenset == NULL) { return ERROR; @@ -1817,15 +1821,9 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) } break; case BUILD_LIST: - if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_LIST, LIST_EXTEND) < 0) { - goto error; - } - } - break; case BUILD_SET: if (i >= oparg) { - if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts, BUILD_SET, SET_UPDATE) < 0) { + if (optimize_const_sequence(const_cache, inst-oparg, oparg, consts) < 0) { goto error; } } From 7e99bd174f4f92760bad5e09ab87a9605875f43e Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 30 Jan 2025 11:43:09 +0200 Subject: [PATCH 09/11] Apply suggestions from code review Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Python/flowgraph.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 254b0a30c85ce1..44270ccf055819 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1397,25 +1397,15 @@ fold_tuple_on_constants(PyObject *const_cache, // with BUILD_LIST 0, LOAD_CONST (x, y, z), LIST_EXTEND 1 // or BUILD_SET & SET_UPDATE respectively. static int -optimize_const_sequence(PyObject *const_cache, - cfg_instr* inst, - int n, PyObject *consts) +optimize_const_sequence(PyObject *const_cache, cfg_instr* inst, int n, PyObject *consts) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); assert(inst[n].i_oparg == n); - int construct_list = inst[n].i_opcode == BUILD_LIST; - int build, extend; - if (!construct_list) { - assert(inst[n].i_opcode == BUILD_SET); - build = BUILD_SET; - extend = SET_UPDATE; - } - else { - build = BUILD_LIST; - extend = LIST_EXTEND; - } + int build = inst[n].i_opcode; + assert(build == BUILD_LIST || build == BUILD_SET); + int extend = (build == BUILD_LIST) ? SET_UPDATE : LIST_EXTEND if (n < 3 || !is_sequence_constant(inst, n)) { return SUCCESS; @@ -1433,7 +1423,7 @@ optimize_const_sequence(PyObject *const_cache, } PyTuple_SET_ITEM(newconst, i, constant); } - if (!construct_list) { + if (build == BUILD_SET) { PyObject *frozenset = PyFrozenSet_New(newconst); if (frozenset == NULL) { return ERROR; @@ -1441,9 +1431,7 @@ optimize_const_sequence(PyObject *const_cache, Py_SETREF(newconst, frozenset); } int index = add_const(newconst, consts, const_cache); - if (index < 0) { - return ERROR; - } + RETURN_IF_ERROR(index); INSTR_SET_OP1(&inst[0], build, 0); for (int i = 1; i < n - 1; i++) { INSTR_SET_OP0(&inst[i], NOP); From 43dc870ad454a68003bf3dad7af0586e2993aa20 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 30 Jan 2025 11:58:52 +0200 Subject: [PATCH 10/11] Semicolons are our common enemy, Irit --- Python/flowgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 44270ccf055819..509caa6471ebb9 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1405,7 +1405,7 @@ optimize_const_sequence(PyObject *const_cache, cfg_instr* inst, int n, PyObject int build = inst[n].i_opcode; assert(build == BUILD_LIST || build == BUILD_SET); - int extend = (build == BUILD_LIST) ? SET_UPDATE : LIST_EXTEND + int extend = (build == BUILD_LIST) ? SET_UPDATE : LIST_EXTEND; if (n < 3 || !is_sequence_constant(inst, n)) { return SUCCESS; From 329d158fb15589aca1025aea9600bff1447855d8 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 30 Jan 2025 11:59:22 +0200 Subject: [PATCH 11/11] Fix ternary expression --- Python/flowgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 509caa6471ebb9..7b00d4f0a475a2 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1405,7 +1405,7 @@ optimize_const_sequence(PyObject *const_cache, cfg_instr* inst, int n, PyObject int build = inst[n].i_opcode; assert(build == BUILD_LIST || build == BUILD_SET); - int extend = (build == BUILD_LIST) ? SET_UPDATE : LIST_EXTEND; + int extend = (build == BUILD_LIST) ? LIST_EXTEND : SET_UPDATE; if (n < 3 || !is_sequence_constant(inst, n)) { return SUCCESS;