From c1b9a836dbd5181ef57514139a5109d554ba30f1 Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Mon, 30 Mar 2026 13:29:07 -0400 Subject: [PATCH 1/3] GH-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to `str` --- Lib/test/test_capi/test_opt.py | 38 +++++++++++++++++++ ...-03-30-17-01-34.gh-issue-131798.WSefcr.rst | 2 + Python/optimizer_bytecodes.c | 8 ++++ Python/optimizer_cases.c.h | 4 +- 4 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2005dd9b0866bd..bda26bc7464c5a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2263,6 +2263,44 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_format_simple_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 42 + s = f"{v}" + t = "hello" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["hello42"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_SIMPLE", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + + def test_format_with_spec_narrows_to_str(self): + def testfunc(n): + x = [] + for _ in range(n): + v = 3.14 + s = f"{v:.2f}" + t = "pi=" + s + x.append(t) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, ["pi=3.14"] * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_FORMAT_WITH_SPEC", uops) + self.assertNotIn("_GUARD_TOS_UNICODE", uops) + self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_binary_op_subscr_str_int(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst new file mode 100644 index 00000000000000..3f7e7fa0c37a5b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -0,0 +1,2 @@ +Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and +``_FORMAT_WITH_SPEC`` by setting the return type to string. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 792f83cdbd2d3a..814f4a632c5fcc 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1551,6 +1551,14 @@ dummy_func(void) { set = sym_new_type(ctx, &PySet_Type); } + op(_FORMAT_SIMPLE, (value -- res)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } + + op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } + op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { (void)set; i = iterable; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 7539133fb92096..9be588726f2760 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4280,14 +4280,14 @@ case _FORMAT_SIMPLE: { JitOptRef res; - res = sym_new_not_null(ctx); + res = sym_new_type(ctx, &PyUnicode_Type); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { JitOptRef res; - res = sym_new_not_null(ctx); + res = sym_new_type(ctx, &PyUnicode_Type); CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; From ed1204e11f4e25863cc7d12a792c39c1a96dbc3d Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Wed, 1 Apr 2026 12:03:26 -0400 Subject: [PATCH 2/3] gh-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to str for built-in types --- Include/internal/pycore_optimizer.h | 1 + ...-03-30-17-01-34.gh-issue-131798.WSefcr.rst | 2 +- Python/optimizer_analysis.c | 1 + Python/optimizer_bytecodes.c | 12 ++++++++++-- Python/optimizer_cases.c.h | 16 ++++++++++++++-- Python/optimizer_symbols.c | 19 +++++++++++++++++++ 6 files changed, 46 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2986afb142b5d1..101909d11baccb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -393,6 +393,7 @@ extern JitOptRef _Py_uop_sym_new_type( extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +extern bool _Py_uop_sym_is_safe_type(JitOptRef sym); bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst index 3f7e7fa0c37a5b..b587598be65b7d 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-30-17-01-34.gh-issue-131798.WSefcr.rst @@ -1,2 +1,2 @@ Allow the JIT to remove unicode guards after ``_FORMAT_SIMPLE`` and -``_FORMAT_WITH_SPEC`` by setting the return type to string. +``_FORMAT_WITH_SPEC`` when the input type is a known built-in type. diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4672a272fc9203..3e4942c483b7ec 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -250,6 +250,7 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_type _Py_uop_sym_is_safe_type #define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const #define sym_new_const_steal _Py_uop_sym_new_const_steal diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 814f4a632c5fcc..ec449ab0dccd46 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1552,11 +1552,19 @@ dummy_func(void) { } op(_FORMAT_SIMPLE, (value -- res)) { - res = sym_new_type(ctx, &PyUnicode_Type); + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } } op(_FORMAT_WITH_SPEC, (value, fmt_spec -- res)) { - res = sym_new_type(ctx, &PyUnicode_Type); + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } } op(_SET_UPDATE, (set, unused[oparg-1], iterable -- set, unused[oparg-1], i)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9be588726f2760..86d6fa6a77872c 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4279,15 +4279,27 @@ } case _FORMAT_SIMPLE: { + JitOptRef value; JitOptRef res; - res = sym_new_type(ctx, &PyUnicode_Type); + value = stack_pointer[-1]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { + JitOptRef value; JitOptRef res; - res = sym_new_type(ctx, &PyUnicode_Type); + value = stack_pointer[-2]; + if (sym_is_safe_type(value)) { + res = sym_new_type(ctx, &PyUnicode_Type); + } else { + res = sym_new_not_null(ctx); + } CHECK_STACK_BOUNDS(-1); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d6f1c09490aac9..f4072326fb4d2d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,6 +264,25 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +/* + Indicates whether the type is a known built-in type + that is safe to narrow. + */ +bool +_Py_uop_sym_is_safe_type(JitOptRef sym) +{ + PyTypeObject *typ = _Py_uop_sym_get_type(sym); + if (typ == NULL) { + return false; + } + return (typ == &PyLong_Type) || + (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyFrozenDict_Type); +} + /* Indicates whether the constant is safe to constant evaluate (without side effects). From d09a7ccc9afa331ed9ecdecdad3b32eed761a070 Mon Sep 17 00:00:00 2001 From: Neko Asakura Date: Wed, 1 Apr 2026 12:34:26 -0400 Subject: [PATCH 3/3] gh-131798: Narrow the return type of `_FORMAT_SIMPLE` and `_FORMAT_WITH_SPEC` to str for built-in types --- Python/optimizer_symbols.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index f4072326fb4d2d..d6c014a838cc5d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -264,6 +264,16 @@ _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) return PyStackRef_FromPyObjectBorrow(const_val); } +static bool +is_safe_builtin_type(PyTypeObject *typ) +{ + return (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &_PyNone_Type) || + (typ == &PyBool_Type) || + (typ == &PyFrozenDict_Type); +} + /* Indicates whether the type is a known built-in type that is safe to narrow. @@ -275,12 +285,7 @@ _Py_uop_sym_is_safe_type(JitOptRef sym) if (typ == NULL) { return false; } - return (typ == &PyLong_Type) || - (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + return (typ == &PyLong_Type) || is_safe_builtin_type(typ); } /* @@ -298,11 +303,7 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) return true; } PyTypeObject *typ = Py_TYPE(const_val); - return (typ == &PyUnicode_Type) || - (typ == &PyFloat_Type) || - (typ == &_PyNone_Type) || - (typ == &PyBool_Type) || - (typ == &PyFrozenDict_Type); + return is_safe_builtin_type(typ); } void