Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions Lib/test/test_tomllib/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
from pathlib import Path
import sys
import tempfile
import textwrap
import unittest
from test import support
from test.support import os_helper
from test.support.script_helper import assert_python_ok

from . import tomllib

Expand Down Expand Up @@ -124,3 +127,55 @@ def test_types_import(self):
never imported by tests.
"""
importlib.import_module(f"{tomllib.__name__}._types")

def test_try_simple_decimal(self):
try_simple_decimal = tomllib._parser.try_simple_decimal
self.assertEqual(try_simple_decimal("123", 0), (3, 123))
self.assertEqual(try_simple_decimal("123\n", 0), (3, 123))
self.assertEqual(try_simple_decimal("123 456", 0), (3, 123))
self.assertEqual(try_simple_decimal("+123\n", 0), (4, 123))
self.assertEqual(try_simple_decimal("-123\n", 0), (4, -123))
self.assertEqual(try_simple_decimal("0\n", 0), (1, 0))
self.assertEqual(try_simple_decimal("+0\n", 0), (2, 0))
self.assertEqual(try_simple_decimal("-0\n", 0), (2, 0))
self.assertEqual(try_simple_decimal("[23]\n", 1), (3, 23))
self.assertEqual(try_simple_decimal("[23, 24]\n", 1), (3, 23))
self.assertEqual(try_simple_decimal("{x = 42}\n", 5), (7, 42))

self.assertIsNone(try_simple_decimal("+", 0), None)
self.assertIsNone(try_simple_decimal("-", 0), None)
self.assertIsNone(try_simple_decimal("+\n", 0), None)
self.assertIsNone(try_simple_decimal("-\n", 0), None)
self.assertIsNone(try_simple_decimal("+inf\n", 0), None)
self.assertIsNone(try_simple_decimal("-nan\n", 0), None)
self.assertIsNone(try_simple_decimal("0123\n", 0))
self.assertIsNone(try_simple_decimal("1979-05-27\n", 0))
self.assertIsNone(try_simple_decimal("12:32:00\n", 0))
self.assertIsNone(try_simple_decimal("1.0\n", 0))
self.assertIsNone(try_simple_decimal("1_000\n", 0))
self.assertIsNone(try_simple_decimal("0x123\n", 0))
self.assertIsNone(try_simple_decimal("0o123\n", 0))
self.assertIsNone(try_simple_decimal("0b100\n", 0))

def test_lazy_import(self):
# Test that try_simple_decimal() can parse the TOML file without
# importing regular expressions (tomllib._re)
filename = os_helper.TESTFN
self.addCleanup(os_helper.unlink, filename)
toml = textwrap.dedent("""
[metadata]
int = 123
list = [+1, -2, 3]
table = {x=1, y=2}
""")
with open(filename, "w") as fp:
fp.write(toml)

code = textwrap.dedent(f"""
import sys, tomllib
with open({filename!a}, "rb") as fp:
tomllib.load(fp)
print("lazy import?", 'tomllib._re' not in sys.modules)
""")
proc = assert_python_ok('-c', code)
self.assertIn(b'lazy import? True', proc.out)
56 changes: 54 additions & 2 deletions Lib/tomllib/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

from __future__ import annotations

from types import MappingProxyType
# Defer loading regular expressions until we actually need them in
# parse_value(). Before that, use try_simple_decimal() to parse simple
# decimal numbers.
__lazy_modules__ = ["tomllib._re"]

from ._re import (
RE_DATETIME,
Expand Down Expand Up @@ -42,7 +45,18 @@
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")

BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
# If one of these follows a "simple decimal" it could mean that
# the value is actually something else (float, datetime...), so
# optimized parsing should be abandoned.
ILLEGAL_AFTER_SIMPLE_DECIMAL: Final = frozenset(
"eE." # decimal
"xbo" # hex, bin, oct
"-" # datetime
":" # localtime
"_0123456789" # complex decimal
)

BASIC_STR_ESCAPE_REPLACEMENTS: Final = frozendict( # type: ignore[name-defined]
{
"\\b": "\u0008", # backspace
"\\t": "\u0009", # tab
Expand Down Expand Up @@ -665,6 +679,37 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
pos += 1


def try_simple_decimal(
src: str, pos: Pos
) -> None | tuple[Pos, int]:
"""Parse a "simple" decimal integer.

An optimization that tries to parse a simple decimal integer
without underscores. Returns `None` if there's any uncertainty
on correctness.
"""
start_pos = pos

if src.startswith(("+", "-"), pos):
pos += 1

if src.startswith("0", pos):
pos += 1
elif src.startswith(("1", "2", "3", "4", "5", "6", "7", "8", "9"), pos):
pos = skip_chars(src, pos, "0123456789")
else:
return None

try:
next_char = src[pos]
except IndexError:
next_char = None
if next_char in ILLEGAL_AFTER_SIMPLE_DECIMAL:
return None

return pos, int(src[start_pos:pos])


def parse_value(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Any]:
Expand Down Expand Up @@ -703,6 +748,13 @@ def parse_value(
if char == "{":
return parse_inline_table(src, pos, parse_float)

# Try a simple parser for decimal numbers. If it's able to parse all
# numbers, it avoids importing tomllib._re which has an impact on
# the tomllib startup time.
number = try_simple_decimal(src, pos)
if number is not None:
return number

# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
if datetime_match:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve :mod:`tomllib` import time (up to 10x faster). Patch by Victor
Stinner.
Loading