-
-
Notifications
You must be signed in to change notification settings - Fork 34.4k
gh-130273: Fix traceback color output with unicode characters #142529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
306a690
8edad11
7947033
e8d23cd
467656e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,8 +1,10 @@ | ||||||||||||||
| """Extract, format and print information about Python stack traces.""" | ||||||||||||||
|
|
||||||||||||||
| import collections.abc | ||||||||||||||
| import functools | ||||||||||||||
| import itertools | ||||||||||||||
| import linecache | ||||||||||||||
| import re | ||||||||||||||
| import sys | ||||||||||||||
| import textwrap | ||||||||||||||
| import types | ||||||||||||||
|
|
@@ -681,12 +683,12 @@ def output_line(lineno): | |||||||||||||
| colorized_line_parts = [] | ||||||||||||||
| colorized_carets_parts = [] | ||||||||||||||
|
|
||||||||||||||
| for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]): | ||||||||||||||
| for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]): | ||||||||||||||
| caret_group = list(group) | ||||||||||||||
| if color == "^": | ||||||||||||||
| if "^" in color: | ||||||||||||||
| colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset) | ||||||||||||||
| colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset) | ||||||||||||||
| elif color == "~": | ||||||||||||||
| elif "~" in color: | ||||||||||||||
| colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset) | ||||||||||||||
| colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset) | ||||||||||||||
| else: | ||||||||||||||
|
|
@@ -968,7 +970,46 @@ def setup_positions(expr, force_valid=True): | |||||||||||||
|
|
||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
| _WIDE_CHAR_SPECIFIERS = "WF" | ||||||||||||||
|
|
||||||||||||||
| def _zip_display_width(line, carets): | ||||||||||||||
| import unicodedata | ||||||||||||||
| carets = iter(carets) | ||||||||||||||
| for char in unicodedata.iter_graphemes(line): | ||||||||||||||
| char = str(char) | ||||||||||||||
| char_width = _display_width(char) | ||||||||||||||
| yield char, "".join(itertools.islice(carets, char_width)) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| @functools.cache | ||||||||||||||
| def _str_width(c: str) -> int: | ||||||||||||||
| import unicodedata | ||||||||||||||
| if ord(c) < 128: | ||||||||||||||
| return 1 | ||||||||||||||
|
Comment on lines
+985
to
+987
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need to import unicodedata for ASCII characters:
Suggested change
|
||||||||||||||
| # gh-139246 for zero-width joiner and combining characters | ||||||||||||||
| if unicodedata.combining(c): | ||||||||||||||
| return 0 | ||||||||||||||
| category = unicodedata.category(c) | ||||||||||||||
| if category == "Cf" and c != "\u00ad": | ||||||||||||||
| return 0 | ||||||||||||||
| w = unicodedata.east_asian_width(c) | ||||||||||||||
| if w in ("N", "Na", "H", "A"): | ||||||||||||||
| return 1 | ||||||||||||||
| return 2 | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") | ||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should also be private. |
||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def _wlen(s: str) -> int: | ||||||||||||||
| if len(s) == 1 and s != "\x1a": | ||||||||||||||
| return _str_width(s) | ||||||||||||||
| length = sum(_str_width(i) for i in s) | ||||||||||||||
| # remove lengths of any escape sequences | ||||||||||||||
| sequence = ANSI_ESCAPE_SEQUENCE.findall(s) | ||||||||||||||
| ctrl_z_cnt = s.count("\x1a") | ||||||||||||||
| return length - sum(len(i) for i in sequence) + ctrl_z_cnt | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def _display_width(line, offset=None): | ||||||||||||||
| """Calculate the extra amount of width space the given source | ||||||||||||||
|
|
@@ -982,13 +1023,7 @@ def _display_width(line, offset=None): | |||||||||||||
| if line.isascii(): | ||||||||||||||
| return offset | ||||||||||||||
|
|
||||||||||||||
| import unicodedata | ||||||||||||||
|
|
||||||||||||||
| return sum( | ||||||||||||||
| 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 | ||||||||||||||
| for char in line[:offset] | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| return _wlen(line[:offset]) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| class _ExceptionPrintContext: | ||||||||||||||
|
|
||||||||||||||
StanFromIreland marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1 @@ | ||||||
| Fix traceback color output with unicode characters | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to avoid the heavy
unicodedataimport for ASCII line?I'm not sure that my code is correct :-)