Skip to content

gh-105636: Add re.Pattern.compile_template() #135992

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,9 @@ Functions

Return the string obtained by replacing the leftmost non-overlapping occurrences
of *pattern* in *string* by the replacement *repl*. If the pattern isn't found,
*string* is returned unchanged. *repl* can be a string or a function; if it is
*string* is returned unchanged.
*repl* can be a string, a :ref:`template object <template-objects>`,
or a callable; if it is
a string, any backslash escapes in it are processed. That is, ``\n`` is
converted to a single newline character, ``\r`` is converted to a carriage return, and
so forth. Unknown escapes of ASCII letters are reserved for future use and
Expand Down Expand Up @@ -1093,6 +1095,13 @@ Functions

The pattern may be a string or a :class:`~re.Pattern`.

The replacement string can be compiled as well as the pattern::

>>> pat = re.compile(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):')
>>> repl = pat.compile_template(r'static PyObject*\npy_\1(void)\n{')
>>> re.sub(pat, repl, 'def myfunc():')
'static PyObject*\npy_myfunc(void)\n{'

The optional argument *count* is the maximum number of pattern occurrences to be
replaced; *count* must be a non-negative integer. If omitted or zero, all
occurrences will be replaced.
Expand Down Expand Up @@ -1143,6 +1152,9 @@ Functions
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.

.. versionchanged:: next
*repl* can be compiled.


.. function:: subn(pattern, repl, string, count=0, flags=0)

Expand Down Expand Up @@ -1337,6 +1349,16 @@ Regular Expression Objects
Identical to the :func:`subn` function, using the compiled pattern.


.. method:: Pattern.compile_template(repl)

Compile a replacement string into a :ref:`template object
<template-objects>`, which can be used for replacing patterns in strings
using functions :func:`re.sub` or :func:`re.subn` or corresponding methods
of the :ref:`pattern object <re-objects>`.

.. versionadded:: next


.. attribute:: Pattern.flags

The regex matching flags. This is a combination of the flags given to
Expand Down Expand Up @@ -1586,6 +1608,25 @@ when there is no match, you can test whether there was a match with a simple
are considered atomic.


.. _template-objects:

Template Objects
----------------

A replacement string can be compiled into a template object using the :meth:`~re.Pattern.compile_template` method.

.. versionadded:: next

Template object is a callable which takes a single :ref:`match object
<match-objects>` argument, and returns the replacement string with group
references resolved.

>>> pat = re.compile('(.)(.)')
>>> templ = pat.compile_template(r'\2-\1')
>>> templ(pat.match('ab'))
'b-a'


.. _re-examples:

Regular Expression Examples
Expand Down
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ os.path
(Contributed by Petr Viktorin for :cve:`2025-4517`.)


re
--

* Add the :meth:`~re.Pattern.compile_template` method for the
:ref:`pattern object <re-objects>` which allows to pre-compile
replacement strings.
(Contributed by Serhiy Storchaka in :gh:`105636`.)


shelve
------

Expand Down
12 changes: 9 additions & 3 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
__all__ = [
"match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "escape",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"error", "Pattern", "Match", "Template", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "NOFLAG", "RegexFlag", "PatternError"
]
Expand Down Expand Up @@ -312,8 +312,12 @@ def escape(pattern):
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')

Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))
p = _compiler.compile('', 0)
Pattern = type(p)
Match = type(p.match(''))
import _sre
Template = type(_sre.template(p, ['']))
del p

# --------------------------------------------------------------------
# internals
Expand Down Expand Up @@ -374,6 +378,8 @@ def _compile(pattern, flags):
@functools.lru_cache(_MAXCACHE)
def _compile_template(pattern, repl):
# internal: compile replacement pattern
if isinstance(repl, Template):
return repr
return _sre.template(pattern, _parser.parse_template(repl, pattern))

# register myself for pickling
Expand Down
76 changes: 76 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2889,6 +2889,79 @@ def test_flags_repr(self):
"re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.DEBUG|0xffe01")


class TemplateTests(unittest.TestCase):
def test_literal(self):
p = re.compile(r'\w')
t = p.compile_template('a')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, 'x-yz'), 'a-aa')
self.assertEqual(p.sub(t, 'x-yz'), 'a-aa')
self.assertEqual(re.subn(p, t, 'x-yz', count=2), ('a-az', 2))
self.assertEqual(p.subn(t, 'x-yz', 2), ('a-az', 2))

p = re.compile(br'\w')
t = p.compile_template(b'a')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, b'x-yz'), b'a-aa')
self.assertEqual(p.sub(t, b'x-yz'), b'a-aa')
self.assertEqual(re.subn(p, t, b'x-yz', count=2), (b'a-az', 2))
self.assertEqual(p.subn(t, b'x-yz', 2), (b'a-az', 2))

def test_group_refs(self):
p = re.compile(r'(\w)(\w)')
t = p.compile_template(r'[\2-\1]')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, 'xyzt'), '[y-x][t-z]')
self.assertEqual(p.sub(t, 'xyzt'), '[y-x][t-z]')

p = re.compile(br'(\w)(\w)')
t = p.compile_template(br'[\2-\1]')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, b'xyzt'), b'[y-x][t-z]')
self.assertEqual(p.sub(t, b'xyzt'), b'[y-x][t-z]')

def test_group_refs_emplty_literals(self):
p = re.compile(r'(\w)(\w)')
t = p.compile_template(r'\2\1')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, 'xyzt'), 'yxtz')
self.assertEqual(p.sub(t, 'xyzt'), 'yxtz')

p = re.compile(br'(\w)(\w)')
t = p.compile_template(br'\2\1')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, b'xyzt'), b'yxtz')
self.assertEqual(p.sub(t, b'xyzt'), b'yxtz')

def test_symbolic_group_refs(self):
p = re.compile(r'(?P<a>\w)(?P<b>\w)')
t = p.compile_template(r'[\g<b>-\g<a>]')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, 'xyzt'), '[y-x][t-z]')
self.assertEqual(p.sub(t, 'xyzt'), '[y-x][t-z]')

p = re.compile(br'(?P<a>\w)(?P<b>\w)')
t = p.compile_template(br'[\g<b>-\g<a>]')
self.assertIsInstance(t, re.Template)
self.assertEqual(re.sub(p, t, b'xyzt'), b'[y-x][t-z]')
self.assertEqual(p.sub(t, b'xyzt'), b'[y-x][t-z]')

def test_call(self):
p = re.compile(r'(\w)(\w)')
t = p.compile_template(r'[\2-\1]')
m = p.search(' xy ')
self.assertEqual(t(m), '[y-x]')
self.assertRaises(TypeError, t, None)
self.assertRaises(TypeError, t, {})

p = re.compile(br'(\w)(\w)')
t = p.compile_template(br'[\2-\1]')
m = p.search(b' xy ')
self.assertEqual(t(m), b'[y-x]')
self.assertRaises(TypeError, t, None)
self.assertRaises(TypeError, t, {})


class ImplementationTest(unittest.TestCase):
"""
Test implementation details of the re module.
Expand All @@ -2901,6 +2974,8 @@ def test_immutable(self):
re.Match.foo = 1
with self.assertRaises(TypeError):
re.Pattern.foo = 1
with self.assertRaises(TypeError):
re.Template.foo = 1
with self.assertRaises(TypeError):
pat = re.compile("")
tp = type(pat.scanner(""))
Expand All @@ -2924,6 +2999,7 @@ def test_disallow_instantiation(self):
# Ensure that the type disallows instantiation (bpo-43916)
check_disallow_instantiation(self, re.Match)
check_disallow_instantiation(self, re.Pattern)
check_disallow_instantiation(self, re.Template)
pat = re.compile("")
check_disallow_instantiation(self, type(pat.scanner("")))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add the :meth:`~re.Pattern.compile_template` method for the :ref:`pattern
object <re-objects>` which allows to pre-compile replacement strings.
47 changes: 46 additions & 1 deletion Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading