Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
909b0a1cd9
!327 update openssl version for test case
From: @xinsheng3 
Reviewed-by: @zhuchunyi, @chen-huihan 
Signed-off-by: @zhuchunyi
2024-06-20 12:57:48 +00:00
x30009450
bdcb9de554 update openssl version for test case 2024-06-20 15:59:15 +08:00
openeuler-ci-bot
eb336259ad
!321 fix xml tree assert error
From: @xinsheng3 
Reviewed-by: @chen-huihan, @zhuchunyi 
Signed-off-by: @zhuchunyi
2024-05-23 03:29:19 +00:00
x30009450
b4d3e67706 fix xml tree assert error 2024-05-22 17:20:33 +08:00
openeuler-ci-bot
5525c64e62
!312 backport upstream patches
From: @xinsheng3 
Reviewed-by: @chen-huihan, @zhuchunyi 
Signed-off-by: @zhuchunyi
2024-04-07 06:23:01 +00:00
xinsheng3
c1943ea60d backport upstream patches 2024-03-13 17:48:19 +08:00
openeuler-ci-bot
d0bb0111a7
!302 Fix CVE-2023-27043 Reject malformed addresses in email.parseaddr()
From: @zhaoyu_hit 
Reviewed-by: @dillon_chen 
Signed-off-by: @dillon_chen
2024-03-05 07:01:12 +00:00
CharlieZhao
bc9d151390 Fix CVE-2023-27043 2024-01-10 15:25:18 +08:00
openeuler-ci-bot
90d2230471
!292 [sync] PR-291: 【轻量级 PR】:remove lto
From: @openeuler-sync-bot 
Reviewed-by: @gaoruoshu 
Signed-off-by: @gaoruoshu
2023-10-25 06:04:52 +00:00
zhuofeng
c76cbeb6b1 remove lto
Signed-off-by: zhuofeng <zhuofeng2@huawei.com>
(cherry picked from commit 4d804668aa77789473bb119363782c3f23b81826)
2023-10-25 11:27:17 +08:00
11 changed files with 1912 additions and 2 deletions

View File

@ -0,0 +1,89 @@
From a6f73f61147048187908299ae911c5ad498d813a Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:47:26 +0100
Subject: [PATCH] [3.9] bpo-37013: Fix the error handling in
socket.if_indextoname() (GH-13503) (GH-112600)
* Fix a crash when pass UINT_MAX.
* Fix an integer overflow on 64-bit non-Windows platforms.
(cherry picked from commit 0daf555c6fb3feba77989382135a58215e1d70a5)
Co-authored-by: Zackery Spytz <zspytz@gmail.com>
---
Lib/test/test_socket.py | 13 +++++++++++++
...2023-12-01-16-09-59.gh-issue-81194.FFad1c.rst | 3 +++
Modules/socketmodule.c | 16 +++++++++++-----
3 files changed, 27 insertions(+), 5 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-12-01-16-09-59.gh-issue-81194.FFad1c.rst
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 127d61cb6a..043e554388 100755
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -1070,7 +1070,20 @@ def testInterfaceNameIndex(self):
'socket.if_indextoname() not available.')
def testInvalidInterfaceIndexToName(self):
self.assertRaises(OSError, socket.if_indextoname, 0)
+ self.assertRaises(OverflowError, socket.if_indextoname, -1)
+ self.assertRaises(OverflowError, socket.if_indextoname, 2**1000)
self.assertRaises(TypeError, socket.if_indextoname, '_DEADBEEF')
+ if hasattr(socket, 'if_nameindex'):
+ indices = dict(socket.if_nameindex())
+ for index in indices:
+ index2 = index + 2**32
+ if index2 not in indices:
+ with self.assertRaises((OverflowError, OSError)):
+ socket.if_indextoname(index2)
+ for index in 2**32-1, 2**64-1:
+ if index not in indices:
+ with self.assertRaises((OverflowError, OSError)):
+ socket.if_indextoname(index)
@unittest.skipUnless(hasattr(socket, 'if_nametoindex'),
'socket.if_nametoindex() not available.')
diff --git a/Misc/NEWS.d/next/Library/2023-12-01-16-09-59.gh-issue-81194.FFad1c.rst b/Misc/NEWS.d/next/Library/2023-12-01-16-09-59.gh-issue-81194.FFad1c.rst
new file mode 100644
index 0000000000..feb7a8643b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-12-01-16-09-59.gh-issue-81194.FFad1c.rst
@@ -0,0 +1,3 @@
+Fix a crash in :func:`socket.if_indextoname` with specific value (UINT_MAX).
+Fix an integer overflow in :func:`socket.if_indextoname` on 64-bit
+non-Windows platforms.
diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c
index 133470f9b8..9e0223b127 100644
--- a/Modules/socketmodule.c
+++ b/Modules/socketmodule.c
@@ -6890,17 +6890,23 @@ Returns the interface index corresponding to the interface name if_name.");
static PyObject *
socket_if_indextoname(PyObject *self, PyObject *arg)
{
+ unsigned long index_long = PyLong_AsUnsignedLong(arg);
+ if (index_long == (unsigned long) -1 && PyErr_Occurred()) {
+ return NULL;
+ }
+
#ifdef MS_WINDOWS
- NET_IFINDEX index;
+ NET_IFINDEX index = (NET_IFINDEX)index_long;
#else
- unsigned long index;
+ unsigned int index = (unsigned int)index_long;
#endif
- char name[IF_NAMESIZE + 1];
- index = PyLong_AsUnsignedLong(arg);
- if (index == (unsigned long) -1)
+ if ((unsigned long)index != index_long) {
+ PyErr_SetString(PyExc_OverflowError, "index is too large");
return NULL;
+ }
+ char name[IF_NAMESIZE + 1];
if (if_indextoname(index, name) == NULL) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
--
2.34.1.windows.1

View File

@ -0,0 +1,146 @@
From a2c59992e9e8d35baba9695eb186ad6c6ff85c51 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:48:06 +0100
Subject: [PATCH] [3.9] gh-109858: Protect zipfile from "quoted-overlap"
zipbomb (GH-110016) (GH-113915)
Raise BadZipFile when try to read an entry that overlaps with other entry or
central directory.
(cherry picked from commit 66363b9a7b9fe7c99eba3a185b74c5fdbf842eba)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/test/test_zipfile.py | 60 +++++++++++++++++++
Lib/zipfile.py | 12 ++++
...-09-28-13-15-51.gh-issue-109858.43e2dg.rst | 3 +
3 files changed, 75 insertions(+)
create mode 100644 Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index bd383d3f68..17e95eb862 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -2045,6 +2045,66 @@ def test_decompress_without_3rd_party_library(self):
with zipfile.ZipFile(zip_file) as zf:
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
+ @requires_zlib()
+ def test_full_overlap(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
+ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
+ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
+ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
+ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
+ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
+ b'\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'b'])
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ zi = zipf.getinfo('b')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ self.assertEqual(len(zipf.read('a')), 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
+ zipf.read('b')
+
+ @requires_zlib()
+ def test_quoted_overlap(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
+ b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
+ b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
+ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
+ b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
+ b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
+ b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
+ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
+ b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
+ b'\x00S\x00\x00\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'b'])
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 52)
+ self.assertEqual(zi.file_size, 1064)
+ zi = zipf.getinfo('b')
+ self.assertEqual(zi.header_offset, 36)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
+ zipf.read('a')
+ self.assertEqual(len(zipf.read('b')), 1033)
+
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 1e942a503e..95f95ee112 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -338,6 +338,7 @@ class ZipInfo (object):
'compress_size',
'file_size',
'_raw_time',
+ '_end_offset',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -379,6 +380,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.external_attr = 0 # External file attributes
self.compress_size = 0 # Size of the compressed file
self.file_size = 0 # Size of the uncompressed file
+ self._end_offset = None # Start of the next local header or central directory
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# CRC CRC-32 of the uncompressed file
@@ -1399,6 +1401,12 @@ def _RealGetContents(self):
if self.debug > 2:
print("total", total)
+ end_offset = self.start_dir
+ for zinfo in sorted(self.filelist,
+ key=lambda zinfo: zinfo.header_offset,
+ reverse=True):
+ zinfo._end_offset = end_offset
+ end_offset = zinfo.header_offset
def namelist(self):
"""Return a list of file names in the archive."""
@@ -1554,6 +1562,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
'File name in directory %r and header %r differ.'
% (zinfo.orig_filename, fname))
+ if (zinfo._end_offset is not None and
+ zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
+ raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
+
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1
if is_encrypted:
diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
new file mode 100644
index 0000000000..be279caffc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
@@ -0,0 +1,3 @@
+Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
+BadZipFile when try to read an entry that overlaps with other entry or
+central directory.
--
2.34.1.windows.1

View File

@ -0,0 +1,122 @@
From 8fc8c45b6717be58ad927def1bf3ea05c83cab8c Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 17 Jan 2024 16:28:17 +0200
Subject: [PATCH] [3.9] gh-113659: Skip hidden .pth files (GH-113660)
(GH-114146)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
(cherry picked from commit 74208ed0c440244fb809d8acc97cb9ef51e888e3)
Co-authored-by: Łukasz Langa <lukasz@langa.pl>
---
Lib/site.py | 11 +++++-
Lib/test/test_site.py | 39 +++++++++++++++++++
...-01-02-19-52-23.gh-issue-113659.DkmnQc.rst | 1 +
3 files changed, 50 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst
diff --git a/Lib/site.py b/Lib/site.py
index 9e617afb00..54ffc4fdc0 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -74,6 +74,7 @@
import builtins
import _sitebuiltins
import io
+import stat
# Prefixes for site-packages; add additional prefixes like /usr/local here
PREFIXES = [sys.prefix, sys.exec_prefix]
@@ -156,6 +157,13 @@ def addpackage(sitedir, name, known_paths):
else:
reset = False
fullname = os.path.join(sitedir, name)
+ try:
+ st = os.lstat(fullname)
+ except OSError:
+ return
+ if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or
+ (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)):
+ return
try:
f = io.TextIOWrapper(io.open_code(fullname))
except OSError:
@@ -203,7 +211,8 @@ def addsitedir(sitedir, known_paths=None):
names = os.listdir(sitedir)
except OSError:
return
- names = [name for name in names if name.endswith(".pth")]
+ names = [name for name in names
+ if name.endswith(".pth") and not name.startswith(".")]
for name in sorted(names):
addpackage(sitedir, name, known_paths)
if reset:
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
index 3d25d7e473..e578cd7db3 100644
--- a/Lib/test/test_site.py
+++ b/Lib/test/test_site.py
@@ -16,6 +16,7 @@
import os
import re
import shutil
+import stat
import subprocess
import sys
import sysconfig
@@ -185,6 +186,44 @@ def test_addsitedir(self):
finally:
pth_file.cleanup()
+ def test_addsitedir_dotfile(self):
+ pth_file = PthFile('.dotfile')
+ pth_file.cleanup(prep=True)
+ try:
+ pth_file.create()
+ site.addsitedir(pth_file.base_dir, set())
+ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path)
+ self.assertIn(pth_file.base_dir, sys.path)
+ finally:
+ pth_file.cleanup()
+
+ @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()')
+ def test_addsitedir_hidden_flags(self):
+ pth_file = PthFile()
+ pth_file.cleanup(prep=True)
+ try:
+ pth_file.create()
+ st = os.stat(pth_file.file_path)
+ os.chflags(pth_file.file_path, st.st_flags | stat.UF_HIDDEN)
+ site.addsitedir(pth_file.base_dir, set())
+ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path)
+ self.assertIn(pth_file.base_dir, sys.path)
+ finally:
+ pth_file.cleanup()
+
+ @unittest.skipUnless(sys.platform == 'win32', 'test needs Windows')
+ def test_addsitedir_hidden_file_attribute(self):
+ pth_file = PthFile()
+ pth_file.cleanup(prep=True)
+ try:
+ pth_file.create()
+ subprocess.check_call(['attrib', '+H', pth_file.file_path])
+ site.addsitedir(pth_file.base_dir, set())
+ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path)
+ self.assertIn(pth_file.base_dir, sys.path)
+ finally:
+ pth_file.cleanup()
+
# This tests _getuserbase, hence the double underline
# to distinguish from a test for getuserbase
def test__getuserbase(self):
diff --git a/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst b/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst
new file mode 100644
index 0000000000..744687e723
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst
@@ -0,0 +1 @@
+Skip ``.pth`` files with names starting with a dot or hidden file attribute.
--
2.34.1.windows.1

View File

@ -0,0 +1,214 @@
From d54e22a669ae6e987199bb5d2c69bb5a46b0083b Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 17 Jan 2024 15:47:47 +0200
Subject: [PATCH] [3.9] gh-91133: tempfile.TemporaryDirectory: fix symlink bug
in cleanup (GH-99930) (GH-112842)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
(cherry picked from commit 81c16cd94ec38d61aa478b9a452436dc3b1b524d)
Co-authored-by: Søren Løvborg <sorenl@unity3d.com>
---
Lib/tempfile.py | 27 ++--
Lib/test/test_tempfile.py | 117 +++++++++++++++++-
...2-12-01-16-57-44.gh-issue-91133.LKMVCV.rst | 2 +
3 files changed, 136 insertions(+), 10 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
diff --git a/Lib/tempfile.py b/Lib/tempfile.py
index eafce6f25b..59a628a174 100644
--- a/Lib/tempfile.py
+++ b/Lib/tempfile.py
@@ -268,6 +268,22 @@ def _mkstemp_inner(dir, pre, suf, flags, output_type):
raise FileExistsError(_errno.EEXIST,
"No usable temporary file name found")
+def _dont_follow_symlinks(func, path, *args):
+ # Pass follow_symlinks=False, unless not supported on this platform.
+ if func in _os.supports_follow_symlinks:
+ func(path, *args, follow_symlinks=False)
+ elif _os.name == 'nt' or not _os.path.islink(path):
+ func(path, *args)
+
+def _resetperms(path):
+ try:
+ chflags = _os.chflags
+ except AttributeError:
+ pass
+ else:
+ _dont_follow_symlinks(chflags, path, 0)
+ _dont_follow_symlinks(_os.chmod, path, 0o700)
+
# User visible interfaces.
@@ -789,17 +805,10 @@ def __init__(self, suffix=None, prefix=None, dir=None):
def _rmtree(cls, name):
def onerror(func, path, exc_info):
if issubclass(exc_info[0], PermissionError):
- def resetperms(path):
- try:
- _os.chflags(path, 0)
- except AttributeError:
- pass
- _os.chmod(path, 0o700)
-
try:
if path != name:
- resetperms(_os.path.dirname(path))
- resetperms(path)
+ _resetperms(_os.path.dirname(path))
+ _resetperms(path)
try:
_os.unlink(path)
diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py
index 8ad1bb98e8..571263d9c9 100644
--- a/Lib/test/test_tempfile.py
+++ b/Lib/test/test_tempfile.py
@@ -1394,6 +1394,103 @@ def test_cleanup_with_symlink_to_a_directory(self):
"were deleted")
d2.cleanup()
+ @support.skip_unless_symlink
+ def test_cleanup_with_symlink_modes(self):
+ # cleanup() should not follow symlinks when fixing mode bits (#91133)
+ with self.do_create(recurse=0) as d2:
+ file1 = os.path.join(d2, 'file1')
+ open(file1, 'wb').close()
+ dir1 = os.path.join(d2, 'dir1')
+ os.mkdir(dir1)
+ for mode in range(8):
+ mode <<= 6
+ with self.subTest(mode=format(mode, '03o')):
+ def test(target, target_is_directory):
+ d1 = self.do_create(recurse=0)
+ symlink = os.path.join(d1.name, 'symlink')
+ os.symlink(target, symlink,
+ target_is_directory=target_is_directory)
+ try:
+ os.chmod(symlink, mode, follow_symlinks=False)
+ except NotImplementedError:
+ pass
+ try:
+ os.chmod(symlink, mode)
+ except FileNotFoundError:
+ pass
+ os.chmod(d1.name, mode)
+ d1.cleanup()
+ self.assertFalse(os.path.exists(d1.name))
+
+ with self.subTest('nonexisting file'):
+ test('nonexisting', target_is_directory=False)
+ with self.subTest('nonexisting dir'):
+ test('nonexisting', target_is_directory=True)
+
+ with self.subTest('existing file'):
+ os.chmod(file1, mode)
+ old_mode = os.stat(file1).st_mode
+ test(file1, target_is_directory=False)
+ new_mode = os.stat(file1).st_mode
+ self.assertEqual(new_mode, old_mode,
+ '%03o != %03o' % (new_mode, old_mode))
+
+ with self.subTest('existing dir'):
+ os.chmod(dir1, mode)
+ old_mode = os.stat(dir1).st_mode
+ test(dir1, target_is_directory=True)
+ new_mode = os.stat(dir1).st_mode
+ self.assertEqual(new_mode, old_mode,
+ '%03o != %03o' % (new_mode, old_mode))
+
+ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags')
+ @support.skip_unless_symlink
+ def test_cleanup_with_symlink_flags(self):
+ # cleanup() should not follow symlinks when fixing flags (#91133)
+ flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK
+ self.check_flags(flags)
+
+ with self.do_create(recurse=0) as d2:
+ file1 = os.path.join(d2, 'file1')
+ open(file1, 'wb').close()
+ dir1 = os.path.join(d2, 'dir1')
+ os.mkdir(dir1)
+ def test(target, target_is_directory):
+ d1 = self.do_create(recurse=0)
+ symlink = os.path.join(d1.name, 'symlink')
+ os.symlink(target, symlink,
+ target_is_directory=target_is_directory)
+ try:
+ os.chflags(symlink, flags, follow_symlinks=False)
+ except NotImplementedError:
+ pass
+ try:
+ os.chflags(symlink, flags)
+ except FileNotFoundError:
+ pass
+ os.chflags(d1.name, flags)
+ d1.cleanup()
+ self.assertFalse(os.path.exists(d1.name))
+
+ with self.subTest('nonexisting file'):
+ test('nonexisting', target_is_directory=False)
+ with self.subTest('nonexisting dir'):
+ test('nonexisting', target_is_directory=True)
+
+ with self.subTest('existing file'):
+ os.chflags(file1, flags)
+ old_flags = os.stat(file1).st_flags
+ test(file1, target_is_directory=False)
+ new_flags = os.stat(file1).st_flags
+ self.assertEqual(new_flags, old_flags)
+
+ with self.subTest('existing dir'):
+ os.chflags(dir1, flags)
+ old_flags = os.stat(dir1).st_flags
+ test(dir1, target_is_directory=True)
+ new_flags = os.stat(dir1).st_flags
+ self.assertEqual(new_flags, old_flags)
+
@support.cpython_only
def test_del_on_collection(self):
# A TemporaryDirectory is deleted when garbage collected
@@ -1506,9 +1603,27 @@ def test_modes(self):
d.cleanup()
self.assertFalse(os.path.exists(d.name))
- @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.lchflags')
+ def check_flags(self, flags):
+ # skip the test if these flags are not supported (ex: FreeBSD 13)
+ filename = support.TESTFN
+ try:
+ open(filename, "w").close()
+ try:
+ os.chflags(filename, flags)
+ except OSError as exc:
+ # "OSError: [Errno 45] Operation not supported"
+ self.skipTest(f"chflags() doesn't support flags "
+ f"{flags:#b}: {exc}")
+ else:
+ os.chflags(filename, 0)
+ finally:
+ support.unlink(filename)
+
+ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags')
def test_flags(self):
flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK
+ self.check_flags(flags)
+
d = self.do_create(recurse=3, dirs=2, files=2)
with d:
# Change files and directories flags recursively.
diff --git a/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst b/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
new file mode 100644
index 0000000000..7991048fc4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
@@ -0,0 +1,2 @@
+Fix a bug in :class:`tempfile.TemporaryDirectory` cleanup, which now no longer
+dereferences symlinks when working around file system permission errors.
--
2.34.1.windows.1

View File

@ -0,0 +1,489 @@
From 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 15 Dec 2023 16:10:40 +0100
Subject: [PATCH] [Backport] CVE-2023-27043 Reject malformed addresses in
email.parseaddr()
Reference: https://github.com/python/cpython/pull/111116
Detect email address parsing errors and return empty tuple to
indicate the parsing error (old API). Add an optional 'strict'
parameter to getaddresses() and parseaddr() functions.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 19 +-
Lib/email/utils.py | 151 +++++++++++++-
Lib/test/test_email/test_email.py | 187 +++++++++++++++++-
...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
4 files changed, 344 insertions(+), 21 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 4d0e920eb0..104229e9e5 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -60,13 +60,18 @@ of the new API.
begins with angle brackets, they are stripped off.
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, *, strict=True)
Parse address -- which should be the value of some address-containing field such
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -84,12 +89,15 @@ of the new API.
Added the *charset* option.
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, *, strict=True)
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ :meth:`Message.get_all <email.message.Message.get_all>`.
+
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ Here's a simple example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -99,6 +107,9 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 48d30160aa..7ca7a7c886 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
+
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
return address
+def _iter_escaped_chars(addr):
+ pos = 0
+ escape = False
+ for pos, ch in enumerate(addr):
+ if escape:
+ yield (pos, '\\' + ch)
+ escape = False
+ elif ch == '\\':
+ escape = True
+ else:
+ yield (pos, ch)
+ if escape:
+ yield (pos, '\\')
+
+
+def _strip_quoted_realnames(addr):
+ """Strip real names between quotes."""
+ if '"' not in addr:
+ # Fast path
+ return addr
+
+ start = 0
+ open_pos = None
+ result = []
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '"':
+ if open_pos is None:
+ open_pos = pos
+ else:
+ if start != open_pos:
+ result.append(addr[start:open_pos])
+ start = pos + 1
+ open_pos = None
+
+ if start < len(addr):
+ result.append(addr[start:])
+
+ return ''.join(result)
-def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
- a = _AddressList(all)
- return a.addresslist
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If strict is true, use a strict parser which rejects malformed inputs.
+ """
+
+ # If strict is true, if the resulting list of parsed addresses is greater
+ # than the number of fieldvalues in the input list, a parsing error has
+ # occurred and consequently a list containing a single empty 2-tuple [('',
+ # '')] is returned in its place. This is done to avoid invalid output.
+ #
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+ # Safe output: [('', '')]
+
+ if not strict:
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ addr = COMMASPACE.join(fieldvalues)
+ a = _AddressList(addr)
+ result = _post_parse_validation(a.addresslist)
+
+ # Treat output as invalid if the number of addresses is not equal to the
+ # expected number of addresses.
+ n = 0
+ for v in fieldvalues:
+ # When a comma is used in the Real Name part it is not a deliminator.
+ # So strip those out before counting the commas.
+ v = _strip_quoted_realnames(v)
+ # Expected number of addresses: 1 + number of commas
+ n += 1 + v.count(',')
+ if len(result) != n:
+ return [('', '')]
+
+ return result
+
+
+def _check_parenthesis(addr):
+ # Ignore parenthesis in quoted real names.
+ addr = _strip_quoted_realnames(addr)
+
+ opens = 0
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '(':
+ opens += 1
+ elif ch == ')':
+ opens -= 1
+ if opens < 0:
+ return False
+ return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ if not _check_parenthesis(v):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
def _format_timetuple_and_zone(timetuple, zone):
@@ -202,16 +318,33 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
"""
Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
+
+ If strict is True, use a strict parser which rejects malformed inputs.
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if not strict:
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return ('', '')
+ return addrs[0]
+
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 02e00cc058..7c2607fc4c 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -16,6 +16,7 @@ from unittest.mock import patch
import email
import email.policy
+import email.utils
from email.charset import Charset
from email.header import Header, decode_header, make_header
@@ -3280,15 +3281,137 @@ Foo
],
)
+ def test_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
+ alice = 'alice@example.org'
+ bob = 'bob@example.com'
+ empty = ('', '')
+
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
+ # addresses: default behavior (strict=True) rejects malformed address,
+ # and strict=False which tolerates malformed address.
+ for invalid_separator, expected_non_strict in (
+ ('(', [(f'<{bob}>', alice)]),
+ (')', [('', alice), empty, ('', bob)]),
+ ('<', [('', alice), empty, ('', bob), empty]),
+ ('>', [('', alice), empty, ('', bob)]),
+ ('[', [('', f'{alice}[<{bob}>]')]),
+ (']', [('', alice), empty, ('', bob)]),
+ ('@', [empty, empty, ('', bob)]),
+ (';', [('', alice), empty, ('', bob)]),
+ (':', [('', alice), ('', bob)]),
+ ('.', [('', alice + '.'), ('', bob)]),
+ ('"', [('', alice), ('', f'<{bob}>')]),
+ ):
+ address = f'{alice}{invalid_separator}<{bob}>'
+ with self.subTest(address=address):
+ self.assertEqual(utils.getaddresses([address]),
+ [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ expected_non_strict)
+
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Comma (',') is treated differently depending on strict parameter.
+ # Comma without quotes.
+ address = f'{alice},<{bob}>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Real name between quotes containing comma.
+ address = '"Alice, alice@example.org" <bob@example.com>'
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Valid parenthesis in comments.
+ address = 'alice@example.org (Alice)'
+ expected_strict = ('Alice', 'alice@example.org')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Invalid parenthesis in comments.
+ address = 'alice@example.org )Alice('
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Two addresses with quotes separated by comma.
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Test email.utils.supports_strict_parsing attribute
+ self.assertEqual(email.utils.supports_strict_parsing, True)
+
def test_getaddresses_nasty(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(utils.getaddresses(
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ for addresses, expected in (
+ (['"Sürname, Firstname" <to@example.com>'],
+ [('Sürname, Firstname', 'to@example.com')]),
+
+ (['foo: ;'],
+ [('', '')]),
+
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
+
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
+
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
+ [('', '')]),
+
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
+
+ (['John Doe <jdoe@machine(comment). example>'],
+ [('John Doe (comment)', 'jdoe@machine.example')]),
+
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
+
+ (['Undisclosed recipients:;'],
+ [('', '')]),
+
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
+ ):
+ with self.subTest(addresses=addresses):
+ self.assertEqual(utils.getaddresses(addresses),
+ expected)
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ expected)
+
+ addresses = ['[]*-- =~$']
+ self.assertEqual(utils.getaddresses(addresses),
+ [('', '')])
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ [('', ''), ('', ''), ('', '*--')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
@@ -3477,6 +3600,54 @@ multipart/report
m = cls(*constructor, policy=email.policy.default)
self.assertIs(m.policy, email.policy.default)
+ def test_iter_escaped_chars(self):
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
+ [(0, 'a'),
+ (2, '\\\\'),
+ (3, 'b'),
+ (5, '\\"'),
+ (6, 'c'),
+ (8, '\\\\'),
+ (9, '"'),
+ (10, 'd')])
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
+ [(0, 'a'), (1, '\\')])
+
+ def test_strip_quoted_realnames(self):
+ def check(addr, expected):
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
+
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
+ ' <jane@example.net>, <john@example.net>')
+ check(r'"Jane \"Doe\"." <jane@example.net>',
+ ' <jane@example.net>')
+
+ # special cases
+ check(r'before"name"after', 'beforeafter')
+ check(r'before"name"', 'before')
+ check(r'b"name"', 'b') # single char
+ check(r'"name"after', 'after')
+ check(r'"name"a', 'a') # single char
+ check(r'"name"', '')
+
+ # no change
+ for addr in (
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
+ 'lone " quote',
+ ):
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
+
+
+ def test_check_parenthesis(self):
+ addr = 'alice@example.net'
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
+
+ # Ignore real name between quotes
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
+
# Test the iterator/generators
class TestIterators(TestEmailBase):
diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
new file mode 100644
index 0000000000..3d0e9e4078
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
@@ -0,0 +1,8 @@
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
+return ``('', '')`` 2-tuples in more situations where invalid email
+addresses are encountered instead of potentially inaccurate values. Add
+optional *strict* parameter to these two functions: use ``strict=False`` to
+get the old behavior, accept malformed inputs.
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
+Stinner to improve the CVE-2023-27043 fix.
--
2.32.0.windows.1

View File

@ -0,0 +1,266 @@
From 18dfbd035775c15533d13a98e56b1d2bf5c65f00 Mon Sep 17 00:00:00 2001
From: Thomas Dwyer <github@tomd.tel>
Date: Mon, 10 Jul 2023 18:00:55 -0500
Subject: [PATCH] [Backport] Fix parsing errors in email/_parseaddr.py
Reference: https://github.com/python/cpython/issues/102988
The e-mail module of Python 0 - 2.7.18, 3.x - 3.11 incorrectly parses e-mail addresses which contain a special character. This vulnerability allows attackers to send messages from e-mail addresses that would otherwise be rejected.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 26 +++++-
Lib/email/utils.py | 63 +++++++++++++--
Lib/test/test_email/test_email.py | 81 ++++++++++++++++++-
...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 +
4 files changed, 164 insertions(+), 10 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 4d0e920eb0..97df96230c 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -67,6 +67,11 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ .. versionchanged:: 3.12
+ For security reasons, addresses that were ambiguous and could parse into
+ multiple different addresses now cause ``('', '')`` to be returned
+ instead of only one of the *potential* addresses.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -89,7 +94,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ example that gets all the recipients of a message:
from email.utils import getaddresses
@@ -99,6 +104,25 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
+ is returned in its place. Other errors in parsing the list of
+ addresses such as a fieldvalue seemingly parsing into multiple
+ addresses may result in a list containing a single empty 2-tuple
+ ``[('', '')]`` being returned rather than returning potentially
+ invalid output.
+
+ Example malformed input parsing:
+
+ .. doctest::
+
+ >>> from email.utils import getaddresses
+ >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
+ [('', '')]
+
+ .. versionchanged:: 3.12
+ The 2-tuple of ``('', '')`` in the returned values when parsing
+ fails were added as to address a security issue.
+
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 48d30160aa..385e05c429 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'):
return address
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ s = v.replace('\\(', '').replace('\\)', '')
+ if s.count('(') != s.count(')'):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
+
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If the resulting list of parsed address is not the same as the number of
+ fieldvalues in the input list a parsing error has occurred. A list
+ containing a single empty 2-tuple [('', '')] is returned in its place.
+ This is done to avoid invalid output.
+ """
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all)
- return a.addresslist
+ result = _post_parse_validation(a.addresslist)
+
+ n = 0
+ for v in fieldvalues:
+ n += v.count(',') + 1
+
+ if len(result) != n:
+ return [('', '')]
+
+ return result
def _format_timetuple_and_zone(timetuple, zone):
@@ -209,9 +251,18 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 489cd05be4..22b7cb43f6 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3263,15 +3263,90 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
+ [('', 'alice@example.org'), ('', 'bob@example.com')])
+ eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
+ [('', '')])
+
+ def test_parseaddr_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
+ ('', ''))
+
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ eq(utils.getaddresses(
+ [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
+ eq(utils.getaddresses(
+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
+ [('', '')])
+ eq(utils.getaddresses(
+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
+ eq(utils.getaddresses(
+ ['John Doe <jdoe@machine(comment). example>']),
+ [('John Doe (comment)', 'jdoe@machine.example')])
+ eq(utils.getaddresses(
+ ['"Mary Smith: Personal Account" <smith@home.example>']),
+ [('Mary Smith: Personal Account', 'smith@home.example')])
+ eq(utils.getaddresses(
+ ['Undisclosed recipients:;']),
+ [('', '')])
+ eq(utils.getaddresses(
+ [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
new file mode 100644
index 0000000000..e0434ccd2c
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -0,0 +1,4 @@
+CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
+and :func:`email.utils.getaddresses` from returning the realname portion of an
+invalid RFC2822 email header in the email address portion of the 2-tuple
+returned after being parsed by :class:`email._parseaddr.AddressList`.
--
2.32.0.windows.1

View File

@ -0,0 +1,285 @@
From a31dea1feb61793e48fa9aa5014f358352205c1d Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Thu, 20 Jul 2023 20:30:52 -0700
Subject: [PATCH] [Backport] Revert fixes for CVE-2023-27043
Reference: https://github.com/python/cpython/pull/106733
Revert "gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)"
This reverts commit and adds the regression test suggested in the issue.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 26 +----
Lib/email/utils.py | 63 ++----------
Lib/test/test_email/test_email.py | 96 ++++---------------
...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 8 +-
4 files changed, 30 insertions(+), 163 deletions(-)
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 97df96230c..4d0e920eb0 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -67,11 +67,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
- .. versionchanged:: 3.12
- For security reasons, addresses that were ambiguous and could parse into
- multiple different addresses now cause ``('', '')`` to be returned
- instead of only one of the *potential* addresses.
-
.. function:: formataddr(pair, charset='utf-8')
@@ -94,7 +89,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message:
+ example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -104,25 +99,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
- is returned in its place. Other errors in parsing the list of
- addresses such as a fieldvalue seemingly parsing into multiple
- addresses may result in a list containing a single empty 2-tuple
- ``[('', '')]`` being returned rather than returning potentially
- invalid output.
-
- Example malformed input parsing:
-
- .. doctest::
-
- >>> from email.utils import getaddresses
- >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
- [('', '')]
-
- .. versionchanged:: 3.12
- The 2-tuple of ``('', '')`` in the returned values when parsing
- fails were added as to address a security issue.
-
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 385e05c429..48d30160aa 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
return address
-def _pre_parse_validation(email_header_fields):
- accepted_values = []
- for v in email_header_fields:
- s = v.replace('\\(', '').replace('\\)', '')
- if s.count('(') != s.count(')'):
- v = "('', '')"
- accepted_values.append(v)
-
- return accepted_values
-
-
-def _post_parse_validation(parsed_email_header_tuples):
- accepted_values = []
- # The parser would have parsed a correctly formatted domain-literal
- # The existence of an [ after parsing indicates a parsing failure
- for v in parsed_email_header_tuples:
- if '[' in v[1]:
- v = ('', '')
- accepted_values.append(v)
-
- return accepted_values
-
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
-
- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
- its place.
-
- If the resulting list of parsed address is not the same as the number of
- fieldvalues in the input list a parsing error has occurred. A list
- containing a single empty 2-tuple [('', '')] is returned in its place.
- This is done to avoid invalid output.
- """
- fieldvalues = [str(v) for v in fieldvalues]
- fieldvalues = _pre_parse_validation(fieldvalues)
- all = COMMASPACE.join(v for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
- result = _post_parse_validation(a.addresslist)
-
- n = 0
- for v in fieldvalues:
- n += v.count(',') + 1
-
- if len(result) != n:
- return [('', '')]
-
- return result
+ return a.addresslist
def _format_timetuple_and_zone(timetuple, zone):
@@ -251,18 +209,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- if isinstance(addr, list):
- addr = addr[0]
-
- if not isinstance(addr, str):
- return ('', '')
-
- addr = _pre_parse_validation([addr])[0]
- addrs = _post_parse_validation(_AddressList(addr).addresslist)
-
- if not addrs or len(addrs) > 1:
- return ('', '')
-
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 22b7cb43f6..02e00cc058 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3263,90 +3263,32 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
- def test_getaddresses_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
- [('', 'alice@example.org'), ('', 'bob@example.com')])
- eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
- [('', '')])
-
- def test_parseaddr_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
- ('', ''))
+ def test_getaddresses_comma_in_name(self):
+ """GH-106669 regression test."""
+ self.assertEqual(
+ utils.getaddresses(
+ [
+ '"Bud, Person" <bperson@dom.ain>',
+ 'aperson@dom.ain (Al Person)',
+ '"Mariusz Felisiak" <to@example.com>',
+ ]
+ ),
+ [
+ ('Bud, Person', 'bperson@dom.ain'),
+ ('Al Person', 'aperson@dom.ain'),
+ ('Mariusz Felisiak', 'to@example.com'),
+ ],
+ )
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
+ eq(utils.getaddresses(
+ ['[]*-- =~$']),
+ [('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
- eq(utils.getaddresses(
- [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
- eq(utils.getaddresses(
- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
- [('', '')])
- eq(utils.getaddresses(
- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
- eq(utils.getaddresses(
- ['John Doe <jdoe@machine(comment). example>']),
- [('John Doe (comment)', 'jdoe@machine.example')])
- eq(utils.getaddresses(
- ['"Mary Smith: Personal Account" <smith@home.example>']),
- [('Mary Smith: Personal Account', 'smith@home.example')])
- eq(utils.getaddresses(
- ['Undisclosed recipients:;']),
- [('', '')])
- eq(utils.getaddresses(
- [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
index e0434ccd2c..c67ec45737 100644
--- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -1,4 +1,4 @@
-CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
-and :func:`email.utils.getaddresses` from returning the realname portion of an
-invalid RFC2822 email header in the email address portion of the 2-tuple
-returned after being parsed by :class:`email._parseaddr.AddressList`.
+Reverted the :mod:`email.utils` security improvement change released in
+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
+to parse email addresses with a comma in the quoted name field.
+See :gh:`106669`.
--
2.32.0.windows.1

View File

@ -0,0 +1,77 @@
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 142ce2c..a8d5c2d 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -14,6 +14,8 @@ import locale
import operator
import os
import pickle
+import pyexpat
+import subprocess
import sys
import textwrap
import types
@@ -96,6 +98,11 @@ ENTITY_XML = """\
<document>&entity;</document>
"""
+macro_to_find = 'XML_SetReparseDeferralEnabled'
+header_file = '/usr/include/expat.h'
+result = subprocess.run(['grep', '-q', macro_to_find, header_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+fails_with_expat = (unittest.expectedFailure if result.returncode == 0 else lambda test: test)
+
EXTERNAL_ENTITY_XML = """\
<!DOCTYPE points [
<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
@@ -1410,27 +1417,38 @@ class XMLPullParserTest(unittest.TestCase):
with self.assertRaises(StopIteration):
next(it)
- def test_simple_xml_with_ns(self):
+ def test_simple_xml(self, chunk_size=None):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
- self._feed(parser, "<!-- comment -->\n")
- self.assert_event_tags(parser, [])
- self._feed(parser, "<root xmlns='namespace'>\n")
+ self._feed(parser, "<!-- comment -->\n", chunk_size)
self.assert_event_tags(parser, [])
- self._feed(parser, "<element key='value'>text</element")
+ self._feed(parser,
+ "<root>\n <element key='value'>text</element",
+ chunk_size)
self.assert_event_tags(parser, [])
- self._feed(parser, ">\n")
- self.assert_event_tags(parser, [('end', '{namespace}element')])
- self._feed(parser, "<element>text</element>tail\n")
- self._feed(parser, "<empty-element/>\n")
+ self._feed(parser, ">\n", chunk_size)
+ self.assert_event_tags(parser, [('end', 'element')])
+ self._feed(parser, "<element>text</element>tail\n", chunk_size)
+ self._feed(parser, "<empty-element/>\n", chunk_size)
self.assert_event_tags(parser, [
- ('end', '{namespace}element'),
- ('end', '{namespace}empty-element'),
+ ('end', 'element'),
+ ('end', 'empty-element'),
])
- self._feed(parser, "</root>\n")
- self.assert_event_tags(parser, [('end', '{namespace}root')])
+ self._feed(parser, "</root>\n", chunk_size)
+ self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())
+ @fails_with_expat
+ def test_simple_xml_chunk_1(self):
+ self.test_simple_xml(chunk_size=1)
+
+ @fails_with_expat
+ def test_simple_xml_chunk_5(self):
+ self.test_simple_xml(chunk_size=5)
+
+ def test_simple_xml_chunk_22(self):
+ self.test_simple_xml(chunk_size=22)
+
def test_ns_events(self):
parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
self._feed(parser, "<!-- comment -->\n")

View File

@ -0,0 +1,143 @@
From 95c9c2b9cb2d3c1d29c8ce77f154de8bd5313dae Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Tue, 24 May 2022 01:52:49 -0700
Subject: [PATCH] gh-93065: Fix HAMT to iterate correctly over 7-level
deep
trees (GH-93066) (#93147)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Also while there, clarify a few things about why we reduce the hash to
32 bits.
Co-authored-by: Eli Libman <eli@hyro.ai>
Co-authored-by: Yury Selivanov <yury@edgedb.com>
Co-authored-by: Łukasz Langa <lukasz@langa.pl>
(cherry picked from commit c1f5c903a7e4ed27190488f4e33b00d3c3d952e5)
---
Include/internal/pycore_hamt.h | 14 +++++++++++++-
Lib/test/test_context.py | 35 ++++++++++++++++++++++++++++++++++
Misc/ACKS | 1 +
Python/hamt.c | 14 +++++++++++---
4 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/Include/internal/pycore_hamt.h b/Include/internal/pycore_hamt.h
index aaf6559..357d966 100644
--- a/Include/internal/pycore_hamt.h
+++ b/Include/internal/pycore_hamt.h
@@ -5,7 +5,19 @@
# error "this header requires Py_BUILD_CORE define"
#endif
-#define _Py_HAMT_MAX_TREE_DEPTH 7
+
+/*
+HAMT tree is shaped by hashes of keys. Every group of 5 bits of a hash denotes
+the exact position of the key in one level of the tree. Since we're using
+32 bit hashes, we can have at most 7 such levels. Although if there are
+two distinct keys with equal hashes, they will have to occupy the same
+cell in the 7th level of the tree -- so we'd put them in a "collision" node.
+Which brings the total possible tree depth to 8. Read more about the actual
+layout of the HAMT tree in `hamt.c`.
+
+This constant is used to define a datastucture for storing iteration state.
+*/
+#define _Py_HAMT_MAX_TREE_DEPTH 8
#define PyHamt_Check(o) Py_IS_TYPE(o, &_PyHamt_Type)
diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py
index 2d8b63a..689e3d4 100644
--- a/Lib/test/test_context.py
+++ b/Lib/test/test_context.py
@@ -533,6 +533,41 @@ class HamtTest(unittest.TestCase):
self.assertEqual(len(h4), 2)
self.assertEqual(len(h5), 3)
+ def test_hamt_collision_3(self):
+ # Test that iteration works with the deepest tree possible.
+ # https://github.com/python/cpython/issues/93065
+
+ C = HashKey(0b10000000_00000000_00000000_00000000, 'C')
+ D = HashKey(0b10000000_00000000_00000000_00000000, 'D')
+
+ E = HashKey(0b00000000_00000000_00000000_00000000, 'E')
+
+ h = hamt()
+ h = h.set(C, 'C')
+ h = h.set(D, 'D')
+ h = h.set(E, 'E')
+
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=2 count=1 bitmap=0b1):
+ # NULL:
+ # BitmapNode(size=4 count=2 bitmap=0b101):
+ # <Key name:E hash:0>: 'E'
+ # NULL:
+ # CollisionNode(size=4 id=0x107a24520):
+ # <Key name:C hash:2147483648>: 'C'
+ # <Key name:D hash:2147483648>: 'D'
+
+ self.assertEqual({k.name for k in h.keys()}, {'C', 'D', 'E'})
+
def test_hamt_stress(self):
COLLECTION_SIZE = 7000
TEST_ITERS_EVERY = 647
diff --git a/Misc/ACKS b/Misc/ACKS
index ac893ac..8699b98 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1031,6 +1031,7 @@ Robert Li
Xuanji Li
Zekun Li
Zheao Li
+Eli Libman
Dan Lidral-Porter
Robert van Liere
Ross Light
diff --git a/Python/hamt.c b/Python/hamt.c
index 8801c5e..3296109 100644
--- a/Python/hamt.c
+++ b/Python/hamt.c
@@ -407,14 +407,22 @@ hamt_hash(PyObject *o)
return -1;
}
- /* While it's suboptimal to reduce Python's 64 bit hash to
+ /* While it's somewhat suboptimal to reduce Python's 64 bit hash to
32 bits via XOR, it seems that the resulting hash function
is good enough (this is also how Long type is hashed in Java.)
Storing 10, 100, 1000 Python strings results in a relatively
shallow and uniform tree structure.
- Please don't change this hashing algorithm, as there are many
- tests that test some exact tree shape to cover all code paths.
+ Also it's worth noting that it would be possible to adapt the tree
+ structure to 64 bit hashes, but that would increase memory pressure
+ and provide little to no performance benefits for collections with
+ fewer than billions of key/value pairs.
+
+ Important: do not change this hash reducing function. There are many
+ tests that need an exact tree shape to cover all code paths and
+ we do that by specifying concrete values for test data's `__hash__`.
+ If this function is changed most of the regression tests would
+ become useless.
*/
int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
return xored == -1 ? -2 : xored;
--
2.33.0

View File

@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language
URL: https://www.python.org/
Version: 3.9.9
Release: 27
Release: 32
License: Python-2.0
%global branchversion 3.9
@ -109,11 +109,21 @@ Patch6015: backport-CVE-2007-4559.patch
Patch6016: backport-CVE-2023-40217.patch
Patch6017: backport-3.9-gh-104049-do-not-expose-on-disk-location-from-Si.patch
Patch6018: backport-3.9-gh-99889-Fix-directory-traversal-security-flaw-i.patch
Patch6019: backport-Fix-parsing-errors-in-email-_parseaddr.py.patch
Patch6020: backport-Revert-fixes-for-CVE-2023-27043.patch
Patch6021: backport-CVE-2023-27043.patch
Patch6022: backport-gh-93065-Fix-HAMT-to-iterate-correctly-over-7-level-.patch
Patch6023: backport-3.9-bpo-37013-Fix-the-error-handling-in-socket.if_in.patch
Patch6024: backport-3.9-gh-91133-tempfile.TemporaryDirectory-fix-symlink.patch
Patch6025: backport-3.9-gh-109858-Protect-zipfile-from-quoted-overlap-zi.patch
Patch6026: backport-3.9-gh-113659-Skip-hidden-.pth-files-GH-113660-GH-11.patch
Patch6027: backport-fix_xml_tree_assert_error.patch
Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch
Patch9001: python3-Add-sw64-architecture.patch
Patch9002: Add-loongarch-support.patch
Patch9003: avoid-usage-of-md5-in-multiprocessing.patch
Patch9004: update-openssl-version-for-test-case.patch
Provides: python%{branchversion} = %{version}-%{release}
Provides: python(abi) = %{branchversion}
@ -216,11 +226,21 @@ rm -r Modules/expat
%patch6016 -p1
%patch6017 -p1
%patch6018 -p1
%patch6019 -p1
%patch6020 -p1
%patch6021 -p1
%patch6022 -p1
%patch6023 -p1
%patch6024 -p1
%patch6025 -p1
%patch6026 -p1
%patch6027 -p1
%patch9000 -p1
%patch9001 -p1
%patch9002 -p1
%patch9003 -p1
%patch9004 -p1
rm Lib/ensurepip/_bundled/*.whl
rm configure pyconfig.h.in
@ -308,7 +328,6 @@ pushd ${OptimizedBuildDir}
%endif
%endif
--without-ensurepip \
--with-lto \
%{optimizations_flag}
%make_build EXTRA_CFLAGS="$CFLAGS"
@ -844,6 +863,41 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP"
%{_mandir}/*/*
%changelog
* Thu Jun 20 2024 xinsheng <xinsheng3@huawei.com> - 3.9.9-32
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:update openssl version for test case
* Web May 22 2024 xinsheng <xinsheng3@huawei.com> - 3.9.9-31
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:fix xml tree assert error
* Web Mar 23 2024 xinsheng <xinsheng3@huawei.com> - 3.9.9-30
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:backport upstream patches
- Fix HAMT to iterate correctly over 7 level
- Fix the error handling in socket.if_in
- tempfile.TemporaryDirectory fix symlink
- Protect zipfile from quoted overlap zi
- Skip hidden .pth files GH 113660 GH 11
* Wed Jan 10 2024 zhaoyu <zhaoyu64@huawei.com>- 3.9.9-29
- Type:CVE
- CVE:CVE-2023-27043
- SUG:NA
- DESC:Add an optional 'strict' parameter to getaddresses() and parseaddr() functions for CVE-2023-27043.
* Wed Oct 25 zhuofeng <zhuofeng2@huawei.com> - 3.9.9-28
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:remove lto
* Fri Sep 22 renhongxun <renhongxun@h-partners.com> - 3.9.9-27
- Type:bugfix
- CVE:NA

View File

@ -0,0 +1,25 @@
From d7a637e43086c51d0e2b4cd2f9324f1c60eced50 Mon Sep 17 00:00:00 2001
From: xinsheng3 <xinsheng3@huawei.com>
Date: Thu, 20 Jun 2024 15:29:05 +0800
Subject: [PATCH] update openssl version for test case.patch
---
Lib/test/test_ssl.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 2c71c27..d95f0ef 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -589,7 +589,7 @@ class BasicSocketTests(unittest.TestCase):
self.assertGreaterEqual(fix, 0)
self.assertLess(fix, 256)
self.assertGreaterEqual(patch, 0)
- self.assertLessEqual(patch, 63)
+ self.assertLessEqual(patch, 128)
self.assertGreaterEqual(status, 0)
self.assertLessEqual(status, 15)
# Version string as returned by {Open,Libre}SSL, the format might change
--
2.33.0