backport upstream patches

(cherry picked from commit be659ff1eb35337744c9b331bbbd56f82f692e67)
This commit is contained in:
renxichen 2023-09-22 07:46:00 +00:00 committed by openeuler-sync-bot
parent 75f97a2898
commit 4a2baa7531
5 changed files with 398 additions and 49 deletions

View File

@ -0,0 +1,64 @@
From b53d0ff4312cc2a67b9c5752844b140c08514648 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 22 May 2023 03:40:50 -0700
Subject: [PATCH] [3.9] gh-104049: do not expose on-disk location from
SimpleHTTPRequestHandler (GH-104067) (#104120)
Do not expose the local server's on-disk location from `SimpleHTTPRequestHandler` when generating a directory index. (unnecessary information disclosure)
(cherry picked from commit c7c3a60c88de61a79ded9fdaf6bc6a29da4efb9a)
Co-authored-by: Ethan Furman <ethan@stoneleaf.us>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
---
Lib/http/server.py | 2 +-
Lib/test/test_httpservers.py | 8 ++++++++
.../2023-05-01-15-03-25.gh-issue-104049.b01Y3g.rst | 2 ++
3 files changed, 11 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-05-01-15-03-25.gh-issue-104049.b01Y3g.rst
diff --git a/Lib/http/server.py b/Lib/http/server.py
index cf8933c3db..969df7335f 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -791,7 +791,7 @@ def list_directory(self, path):
displaypath = urllib.parse.unquote(self.path,
errors='surrogatepass')
except UnicodeDecodeError:
- displaypath = urllib.parse.unquote(path)
+ displaypath = urllib.parse.unquote(self.path)
displaypath = html.escape(displaypath, quote=False)
enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index db9ee29e5f..153206da1a 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -415,6 +415,14 @@ def test_undecodable_filename(self):
self.check_status_and_reason(response, HTTPStatus.OK,
data=support.TESTFN_UNDECODABLE)
+ def test_undecodable_parameter(self):
+ # sanity check using a valid parameter
+ response = self.request(self.base_url + '/?x=123').read()
+ self.assertRegex(response, f'listing for {self.base_url}/\?x=123'.encode('latin1'))
+ # now the bogus encoding
+ response = self.request(self.base_url + '/?x=%bb').read()
+ self.assertRegex(response, f'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1'))
+
def test_get_dir_redirect_location_domain_injection_bug(self):
"""Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location.
diff --git a/Misc/NEWS.d/next/Security/2023-05-01-15-03-25.gh-issue-104049.b01Y3g.rst b/Misc/NEWS.d/next/Security/2023-05-01-15-03-25.gh-issue-104049.b01Y3g.rst
new file mode 100644
index 0000000000..969deb26bf
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-05-01-15-03-25.gh-issue-104049.b01Y3g.rst
@@ -0,0 +1,2 @@
+Do not expose the local on-disk location in directory indexes
+produced by :class:`http.client.SimpleHTTPRequestHandler`.
--
2.33.0

View File

@ -0,0 +1,90 @@
From 3d5dd1eee265ec43dd96d89656c2a1c207dd5815 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 22 May 2023 03:41:30 -0700
Subject: [PATCH] [3.9] gh-99889: Fix directory traversal security flaw in
uu.decode() (GH-104096) (#104331)
(cherry picked from commit 0aeda297931820436a50b78f4f7f0597274b5df4)
Co-authored-by: Sam Carroll <70000253+samcarroll42@users.noreply.github.com>
---
Lib/test/test_uu.py | 28 +++++++++++++++++++
Lib/uu.py | 9 +++++-
...3-05-02-17-56-32.gh-issue-99889.l664SU.rst | 2 ++
3 files changed, 38 insertions(+), 1 deletion(-)
mode change 100755 => 100644 Lib/uu.py
create mode 100644 Misc/NEWS.d/next/Security/2023-05-02-17-56-32.gh-issue-99889.l664SU.rst
diff --git a/Lib/test/test_uu.py b/Lib/test/test_uu.py
index 4c639b7bd5..410eb8e392 100644
--- a/Lib/test/test_uu.py
+++ b/Lib/test/test_uu.py
@@ -145,6 +145,34 @@ def test_newlines_escaped(self):
uu.encode(inp, out, filename)
self.assertIn(safefilename, out.getvalue())
+ def test_no_directory_traversal(self):
+ relative_bad = b"""\
+begin 644 ../../../../../../../../tmp/test1
+$86)C"@``
+`
+end
+"""
+ with self.assertRaisesRegex(uu.Error, 'directory'):
+ uu.decode(io.BytesIO(relative_bad))
+ if os.altsep:
+ relative_bad_bs = relative_bad.replace(b'/', b'\\')
+ with self.assertRaisesRegex(uu.Error, 'directory'):
+ uu.decode(io.BytesIO(relative_bad_bs))
+
+ absolute_bad = b"""\
+begin 644 /tmp/test2
+$86)C"@``
+`
+end
+"""
+ with self.assertRaisesRegex(uu.Error, 'directory'):
+ uu.decode(io.BytesIO(absolute_bad))
+ if os.altsep:
+ absolute_bad_bs = absolute_bad.replace(b'/', b'\\')
+ with self.assertRaisesRegex(uu.Error, 'directory'):
+ uu.decode(io.BytesIO(absolute_bad_bs))
+
+
class UUStdIOTest(unittest.TestCase):
def setUp(self):
diff --git a/Lib/uu.py b/Lib/uu.py
old mode 100755
new mode 100644
index 9f1f37f1a6..9fe252a639
--- a/Lib/uu.py
+++ b/Lib/uu.py
@@ -130,7 +130,14 @@ def decode(in_file, out_file=None, mode=None, quiet=False):
# If the filename isn't ASCII, what's up with that?!?
out_file = hdrfields[2].rstrip(b' \t\r\n\f').decode("ascii")
if os.path.exists(out_file):
- raise Error('Cannot overwrite existing file: %s' % out_file)
+ raise Error(f'Cannot overwrite existing file: {out_file}')
+ if (out_file.startswith(os.sep) or
+ f'..{os.sep}' in out_file or (
+ os.altsep and
+ (out_file.startswith(os.altsep) or
+ f'..{os.altsep}' in out_file))
+ ):
+ raise Error(f'Refusing to write to {out_file} due to directory traversal')
if mode is None:
mode = int(hdrfields[1], 8)
#
diff --git a/Misc/NEWS.d/next/Security/2023-05-02-17-56-32.gh-issue-99889.l664SU.rst b/Misc/NEWS.d/next/Security/2023-05-02-17-56-32.gh-issue-99889.l664SU.rst
new file mode 100644
index 0000000000..b7002e81b6
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-05-02-17-56-32.gh-issue-99889.l664SU.rst
@@ -0,0 +1,2 @@
+Fixed a security in flaw in :func:`uu.decode` that could allow for
+directory traversal based on the input if no ``out_file`` was specified.
--
2.33.0

View File

@ -0,0 +1,229 @@
From d7f8a5fe07b0ff3a419ccec434cc405b21a5a304 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Mon, 22 May 2023 03:42:37 -0700
Subject: [PATCH] [3.9] gh-102153: Start stripping C0 control and space chars
in `urlsplit` (GH-102508) (GH-104575) (GH-104592) (#104593)
gh-102153: Start stripping C0 control and space chars in `urlsplit` (GH-102508)
`urllib.parse.urlsplit` has already been respecting the WHATWG spec a bit GH-25595.
This adds more sanitizing to respect the "Remove any leading C0 control or space from input" [rule](https://url.spec.whatwg.org/GH-url-parsing:~:text=Remove%20any%20leading%20and%20trailing%20C0%20control%20or%20space%20from%20input.) in response to [CVE-2023-24329](https://nvd.nist.gov/vuln/detail/CVE-2023-24329).
I simplified the docs by eliding the state of the world explanatory
paragraph in this security release only backport. (people will see
that in the mainline /3/ docs)
(cherry picked from commit 2f630e1ce18ad2e07428296532a68b11dc66ad10)
(cherry picked from commit 610cc0ab1b760b2abaac92bd256b96191c46b941)
(cherry picked from commit f48a96a28012d28ae37a2f4587a780a5eb779946)
Co-authored-by: Illia Volochii <illia.volochii@gmail.com>
Co-authored-by: Gregory P. Smith [Google] <greg@krypto.org>
---
Doc/library/urllib.parse.rst | 38 +++++++++++-
Lib/test/test_urlparse.py | 61 ++++++++++++++++++-
Lib/urllib/parse.py | 12 ++++
...-03-07-20-59-17.gh-issue-102153.14CLSZ.rst | 3 +
4 files changed, 111 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index f0f8605128..9de30a182f 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -159,6 +159,10 @@ or on combining URL components into a URL string.
ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
params='', query='', fragment='')
+ .. warning::
+
+ :func:`urlparse` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -323,8 +327,14 @@ or on combining URL components into a URL string.
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
- Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
- ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+ Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
+ control and space characters are stripped from the URL. ``\n``,
+ ``\r`` and tab ``\t`` characters are removed from the URL at any position.
+
+ .. warning::
+
+ :func:`urlsplit` does not perform validation. See :ref:`URL parsing
+ security <url-parsing-security>` for details.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -337,6 +347,9 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.9.5
ASCII newline and tab characters are stripped from the URL.
+ .. versionchanged:: 3.9.17
+ Leading WHATWG C0 control and space characters are stripped from the URL.
+
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
.. function:: urlunsplit(parts)
@@ -413,6 +426,27 @@ or on combining URL components into a URL string.
or ``scheme://host/path``). If *url* is not a wrapped URL, it is returned
without changes.
+.. _url-parsing-security:
+
+URL parsing security
+--------------------
+
+The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
+inputs. They may not raise errors on inputs that other applications consider
+invalid. They may also succeed on some inputs that might not be considered
+URLs elsewhere. Their purpose is for practical functionality rather than
+purity.
+
+Instead of raising an exception on unusual input, they may instead return some
+component parts as empty strings. Or components may contain more than perhaps
+they should.
+
+We recommend that users of these APIs where the values may be used anywhere
+with security implications code defensively. Do some verification within your
+code before trusting a returned component part. Does that ``scheme`` make
+sense? Is that a sensible ``path``? Is there anything strange about that
+``hostname``? etc.
+
.. _parsing-ascii-encoded-bytes:
Parsing ASCII Encoded Bytes
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 31943f357f..574da5bd69 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -649,6 +649,65 @@ def test_urlsplit_remove_unsafe_bytes(self):
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+ def test_urlsplit_strip_url(self):
+ noise = bytes(range(0, 0x20 + 1))
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
+
+ url = noise.decode("utf-8") + base_url
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query=yes")
+ self.assertEqual(p.fragment, "frag")
+ self.assertEqual(p.username, "User")
+ self.assertEqual(p.password, "Pass")
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url)
+
+ url = noise + base_url.encode("utf-8")
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
+ self.assertEqual(p.path, b"/doc/")
+ self.assertEqual(p.query, b"query=yes")
+ self.assertEqual(p.fragment, b"frag")
+ self.assertEqual(p.username, b"User")
+ self.assertEqual(p.password, b"Pass")
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, 80)
+ self.assertEqual(p.geturl(), base_url.encode("utf-8"))
+
+ # Test that trailing space is preserved as some applications rely on
+ # this within query strings.
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
+ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.netloc, "www.python.org:88")
+ self.assertEqual(p.path, "/doc/")
+ self.assertEqual(p.query, "query= ")
+ self.assertEqual(p.port, 88)
+ self.assertEqual(p.geturl(), query_spaces_url)
+
+ p = urllib.parse.urlsplit("www.pypi.org ")
+ # That "hostname" gets considered a "path" due to the
+ # trailing space and our existing logic... YUCK...
+ # and re-assembles via geturl aka unurlsplit into the original.
+ # django.core.validators.URLValidator (at least through v3.2) relies on
+ # this, for better or worse, to catch it in a ValidationError via its
+ # regular expressions.
+ # Here we test the basic round trip concept of such a trailing space.
+ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
+
+ # with scheme as cache-key
+ url = "//www.python.org/"
+ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
@@ -656,7 +715,7 @@ def test_attributes_bad_port(self):
for port in ("foo", "1.5", "-1", "0x10"):
with self.subTest(bytes=bytes, parse=parse, port=port):
netloc = "www.example.net:" + port
- url = "http://" + netloc
+ url = "http://" + netloc + "/"
if bytes:
netloc = netloc.encode("ascii")
url = url.encode("ascii")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index b7965fe3d2..5b7193f67c 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -25,6 +25,10 @@
scenarios for parsing, and for backward compatibility purposes, some
parsing quirks from older RFCs are retained. The testcases in
test_urlparse.py provides a good indicator of parsing behavior.
+
+The WHATWG URL Parser spec should also be considered. We are not compliant with
+it either due to existing user code API behavior expectations (Hyrum's Law).
+It serves as a useful guide when making changes.
"""
import re
@@ -78,6 +82,10 @@
'0123456789'
'+-.')
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
+
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
@@ -456,6 +464,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
+ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
new file mode 100644
index 0000000000..e57ac4ed3a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -0,0 +1,3 @@
+:func:`urllib.parse.urlsplit` now strips leading C0 control and space
+characters following the specification for URLs defined by WHATWG in
+response to CVE-2023-24329. Patch by Illia Volochii.
--
2.33.0

View File

@ -1,44 +0,0 @@
From 1bad5b2ebc2f3cb663ce425b9979b4ec4dce27b2 Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Thu, 6 Apr 2023 03:30:44 +0000
Subject: [PATCH] fix CVE-2023-24329
---
Lib/test/test_urlparse.py | 7 +++++++
Lib/urllib/parse.py | 2 +-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index f42ed9b..b310017 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -683,6 +683,13 @@ class UrlParseTestCase(unittest.TestCase):
else:
self.assertEqual(p.scheme, "")
+ def test_attributes_bad_scheme_CVE_2023_24329(self):
+ """Check handling of invalid schemes that starts with blank characters."""
+ for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
+ url = " https://www.example.net"
+ p = parse(url)
+ self.assertEqual(p.scheme, "https")
+
def test_attributes_without_netloc(self):
# This example is straight from RFC 3261. It looks like it
# should allow the username, hostname, and port to be filled
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index bd59852..7eb3ad8 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -454,7 +454,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
Note that % escapes are not expanded.
"""
-
+ url = url.lstrip()
url, scheme, _coerce_result = _coerce_args(url, scheme)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
--
2.33.0

View File

@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language
URL: https://www.python.org/
Version: 3.9.9
Release: 26
Release: 27
License: Python-2.0
%global branchversion 3.9
@ -104,14 +104,16 @@ Patch6010: backport-CVE-2022-42919.patch
Patch6011: backport-CVE-2022-45061.patch
Patch6012: backport-CVE-2022-37454.patch
Patch6013: backport-Make-urllib.parse.urlparse-enforce-that-a-scheme-mus.patch
Patch6014: backport-CVE-2007-4559.patch
Patch6015: backport-CVE-2023-40217.patch
Patch6014: backport-CVE-2023-24329.patch
Patch6015: backport-CVE-2007-4559.patch
Patch6016: backport-CVE-2023-40217.patch
Patch6017: backport-3.9-gh-104049-do-not-expose-on-disk-location-from-Si.patch
Patch6018: backport-3.9-gh-99889-Fix-directory-traversal-security-flaw-i.patch
Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch
Patch9001: python3-Add-sw64-architecture.patch
Patch9002: Add-loongarch-support.patch
Patch9003: avoid-usage-of-md5-in-multiprocessing.patch
Patch9004: fix-CVE-2023-24329.patch
Provides: python%{branchversion} = %{version}-%{release}
Provides: python(abi) = %{branchversion}
@ -211,12 +213,14 @@ rm -r Modules/expat
%patch6013 -p1
%patch6014 -p1
%patch6015 -p1
%patch6016 -p1
%patch6017 -p1
%patch6018 -p1
%patch9000 -p1
%patch9001 -p1
%patch9002 -p1
%patch9003 -p1
%patch9004 -p1
rm Lib/ensurepip/_bundled/*.whl
rm configure pyconfig.h.in
@ -840,6 +844,12 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP"
%{_mandir}/*/*
%changelog
* Fri Sep 22 renhongxun <renhongxun@h-partners.com> - 3.9.9-27
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:backport upstream patches
* Tue Sep 19 zhuofeng <zhuofeng2@huawei.com> - 3.9.9-26
- Type:CVE
- CVE:CVE-2023-40217