fix CVE-2024-37388
This commit is contained in:
parent
7176472f2e
commit
d00ca0c0b9
372
backport-CVE-2024-37388.patch
Normal file
372
backport-CVE-2024-37388.patch
Normal file
@ -0,0 +1,372 @@
|
||||
From b38cebf2f846e92bd63de4488fd3d1c8b568f397 Mon Sep 17 00:00:00 2001
|
||||
From: scoder <stefan_ml@behnel.de>
|
||||
Date: Fri, 29 Dec 2023 14:21:23 +0100
|
||||
Subject: [PATCH] Disable external entity resolution (XXE) by default (GH-391)
|
||||
|
||||
This prevents security risks that would allow loading arbitrary external files.
|
||||
|
||||
Closes https://bugs.launchpad.net/lxml/+bug/1742885
|
||||
Supersedes https://github.com/lxml/lxml/pull/130
|
||||
---
|
||||
doc/FAQ.txt | 12 +++--
|
||||
src/lxml/includes/xmlparser.pxd | 18 +++++++-
|
||||
src/lxml/parser.pxi | 70 ++++++++++++++++++++++++++--
|
||||
src/lxml/tests/test_etree.py | 81 +++++++++++++++++++++++++++++++++
|
||||
4 files changed, 170 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/doc/FAQ.txt b/doc/FAQ.txt
|
||||
index 48f69a6..7f3a524 100644
|
||||
--- a/doc/FAQ.txt
|
||||
+++ b/doc/FAQ.txt
|
||||
@@ -1107,9 +1107,9 @@ useless for the data commonly sent through web services and
|
||||
can simply be disabled, which rules out several types of
|
||||
denial of service attacks at once. This also involves an attack
|
||||
that reads local files from the server, as XML entities can be
|
||||
-defined to expand into their content. Consequently, version
|
||||
-1.2 of the SOAP standard explicitly disallows entity references
|
||||
-in the XML stream.
|
||||
+defined to expand into the content of external resources.
|
||||
+Consequently, version 1.2 of the SOAP standard explicitly
|
||||
+disallows entity references in the XML stream.
|
||||
|
||||
To disable entity expansion, use an XML parser that is configured
|
||||
with the option ``resolve_entities=False``. Then, after (or
|
||||
@@ -1117,7 +1117,11 @@ while) parsing the document, use ``root.iter(etree.Entity)`` to
|
||||
recursively search for entity references. If it contains any,
|
||||
reject the entire input document with a suitable error response.
|
||||
In lxml 3.x, you can also use the new DTD introspection API to
|
||||
-apply your own restrictions on input documents.
|
||||
+apply your own restrictions on input documents. Since version 5.x,
|
||||
+lxml disables the expansion of external entities (XXE) by default.
|
||||
+If you really want to allow loading external files into XML documents
|
||||
+using this functionality, you have to explicitly set
|
||||
+``resolve_entities=True``.
|
||||
|
||||
Another attack to consider is compression bombs. If you allow
|
||||
compressed input into your web service, attackers can try to send
|
||||
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
|
||||
index 45acfc8..3945495 100644
|
||||
--- a/src/lxml/includes/xmlparser.pxd
|
||||
+++ b/src/lxml/includes/xmlparser.pxd
|
||||
@@ -1,9 +1,9 @@
|
||||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport (
|
||||
- xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||
+ xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
-from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
|
||||
+from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel
|
||||
|
||||
|
||||
cdef extern from "libxml/parser.h":
|
||||
@@ -47,11 +47,14 @@ cdef extern from "libxml/parser.h":
|
||||
|
||||
ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
|
||||
|
||||
+ ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name)
|
||||
+
|
||||
cdef int XML_SAX2_MAGIC
|
||||
|
||||
cdef extern from "libxml/tree.h":
|
||||
ctypedef struct xmlParserInput:
|
||||
int line
|
||||
+ int col
|
||||
int length
|
||||
const_xmlChar* base
|
||||
const_xmlChar* cur
|
||||
@@ -76,6 +79,7 @@ cdef extern from "libxml/tree.h":
|
||||
charactersSAXFunc characters
|
||||
cdataBlockSAXFunc cdataBlock
|
||||
referenceSAXFunc reference
|
||||
+ getEntitySAXFunc getEntity
|
||||
commentSAXFunc comment
|
||||
processingInstructionSAXFunc processingInstruction
|
||||
startDocumentSAXFunc startDocument
|
||||
@@ -150,6 +154,8 @@ cdef extern from "libxml/parser.h":
|
||||
int inSubset
|
||||
int charset
|
||||
xmlParserInput* input
|
||||
+ int inputNr
|
||||
+ xmlParserInput** inputTab
|
||||
|
||||
ctypedef enum xmlParserOption:
|
||||
XML_PARSE_RECOVER = 1 # recover on errors
|
||||
@@ -212,6 +218,12 @@ cdef extern from "libxml/parser.h":
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
|
||||
+ cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node,
|
||||
+ int domain, int code, xmlErrorLevel level,
|
||||
+ const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
||||
+ int int1, const char *msg, ...)
|
||||
+
|
||||
+
|
||||
# iterparse:
|
||||
|
||||
cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
@@ -233,6 +245,8 @@ cdef extern from "libxml/parser.h":
|
||||
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
|
||||
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
|
||||
|
||||
+ cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) nogil
|
||||
+
|
||||
# DTDs:
|
||||
|
||||
cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
|
||||
index 3187a38..2f0ce80 100644
|
||||
--- a/src/lxml/parser.pxi
|
||||
+++ b/src/lxml/parser.pxi
|
||||
@@ -794,6 +794,7 @@ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
|
||||
c_attr = c_attr.next
|
||||
return 0
|
||||
|
||||
+
|
||||
@cython.internal
|
||||
cdef class _BaseParser:
|
||||
cdef ElementClassLookup _class_lookup
|
||||
@@ -806,6 +807,7 @@ cdef class _BaseParser:
|
||||
cdef bint _remove_pis
|
||||
cdef bint _strip_cdata
|
||||
cdef bint _collect_ids
|
||||
+ cdef bint _resolve_external_entities
|
||||
cdef XMLSchema _schema
|
||||
cdef bytes _filename
|
||||
cdef readonly object target
|
||||
@@ -814,7 +816,7 @@ cdef class _BaseParser:
|
||||
|
||||
def __init__(self, int parse_options, bint for_html, XMLSchema schema,
|
||||
remove_comments, remove_pis, strip_cdata, collect_ids,
|
||||
- target, encoding):
|
||||
+ target, encoding, bint resolve_external_entities=True):
|
||||
cdef tree.xmlCharEncodingHandler* enchandler
|
||||
cdef int c_encoding
|
||||
if not isinstance(self, (XMLParser, HTMLParser)):
|
||||
@@ -827,6 +829,7 @@ cdef class _BaseParser:
|
||||
self._remove_pis = remove_pis
|
||||
self._strip_cdata = strip_cdata
|
||||
self._collect_ids = collect_ids
|
||||
+ self._resolve_external_entities = resolve_external_entities
|
||||
self._schema = schema
|
||||
|
||||
self._resolvers = _ResolverRegistry()
|
||||
@@ -906,6 +909,8 @@ cdef class _BaseParser:
|
||||
if self._strip_cdata:
|
||||
# hard switch-off for CDATA nodes => makes them plain text
|
||||
pctxt.sax.cdataBlock = NULL
|
||||
+ if not self._resolve_external_entities:
|
||||
+ pctxt.sax.getEntity = _getInternalEntityOnly
|
||||
|
||||
cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
|
||||
cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
|
||||
@@ -1206,6 +1211,56 @@ cdef class _BaseParser:
|
||||
finally:
|
||||
context.cleanup()
|
||||
|
||||
+cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name):
|
||||
+ """
|
||||
+ Callback function to intercept the entity resolution when external entity loading is disabled.
|
||||
+ """
|
||||
+ cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
|
||||
+ if not entity:
|
||||
+ return NULL
|
||||
+ if entity.etype not in (
|
||||
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
|
||||
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
|
||||
+ tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
|
||||
+ return entity
|
||||
+
|
||||
+ # Reject all external entities and fail the parsing instead. There is currently
|
||||
+ # no way in libxml2 to just prevent the entity resolution in this case.
|
||||
+ cdef xmlerror.xmlError c_error
|
||||
+ cdef xmlerror.xmlStructuredErrorFunc err_func
|
||||
+ cdef xmlparser.xmlParserInput* parser_input
|
||||
+ cdef void* err_context
|
||||
+
|
||||
+ c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
|
||||
+ err_func = xmlerror.xmlStructuredError
|
||||
+ if err_func:
|
||||
+ parser_input = c_ctxt.input
|
||||
+ # Copied from xmlVErrParser() in libxml2: get current input from stack.
|
||||
+ if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
|
||||
+ parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
|
||||
+
|
||||
+ c_error = xmlerror.xmlError(
|
||||
+ domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
|
||||
+ code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
|
||||
+ level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
|
||||
+ message=b"External entity resolution is disabled for security reasons "
|
||||
+ b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
|
||||
+ b"if you consider it safe to enable it.",
|
||||
+ file=parser_input.filename,
|
||||
+ node=entity,
|
||||
+ str1=<char*> name,
|
||||
+ str2=NULL,
|
||||
+ str3=NULL,
|
||||
+ line=parser_input.line if parser_input else 0,
|
||||
+ int1=0,
|
||||
+ int2=parser_input.col if parser_input else 0,
|
||||
+ )
|
||||
+ err_context = xmlerror.xmlStructuredErrorContext
|
||||
+ err_func(err_context, &c_error)
|
||||
+
|
||||
+ c_ctxt.wellFormed = 0
|
||||
+ # The entity was looked up and does not need to be freed.
|
||||
+ return NULL
|
||||
|
||||
cdef void _initSaxDocument(void* ctxt) with gil:
|
||||
xmlparser.xmlSAX2StartDocument(ctxt)
|
||||
@@ -1508,12 +1563,14 @@ cdef class XMLParser(_FeedParser):
|
||||
- strip_cdata - replace CDATA sections by normal text content (default: True)
|
||||
- compact - save memory for short text content (default: True)
|
||||
- collect_ids - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
|
||||
- - resolve_entities - replace entities by their text value (default: True)
|
||||
- huge_tree - disable security restrictions and support very deep trees
|
||||
and very long text content (only affects libxml2 2.7+)
|
||||
|
||||
Other keyword arguments:
|
||||
-
|
||||
+ - resolve_entities - replace entities by their text value: False for keeping the
|
||||
+ entity references, True for resolving them, and 'internal' for resolving
|
||||
+ internal definitions only (no external file/URL access).
|
||||
+ The default used to be True and was changed to 'internal' in lxml 5.0.
|
||||
- encoding - override the document encoding
|
||||
- target - a parser target object that will receive the parse events
|
||||
- schema - an XMLSchema to validate against
|
||||
@@ -1525,10 +1582,11 @@ cdef class XMLParser(_FeedParser):
|
||||
def __init__(self, *, encoding=None, attribute_defaults=False,
|
||||
dtd_validation=False, load_dtd=False, no_network=True,
|
||||
ns_clean=False, recover=False, XMLSchema schema=None,
|
||||
- huge_tree=False, remove_blank_text=False, resolve_entities=True,
|
||||
+ huge_tree=False, remove_blank_text=False, resolve_entities='internal',
|
||||
remove_comments=False, remove_pis=False, strip_cdata=True,
|
||||
collect_ids=True, target=None, compact=True):
|
||||
cdef int parse_options
|
||||
+ cdef bint resolve_external = True
|
||||
parse_options = _XML_DEFAULT_PARSE_OPTIONS
|
||||
if load_dtd:
|
||||
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
|
||||
@@ -1553,12 +1611,14 @@ cdef class XMLParser(_FeedParser):
|
||||
parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
|
||||
if not resolve_entities:
|
||||
parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
|
||||
+ elif resolve_entities == 'internal':
|
||||
+ resolve_external = False
|
||||
if not strip_cdata:
|
||||
parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
|
||||
|
||||
_BaseParser.__init__(self, parse_options, 0, schema,
|
||||
remove_comments, remove_pis, strip_cdata,
|
||||
- collect_ids, target, encoding)
|
||||
+ collect_ids, target, encoding, resolve_external)
|
||||
|
||||
|
||||
cdef class XMLPullParser(XMLParser):
|
||||
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||
index 14b21f7..bc7548f 100644
|
||||
--- a/src/lxml/tests/test_etree.py
|
||||
+++ b/src/lxml/tests/test_etree.py
|
||||
@@ -12,11 +12,14 @@ from __future__ import absolute_import
|
||||
from collections import OrderedDict
|
||||
import os.path
|
||||
import unittest
|
||||
+import contextlib
|
||||
import copy
|
||||
import sys
|
||||
import re
|
||||
import gc
|
||||
import operator
|
||||
+import shutil
|
||||
+import tempfile
|
||||
import textwrap
|
||||
import zlib
|
||||
import gzip
|
||||
@@ -1675,6 +1678,84 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||
self.assertEqual(_bytes('<doc>&myentity;</doc>'),
|
||||
tostring(root))
|
||||
|
||||
+ @contextlib.contextmanager
|
||||
+ def _xml_test_file(self, name, content=b'<evil>XML</evil>'):
|
||||
+ temp_dir = tempfile.mkdtemp()
|
||||
+ try:
|
||||
+ xml_file = os.path.join(temp_dir, name)
|
||||
+ with open(xml_file, 'wb') as tmpfile:
|
||||
+ tmpfile.write(content)
|
||||
+ yield xml_file
|
||||
+ finally:
|
||||
+ shutil.rmtree(temp_dir)
|
||||
+
|
||||
+ def test_entity_parse_external(self):
|
||||
+ fromstring = self.etree.fromstring
|
||||
+ tostring = self.etree.tostring
|
||||
+ parser = self.etree.XMLParser(resolve_entities=True)
|
||||
+
|
||||
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||
+ xml = '''
|
||||
+ <!DOCTYPE doc [
|
||||
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||
+ ]>
|
||||
+ <doc>&my_external_entity;</doc>
|
||||
+ ''' % path2url(entity_file)
|
||||
+ root = fromstring(xml, parser)
|
||||
+
|
||||
+ self.assertEqual(_bytes('<doc><evil>XML</evil></doc>'),
|
||||
+ tostring(root))
|
||||
+ self.assertEqual(root.tag, 'doc')
|
||||
+ self.assertEqual(root[0].tag, 'evil')
|
||||
+ self.assertEqual(root[0].text, 'XML')
|
||||
+ self.assertEqual(root[0].tail, None)
|
||||
+
|
||||
+ def test_entity_parse_external_no_resolve(self):
|
||||
+ fromstring = self.etree.fromstring
|
||||
+ parser = self.etree.XMLParser(resolve_entities=False)
|
||||
+ Entity = self.etree.Entity
|
||||
+
|
||||
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||
+ xml = '''
|
||||
+ <!DOCTYPE doc [
|
||||
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||
+ ]>
|
||||
+ <doc>&my_external_entity;</doc>
|
||||
+ ''' % path2url(entity_file)
|
||||
+ root = fromstring(xml, parser)
|
||||
+
|
||||
+ self.assertEqual(root[0].tag, Entity)
|
||||
+ self.assertEqual(root[0].text, "&my_external_entity;")
|
||||
+
|
||||
+ def test_entity_parse_no_external_default(self):
|
||||
+ fromstring = self.etree.fromstring
|
||||
+
|
||||
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||
+ xml = '''
|
||||
+ <!DOCTYPE doc [
|
||||
+ <!ENTITY my_failing_external_entity SYSTEM "%s">
|
||||
+ ]>
|
||||
+ <doc>&my_failing_external_entity;</doc>
|
||||
+ ''' % path2url(entity_file)
|
||||
+
|
||||
+ try:
|
||||
+ fromstring(xml)
|
||||
+ except self.etree.XMLSyntaxError as exc:
|
||||
+ exception = exc
|
||||
+ else:
|
||||
+ self.assertTrue(False, "XMLSyntaxError was not raised")
|
||||
+
|
||||
+ self.assertIn("my_failing_external_entity", str(exception))
|
||||
+ self.assertTrue(exception.error_log)
|
||||
+ # Depending on the libxml2 version, we get different errors here,
|
||||
+ # not necessarily the one that lxml produced. But it should fail either way.
|
||||
+ for error in exception.error_log:
|
||||
+ if "my_failing_external_entity" in error.message:
|
||||
+ self.assertEqual(5, error.line)
|
||||
+ break
|
||||
+ else:
|
||||
+ self.assertFalse("entity error not found in parser error log")
|
||||
+
|
||||
def test_entity_restructure(self):
|
||||
xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
|
||||
<root>
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -7,7 +7,7 @@ The latest release works with all CPython versions from 2.7 to 3.7.
|
||||
|
||||
Name: python-%{modname}
|
||||
Version: 4.7.1
|
||||
Release: 7
|
||||
Release: 8
|
||||
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
|
||||
License: BSD
|
||||
URL: https://files.pythonhosted.org
|
||||
@ -16,6 +16,7 @@ Source0: https://files.pythonhosted.org/packages/source/l/lxml/lxml-%{ver
|
||||
Patch6000: backport-CVE-2022-2309.patch
|
||||
Patch6001: backport-Work-around-libxml2-bug-in-affected-versions.patch
|
||||
Patch6002: Fix-test_elementtree-with-Expat-2.6.0.patch
|
||||
Patch6003: backport-CVE-2024-37388.patch
|
||||
|
||||
BuildRequires: gcc libxml2-devel libxslt-devel
|
||||
|
||||
@ -56,6 +57,12 @@ make test3
|
||||
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
|
||||
|
||||
%changelog
|
||||
* Wed Jun 12 2024 zhuofeng <zhuofeng2@huawei.com> - 4.7.1-8
|
||||
- Type:CVE
|
||||
- CVE:CVE-2024-37388
|
||||
- SUG:NA
|
||||
- DESC:fix CVE-2024-37388
|
||||
|
||||
* Wed May 29 2024 zhuofeng <zhuofeng2@huawei.com> - 4.7.1-7
|
||||
- skip the failed test
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user