diff --git a/backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch b/backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch new file mode 100644 index 0000000..4300e86 --- /dev/null +++ b/backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch @@ -0,0 +1,42 @@ +From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping +Date: Tue, 8 Feb 2022 04:32:20 +0100 +Subject: [PATCH] lib: Add missing validation of encoding (CVE-2022-25235) + +--- + lib/xmltok_impl.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c +index 0430591b4..64a3b2c15 100644 +--- a/lib/xmltok_impl.c ++++ b/lib/xmltok_impl.c +@@ -69,7 +69,7 @@ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ +- if (! IS_NAME_CHAR(enc, ptr, n)) { \ ++ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ +@@ -98,7 +98,7 @@ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ +- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ ++ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ +@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ ++ if (IS_INVALID_CHAR(enc, ptr, n)) { \ ++ *nextTokPtr = ptr; \ ++ return XML_TOK_INVALID; \ ++ } \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ diff --git a/backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch b/backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch new file mode 100644 index 0000000..edfbd44 --- /dev/null +++ b/backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch @@ -0,0 +1,32 @@ +From a2fe525e660badd64b6c557c2b1ec26ddc07f6e4 Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping +Date: Sat, 12 Feb 2022 01:09:29 +0100 +Subject: [PATCH] lib: Protect against malicious namespace declarations + (CVE-2022-25236) + +--- + lib/xmlparse.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index c768f856..a3aef88c 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -3754,6 +3754,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + if (! mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; ++ ++ // NOTE: While Expat does not validate namespace URIs against RFC 3986, ++ // we have to at least make sure that the XML processor on top of ++ // Expat (that is splitting tag names by namespace separator into ++ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused ++ // by an attacker putting additional namespace separator characters ++ // into namespace declarations. That would be ambiguous and not to ++ // be expected. ++ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { ++ return XML_ERROR_SYNTAX; ++ } + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; diff --git a/backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch b/backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch new file mode 100644 index 0000000..b4069b6 --- /dev/null +++ b/backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch @@ -0,0 +1,222 @@ +From 9b4ce651b26557f16103c3a366c91934ecd439ab Mon Sep 17 00:00:00 2001 +From: Samanta Navarro +Date: Tue, 15 Feb 2022 11:54:29 +0000 +Subject: [PATCH] Prevent stack exhaustion in build_model + +It is possible to trigger stack exhaustion in build_model function if +depth of nested children in DTD element is large enough. This happens +because build_node is a recursively called function within build_model. + +The code has been adjusted to run iteratively. It uses the already +allocated heap space as temporary stack (growing from top to bottom). + +Output is identical to recursive version. No new fields in data +structures were added, i.e. it keeps full API and ABI compatibility. +Instead the numchildren variable is used to temporarily keep the +index of items (uint vs int). + +Documentation and readability improvements kindly added by Sebastian. + +Proof of Concept: + +1. Compile poc binary which parses XML file line by line + +``` +cat > poc.c << EOF + #include + #include + #include + + XML_Parser parser; + + static void XMLCALL + dummy_element_decl_handler(void *userData, const XML_Char *name, + XML_Content *model) { + XML_FreeContentModel(parser, model); + } + + int main(int argc, char *argv[]) { + FILE *fp; + char *p = NULL; + size_t s = 0; + ssize_t l; + if (argc != 2) + errx(1, "usage: poc poc.xml"); + if ((parser = XML_ParserCreate(NULL)) == NULL) + errx(1, "XML_ParserCreate"); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + if ((fp = fopen(argv[1], "r")) == NULL) + err(1, "fopen"); + while ((l = getline(&p, &s, fp)) > 0) + if (XML_Parse(parser, p, (int)l, XML_FALSE) != XML_STATUS_OK) + errx(1, "XML_Parse"); + XML_ParserFree(parser); + free(p); + fclose(fp); + return 0; + } +EOF +cc -std=c11 -D_POSIX_C_SOURCE=200809L -lexpat -o poc poc.c +``` + +2. Create XML file with a lot of nested groups in DTD element + +``` +cat > poc.xml.zst.b64 << EOF +KLUv/aQkACAAPAEA+DwhRE9DVFlQRSB1d3UgWwo8IUVMRU1FTlQgdXd1CigBAHv/58AJAgAQKAIA +ECgCABAoAgAQKAIAECgCABAoAgAQKHwAAChvd28KKQIA2/8gV24XBAIAECkCABApAgAQKQIAECkC +ABApAgAQKQIAEClVAAAgPl0+CgEA4A4I2VwwnQ== +EOF +base64 -d poc.xml.zst.b64 | zstd -d > poc.xml +``` + +3. Run Proof of Concept + +``` +./poc poc.xml +``` + +Co-authored-by: Sebastian Pipping +--- + lib/xmlparse.c | 116 +++++++++++++++++++++++++++++-------------- + 1 file changed, 79 insertions(+), 37 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 4b43e613..594cf12c 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -7317,44 +7317,15 @@ nextScaffoldPart(XML_Parser parser) { + return next; + } + +-static void +-build_node(XML_Parser parser, int src_node, XML_Content *dest, +- XML_Content **contpos, XML_Char **strpos) { +- DTD *const dtd = parser->m_dtd; /* save one level of indirection */ +- dest->type = dtd->scaffold[src_node].type; +- dest->quant = dtd->scaffold[src_node].quant; +- if (dest->type == XML_CTYPE_NAME) { +- const XML_Char *src; +- dest->name = *strpos; +- src = dtd->scaffold[src_node].name; +- for (;;) { +- *(*strpos)++ = *src; +- if (! *src) +- break; +- src++; +- } +- dest->numchildren = 0; +- dest->children = NULL; +- } else { +- unsigned int i; +- int cn; +- dest->numchildren = dtd->scaffold[src_node].childcnt; +- dest->children = *contpos; +- *contpos += dest->numchildren; +- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; +- i++, cn = dtd->scaffold[cn].nextsib) { +- build_node(parser, cn, &(dest->children[i]), contpos, strpos); +- } +- dest->name = NULL; +- } +-} +- + static XML_Content * + build_model(XML_Parser parser) { ++ /* Function build_model transforms the existing parser->m_dtd->scaffold ++ * array of CONTENT_SCAFFOLD tree nodes into a new array of ++ * XML_Content tree nodes followed by a gapless list of zero-terminated ++ * strings. */ + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + XML_Content *ret; +- XML_Content *cpos; +- XML_Char *str; ++ XML_Char *str; /* the current string writing location */ + + /* Detect and prevent integer overflow. + * The preprocessor guard addresses the "always false" warning +@@ -7380,10 +7351,81 @@ build_model(XML_Parser parser) { + if (! ret) + return NULL; + +- str = (XML_Char *)(&ret[dtd->scaffCount]); +- cpos = &ret[1]; ++ /* What follows is an iterative implementation (of what was previously done ++ * recursively in a dedicated function called "build_node". The old recursive ++ * build_node could be forced into stack exhaustion from input as small as a ++ * few megabyte, and so that was a security issue. Hence, a function call ++ * stack is avoided now by resolving recursion.) ++ * ++ * The iterative approach works as follows: ++ * ++ * - We use space in the target array for building a temporary stack structure ++ * while that space is still unused. ++ * The stack grows from the array's end downwards and the "actual data" ++ * grows from the start upwards, sequentially. ++ * (Because stack grows downwards, pushing onto the stack is a decrement ++ * while popping off the stack is an increment.) ++ * ++ * - A stack element appears as a regular XML_Content node on the outside, ++ * but only uses a single field -- numchildren -- to store the source ++ * tree node array index. These are the breadcrumbs leading the way back ++ * during pre-order (node first) depth-first traversal. ++ * ++ * - The reason we know the stack will never grow into (or overlap with) ++ * the area with data of value at the start of the array is because ++ * the overall number of elements to process matches the size of the array, ++ * and the sum of fully processed nodes and yet-to-be processed nodes ++ * on the stack, cannot be more than the total number of nodes. ++ * It is possible for the top of the stack and the about-to-write node ++ * to meet, but that is safe because we get the source index out ++ * before doing any writes on that node. ++ */ ++ XML_Content *dest = ret; /* tree node writing location, moves upwards */ ++ XML_Content *const destLimit = &ret[dtd->scaffCount]; ++ XML_Content *const stackBottom = &ret[dtd->scaffCount]; ++ XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */ ++ str = (XML_Char *)&ret[dtd->scaffCount]; ++ ++ /* Push source tree root node index onto the stack */ ++ (--stackTop)->numchildren = 0; ++ ++ for (; dest < destLimit; dest++) { ++ /* Pop source tree node index off the stack */ ++ const int src_node = (int)(stackTop++)->numchildren; ++ ++ /* Convert item */ ++ dest->type = dtd->scaffold[src_node].type; ++ dest->quant = dtd->scaffold[src_node].quant; ++ if (dest->type == XML_CTYPE_NAME) { ++ const XML_Char *src; ++ dest->name = str; ++ src = dtd->scaffold[src_node].name; ++ for (;;) { ++ *str++ = *src; ++ if (! *src) ++ break; ++ src++; ++ } ++ dest->numchildren = 0; ++ dest->children = NULL; ++ } else { ++ unsigned int i; ++ int cn; ++ dest->name = NULL; ++ dest->numchildren = dtd->scaffold[src_node].childcnt; ++ dest->children = &dest[1]; ++ ++ /* Push children to the stack ++ * in a way where the first child ends up at the top of the ++ * (downwards growing) stack, in order to be processed first. */ ++ stackTop -= dest->numchildren; ++ for (i = 0, cn = dtd->scaffold[src_node].firstchild; ++ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) { ++ (stackTop + i)->numchildren = (unsigned int)cn; ++ } ++ } ++ } + +- build_node(parser, 0, ret, &cpos, &str); + return ret; + } + diff --git a/backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch b/backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch new file mode 100644 index 0000000..e2dc326 --- /dev/null +++ b/backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch @@ -0,0 +1,24 @@ +From efcb347440ade24b9f1054671e6bd05e60b4cafd Mon Sep 17 00:00:00 2001 +From: Samanta Navarro +Date: Tue, 15 Feb 2022 11:56:57 +0000 +Subject: [PATCH] Prevent integer overflow in copyString + +The copyString function is only used for encoding string supplied by +the library user. +--- + lib/xmlparse.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 4b43e613..a39377c2 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -7412,7 +7412,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, + + static XML_Char * + copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { +- int charsRequired = 0; ++ size_t charsRequired = 0; + XML_Char *result; + + /* First determine how long the string is */ diff --git a/backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch b/backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch new file mode 100644 index 0000000..6fdaeac --- /dev/null +++ b/backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch @@ -0,0 +1,138 @@ +From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001 +From: Samanta Navarro +Date: Tue, 15 Feb 2022 11:55:46 +0000 +Subject: [PATCH] Prevent integer overflow in storeRawNames + +It is possible to use an integer overflow in storeRawNames for out of +boundary heap writes. Default configuration is affected. If compiled +with XML_UNICODE then the attack does not work. Compiling with +-fsanitize=address confirms the following proof of concept. + +The problem can be exploited by abusing the m_buffer expansion logic. +Even though the initial size of m_buffer is a power of two, eventually +it can end up a little bit lower, thus allowing allocations very close +to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag +names can be parsed which are almost INT_MAX in size. + +Unfortunately (from an attacker point of view) INT_MAX/2 is also a +limitation in string pools. Having a tag name of INT_MAX/2 characters +or more is not possible. + +Expat can convert between different encodings. UTF-16 documents which +contain only ASCII representable characters are twice as large as their +ASCII encoded counter-parts. + +The proof of concept works by taking these three considerations into +account: + +1. Move the m_buffer size slightly below a power of two by having a + short root node . This allows the m_buffer to grow very close + to INT_MAX. +2. The string pooling forbids tag names longer than or equal to + INT_MAX/2, so keep the attack tag name smaller than that. +3. To be able to still overflow INT_MAX even though the name is + limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag + which only contains ASCII characters. UTF-16 always stores two + bytes per character while the tag name is converted to using only + one. Our attack node byte count must be a bit higher than + 2/3 INT_MAX so the converted tag name is around INT_MAX/3 which + in sum can overflow INT_MAX. + +Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes +without running into INT_MAX boundary check. The string pooling is +able to store INT_MAX/3 as tag name because the amount is below +INT_MAX/2 limitation. And creating the sum of both eventually overflows +in storeRawNames. + +Proof of Concept: + +1. Compile expat with -fsanitize=address. + +2. Create Proof of Concept binary which iterates through input + file 16 MB at once for better performance and easier integer + calculations: + +``` +cat > poc.c << EOF + #include + #include + #include + #include + + #define CHUNK (16 * 1024 * 1024) + int main(int argc, char *argv[]) { + XML_Parser parser; + FILE *fp; + char *buf; + int i; + + if (argc != 2) + errx(1, "usage: poc file.xml"); + if ((parser = XML_ParserCreate(NULL)) == NULL) + errx(1, "failed to create expat parser"); + if ((fp = fopen(argv[1], "r")) == NULL) { + XML_ParserFree(parser); + err(1, "failed to open file"); + } + if ((buf = malloc(CHUNK)) == NULL) { + fclose(fp); + XML_ParserFree(parser); + err(1, "failed to allocate buffer"); + } + i = 0; + while (fread(buf, CHUNK, 1, fp) == 1) { + printf("iteration %d: XML_Parse returns %d\n", ++i, + XML_Parse(parser, buf, CHUNK, XML_FALSE)); + } + free(buf); + fclose(fp); + XML_ParserFree(parser); + return 0; + } +EOF +gcc -fsanitize=address -lexpat -o poc poc.c +``` + +3. Construct specially prepared UTF-16 XML file: + +``` +dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml +echo -n '<' | dd conv=notrunc of=poc-utf8.xml +echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368 +iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml +``` + +4. Run proof of concept: + +``` +./poc poc-utf16.xml +``` +--- + lib/xmlparse.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 4b43e613..f34d6ab5 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) { + while (tag) { + int bufSize; + int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); ++ size_t rawNameLen; + char *rawNameBuf = tag->buf + nameLen; + /* Stop if already stored. Since m_tagStack is a stack, we can stop + at the first entry that has already been copied; everything +@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) { + /* For re-use purposes we need to ensure that the + size of tag->buf is a multiple of sizeof(XML_Char). + */ +- bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); ++ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); ++ /* Detect and prevent integer overflow. */ ++ if (rawNameLen > (size_t)INT_MAX - nameLen) ++ return XML_FALSE; ++ bufSize = nameLen + (int)rawNameLen; + if (bufSize > tag->bufEnd - tag->buf) { + char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + if (temp == NULL) diff --git a/backport-Fix-build_model-regression.patch b/backport-Fix-build_model-regression.patch new file mode 100644 index 0000000..eab7777 --- /dev/null +++ b/backport-Fix-build_model-regression.patch @@ -0,0 +1,134 @@ +From b12f34fe32821a69dc12ff9a021daca0856de238 Mon Sep 17 00:00:00 2001 +From: Samanta Navarro +Date: Sat, 19 Feb 2022 23:59:25 +0000 +Subject: [PATCH] Fix build_model regression. + +The iterative approach in build_model failed to fill children arrays +correctly. A preorder traversal is not required and turned out to be the +culprit. Use an easier algorithm: + +Add nodes from scaffold tree starting at index 0 (root) to the target +array whenever children are encountered. This ensures that children +are adjacent to each other. This complies with the recursive version. + +Store only the scaffold index in numchildren field to prevent a direct +processing of these children, which would require a recursive solution. +This allows the algorithm to iterate through the target array from start +to end without jumping back and forth, converting on the fly. + +Co-authored-by: Sebastian Pipping +--- + lib/xmlparse.c | 79 +++++++++++++++++++++++++++++++--------------------- + 1 file changed, 47 insertions(+), 32 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index c479a25..84885b5 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -7373,39 +7373,58 @@ build_model(XML_Parser parser) { + * + * The iterative approach works as follows: + * +- * - We use space in the target array for building a temporary stack structure +- * while that space is still unused. +- * The stack grows from the array's end downwards and the "actual data" +- * grows from the start upwards, sequentially. +- * (Because stack grows downwards, pushing onto the stack is a decrement +- * while popping off the stack is an increment.) ++ * - We have two writing pointers, both walking up the result array; one does ++ * the work, the other creates "jobs" for its colleague to do, and leads ++ * the way: + * +- * - A stack element appears as a regular XML_Content node on the outside, +- * but only uses a single field -- numchildren -- to store the source +- * tree node array index. These are the breadcrumbs leading the way back +- * during pre-order (node first) depth-first traversal. ++ * - The faster one, pointer jobDest, always leads and writes "what job ++ * to do" by the other, once they reach that place in the ++ * array: leader "jobDest" stores the source node array index (relative ++ * to array dtd->scaffold) in field "numchildren". + * +- * - The reason we know the stack will never grow into (or overlap with) +- * the area with data of value at the start of the array is because +- * the overall number of elements to process matches the size of the array, +- * and the sum of fully processed nodes and yet-to-be processed nodes +- * on the stack, cannot be more than the total number of nodes. +- * It is possible for the top of the stack and the about-to-write node +- * to meet, but that is safe because we get the source index out +- * before doing any writes on that node. ++ * - The slower one, pointer dest, looks at the value stored in the ++ * "numchildren" field (which actually holds a source node array index ++ * at that time) and puts the real data from dtd->scaffold in. ++ * ++ * - Before the loop starts, jobDest writes source array index 0 ++ * (where the root node is located) so that dest will have something to do ++ * when it starts operation. ++ * ++ * - Whenever nodes with children are encountered, jobDest appends ++ * them as new jobs, in order. As a result, tree node siblings are ++ * adjacent in the resulting array, for example: ++ * ++ * [0] root, has two children ++ * [1] first child of 0, has three children ++ * [3] first child of 1, does not have children ++ * [4] second child of 1, does not have children ++ * [5] third child of 1, does not have children ++ * [2] second child of 0, does not have children ++ * ++ * Or (the same data) presented in flat array view: ++ * ++ * [0] root, has two children ++ * ++ * [1] first child of 0, has three children ++ * [2] second child of 0, does not have children ++ * ++ * [3] first child of 1, does not have children ++ * [4] second child of 1, does not have children ++ * [5] third child of 1, does not have children ++ * ++ * - The algorithm repeats until all target array indices have been processed. + */ + XML_Content *dest = ret; /* tree node writing location, moves upwards */ + XML_Content *const destLimit = &ret[dtd->scaffCount]; +- XML_Content *const stackBottom = &ret[dtd->scaffCount]; +- XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */ ++ XML_Content *jobDest = ret; /* next free writing location in target array */ + str = (XML_Char *)&ret[dtd->scaffCount]; + +- /* Push source tree root node index onto the stack */ +- (--stackTop)->numchildren = 0; ++ /* Add the starting job, the root node (index 0) of the source tree */ ++ (jobDest++)->numchildren = 0; + + for (; dest < destLimit; dest++) { +- /* Pop source tree node index off the stack */ +- const int src_node = (int)(stackTop++)->numchildren; ++ /* Retrieve source tree array index from job storage */ ++ const int src_node = (int)dest->numchildren; + + /* Convert item */ + dest->type = dtd->scaffold[src_node].type; +@@ -7427,16 +7446,12 @@ build_model(XML_Parser parser) { + int cn; + dest->name = NULL; + dest->numchildren = dtd->scaffold[src_node].childcnt; +- dest->children = &dest[1]; ++ dest->children = jobDest; + +- /* Push children to the stack +- * in a way where the first child ends up at the top of the +- * (downwards growing) stack, in order to be processed first. */ +- stackTop -= dest->numchildren; ++ /* Append scaffold indices of children to array */ + for (i = 0, cn = dtd->scaffold[src_node].firstchild; +- i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) { +- (stackTop + i)->numchildren = (unsigned int)cn; +- } ++ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) ++ (jobDest++)->numchildren = (unsigned int)cn; + } + } + +-- +1.8.3.1 + diff --git a/backport-tests-Cover-CVE-2022-25236.patch b/backport-tests-Cover-CVE-2022-25236.patch new file mode 100644 index 0000000..825107c --- /dev/null +++ b/backport-tests-Cover-CVE-2022-25236.patch @@ -0,0 +1,60 @@ +From 2de077423fb22750ebea599677d523b53cb93b1d Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping +Date: Sat, 12 Feb 2022 00:51:43 +0100 +Subject: [PATCH] tests: Cover CVE-2022-25236 + +--- + tests/runtests.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/tests/runtests.c b/tests/runtests.c +index d07203f..bc5344b 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) { + } + END_TEST + ++START_TEST(test_ns_separator_in_uri) { ++ struct test_case { ++ enum XML_Status expectedStatus; ++ const char *doc; ++ }; ++ struct test_case cases[] = { ++ {XML_STATUS_OK, ""}, ++ {XML_STATUS_ERROR, ""}, ++ }; ++ ++ size_t i = 0; ++ size_t failCount = 0; ++ for (; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); ++ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); ++ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), ++ /*isFinal*/ XML_TRUE) ++ != cases[i].expectedStatus) { ++ failCount++; ++ } ++ XML_ParserFree(parser); ++ } ++ ++ if (failCount) { ++ fail("Namespace separator handling is broken"); ++ } ++} ++END_TEST ++ + /* Control variable; the number of times duff_allocator() will successfully + * allocate */ + #define ALLOC_ALWAYS_SUCCEED (-1) +@@ -11905,6 +11934,7 @@ make_suite(void) { + tcase_add_test(tc_namespace, test_ns_utf16_doctype); + tcase_add_test(tc_namespace, test_ns_invalid_doctype); + tcase_add_test(tc_namespace, test_ns_double_colon_doctype); ++ tcase_add_test(tc_namespace, test_ns_separator_in_uri); + + suite_add_tcase(s, tc_misc); + tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); +-- +1.8.3.1 + diff --git a/backport-tests-Cover-missing-validation-of-encoding.patch b/backport-tests-Cover-missing-validation-of-encoding.patch new file mode 100644 index 0000000..c27dccd --- /dev/null +++ b/backport-tests-Cover-missing-validation-of-encoding.patch @@ -0,0 +1,154 @@ +From 6a5510bc6b7efe743356296724e0b38300f05379 Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping +Date: Tue, 8 Feb 2022 04:06:21 +0100 +Subject: [PATCH] tests: Cover missing validation of encoding + (CVE-2022-25235) + +--- + tests/runtests.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 109 insertions(+) + +diff --git a/tests/runtests.c b/tests/runtests.c +index bc5344b..9b155b8 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) { + } + END_TEST + ++START_TEST(test_utf8_in_start_tags) { ++ struct test_case { ++ bool goodName; ++ bool goodNameStart; ++ const char *tagName; ++ }; ++ ++ // The idea with the tests below is this: ++ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences ++ // go to isNever and are hence not a concern. ++ // ++ // We start with a character that is a valid name character ++ // (or even name-start character, see XML 1.0r4 spec) and then we flip ++ // single bits at places where (1) the result leaves the UTF-8 encoding space ++ // and (2) we stay in the same n-byte sequence family. ++ // ++ // The flipped bits are highlighted in angle brackets in comments, ++ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped ++ // the most significant bit to 1 to leave UTF-8 encoding space. ++ struct test_case cases[] = { ++ // 1-byte UTF-8: [0xxx xxxx] ++ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' ++ {false, false, "\xBA"}, // [<1>011 1010] ++ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' ++ {false, false, "\xB9"}, // [<1>011 1001] ++ ++ // 2-byte UTF-8: [110x xxxx] [10xx xxxx] ++ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = ++ // Arabic small waw U+06E5 ++ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] ++ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] ++ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] ++ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = ++ // combining char U+0301 ++ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] ++ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] ++ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] ++ ++ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] ++ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = ++ // Devanagari Letter A U+0905 ++ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] ++ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] ++ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] ++ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] ++ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] ++ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = ++ // combining char U+0901 ++ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] ++ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] ++ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] ++ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] ++ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] ++ }; ++ const bool atNameStart[] = {true, false}; ++ ++ size_t i = 0; ++ char doc[1024]; ++ size_t failCount = 0; ++ ++ for (; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ size_t j = 0; ++ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { ++ const bool expectedSuccess ++ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; ++ sprintf(doc, "<%s%s>