!34 Fix CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315

From: @yang_zhuang_zhuang Reviewed-by: @xiezhipeng1 Signed-off-by: @xiezhipeng1
2022-02-28 01:11:00 +00:00 · 2022-02-28 01:11:00 +00:00 · 570b409ec8
commit 570b409ec8
parent 0922fc74a6 351f6d23fd
10 changed files with 930 additions and 1 deletions
--- a/backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch
+++ b/backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch
@ -0,0 +1,42 @@
+From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001
+From: Sebastian Pipping <sebastian@pipping.org>
+Date: Tue, 8 Feb 2022 04:32:20 +0100
+Subject: [PATCH] lib: Add missing validation of encoding (CVE-2022-25235)
+
+---
+ lib/xmltok_impl.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
+index 0430591b4..64a3b2c15 100644
+--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
+@@ -69,7 +69,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NAME_CHAR(enc, ptr, n)) {                                         \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -98,7 +98,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NMSTRT_CHAR(enc, ptr, n)) {                                       \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
+      *nextTokPtr = ptr;                                                       \
+      return XML_TOK_INVALID;                                                  \
+    }                                                                          \
+     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
+       ptr += n;                                                                \
+       tok = XML_TOK_NAME;                                                      \
--- a/backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch
+++ b/backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch
@ -0,0 +1,32 @@
+From a2fe525e660badd64b6c557c2b1ec26ddc07f6e4 Mon Sep 17 00:00:00 2001
+From: Sebastian Pipping <sebastian@pipping.org>
+Date: Sat, 12 Feb 2022 01:09:29 +0100
+Subject: [PATCH] lib: Protect against malicious namespace declarations
+ (CVE-2022-25236)
+
+---
+ lib/xmlparse.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index c768f856..a3aef88c 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -3754,6 +3754,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
+     if (! mustBeXML && isXMLNS
+         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
+       isXMLNS = XML_FALSE;
+
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
+    //       we have to at least make sure that the XML processor on top of
+    //       Expat (that is splitting tag names by namespace separator into
+    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
+    //       by an attacker putting additional namespace separator characters
+    //       into namespace declarations.  That would be ambiguous and not to
+    //       be expected.
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+      return XML_ERROR_SYNTAX;
+    }
+   }
+   isXML = isXML && len == xmlLen;
+   isXMLNS = isXMLNS && len == xmlnsLen;
--- a/backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch
+++ b/backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch
@ -0,0 +1,222 @@
+From 9b4ce651b26557f16103c3a366c91934ecd439ab Mon Sep 17 00:00:00 2001
+From: Samanta Navarro <ferivoz@riseup.net>
+Date: Tue, 15 Feb 2022 11:54:29 +0000
+Subject: [PATCH] Prevent stack exhaustion in build_model
+
+It is possible to trigger stack exhaustion in build_model function if
+depth of nested children in DTD element is large enough. This happens
+because build_node is a recursively called function within build_model.
+
+The code has been adjusted to run iteratively. It uses the already
+allocated heap space as temporary stack (growing from top to bottom).
+
+Output is identical to recursive version. No new fields in data
+structures were added, i.e. it keeps full API and ABI compatibility.
+Instead the numchildren variable is used to temporarily keep the
+index of items (uint vs int).
+
+Documentation and readability improvements kindly added by Sebastian.
+
+Proof of Concept:
+
+1. Compile poc binary which parses XML file line by line
+
+```
+cat > poc.c << EOF
+ #include <err.h>
+ #include <expat.h>
+ #include <stdio.h>
+
+ XML_Parser parser;
+
+ static void XMLCALL
+ dummy_element_decl_handler(void *userData, const XML_Char *name,
+                            XML_Content *model) {
+   XML_FreeContentModel(parser, model);
+ }
+
+ int main(int argc, char *argv[]) {
+   FILE *fp;
+   char *p = NULL;
+   size_t s = 0;
+   ssize_t l;
+   if (argc != 2)
+     errx(1, "usage: poc poc.xml");
+   if ((parser = XML_ParserCreate(NULL)) == NULL)
+     errx(1, "XML_ParserCreate");
+   XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
+   if ((fp = fopen(argv[1], "r")) == NULL)
+     err(1, "fopen");
+   while ((l = getline(&p, &s, fp)) > 0)
+     if (XML_Parse(parser, p, (int)l, XML_FALSE) != XML_STATUS_OK)
+       errx(1, "XML_Parse");
+   XML_ParserFree(parser);
+   free(p);
+   fclose(fp);
+   return 0;
+ }
+EOF
+cc -std=c11 -D_POSIX_C_SOURCE=200809L -lexpat -o poc poc.c
+```
+
+2. Create XML file with a lot of nested groups in DTD element
+
+```
+cat > poc.xml.zst.b64 << EOF
+KLUv/aQkACAAPAEA+DwhRE9DVFlQRSB1d3UgWwo8IUVMRU1FTlQgdXd1CigBAHv/58AJAgAQKAIA
+ECgCABAoAgAQKAIAECgCABAoAgAQKHwAAChvd28KKQIA2/8gV24XBAIAECkCABApAgAQKQIAECkC
+ABApAgAQKQIAEClVAAAgPl0+CgEA4A4I2VwwnQ==
+EOF
+base64 -d poc.xml.zst.b64 | zstd -d > poc.xml
+```
+
+3. Run Proof of Concept
+
+```
+./poc poc.xml
+```
+
+Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
+---
+ lib/xmlparse.c | 116 +++++++++++++++++++++++++++++--------------
+ 1 file changed, 79 insertions(+), 37 deletions(-)
+
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index 4b43e613..594cf12c 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -7317,44 +7317,15 @@ nextScaffoldPart(XML_Parser parser) {
+   return next;
+ }
+ 
+-static void
+-build_node(XML_Parser parser, int src_node, XML_Content *dest,
+-           XML_Content **contpos, XML_Char **strpos) {
+-  DTD *const dtd = parser->m_dtd; /* save one level of indirection */
+-  dest->type = dtd->scaffold[src_node].type;
+-  dest->quant = dtd->scaffold[src_node].quant;
+-  if (dest->type == XML_CTYPE_NAME) {
+-    const XML_Char *src;
+-    dest->name = *strpos;
+-    src = dtd->scaffold[src_node].name;
+-    for (;;) {
+-      *(*strpos)++ = *src;
+-      if (! *src)
+-        break;
+-      src++;
+-    }
+-    dest->numchildren = 0;
+-    dest->children = NULL;
+-  } else {
+-    unsigned int i;
+-    int cn;
+-    dest->numchildren = dtd->scaffold[src_node].childcnt;
+-    dest->children = *contpos;
+-    *contpos += dest->numchildren;
+-    for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
+-         i++, cn = dtd->scaffold[cn].nextsib) {
+-      build_node(parser, cn, &(dest->children[i]), contpos, strpos);
+-    }
+-    dest->name = NULL;
+-  }
+-}
+-
+ static XML_Content *
+ build_model(XML_Parser parser) {
+  /* Function build_model transforms the existing parser->m_dtd->scaffold
+   * array of CONTENT_SCAFFOLD tree nodes into a new array of
+   * XML_Content tree nodes followed by a gapless list of zero-terminated
+   * strings. */
+   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
+   XML_Content *ret;
+-  XML_Content *cpos;
+-  XML_Char *str;
+  XML_Char *str; /* the current string writing location */
+ 
+   /* Detect and prevent integer overflow.
+    * The preprocessor guard addresses the "always false" warning
+@@ -7380,10 +7351,81 @@ build_model(XML_Parser parser) {
+   if (! ret)
+     return NULL;
+ 
+-  str = (XML_Char *)(&ret[dtd->scaffCount]);
+-  cpos = &ret[1];
+  /* What follows is an iterative implementation (of what was previously done
+   * recursively in a dedicated function called "build_node".  The old recursive
+   * build_node could be forced into stack exhaustion from input as small as a
+   * few megabyte, and so that was a security issue.  Hence, a function call
+   * stack is avoided now by resolving recursion.)
+   *
+   * The iterative approach works as follows:
+   *
+   * - We use space in the target array for building a temporary stack structure
+   *   while that space is still unused.
+   *   The stack grows from the array's end downwards and the "actual data"
+   *   grows from the start upwards, sequentially.
+   *   (Because stack grows downwards, pushing onto the stack is a decrement
+   *   while popping off the stack is an increment.)
+   *
+   * - A stack element appears as a regular XML_Content node on the outside,
+   *   but only uses a single field -- numchildren -- to store the source
+   *   tree node array index.  These are the breadcrumbs leading the way back
+   *   during pre-order (node first) depth-first traversal.
+   *
+   * - The reason we know the stack will never grow into (or overlap with)
+   *   the area with data of value at the start of the array is because
+   *   the overall number of elements to process matches the size of the array,
+   *   and the sum of fully processed nodes and yet-to-be processed nodes
+   *   on the stack, cannot be more than the total number of nodes.
+   *   It is possible for the top of the stack and the about-to-write node
+   *   to meet, but that is safe because we get the source index out
+   *   before doing any writes on that node.
+   */
+  XML_Content *dest = ret; /* tree node writing location, moves upwards */
+  XML_Content *const destLimit = &ret[dtd->scaffCount];
+  XML_Content *const stackBottom = &ret[dtd->scaffCount];
+  XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+  str = (XML_Char *)&ret[dtd->scaffCount];
+
+  /* Push source tree root node index onto the stack */
+  (--stackTop)->numchildren = 0;
+
+  for (; dest < destLimit; dest++) {
+    /* Pop source tree node index off the stack */
+    const int src_node = (int)(stackTop++)->numchildren;
+
+    /* Convert item */
+    dest->type = dtd->scaffold[src_node].type;
+    dest->quant = dtd->scaffold[src_node].quant;
+    if (dest->type == XML_CTYPE_NAME) {
+      const XML_Char *src;
+      dest->name = str;
+      src = dtd->scaffold[src_node].name;
+      for (;;) {
+        *str++ = *src;
+        if (! *src)
+          break;
+        src++;
+      }
+      dest->numchildren = 0;
+      dest->children = NULL;
+    } else {
+      unsigned int i;
+      int cn;
+      dest->name = NULL;
+      dest->numchildren = dtd->scaffold[src_node].childcnt;
+      dest->children = &dest[1];
+
+      /* Push children to the stack
+       * in a way where the first child ends up at the top of the
+       * (downwards growing) stack, in order to be processed first. */
+      stackTop -= dest->numchildren;
+      for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+           i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+        (stackTop + i)->numchildren = (unsigned int)cn;
+      }
+    }
+  }
+ 
+-  build_node(parser, 0, ret, &cpos, &str);
+   return ret;
+ }
+ 
--- a/backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch
+++ b/backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch
@ -0,0 +1,24 @@
+From efcb347440ade24b9f1054671e6bd05e60b4cafd Mon Sep 17 00:00:00 2001
+From: Samanta Navarro <ferivoz@riseup.net>
+Date: Tue, 15 Feb 2022 11:56:57 +0000
+Subject: [PATCH] Prevent integer overflow in copyString
+
+The copyString function is only used for encoding string supplied by
+the library user.
+---
+ lib/xmlparse.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index 4b43e613..a39377c2 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -7412,7 +7412,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
+ 
+ static XML_Char *
+ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
+-  int charsRequired = 0;
+  size_t charsRequired = 0;
+   XML_Char *result;
+ 
+   /* First determine how long the string is */
--- a/backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch
+++ b/backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch
@ -0,0 +1,138 @@
+From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
+From: Samanta Navarro <ferivoz@riseup.net>
+Date: Tue, 15 Feb 2022 11:55:46 +0000
+Subject: [PATCH] Prevent integer overflow in storeRawNames
+
+It is possible to use an integer overflow in storeRawNames for out of
+boundary heap writes. Default configuration is affected. If compiled
+with XML_UNICODE then the attack does not work. Compiling with
+-fsanitize=address confirms the following proof of concept.
+
+The problem can be exploited by abusing the m_buffer expansion logic.
+Even though the initial size of m_buffer is a power of two, eventually
+it can end up a little bit lower, thus allowing allocations very close
+to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
+names can be parsed which are almost INT_MAX in size.
+
+Unfortunately (from an attacker point of view) INT_MAX/2 is also a
+limitation in string pools. Having a tag name of INT_MAX/2 characters
+or more is not possible.
+
+Expat can convert between different encodings. UTF-16 documents which
+contain only ASCII representable characters are twice as large as their
+ASCII encoded counter-parts.
+
+The proof of concept works by taking these three considerations into
+account:
+
+1. Move the m_buffer size slightly below a power of two by having a
+   short root node <a>. This allows the m_buffer to grow very close
+   to INT_MAX.
+2. The string pooling forbids tag names longer than or equal to
+   INT_MAX/2, so keep the attack tag name smaller than that.
+3. To be able to still overflow INT_MAX even though the name is
+   limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
+   which only contains ASCII characters. UTF-16 always stores two
+   bytes per character while the tag name is converted to using only
+   one. Our attack node byte count must be a bit higher than
+   2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
+   in sum can overflow INT_MAX.
+
+Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
+without running into INT_MAX boundary check. The string pooling is
+able to store INT_MAX/3 as tag name because the amount is below
+INT_MAX/2 limitation. And creating the sum of both eventually overflows
+in storeRawNames.
+
+Proof of Concept:
+
+1. Compile expat with -fsanitize=address.
+
+2. Create Proof of Concept binary which iterates through input
+   file 16 MB at once for better performance and easier integer
+   calculations:
+
+```
+cat > poc.c << EOF
+ #include <err.h>
+ #include <expat.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+
+ #define CHUNK (16 * 1024 * 1024)
+ int main(int argc, char *argv[]) {
+   XML_Parser parser;
+   FILE *fp;
+   char *buf;
+   int i;
+
+   if (argc != 2)
+     errx(1, "usage: poc file.xml");
+   if ((parser = XML_ParserCreate(NULL)) == NULL)
+     errx(1, "failed to create expat parser");
+   if ((fp = fopen(argv[1], "r")) == NULL) {
+     XML_ParserFree(parser);
+     err(1, "failed to open file");
+   }
+   if ((buf = malloc(CHUNK)) == NULL) {
+     fclose(fp);
+     XML_ParserFree(parser);
+     err(1, "failed to allocate buffer");
+   }
+   i = 0;
+   while (fread(buf, CHUNK, 1, fp) == 1) {
+     printf("iteration %d: XML_Parse returns %d\n", ++i,
+       XML_Parse(parser, buf, CHUNK, XML_FALSE));
+   }
+   free(buf);
+   fclose(fp);
+   XML_ParserFree(parser);
+   return 0;
+ }
+EOF
+gcc -fsanitize=address -lexpat -o poc poc.c
+```
+
+3. Construct specially prepared UTF-16 XML file:
+
+```
+dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
+echo -n '<a><' | dd conv=notrunc of=poc-utf8.xml
+echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
+iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
+```
+
+4. Run proof of concept:
+
+```
+./poc poc-utf16.xml
+```
+---
+ lib/xmlparse.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index 4b43e613..f34d6ab5 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
+   while (tag) {
+     int bufSize;
+     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
+    size_t rawNameLen;
+     char *rawNameBuf = tag->buf + nameLen;
+     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
+        at the first entry that has already been copied; everything
+@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
+     /* For re-use purposes we need to ensure that the
+        size of tag->buf is a multiple of sizeof(XML_Char).
+     */
+-    bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+    rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+    /* Detect and prevent integer overflow. */
+    if (rawNameLen > (size_t)INT_MAX - nameLen)
+      return XML_FALSE;
+    bufSize = nameLen + (int)rawNameLen;
+     if (bufSize > tag->bufEnd - tag->buf) {
+       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
+       if (temp == NULL)
--- a/backport-Fix-build_model-regression.patch
+++ b/backport-Fix-build_model-regression.patch
@ -0,0 +1,134 @@
+From b12f34fe32821a69dc12ff9a021daca0856de238 Mon Sep 17 00:00:00 2001
+From: Samanta Navarro <ferivoz@riseup.net>
+Date: Sat, 19 Feb 2022 23:59:25 +0000
+Subject: [PATCH] Fix build_model regression.
+
+The iterative approach in build_model failed to fill children arrays
+correctly. A preorder traversal is not required and turned out to be the
+culprit. Use an easier algorithm:
+
+Add nodes from scaffold tree starting at index 0 (root) to the target
+array whenever children are encountered. This ensures that children
+are adjacent to each other. This complies with the recursive version.
+
+Store only the scaffold index in numchildren field to prevent a direct
+processing of these children, which would require a recursive solution.
+This allows the algorithm to iterate through the target array from start
+to end without jumping back and forth, converting on the fly.
+
+Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
+---
+ lib/xmlparse.c | 79 +++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 47 insertions(+), 32 deletions(-)
+
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index c479a25..84885b5 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -7373,39 +7373,58 @@ build_model(XML_Parser parser) {
+    *
+    * The iterative approach works as follows:
+    *
+-   * - We use space in the target array for building a temporary stack structure
+-   *   while that space is still unused.
+-   *   The stack grows from the array's end downwards and the "actual data"
+-   *   grows from the start upwards, sequentially.
+-   *   (Because stack grows downwards, pushing onto the stack is a decrement
+-   *   while popping off the stack is an increment.)
+   * - We have two writing pointers, both walking up the result array; one does
+   *   the work, the other creates "jobs" for its colleague to do, and leads
+   *   the way:
+    *
+-   * - A stack element appears as a regular XML_Content node on the outside,
+-   *   but only uses a single field -- numchildren -- to store the source
+-   *   tree node array index.  These are the breadcrumbs leading the way back
+-   *   during pre-order (node first) depth-first traversal.
+   *   - The faster one, pointer jobDest, always leads and writes "what job
+   *     to do" by the other, once they reach that place in the
+   *     array: leader "jobDest" stores the source node array index (relative
+   *     to array dtd->scaffold) in field "numchildren".
+    *
+-   * - The reason we know the stack will never grow into (or overlap with)
+-   *   the area with data of value at the start of the array is because
+-   *   the overall number of elements to process matches the size of the array,
+-   *   and the sum of fully processed nodes and yet-to-be processed nodes
+-   *   on the stack, cannot be more than the total number of nodes.
+-   *   It is possible for the top of the stack and the about-to-write node
+-   *   to meet, but that is safe because we get the source index out
+-   *   before doing any writes on that node.
+   *   - The slower one, pointer dest, looks at the value stored in the
+   *     "numchildren" field (which actually holds a source node array index
+   *     at that time) and puts the real data from dtd->scaffold in.
+   *
+   * - Before the loop starts, jobDest writes source array index 0
+   *   (where the root node is located) so that dest will have something to do
+   *   when it starts operation.
+   *
+   * - Whenever nodes with children are encountered, jobDest appends
+   *   them as new jobs, in order.  As a result, tree node siblings are
+   *   adjacent in the resulting array, for example:
+   *
+   *     [0] root, has two children
+   *       [1] first child of 0, has three children
+   *         [3] first child of 1, does not have children
+   *         [4] second child of 1, does not have children
+   *         [5] third child of 1, does not have children
+   *       [2] second child of 0, does not have children
+   *
+   *   Or (the same data) presented in flat array view:
+   *
+   *     [0] root, has two children
+   *
+   *     [1] first child of 0, has three children
+   *     [2] second child of 0, does not have children
+   *
+   *     [3] first child of 1, does not have children
+   *     [4] second child of 1, does not have children
+   *     [5] third child of 1, does not have children
+   *
+   * - The algorithm repeats until all target array indices have been processed.
+    */
+   XML_Content *dest = ret; /* tree node writing location, moves upwards */
+   XML_Content *const destLimit = &ret[dtd->scaffCount];
+-  XML_Content *const stackBottom = &ret[dtd->scaffCount];
+-  XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+  XML_Content *jobDest = ret; /* next free writing location in target array */
+   str = (XML_Char *)&ret[dtd->scaffCount];
+ 
+-  /* Push source tree root node index onto the stack */
+-  (--stackTop)->numchildren = 0;
+  /* Add the starting job, the root node (index 0) of the source tree  */
+  (jobDest++)->numchildren = 0;
+ 
+   for (; dest < destLimit; dest++) {
+-    /* Pop source tree node index off the stack */
+-    const int src_node = (int)(stackTop++)->numchildren;
+    /* Retrieve source tree array index from job storage */
+    const int src_node = (int)dest->numchildren;
+ 
+     /* Convert item */
+     dest->type = dtd->scaffold[src_node].type;
+@@ -7427,16 +7446,12 @@ build_model(XML_Parser parser) {
+       int cn;
+       dest->name = NULL;
+       dest->numchildren = dtd->scaffold[src_node].childcnt;
+-      dest->children = &dest[1];
+      dest->children = jobDest;
+ 
+-      /* Push children to the stack
+-       * in a way where the first child ends up at the top of the
+-       * (downwards growing) stack, in order to be processed first. */
+-      stackTop -= dest->numchildren;
+      /* Append scaffold indices of children to array */
+       for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+-           i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+-        (stackTop + i)->numchildren = (unsigned int)cn;
+-      }
+           i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
+        (jobDest++)->numchildren = (unsigned int)cn;
+     }
+   }
+ 
+-- 
+1.8.3.1
+
--- a/backport-tests-Cover-CVE-2022-25236.patch
+++ b/backport-tests-Cover-CVE-2022-25236.patch
@ -0,0 +1,60 @@
+From 2de077423fb22750ebea599677d523b53cb93b1d Mon Sep 17 00:00:00 2001
+From: Sebastian Pipping <sebastian@pipping.org>
+Date: Sat, 12 Feb 2022 00:51:43 +0100
+Subject: [PATCH] tests: Cover CVE-2022-25236
+
+---
+ tests/runtests.c | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+diff --git a/tests/runtests.c b/tests/runtests.c
+index d07203f..bc5344b 100644
+--- a/tests/runtests.c
+++ b/tests/runtests.c
+@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) {
+ }
+ END_TEST
+ 
+START_TEST(test_ns_separator_in_uri) {
+  struct test_case {
+    enum XML_Status expectedStatus;
+    const char *doc;
+  };
+  struct test_case cases[] = {
+      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
+      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
+  };
+
+  size_t i = 0;
+  size_t failCount = 0;
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
+    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
+                  /*isFinal*/ XML_TRUE)
+        != cases[i].expectedStatus) {
+      failCount++;
+    }
+    XML_ParserFree(parser);
+  }
+
+  if (failCount) {
+    fail("Namespace separator handling is broken");
+  }
+}
+END_TEST
+
+ /* Control variable; the number of times duff_allocator() will successfully
+  * allocate */
+ #define ALLOC_ALWAYS_SUCCEED (-1)
+@@ -11905,6 +11934,7 @@ make_suite(void) {
+   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
+   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
+   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
+  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
+ 
+   suite_add_tcase(s, tc_misc);
+   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
+-- 
+1.8.3.1
+
--- a/backport-tests-Cover-missing-validation-of-encoding.patch
+++ b/backport-tests-Cover-missing-validation-of-encoding.patch
@ -0,0 +1,154 @@
+From 6a5510bc6b7efe743356296724e0b38300f05379 Mon Sep 17 00:00:00 2001
+From: Sebastian Pipping <sebastian@pipping.org>
+Date: Tue, 8 Feb 2022 04:06:21 +0100
+Subject: [PATCH] tests: Cover missing validation of encoding
+ (CVE-2022-25235)
+
+---
+ tests/runtests.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 109 insertions(+)
+
+diff --git a/tests/runtests.c b/tests/runtests.c
+index bc5344b..9b155b8 100644
+--- a/tests/runtests.c
+++ b/tests/runtests.c
+@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
+ }
+ END_TEST
+ 
+START_TEST(test_utf8_in_start_tags) {
+  struct test_case {
+    bool goodName;
+    bool goodNameStart;
+    const char *tagName;
+  };
+
+  // The idea with the tests below is this:
+  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
+  // go to isNever and are hence not a concern.
+  //
+  // We start with a character that is a valid name character
+  // (or even name-start character, see XML 1.0r4 spec) and then we flip
+  // single bits at places where (1) the result leaves the UTF-8 encoding space
+  // and (2) we stay in the same n-byte sequence family.
+  //
+  // The flipped bits are highlighted in angle brackets in comments,
+  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
+  // the most significant bit to 1 to leave UTF-8 encoding space.
+  struct test_case cases[] = {
+      // 1-byte UTF-8: [0xxx xxxx]
+      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
+      {false, false, "\xBA"}, // [<1>011 1010]
+      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
+      {false, false, "\xB9"}, // [<1>011 1001]
+
+      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
+      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
+                                  // Arabic small waw U+06E5
+      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
+      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
+      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
+      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
+                                  // combining char U+0301
+      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
+      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
+      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
+
+      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
+      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
+                                      // Devanagari Letter A U+0905
+      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
+      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
+      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
+      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
+      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
+      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
+                                      // combining char U+0901
+      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
+      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
+      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
+      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
+      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
+  };
+  const bool atNameStart[] = {true, false};
+
+  size_t i = 0;
+  char doc[1024];
+  size_t failCount = 0;
+
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    size_t j = 0;
+    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
+      const bool expectedSuccess
+          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
+      sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
+      XML_Parser parser = XML_ParserCreate(NULL);
+
+      const enum XML_Status status
+          = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
+
+      bool success = true;
+      if ((status == XML_STATUS_OK) != expectedSuccess) {
+        success = false;
+      }
+      if ((status == XML_STATUS_ERROR)
+          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
+        success = false;
+      }
+
+      if (! success) {
+        fprintf(
+            stderr,
+            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
+            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
+            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
+        failCount++;
+      }
+
+      XML_ParserFree(parser);
+    }
+  }
+
+  if (failCount > 0) {
+    fail("UTF-8 regression detected");
+  }
+}
+END_TEST
+
+ /* Test trailing spaces in elements are accepted */
+ static void XMLCALL
+ record_element_end_handler(void *userData, const XML_Char *name) {
+@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
+ }
+ END_TEST
+ 
+START_TEST(test_bad_doctype_utf8) {
+  const char *text = "<!DOCTYPE \xDB\x25"
+                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
+  expect_failure(text, XML_ERROR_INVALID_TOKEN,
+                 "Invalid UTF-8 in DOCTYPE not faulted");
+}
+END_TEST
+
+ START_TEST(test_bad_doctype_utf16) {
+   const char text[] =
+       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
+@@ -11870,6 +11977,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
+  tcase_add_test(tc_basic, test_utf8_in_start_tags);
+   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
+   tcase_add_test(tc_basic, test_utf16_attribute);
+   tcase_add_test(tc_basic, test_utf16_second_attr);
+@@ -11878,6 +11986,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype);
+  tcase_add_test(tc_basic, test_bad_doctype_utf8);
+   tcase_add_test(tc_basic, test_bad_doctype_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype_plus);
+   tcase_add_test(tc_basic, test_bad_doctype_star);
+-- 
+1.8.3.1
+
--- a/backport-tests-Protect-against-nested-element-declaration-mod.patch
+++ b/backport-tests-Protect-against-nested-element-declaration-mod.patch
@ -0,0 +1,108 @@
+From 154e565f6ef329c9ec97e6534c411ddde0b320c8 Mon Sep 17 00:00:00 2001
+From: Sebastian Pipping <sebastian@pipping.org>
+Date: Sun, 20 Feb 2022 03:26:57 +0100
+Subject: [PATCH] tests: Protect against nested element declaration
+ model regressions
+
+---
+ tests/runtests.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 77 insertions(+)
+
+diff --git a/tests/runtests.c b/tests/runtests.c
+index 2cd4acb..e28670d 100644
+--- a/tests/runtests.c
+++ b/tests/runtests.c
+@@ -2664,6 +2664,82 @@ START_TEST(test_dtd_elements) {
+ }
+ END_TEST
+ 
+static void XMLCALL
+element_decl_check_model(void *userData, const XML_Char *name,
+                         XML_Content *model) {
+  UNUSED_P(userData);
+  uint32_t errorFlags = 0;
+
+  /* Expected model array structure is this:
+   * [0] (type 6, quant 0)
+   *   [1] (type 5, quant 0)
+   *     [3] (type 4, quant 0, name "bar")
+   *     [4] (type 4, quant 0, name "foo")
+   *     [5] (type 4, quant 3, name "xyz")
+   *   [2] (type 4, quant 2, name "zebra")
+   */
+  errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
+  errorFlags |= ((model != NULL) ? 0 : (1u << 1));
+
+  errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
+  errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
+  errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
+  errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
+  errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
+
+  errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
+  errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
+  errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
+  errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
+  errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
+
+  errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
+  errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
+  errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
+  errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
+  errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
+
+  errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
+  errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
+  errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
+  errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
+  errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
+
+  errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
+  errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
+  errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
+  errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
+  errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
+
+  errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
+  errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
+  errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
+  errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
+  errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
+
+  XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
+  XML_FreeContentModel(g_parser, model);
+}
+
+START_TEST(test_dtd_elements_nesting) {
+  // Payload inspired by a test in Perl's XML::Parser
+  const char *text = "<!DOCTYPE foo [\n"
+                     "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
+                     "]>\n"
+                     "<foo/>";
+
+  XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
+
+  XML_SetElementDeclHandler(g_parser, element_decl_check_model);
+  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
+      == XML_STATUS_ERROR)
+    xml_failure(g_parser);
+
+  if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
+    fail("Element declaration model regression detected");
+}
+END_TEST
+
+ /* Test foreign DTD handling */
+ START_TEST(test_set_foreign_dtd) {
+   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
+@@ -11863,6 +11939,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_memory_allocation);
+   tcase_add_test(tc_basic, test_default_current);
+   tcase_add_test(tc_basic, test_dtd_elements);
+  tcase_add_test(tc_basic, test_dtd_elements_nesting);
+   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
+   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
+   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
+-- 
+1.8.3.1
+
--- a/expat.spec
+++ b/expat.spec
@ -1,7 +1,7 @@
 %define Rversion %(echo %{version} | sed -e 's/\\./_/g' -e 's/^/R_/')
 Name:           expat
 Version:        2.4.1
-Release:        3
+Release:        4
 Summary:        An XML parser library
 License:        MIT
 URL:            https://libexpat.github.io/
@ -13,6 +13,15 @@ Patch2:         backport-CVE-2022-22822-CVE-2022-22823-CVE-2022-22824-CVE-2022-2
 Patch3:         backport-CVE-2022-23852-lib-Detect-and-prevent-integer-overflow-in-XML_GetBu.patch
 Patch4:         backport-CVE-2022-23852-tests-Cover-integer-overflow-in-XML_GetBuffer-CVE-20.patch
 Patch5:         backport-CVE-2022-23990-lib-Prevent-integer-overflow-in-doProlog-CVE-2022-23.patch
+Patch6:         backport-CVE-2022-25235-lib-Add-missing-validation-of-encoding.patch
+Patch7:         backport-tests-Cover-missing-validation-of-encoding.patch
+Patch8:         backport-CVE-2022-25236-lib-Protect-against-malicious-namespace-declarations.patch
+Patch9:         backport-tests-Cover-CVE-2022-25236.patch
+Patch10:        backport-CVE-2022-25313-Prevent-stack-exhaustion-in-build_model.patch
+Patch11:        backport-CVE-2022-25314-Prevent-integer-overflow-in-copyString.patch
+Patch12:        backport-CVE-2022-25315-Prevent-integer-overflow-in-storeRawNames.patch
+Patch13:        backport-Fix-build_model-regression.patch
+Patch14:        backport-tests-Protect-against-nested-element-declaration-mod.patch

 BuildRequires:  sed,autoconf,automake,gcc-c++,libtool,xmlto

@ -66,6 +75,12 @@ make check
 %{_mandir}/man1/*

 %changelog
+* Sat Feb 26 2022 yangzhuangzhuang <yangzhuangzhuang1@h-partners.com> - 2.4.1-4
+- Type:CVE
+- ID:CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315
+- SUG:NA
+- DESC:Fix CVE-2022-25235 CVE-2022-25236 CVE-2022-25313 CVE-2022-25314 CVE-2022-25315
+
 * Mon Feb 7 2022 yangzhuangzhuang <yangzhuangzhuang1@h-partners.com> - 2.4.1-3
 - Type:CVE
 - ID:CVE-2022-23852 CVE-2022-23990