libxml2/backport-regexp-Fix-determinism-checks.patch
zhuofeng feb7e8218d backport upstream patches
(cherry picked from commit ec64ed27a9add0f7a9bf6ee351ad67302a60c383)
2023-06-20 11:16:46 +08:00

122 lines
3.9 KiB
Diff

From a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Thu, 9 Mar 2023 06:58:24 +0100
Subject: [PATCH] regexp: Fix determinism checks
Swap arguments in initial call to xmlFARecurseDeterminism.
Fix the check whether we revisit the initial state in
xmlFARecurseDeterminism.
If there are transitions with equal atoms and targets but different
counters, treat the regex as deterministic but mark the transitions as
non-deterministic internally.
Don't overwrite zero return value of xmlFAComputesDeterminism
with non-zero value from xmlFARecurseDeterminism.
Most of these errors lead to non-deterministic regexes not being
detected which typically isn't an issue. The improved code may break
users who relied on buggy behavior or cause other bugs to become
visible.
Fixes #469.
Reference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338
Conflict:NA
---
xmlregexp.c | 34 +++++++++++++++++++++++-----------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/xmlregexp.c b/xmlregexp.c
index df0626c..c89f0c7 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -2665,7 +2665,7 @@ not_determinist:
*/
static int
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
- int to, xmlRegAtomPtr atom) {
+ int fromnr, int tonr, xmlRegAtomPtr atom) {
int ret = 1;
int res;
int transnr, nbTrans;
@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
/*
* check transitions conflicting with the one looked at
*/
+ if ((t1->to < 0) || (t1->to == fromnr))
+ continue;
if (t1->atom == NULL) {
- if (t1->to < 0)
- continue;
state->markd = XML_REGEXP_MARK_VISITED;
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
- to, atom);
+ fromnr, tonr, atom);
if (res == 0) {
ret = 0;
/* t1->nd = 1; */
}
continue;
}
- if (t1->to != to)
- continue;
if (xmlFACompareAtoms(t1->atom, atom, deep)) {
+ /* Treat equal transitions as deterministic. */
+ if ((t1->to != tonr) ||
+ (!xmlFAEqualAtoms(t1->atom, atom, deep)))
+ ret = 0;
ret = 0;
/* mark the transition as non-deterministic */
t1->nd = 1;
@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
* find transitions which indicate a conflict
*/
if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
- ret = 0;
+ /*
+ * Treat equal counter transitions that couldn't be
+ * eliminated as deterministic.
+ */
+ if ((t1->to != t2->to) ||
+ (t1->counter == t2->counter) ||
+ (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
+ ret = 0;
/* mark the transitions as non-deterministic ones */
t1->nd = 1;
t2->nd = 1;
last = t1;
}
} else {
+ int res;
+
/*
* do the closure in case of remaining specific
* epsilon transitions like choices or all
*/
- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
- t2->to, t2->atom);
- xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
+ res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
+ statenr, t1->to, t1->atom);
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
/* don't shortcut the computation so all non deterministic
transition get marked down
if (ret == 0)
return(0);
*/
- if (ret == 0) {
+ if (res == 0) {
t1->nd = 1;
/* t2->nd = 1; */
last = t1;
+ ret = 0;
}
}
}
--
2.27.0