122 lines
3.9 KiB
Diff
122 lines
3.9 KiB
Diff
From a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001
|
|
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
|
Date: Thu, 9 Mar 2023 06:58:24 +0100
|
|
Subject: [PATCH] regexp: Fix determinism checks
|
|
|
|
Swap arguments in initial call to xmlFARecurseDeterminism.
|
|
|
|
Fix the check whether we revisit the initial state in
|
|
xmlFARecurseDeterminism.
|
|
|
|
If there are transitions with equal atoms and targets but different
|
|
counters, treat the regex as deterministic but mark the transitions as
|
|
non-deterministic internally.
|
|
|
|
Don't overwrite zero return value of xmlFAComputesDeterminism
|
|
with non-zero value from xmlFARecurseDeterminism.
|
|
|
|
Most of these errors lead to non-deterministic regexes not being
|
|
detected which typically isn't an issue. The improved code may break
|
|
users who relied on buggy behavior or cause other bugs to become
|
|
visible.
|
|
|
|
Fixes #469.
|
|
|
|
Reference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338
|
|
Conflict:NA
|
|
|
|
---
|
|
xmlregexp.c | 34 +++++++++++++++++++++++-----------
|
|
1 file changed, 23 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/xmlregexp.c b/xmlregexp.c
|
|
index df0626c..c89f0c7 100644
|
|
--- a/xmlregexp.c
|
|
+++ b/xmlregexp.c
|
|
@@ -2665,7 +2665,7 @@ not_determinist:
|
|
*/
|
|
static int
|
|
xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
|
- int to, xmlRegAtomPtr atom) {
|
|
+ int fromnr, int tonr, xmlRegAtomPtr atom) {
|
|
int ret = 1;
|
|
int res;
|
|
int transnr, nbTrans;
|
|
@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
|
/*
|
|
* check transitions conflicting with the one looked at
|
|
*/
|
|
+ if ((t1->to < 0) || (t1->to == fromnr))
|
|
+ continue;
|
|
if (t1->atom == NULL) {
|
|
- if (t1->to < 0)
|
|
- continue;
|
|
state->markd = XML_REGEXP_MARK_VISITED;
|
|
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
|
|
- to, atom);
|
|
+ fromnr, tonr, atom);
|
|
if (res == 0) {
|
|
ret = 0;
|
|
/* t1->nd = 1; */
|
|
}
|
|
continue;
|
|
}
|
|
- if (t1->to != to)
|
|
- continue;
|
|
if (xmlFACompareAtoms(t1->atom, atom, deep)) {
|
|
+ /* Treat equal transitions as deterministic. */
|
|
+ if ((t1->to != tonr) ||
|
|
+ (!xmlFAEqualAtoms(t1->atom, atom, deep)))
|
|
+ ret = 0;
|
|
ret = 0;
|
|
/* mark the transition as non-deterministic */
|
|
t1->nd = 1;
|
|
@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
|
* find transitions which indicate a conflict
|
|
*/
|
|
if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
|
|
- ret = 0;
|
|
+ /*
|
|
+ * Treat equal counter transitions that couldn't be
|
|
+ * eliminated as deterministic.
|
|
+ */
|
|
+ if ((t1->to != t2->to) ||
|
|
+ (t1->counter == t2->counter) ||
|
|
+ (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
|
|
+ ret = 0;
|
|
/* mark the transitions as non-deterministic ones */
|
|
t1->nd = 1;
|
|
t2->nd = 1;
|
|
last = t1;
|
|
}
|
|
} else {
|
|
+ int res;
|
|
+
|
|
/*
|
|
* do the closure in case of remaining specific
|
|
* epsilon transitions like choices or all
|
|
*/
|
|
- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
|
|
- t2->to, t2->atom);
|
|
- xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
|
|
+ res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
|
|
+ statenr, t1->to, t1->atom);
|
|
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
|
|
/* don't shortcut the computation so all non deterministic
|
|
transition get marked down
|
|
if (ret == 0)
|
|
return(0);
|
|
*/
|
|
- if (ret == 0) {
|
|
+ if (res == 0) {
|
|
t1->nd = 1;
|
|
/* t2->nd = 1; */
|
|
last = t1;
|
|
+ ret = 0;
|
|
}
|
|
}
|
|
}
|
|
--
|
|
2.27.0
|
|
|