xerces-j2/CVE-2012-0881.patch
2020-09-19 14:49:48 +08:00

1116 lines
44 KiB
Diff

From 992b5d9c24102ad20330d36c0a71162753a37449 Mon Sep 17 00:00:00 2001
From: Michael Glavassevich <mrglavas@apache.org>
Date: Wed, 4 Jul 2012 19:56:10 +0000
Subject: [PATCH] Enhancements to the hashing algorithms used in internal data
structures within Xerces to make them more resistant to collisions. To
improve the distribution a new hash function is randomly selected each time
the number of items in a bucket exceeds a threshold.
git-svn-id: https://svn.apache.org/repos/asf/xerces/java/trunk@1357381 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/xerces/impl/dtd/DTDGrammar.java | 147 ++++++++++-
.../util/PrimeNumberSequenceGenerator.java | 43 +++
.../xerces/util/SoftReferenceSymbolTable.java | 37 ++-
src/org/apache/xerces/util/SymbolHash.java | 104 ++++++--
src/org/apache/xerces/util/SymbolTable.java | 95 ++++++-
.../apache/xerces/util/XMLAttributesImpl.java | 246 ++++++++++++++----
6 files changed, 584 insertions(+), 88 deletions(-)
create mode 100644 src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
diff --git a/src/org/apache/xerces/impl/dtd/DTDGrammar.java b/src/org/apache/xerces/impl/dtd/DTDGrammar.java
index 6eb5ee580..220a4174b 100644
--- a/src/org/apache/xerces/impl/dtd/DTDGrammar.java
+++ b/src/org/apache/xerces/impl/dtd/DTDGrammar.java
@@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.Hashtable;
+import java.util.Random;
import org.apache.xerces.impl.dtd.models.CMAny;
import org.apache.xerces.impl.dtd.models.CMBinOp;
@@ -2639,6 +2640,26 @@ public ChildrenList () {}
* @author Andy Clark, IBM
*/
protected static final class QNameHashtable {
+
+ private static final class PrimeNumberSequenceGenerator {
+
+ private static int [] PRIMES = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
+ 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137,
+ 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313,
+ 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419,
+ 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617,
+ 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727};
+
+ static void generateSequence(int[] arrayToFill) {
+ Random r = new Random();
+ for (int i = 0; i < arrayToFill.length; ++i) {
+ arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+ }
+ }
+ }
//
// Constants
@@ -2651,11 +2672,29 @@ public ChildrenList () {}
// that we get a better distribution for hashing. -Ac
/** Hashtable size (101). */
private static final int HASHTABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket for a table with load factor == 1. */
+ private static final int MAX_HASH_COLLISIONS = 40;
+
+ private static final int MULTIPLIERS_SIZE = 1 << 5;
+ private static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
//
private Object[][] fHashTable = new Object[HASHTABLE_SIZE][];
+
+ /** actual table size **/
+ private int fTableSize = HASHTABLE_SIZE;
+
+ /** The total number of entries in the hash table. */
+ private int fCount = 0;
+
+ /**
+ * Array of randomly selected hash function multipliers or <code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ private int[] fHashMultipliers;
//
// Public methods
@@ -2663,7 +2702,7 @@ public ChildrenList () {}
/** Associates the given value with the specified key tuple. */
public void put(String key, int value) {
- int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+ int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
Object[] bucket = fHashTable[hash];
if (bucket == null) {
@@ -2672,6 +2711,11 @@ public void put(String key, int value) {
bucket[1] = key;
bucket[2] = new int[]{value};
fHashTable[hash] = bucket;
+ if (++fCount > fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ }
} else {
int count = ((int[])bucket[0])[0];
int offset = 1 + 2*count;
@@ -2692,10 +2736,20 @@ public void put(String key, int value) {
}
j += 2;
}
- if (! found) {
+ if (!found) {
bucket[offset++] = key;
bucket[offset]= new int[]{value};
((int[])bucket[0])[0] = ++count;
+ if (++fCount > fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ }
+ else if (count > MAX_HASH_COLLISIONS) {
+ // Select a new hash function and rehash the table if
+ // MAX_HASH_COLLISIONS is exceeded.
+ rebalance();
+ }
}
}
@@ -2706,7 +2760,7 @@ public void put(String key, int value) {
/** Returns the value associated with the specified key tuple. */
public int get(String key) {
- int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+ int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
Object[] bucket = fHashTable[hash];
if (bucket == null) {
@@ -2724,6 +2778,93 @@ public int get(String key) {
return -1;
} // get(int,String,String)
+
+ public int hash(String symbol) {
+ if (fHashMultipliers == null) {
+ return symbol.hashCode();
+ }
+ return hash0(symbol);
+ } // hash(String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code;
+ } // hash0(String):int
+
+ private void rehash() {
+ rehashCommon(fHashTable.length * 2 + 1);
+ } // rehash()
+
+ private void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fHashTable.length);
+ } // rebalance()
+
+ private void rehashCommon(final int newCapacity) {
+
+ final int oldCapacity = fHashTable.length;
+ final Object[][] oldTable = fHashTable;
+
+ final Object[][] newTable = new Object[newCapacity][];
+
+ fHashTable = newTable;
+ fTableSize = fHashTable.length;
+
+ for (int i = 0; i < oldCapacity; ++i) {
+ final Object[] oldBucket = oldTable[i];
+ if (oldBucket != null) {
+ final int oldCount = ((int[]) oldBucket[0])[0];
+ boolean oldBucketReused = false;
+ int k = 1;
+ for (int j = 0; j < oldCount; ++j) {
+ final String key = (String) oldBucket[k];
+ final Object value = oldBucket[k+1];
+
+ final int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
+ Object[] bucket = fHashTable[hash];
+
+ if (bucket == null) {
+ if (oldBucketReused) {
+ bucket = new Object[1 + 2*INITIAL_BUCKET_SIZE];
+ bucket[0] = new int[]{1};
+ }
+ else {
+ bucket = oldBucket;
+ ((int[])bucket[0])[0] = 1;
+ oldBucketReused = true;
+ }
+ bucket[1] = key;
+ bucket[2] = value;
+ fHashTable[hash] = bucket;
+ }
+ else {
+ int count = ((int[])bucket[0])[0];
+ int offset = 1 + 2*count;
+ if (offset == bucket.length) {
+ int newSize = count + INITIAL_BUCKET_SIZE;
+ Object[] newBucket = new Object[1 + 2*newSize];
+ System.arraycopy(bucket, 0, newBucket, 0, offset);
+ bucket = newBucket;
+ fHashTable[hash] = bucket;
+ }
+ bucket[offset++] = key;
+ bucket[offset]= value;
+ ((int[])bucket[0])[0] = ++count;
+ }
+ k += 2;
+ }
+ }
+ }
+
+ } // rehashCommon(int)
} // class QNameHashtable
diff --git a/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java b/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
new file mode 100644
index 000000000..407eab1dc
--- /dev/null
+++ b/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xerces.util;
+
+import java.util.Random;
+
+/**
+ * @version $Id$
+ */
+final class PrimeNumberSequenceGenerator {
+
+ private static int [] PRIMES = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
+ 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137,
+ 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313,
+ 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419,
+ 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617,
+ 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727};
+
+ static void generateSequence(int[] arrayToFill) {
+ Random r = new Random();
+ for (int i = 0; i < arrayToFill.length; ++i) {
+ arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+ }
+ }
+}
diff --git a/src/org/apache/xerces/util/SoftReferenceSymbolTable.java b/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
index 542bfb7da..24bb8ceaf 100644
--- a/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
+++ b/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
@@ -124,6 +124,7 @@ public SoftReferenceSymbolTable() {
public String addSymbol(String symbol) {
clean();
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(symbol) % fTableSize;
for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
SREntryData data = (SREntryData)entry.get();
@@ -133,13 +134,20 @@ public String addSymbol(String symbol) {
if (data.symbol.equals(symbol)) {
return data.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(symbol) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(symbol) % fTableSize;
+ }
// add new entry
symbol = symbol.intern();
@@ -162,6 +170,7 @@ public String addSymbol(String symbol) {
public String addSymbol(char[] buffer, int offset, int length) {
clean();
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
SREntryData data = (SREntryData)entry.get();
@@ -171,18 +180,26 @@ public String addSymbol(char[] buffer, int offset, int length) {
if (length == data.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != data.characters[i]) {
+ ++collisionCount;
continue OUTER;
}
}
return data.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(buffer, offset, length) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(buffer, offset, length) % fTableSize;
+ }
// add new entry
String symbol = new String(buffer, offset, length).intern();
@@ -215,6 +232,20 @@ protected void compact() {
rehashCommon(((int) (fCount / fLoadFactor)) * 2 + 1);
}
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolTable
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolTable's buckets exceeds the given collision threshold.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
private void rehashCommon(final int newCapacity) {
final int oldCapacity = fBuckets.length;
@@ -232,7 +263,7 @@ private void rehashCommon(final int newCapacity) {
SREntryData data = (SREntryData)e.get();
if (data != null) {
- int index = hash(data.characters, 0, data.characters.length) % newCapacity;
+ int index = hash(data.symbol) % newCapacity;
if (newTable[index] != null) {
newTable[index].prev = e;
}
diff --git a/src/org/apache/xerces/util/SymbolHash.java b/src/org/apache/xerces/util/SymbolHash.java
index 08caa7b03..f2c001a93 100644
--- a/src/org/apache/xerces/util/SymbolHash.java
+++ b/src/org/apache/xerces/util/SymbolHash.java
@@ -31,19 +31,34 @@
//
// Constants
//
-
+
/** Default table size. */
- protected int fTableSize = 101;
+ protected static final int TABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
//
+
+ /** Actual table size **/
+ protected int fTableSize;
/** Buckets. */
protected Entry[] fBuckets;
/** Number of elements. */
protected int fNum = 0;
+
+ /**
+ * Array of randomly selected hash function multipliers or <code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -51,7 +66,7 @@
/** Constructs a key table with the default size. */
public SymbolHash() {
- fBuckets = new Entry[fTableSize];
+ this(TABLE_SIZE);
}
/**
@@ -77,26 +92,37 @@ public SymbolHash(int size) {
* @param value
*/
public void put(Object key, Object value) {
+
+ // search for identical key
+ int collisionCount = 0;
final int hash = hash(key);
int bucket = hash % fTableSize;
- Entry entry = search(key, bucket);
-
- // replace old value
- if (entry != null) {
- entry.value = value;
- }
- // create new entry
- else {
- if (fNum >= fTableSize) {
- // Rehash the table if the number of entries
- // would exceed the number of buckets.
- rehash();
- bucket = hash % fTableSize;
+ for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
+ if (key.equals(entry.key)) {
+ // replace old value
+ entry.value = value;
+ return;
}
- entry = new Entry(key, value, fBuckets[bucket]);
- fBuckets[bucket] = entry;
- fNum++;
+ ++collisionCount;
}
+
+ if (fNum >= fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ bucket = hash % fTableSize;
+ }
+ else if (collisionCount >= MAX_HASH_COLLISIONS && key instanceof String) {
+ // Select a new hash function and rehash the table if
+ // MAX_HASH_COLLISIONS is exceeded.
+ rebalance();
+ bucket = hash(key) % fTableSize;
+ }
+
+ // create new entry
+ Entry entry = new Entry(key, value, fBuckets[bucket]);
+ fBuckets[bucket] = entry;
+ ++fNum;
}
/**
@@ -161,6 +187,7 @@ public int getValues(Object[] elements, int from) {
public SymbolHash makeClone() {
SymbolHash newTable = new SymbolHash(fTableSize);
newTable.fNum = fNum;
+ newTable.fHashMultipliers = fHashMultipliers != null ? (int[]) fHashMultipliers.clone() : null;
for (int i = 0; i < fTableSize; i++) {
if (fBuckets[i] != null) {
newTable.fBuckets[i] = fBuckets[i].makeClone();
@@ -178,6 +205,7 @@ public void clear() {
fBuckets[i] = null;
}
fNum = 0;
+ fHashMultipliers = null;
} // clear(): void
protected Entry search(Object key, int bucket) {
@@ -195,8 +223,21 @@ protected Entry search(Object key, int bucket) {
* @param key The key to hash.
*/
protected int hash(Object key) {
- return key.hashCode() & 0x7FFFFFFF;
- }
+ if (fHashMultipliers == null || !(key instanceof String)) {
+ return key.hashCode() & 0x7FFFFFFF;
+ }
+ return hash0((String) key);
+ } // hash(Object):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code & 0x7FFFFFFF;
+ } // hash0(String):int
/**
* Increases the capacity of and internally reorganizes this
@@ -205,11 +246,28 @@ protected int hash(Object key) {
* number of keys in the SymbolHash exceeds its number of buckets.
*/
protected void rehash() {
-
+ rehashCommon((fBuckets.length << 1) + 1);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolHash
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolHash's buckets exceeds MAX_HASH_COLLISIONS.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
+ private void rehashCommon(final int newCapacity) {
+
final int oldCapacity = fBuckets.length;
final Entry[] oldTable = fBuckets;
- final int newCapacity = (oldCapacity << 1) + 1;
final Entry[] newTable = new Entry[newCapacity];
fBuckets = newTable;
diff --git a/src/org/apache/xerces/util/SymbolTable.java b/src/org/apache/xerces/util/SymbolTable.java
index 02ef70b2e..4c7cc1bcb 100644
--- a/src/org/apache/xerces/util/SymbolTable.java
+++ b/src/org/apache/xerces/util/SymbolTable.java
@@ -84,6 +84,12 @@
/** Default table size. */
protected static final int TABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket for a table with load factor == 1. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
@@ -104,6 +110,18 @@
/** The load factor for the SymbolTable. */
protected float fLoadFactor;
+
+ /**
+ * A new hash function is selected and the table is rehashed when
+ * the number of keys in the bucket exceeds this threshold.
+ */
+ protected final int fCollisionThreshold;
+
+ /**
+ * Array of randomly selected hash function multipliers or <code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -136,6 +154,7 @@ public SymbolTable(int initialCapacity, float loadFactor) {
fTableSize = initialCapacity;
fBuckets = new Entry[fTableSize];
fThreshold = (int)(fTableSize * loadFactor);
+ fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor);
fCount = 0;
}
@@ -174,18 +193,26 @@ public SymbolTable() {
public String addSymbol(String symbol) {
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(symbol) % fTableSize;
for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (entry.symbol.equals(symbol)) {
return entry.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(symbol) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(symbol) % fTableSize;
+ }
// create new entry
Entry entry = new Entry(symbol, fBuckets[bucket]);
@@ -208,23 +235,32 @@ public String addSymbol(String symbol) {
public String addSymbol(char[] buffer, int offset, int length) {
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
+ ++collisionCount;
continue OUTER;
}
}
return entry.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(buffer, offset, length) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(buffer, offset, length) % fTableSize;
+ }
// add new entry
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
@@ -243,8 +279,21 @@ public String addSymbol(char[] buffer, int offset, int length) {
* @param symbol The symbol to hash.
*/
public int hash(String symbol) {
- return symbol.hashCode() & 0x7FFFFFFF;
+ if (fHashMultipliers == null) {
+ return symbol.hashCode() & 0x7FFFFFFF;
+ }
+ return hash0(symbol);
} // hash(String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code & 0x7FFFFFFF;
+ } // hash0(String):int
/**
* Returns a hashcode value for the specified symbol information.
@@ -258,14 +307,25 @@ public int hash(String symbol) {
* @param length The length of the symbol.
*/
public int hash(char[] buffer, int offset, int length) {
+ if (fHashMultipliers == null) {
+ int code = 0;
+ for (int i = 0; i < length; ++i) {
+ code = code * 31 + buffer[offset + i];
+ }
+ return code & 0x7FFFFFFF;
+ }
+ return hash0(buffer, offset, length);
+ } // hash(char[],int,int):int
+
+ private int hash0(char[] buffer, int offset, int length) {
int code = 0;
+ final int[] multipliers = fHashMultipliers;
for (int i = 0; i < length; ++i) {
- code = code * 31 + buffer[offset + i];
+ code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i];
}
return code & 0x7FFFFFFF;
-
- } // hash(char[],int,int):int
+ } // hash0(char[],int,int):int
/**
* Increases the capacity of and internally reorganizes this
@@ -275,11 +335,28 @@ public int hash(char[] buffer, int offset, int length) {
* and load factor.
*/
protected void rehash() {
-
+ rehashCommon(fBuckets.length * 2 + 1);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolTable
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolTable's buckets exceeds the given collision threshold.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
+ private void rehashCommon(final int newCapacity) {
+
int oldCapacity = fBuckets.length;
Entry[] oldTable = fBuckets;
- int newCapacity = oldCapacity * 2 + 1;
Entry[] newTable = new Entry[newCapacity];
fThreshold = (int)(newCapacity * fLoadFactor);
@@ -291,7 +368,7 @@ protected void rehash() {
Entry e = old;
old = old.next;
- int index = hash(e.characters, 0, e.characters.length) % newCapacity;
+ int index = hash(e.symbol) % newCapacity;
e.next = newTable[index];
newTable[index] = e;
}
diff --git a/src/org/apache/xerces/util/XMLAttributesImpl.java b/src/org/apache/xerces/util/XMLAttributesImpl.java
index 63ef720d1..695731e37 100644
--- a/src/org/apache/xerces/util/XMLAttributesImpl.java
+++ b/src/org/apache/xerces/util/XMLAttributesImpl.java
@@ -50,6 +50,12 @@
/** Default table size. */
protected static final int TABLE_SIZE = 101;
+ /** Maximum hash collisions per bucket. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
+
/**
* Threshold at which an instance is treated
* as a large attribute list.
@@ -103,6 +109,12 @@
* Indicates whether the table view contains consistent data.
*/
protected boolean fIsTableViewConsistent;
+
+ /**
+ * Array of randomly selected hash function multipliers or <code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -203,7 +215,8 @@ else if (name.uri == null ||
* the user of this class adds attributes, removes them, and
* then adds more.
*/
- if (!fIsTableViewConsistent || fLength == SIZE_LIMIT) {
+ if (!fIsTableViewConsistent || fLength == SIZE_LIMIT ||
+ (fLength > SIZE_LIMIT && fLength > fTableViewBuckets)) {
prepareAndPopulateTableView();
fIsTableViewConsistent = true;
}
@@ -232,12 +245,14 @@ else if (name.uri == null ||
// We need to check if any of the attributes has the same rawname.
else {
// Search the table.
+ int collisionCount = 0;
Attribute found = fAttributeTableView[bucket];
while (found != null) {
if (found.name.rawname == name.rawname) {
break;
}
found = found.next;
+ ++collisionCount;
}
// This attribute is unique.
if (found == null) {
@@ -250,10 +265,19 @@ else if (name.uri == null ||
}
fAttributes = attributes;
}
-
- // Update table view
- fAttributes[index].next = fAttributeTableView[bucket];
- fAttributeTableView[bucket] = fAttributes[index];
+ // Select a new hash function and rehash the table view
+ // if the collision threshold is exceeded.
+ if (collisionCount >= MAX_HASH_COLLISIONS) {
+ // The current attribute will be processed in the rehash.
+ // Need to set its name first.
+ fAttributes[index].name.setValues(name);
+ rebalanceTableView(fLength);
+ }
+ else {
+ // Update table view
+ fAttributes[index].next = fAttributeTableView[bucket];
+ fAttributeTableView[bucket] = fAttributes[index];
+ }
}
// Duplicate. We still need to find the index.
else {
@@ -829,60 +853,84 @@ public void addAttributeNS(QName name, String type, String value) {
*/
public QName checkDuplicatesNS() {
// If the list is small check for duplicates using pairwise comparison.
- if (fLength <= SIZE_LIMIT) {
- for (int i = 0; i < fLength - 1; ++i) {
- Attribute att1 = fAttributes[i];
- for (int j = i + 1; j < fLength; ++j) {
- Attribute att2 = fAttributes[j];
+ final int length = fLength;
+ if (length <= SIZE_LIMIT) {
+ final Attribute[] attributes = fAttributes;
+ for (int i = 0; i < length - 1; ++i) {
+ Attribute att1 = attributes[i];
+ for (int j = i + 1; j < length; ++j) {
+ Attribute att2 = attributes[j];
if (att1.name.localpart == att2.name.localpart &&
att1.name.uri == att2.name.uri) {
- return att2.name;
+ return att2.name;
}
}
}
+ return null;
}
// If the list is large check duplicates using a hash table.
else {
- // We don't want this table view to be read if someone calls
- // addAttribute so we invalidate it up front.
- fIsTableViewConsistent = false;
-
- prepareTableView();
+ return checkManyDuplicatesNS();
+ }
+ }
+
+ private QName checkManyDuplicatesNS() {
+ // We don't want this table view to be read if someone calls
+ // addAttribute so we invalidate it up front.
+ fIsTableViewConsistent = false;
- Attribute attr;
- int bucket;
+ prepareTableView();
- for (int i = fLength - 1; i >= 0; --i) {
- attr = fAttributes[i];
- bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+ Attribute attr;
+ int bucket;
+
+ final int length = fLength;
+ final Attribute[] attributes = fAttributes;
+ final Attribute[] attributeTableView = fAttributeTableView;
+ final int[] attributeTableViewChainState = fAttributeTableViewChainState;
+ int largeCount = fLargeCount;
+
+ for (int i = 0; i < length; ++i) {
+ attr = attributes[i];
+ bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+
+ // The chain is stale.
+ // This must be a unique attribute.
+ if (attributeTableViewChainState[bucket] != largeCount) {
+ attributeTableViewChainState[bucket] = largeCount;
+ attr.next = null;
+ attributeTableView[bucket] = attr;
+ }
+ // This chain is active.
+ // We need to check if any of the attributes has the same name.
+ else {
+ // Search the table.
+ int collisionCount = 0;
+ Attribute found = attributeTableView[bucket];
+ while (found != null) {
+ if (found.name.localpart == attr.name.localpart &&
+ found.name.uri == attr.name.uri) {
+ return attr.name;
+ }
+ found = found.next;
+ ++collisionCount;
+ }
- // The chain is stale.
- // This must be a unique attribute.
- if (fAttributeTableViewChainState[bucket] != fLargeCount) {
- fAttributeTableViewChainState[bucket] = fLargeCount;
- attr.next = null;
- fAttributeTableView[bucket] = attr;
- }
- // This chain is active.
- // We need to check if any of the attributes has the same name.
+ // Select a new hash function and rehash the table view
+ // if the collision threshold is exceeded.
+ if (collisionCount >= MAX_HASH_COLLISIONS) {
+ // The current attribute will be processed in the rehash.
+ rebalanceTableViewNS(i+1);
+ largeCount = fLargeCount;
+ }
else {
- // Search the table.
- Attribute found = fAttributeTableView[bucket];
- while (found != null) {
- if (found.name.localpart == attr.name.localpart &&
- found.name.uri == attr.name.uri) {
- return attr.name;
- }
- found = found.next;
- }
-
// Update table view
- attr.next = fAttributeTableView[bucket];
- fAttributeTableView[bucket] = attr;
+ attr.next = attributeTableView[bucket];
+ attributeTableView[bucket] = attr;
}
}
- }
- return null;
+ }
+ return null;
}
/**
@@ -933,7 +981,7 @@ private String getReportableType(String type) {
* would be hashed
*/
protected int getTableViewBucket(String qname) {
- return (qname.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(qname) & 0x7FFFFFFF) % fTableViewBuckets;
}
/**
@@ -947,13 +995,36 @@ protected int getTableViewBucket(String qname) {
*/
protected int getTableViewBucket(String localpart, String uri) {
if (uri == null) {
- return (localpart.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(localpart) & 0x7FFFFFFF) % fTableViewBuckets;
}
else {
- return ((localpart.hashCode() + uri.hashCode())
- & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(localpart, uri) & 0x7FFFFFFF) % fTableViewBuckets;
}
}
+
+ private int hash(String localpart) {
+ if (fHashMultipliers == null) {
+ return localpart.hashCode();
+ }
+ return hash0(localpart);
+ } // hash(String):int
+
+ private int hash(String localpart, String uri) {
+ if (fHashMultipliers == null) {
+ return localpart.hashCode() + uri.hashCode() * 31;
+ }
+ return hash0(localpart) + hash0(uri) * fHashMultipliers[MULTIPLIERS_SIZE];
+ } // hash(String,String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code;
+ } // hash0(String):int
/**
* Purges all elements from the table view.
@@ -970,10 +1041,32 @@ protected void cleanTableView() {
}
}
+ /**
+ * Increases the capacity of the table view.
+ */
+ private void growTableView() {
+ final int length = fLength;
+ int tableViewBuckets = fTableViewBuckets;
+ do {
+ tableViewBuckets = (tableViewBuckets << 1) + 1;
+ if (tableViewBuckets < 0) {
+ tableViewBuckets = Integer.MAX_VALUE;
+ break;
+ }
+ }
+ while (length > tableViewBuckets);
+ fTableViewBuckets = tableViewBuckets;
+ fAttributeTableView = null;
+ fLargeCount = 1;
+ }
+
/**
* Prepares the table view of the attributes list for use.
*/
protected void prepareTableView() {
+ if (fLength > fTableViewBuckets) {
+ growTableView();
+ }
if (fAttributeTableView == null) {
fAttributeTableView = new Attribute[fTableViewBuckets];
fAttributeTableViewChainState = new int[fTableViewBuckets];
@@ -989,11 +1082,15 @@ protected void prepareTableView() {
* previously read.
*/
protected void prepareAndPopulateTableView() {
+ prepareAndPopulateTableView(fLength);
+ }
+
+ private void prepareAndPopulateTableView(final int count) {
prepareTableView();
- // Need to populate the hash table with the attributes we've scanned so far.
+ // Need to populate the hash table with the attributes we've processed so far.
Attribute attr;
int bucket;
- for (int i = 0; i < fLength; ++i) {
+ for (int i = 0; i < count; ++i) {
attr = fAttributes[i];
bucket = getTableViewBucket(attr.name.rawname);
if (fAttributeTableViewChainState[bucket] != fLargeCount) {
@@ -1009,6 +1009,55 @@ public class XMLAttributesImpl
}
}
+ private void prepareAndPopulateTableViewNS(final int count) {
+ prepareTableView();
+ // Need to populate the hash table with the attributes we've processed so far.
+ Attribute attr;
+ int bucket;
+ for (int i = 0; i < count; ++i) {
+ attr = fAttributes[i];
+ bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+ if (fAttributeTableViewChainState[bucket] != fLargeCount) {
+ fAttributeTableViewChainState[bucket] = fLargeCount;
+ attr.next = null;
+ fAttributeTableView[bucket] = attr;
+ }
+ else {
+ // Update table view
+ attr.next = fAttributeTableView[bucket];
+ fAttributeTableView[bucket] = attr;
+ }
+ }
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes the table view
+ * in order to more evenly distribute its entries. This method is called
+ * automatically when the number of attributes in one bucket exceeds
+ * MAX_HASH_COLLISIONS.
+ */
+ private void rebalanceTableView(final int count) {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ prepareAndPopulateTableView(count);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes the table view
+ * in order to more evenly distribute its entries. This method is called
+ * automatically when the number of attributes in one bucket exceeds
+ * MAX_HASH_COLLISIONS.
+ */
+ private void rebalanceTableViewNS(final int count) {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ prepareAndPopulateTableViewNS(count);
+ }
+
//
// Classes
//