From f890ce20b623f2bea736d096229677c8b99556ad Mon Sep 17 00:00:00 2001 From: Michael Glavassevich Date: Wed, 16 Mar 2011 15:57:02 +0000 Subject: [PATCH] JIRA Issue #1499: http://issues.apache.org/jira/browse/XERCESJ-1499. Reducing the initial footprint of SymbolHash buckets within a SchemaGrammar from 1,515 to 177 (about 12% of the default size). Implemented a rehash() method on SymbolHash to grow the maps if they actually become filled. git-svn-id: https://svn.apache.org/repos/asf/xerces/java/trunk@1082175 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/xerces/impl/xs/SchemaGrammar.java | 44 ++++++++------- .../xerces/impl/xs/traversers/XSDHandler.java | 14 ++--- src/org/apache/xerces/util/SymbolHash.java | 55 +++++++++++++++++-- 3 files changed, 81 insertions(+), 32 deletions(-) diff --git a/src/org/apache/xerces/impl/xs/SchemaGrammar.java b/src/org/apache/xerces/impl/xs/SchemaGrammar.java index e522e9770..a4d5ca675 100644 --- a/src/org/apache/xerces/impl/xs/SchemaGrammar.java +++ b/src/org/apache/xerces/impl/xs/SchemaGrammar.java @@ -144,35 +144,39 @@ public SchemaGrammar(String targetNamespace, XSDDescription grammarDesc, fGrammarDescription = grammarDesc; fSymbolTable = symbolTable; - // REVISIT: do we know the numbers of the following global decls - // when creating this grammar? If so, we can pass the numbers in, - // and use that number to initialize the following hashtables. - fGlobalAttrDecls = new SymbolHash(); - fGlobalAttrGrpDecls = new SymbolHash(); - fGlobalElemDecls = new SymbolHash(); - fGlobalGroupDecls = new SymbolHash(); - fGlobalNotationDecls = new SymbolHash(); - fGlobalIDConstraintDecls = new SymbolHash(); + // REVISIT: the initial sizes being chosen for each SymbolHash + // may not be ideal and could still be tuned. They were chosen + // somewhat arbitrarily to reduce the initial footprint of + // SymbolHash buckets from 1,515 to 177 (about 12% of the + // default size). + fGlobalAttrDecls = new SymbolHash(12); + fGlobalAttrGrpDecls = new SymbolHash(5); + fGlobalElemDecls = new SymbolHash(25); + fGlobalGroupDecls = new SymbolHash(5); + fGlobalNotationDecls = new SymbolHash(1); + fGlobalIDConstraintDecls = new SymbolHash(3); // Extended tables - fGlobalAttrDeclsExt = new SymbolHash(); - fGlobalAttrGrpDeclsExt = new SymbolHash(); - fGlobalElemDeclsExt = new SymbolHash(); - fGlobalGroupDeclsExt = new SymbolHash(); - fGlobalNotationDeclsExt = new SymbolHash(); - fGlobalIDConstraintDeclsExt = new SymbolHash(); - fGlobalTypeDeclsExt = new SymbolHash(); + fGlobalAttrDeclsExt = new SymbolHash(12); + fGlobalAttrGrpDeclsExt = new SymbolHash(5); + fGlobalElemDeclsExt = new SymbolHash(25); + fGlobalGroupDeclsExt = new SymbolHash(5); + fGlobalNotationDeclsExt = new SymbolHash(1); + fGlobalIDConstraintDeclsExt = new SymbolHash(3); + fGlobalTypeDeclsExt = new SymbolHash(25); // All global elements table - fAllGlobalElemDecls = new SymbolHash(); + fAllGlobalElemDecls = new SymbolHash(25); // if we are parsing S4S, put built-in types in first // they might get overwritten by the types from S4S, but that's // considered what the application wants to do. - if (fTargetNamespace == SchemaSymbols.URI_SCHEMAFORSCHEMA) + if (fTargetNamespace == SchemaSymbols.URI_SCHEMAFORSCHEMA) { fGlobalTypeDecls = SG_SchemaNS.fGlobalTypeDecls.makeClone(); - else - fGlobalTypeDecls = new SymbolHash(); + } + else { + fGlobalTypeDecls = new SymbolHash(25); + } } // (String, XSDDescription) // Clone an existing schema grammar diff --git a/src/org/apache/xerces/impl/xs/traversers/XSDHandler.java b/src/org/apache/xerces/impl/xs/traversers/XSDHandler.java index e05409d6e..0937ac3a7 100644 --- a/src/org/apache/xerces/impl/xs/traversers/XSDHandler.java +++ b/src/org/apache/xerces/impl/xs/traversers/XSDHandler.java @@ -445,13 +445,13 @@ private String doc2SystemId(Element ele) { private String [][] fKeyrefNamespaceContext = new String[INIT_KEYREF_STACK][1]; // global decls: map from decl name to decl object - SymbolHash fGlobalAttrDecls = new SymbolHash(); - SymbolHash fGlobalAttrGrpDecls = new SymbolHash(); - SymbolHash fGlobalElemDecls = new SymbolHash(); - SymbolHash fGlobalGroupDecls = new SymbolHash(); - SymbolHash fGlobalNotationDecls = new SymbolHash(); - SymbolHash fGlobalIDConstraintDecls = new SymbolHash(); - SymbolHash fGlobalTypeDecls = new SymbolHash(); + SymbolHash fGlobalAttrDecls = new SymbolHash(12); + SymbolHash fGlobalAttrGrpDecls = new SymbolHash(5); + SymbolHash fGlobalElemDecls = new SymbolHash(25); + SymbolHash fGlobalGroupDecls = new SymbolHash(5); + SymbolHash fGlobalNotationDecls = new SymbolHash(1); + SymbolHash fGlobalIDConstraintDecls = new SymbolHash(3); + SymbolHash fGlobalTypeDecls = new SymbolHash(25); // Constructors public XSDHandler(){ diff --git a/src/org/apache/xerces/util/SymbolHash.java b/src/org/apache/xerces/util/SymbolHash.java index 63974da68..08caa7b03 100644 --- a/src/org/apache/xerces/util/SymbolHash.java +++ b/src/org/apache/xerces/util/SymbolHash.java @@ -17,7 +17,6 @@ package org.apache.xerces.util; - /** * This class is an unsynchronized hash table primary used for String * to Object mapping. @@ -78,7 +77,8 @@ public SymbolHash(int size) { * @param value */ public void put(Object key, Object value) { - int bucket = (key.hashCode() & 0x7FFFFFFF) % fTableSize; + final int hash = hash(key); + int bucket = hash % fTableSize; Entry entry = search(key, bucket); // replace old value @@ -87,6 +87,12 @@ public void put(Object key, Object value) { } // create new entry else { + if (fNum >= fTableSize) { + // Rehash the table if the number of entries + // would exceed the number of buckets. + rehash(); + bucket = hash % fTableSize; + } entry = new Entry(key, value, fBuckets[bucket]); fBuckets[bucket] = entry; fNum++; @@ -100,7 +106,7 @@ public void put(Object key, Object value) { * @return the value associated with the given key. */ public Object get(Object key) { - int bucket = (key.hashCode() & 0x7FFFFFFF) % fTableSize; + int bucket = hash(key) % fTableSize; Entry entry = search(key, bucket); if (entry != null) { return entry.value; @@ -156,14 +162,15 @@ public SymbolHash makeClone() { SymbolHash newTable = new SymbolHash(fTableSize); newTable.fNum = fNum; for (int i = 0; i < fTableSize; i++) { - if (fBuckets[i] != null) + if (fBuckets[i] != null) { newTable.fBuckets[i] = fBuckets[i].makeClone(); + } } return newTable; } /** - * Remove all key/value assocaition. This tries to save a bit of GC'ing + * Remove all key/value association. This tries to save a bit of GC'ing * by at least keeping the fBuckets array around. */ public void clear() { @@ -182,6 +182,44 @@ public class SymbolHash { return null; } + /** + * Returns a hashcode value for the specified key. + * + * @param key The key to hash. + */ + protected int hash(Object key) { + return key.hashCode() & 0x7FFFFFFF; + } + + /** + * Increases the capacity of and internally reorganizes this + * SymbolHash, in order to accommodate and access its entries more + * efficiently. This method is called automatically when the + * number of keys in the SymbolHash exceeds its number of buckets. + */ + protected void rehash() { + + final int oldCapacity = fBuckets.length; + final Entry[] oldTable = fBuckets; + + final int newCapacity = (oldCapacity << 1) + 1; + final Entry[] newTable = new Entry[newCapacity]; + + fBuckets = newTable; + fTableSize = fBuckets.length; + + for (int i = oldCapacity; i-- > 0;) { + for (Entry old = oldTable[i]; old != null; ) { + Entry e = old; + old = old.next; + + int index = hash(e.key) % newCapacity; + e.next = newTable[index]; + newTable[index] = e; + } + } + } + // // Classes //