lucene4/lucene-4.10.4-morfologik-stemming.patch
2020-08-19 14:23:46 +08:00

180 lines
7.0 KiB
Diff

diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java 2014-08-21 05:12:52.000000000 +0200
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java 2016-04-30 01:39:55.894913112 +0200
@@ -20,6 +20,9 @@
import java.io.Reader;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.polish.PolishStemmer;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
@@ -31,7 +34,7 @@
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
*/
public class MorfologikAnalyzer extends Analyzer {
- private final String dictionary;
+ private final Dictionary dictionary;
/**
* Builds an analyzer with an explicit dictionary resource.
@@ -43,32 +46,15 @@
*
* @see "http://morfologik.blogspot.com/"
*/
- public MorfologikAnalyzer(final String dictionaryResource) {
- this.dictionary = dictionaryResource;
- }
-
- /**
- * @deprecated Use {@link #MorfologikAnalyzer(String)}
- */
- @Deprecated
- public MorfologikAnalyzer(final Version version, final String dictionaryResource) {
- setVersion(version);
- this.dictionary = dictionaryResource;
+ public MorfologikAnalyzer(final Dictionary dictionary) {
+ this.dictionary = dictionary;
}
/**
* Builds an analyzer with the default Morfologik's Polish dictionary.
*/
public MorfologikAnalyzer() {
- this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
- }
-
- /**
- * @deprecated Use {@link #MorfologikAnalyzer()}
- */
- @Deprecated
- public MorfologikAnalyzer(final Version version) {
- this(version, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+ this(new PolishStemmer().getDictionary());
}
/**
@@ -88,6 +74,6 @@
return new TokenStreamComponents(
src,
- new MorfologikFilter(new StandardFilter(getVersion(), src), dictionary, getVersion()));
+ new MorfologikFilter(new StandardFilter(src), dictionary));
}
}
diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java 2014-08-21 05:12:52.000000000 +0200
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java 2016-04-30 01:08:24.560899492 +0200
@@ -18,8 +18,13 @@
*/
import java.util.Map;
+import java.util.Objects;
import java.util.logging.Logger;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryMetadata;
+import morfologik.stemming.polish.PolishStemmer;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -48,6 +53,9 @@
*/
private final String dictionaryResource;
+ /** Loaded {@link Dictionary}, initialized on {@link #inform(ResourceLoader)}. */
+ private Dictionary dictionary;
+
/** Schema attribute. */
@Deprecated
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
@@ -79,6 +87,6 @@
@Override
public TokenStream create(TokenStream ts) {
- return new MorfologikFilter(ts, dictionaryResource);
+ return new MorfologikFilter(ts, Objects.requireNonNull(dictionary, "MorfologikFilterFactory was not fully initialized."));
}
}
diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java 2014-08-21 05:12:52.000000000 +0200
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java 2016-04-30 01:25:55.949415627 +0200
@@ -22,7 +22,11 @@
import java.util.*;
import java.util.regex.Pattern;
-import morfologik.stemming.*;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -49,7 +53,7 @@
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
private final CharsRefBuilder scratch = new CharsRefBuilder();
- private final CharacterUtils charUtils;
+ private final CharacterUtils charUtils = CharacterUtils.getInstance();
private State current;
private final TokenStream input;
@@ -64,46 +68,20 @@
* Creates a filter with the default (Polish) dictionary.
*/
public MorfologikFilter(final TokenStream in) {
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
+ this(in, new PolishStemmer().getDictionary());
}
/**
- * @deprecated Use {@link #MorfologikFilter(TokenStream)}
- */
- @Deprecated
- public MorfologikFilter(final TokenStream in, final Version version) {
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE, version);
- }
-
- /**
- * Creates a filter with a given dictionary resource.
+ * Creates a filter with a given dictionary.
*
* @param in input token stream.
- * @param dict Dictionary resource from classpath.
+ * @param dict Dictionary to use for stemming.
*/
- public MorfologikFilter(final TokenStream in, final String dict) {
- this(in, dict, Version.LATEST);
- }
-
- /**
- * @deprecated Use {@link #MorfologikFilter(TokenStream,String)}
- */
- @Deprecated
- public MorfologikFilter(final TokenStream in, final String dict, final Version version) {
+ public MorfologikFilter(final TokenStream in, final Dictionary dict) {
super(in);
this.input = in;
-
- // SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
- Thread me = Thread.currentThread();
- ClassLoader cl = me.getContextClassLoader();
- try {
- me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader());
- this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict));
- this.charUtils = CharacterUtils.getInstance(version);
- this.lemmaList = Collections.emptyList();
- } finally {
- me.setContextClassLoader(cl);
- }
+ this.stemmer = new DictionaryLookup(dict);
+ this.lemmaList = Collections.emptyList();
}
/**