180 lines
7.0 KiB
Diff
180 lines
7.0 KiB
Diff
diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
|
|
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java 2014-08-21 05:12:52.000000000 +0200
|
|
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java 2016-04-30 01:39:55.894913112 +0200
|
|
@@ -20,6 +20,9 @@
|
|
|
|
import java.io.Reader;
|
|
|
|
+import morfologik.stemming.Dictionary;
|
|
+import morfologik.stemming.polish.PolishStemmer;
|
|
+
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
import org.apache.lucene.analysis.Tokenizer;
|
|
import org.apache.lucene.analysis.standard.StandardFilter;
|
|
@@ -31,7 +34,7 @@
|
|
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
|
|
*/
|
|
public class MorfologikAnalyzer extends Analyzer {
|
|
- private final String dictionary;
|
|
+ private final Dictionary dictionary;
|
|
|
|
/**
|
|
* Builds an analyzer with an explicit dictionary resource.
|
|
@@ -43,32 +46,15 @@
|
|
*
|
|
* @see "http://morfologik.blogspot.com/"
|
|
*/
|
|
- public MorfologikAnalyzer(final String dictionaryResource) {
|
|
- this.dictionary = dictionaryResource;
|
|
- }
|
|
-
|
|
- /**
|
|
- * @deprecated Use {@link #MorfologikAnalyzer(String)}
|
|
- */
|
|
- @Deprecated
|
|
- public MorfologikAnalyzer(final Version version, final String dictionaryResource) {
|
|
- setVersion(version);
|
|
- this.dictionary = dictionaryResource;
|
|
+ public MorfologikAnalyzer(final Dictionary dictionary) {
|
|
+ this.dictionary = dictionary;
|
|
}
|
|
|
|
/**
|
|
* Builds an analyzer with the default Morfologik's Polish dictionary.
|
|
*/
|
|
public MorfologikAnalyzer() {
|
|
- this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
|
- }
|
|
-
|
|
- /**
|
|
- * @deprecated Use {@link #MorfologikAnalyzer()}
|
|
- */
|
|
- @Deprecated
|
|
- public MorfologikAnalyzer(final Version version) {
|
|
- this(version, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
|
+ this(new PolishStemmer().getDictionary());
|
|
}
|
|
|
|
/**
|
|
@@ -88,6 +74,6 @@
|
|
|
|
return new TokenStreamComponents(
|
|
src,
|
|
- new MorfologikFilter(new StandardFilter(getVersion(), src), dictionary, getVersion()));
|
|
+ new MorfologikFilter(new StandardFilter(src), dictionary));
|
|
}
|
|
}
|
|
diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
|
|
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java 2014-08-21 05:12:52.000000000 +0200
|
|
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java 2016-04-30 01:08:24.560899492 +0200
|
|
@@ -18,8 +18,13 @@
|
|
*/
|
|
|
|
import java.util.Map;
|
|
+import java.util.Objects;
|
|
import java.util.logging.Logger;
|
|
|
|
+import morfologik.stemming.Dictionary;
|
|
+import morfologik.stemming.DictionaryMetadata;
|
|
+import morfologik.stemming.polish.PolishStemmer;
|
|
+
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|
|
|
@@ -48,6 +53,9 @@
|
|
*/
|
|
private final String dictionaryResource;
|
|
|
|
+ /** Loaded {@link Dictionary}, initialized on {@link #inform(ResourceLoader)}. */
|
|
+ private Dictionary dictionary;
|
|
+
|
|
/** Schema attribute. */
|
|
@Deprecated
|
|
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
|
|
@@ -79,6 +87,6 @@
|
|
|
|
@Override
|
|
public TokenStream create(TokenStream ts) {
|
|
- return new MorfologikFilter(ts, dictionaryResource);
|
|
+ return new MorfologikFilter(ts, Objects.requireNonNull(dictionary, "MorfologikFilterFactory was not fully initialized."));
|
|
}
|
|
}
|
|
diff -Nru lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
|
|
--- lucene-4.10.4/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java 2014-08-21 05:12:52.000000000 +0200
|
|
+++ lucene-4.10.4.morfologik-stemming/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java 2016-04-30 01:25:55.949415627 +0200
|
|
@@ -22,7 +22,11 @@
|
|
import java.util.*;
|
|
import java.util.regex.Pattern;
|
|
|
|
-import morfologik.stemming.*;
|
|
+import morfologik.stemming.Dictionary;
|
|
+import morfologik.stemming.DictionaryLookup;
|
|
+import morfologik.stemming.IStemmer;
|
|
+import morfologik.stemming.WordData;
|
|
+import morfologik.stemming.polish.PolishStemmer;
|
|
|
|
import org.apache.lucene.analysis.TokenFilter;
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
@@ -49,7 +53,7 @@
|
|
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
|
|
|
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
|
- private final CharacterUtils charUtils;
|
|
+ private final CharacterUtils charUtils = CharacterUtils.getInstance();
|
|
|
|
private State current;
|
|
private final TokenStream input;
|
|
@@ -64,46 +68,20 @@
|
|
* Creates a filter with the default (Polish) dictionary.
|
|
*/
|
|
public MorfologikFilter(final TokenStream in) {
|
|
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
|
+ this(in, new PolishStemmer().getDictionary());
|
|
}
|
|
|
|
/**
|
|
- * @deprecated Use {@link #MorfologikFilter(TokenStream)}
|
|
- */
|
|
- @Deprecated
|
|
- public MorfologikFilter(final TokenStream in, final Version version) {
|
|
- this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE, version);
|
|
- }
|
|
-
|
|
- /**
|
|
- * Creates a filter with a given dictionary resource.
|
|
+ * Creates a filter with a given dictionary.
|
|
*
|
|
* @param in input token stream.
|
|
- * @param dict Dictionary resource from classpath.
|
|
+ * @param dict Dictionary to use for stemming.
|
|
*/
|
|
- public MorfologikFilter(final TokenStream in, final String dict) {
|
|
- this(in, dict, Version.LATEST);
|
|
- }
|
|
-
|
|
- /**
|
|
- * @deprecated Use {@link #MorfologikFilter(TokenStream,String)}
|
|
- */
|
|
- @Deprecated
|
|
- public MorfologikFilter(final TokenStream in, final String dict, final Version version) {
|
|
+ public MorfologikFilter(final TokenStream in, final Dictionary dict) {
|
|
super(in);
|
|
this.input = in;
|
|
-
|
|
- // SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
|
|
- Thread me = Thread.currentThread();
|
|
- ClassLoader cl = me.getContextClassLoader();
|
|
- try {
|
|
- me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader());
|
|
- this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict));
|
|
- this.charUtils = CharacterUtils.getInstance(version);
|
|
- this.lemmaList = Collections.emptyList();
|
|
- } finally {
|
|
- me.setContextClassLoader(cl);
|
|
- }
|
|
+ this.stemmer = new DictionaryLookup(dict);
|
|
+ this.lemmaList = Collections.emptyList();
|
|
}
|
|
|
|
/**
|