!1 package init

Merge pull request !1 from small_leek/master
This commit is contained in:
openeuler-ci-bot 2020-08-12 17:19:25 +08:00 committed by Gitee
commit e80b16d979
6 changed files with 2409 additions and 0 deletions

View File

@ -0,0 +1,21 @@
--- build.xml 2011-05-28 16:56:41.000000000 +0200
+++ build.xml-gil 2011-08-15 17:57:57.279492364 +0200
@@ -53,7 +53,7 @@
<property name="build.main" value="${build.dir}/main" />
<property name="build.demo" value="${build.dir}/demo" />
- <property name="lib.dir" value="${app.dir}/lib" />
+ <property name="lib.dir" value="/usr/share/java" />
<property name="src.main" value="${app.dir}/src/main" />
<property name="src.demo" value="${app.dir}/src/demo" />
<property name="dist.dir" value="${app.dir}/dist" />
@@ -67,7 +67,8 @@
<path id="classpath.libs">
<fileset dir="${lib.dir}">
- <include name="**/*.jar" />
+ <include name="nekohtml.jar" />
+ <include name="xerces-j2.jar" />
</fileset>
</path>

File diff suppressed because it is too large Load Diff

BIN
boilerpipe-1.2.0-src.tar.gz Normal file

Binary file not shown.

35
boilerpipe-1.2.0.pom Normal file
View File

@ -0,0 +1,35 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>de.l3s.boilerpipe</groupId>
<artifactId>boilerpipe</artifactId>
<packaging>jar</packaging>
<version>1.2.0</version>
<url>http://code.google.com/p/boilerpipe/</url>
<licenses>
<license>
<name>Apache License 2.0</name>
</license>
</licenses>
<name>Boilerpipe -- Boilerplate Removal and Fulltext Extraction from HTML pages</name>
<description>The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page.
The library already provides specific strategies for common tasks (for example: news article extraction) and may also be easily extended for individual problem settings.
Extracting content is very fast (milliseconds), just needs the input document (no global or site-level information required) and is usually quite accurate.
Boilerpipe is a Java library written by Christian Kohlschütter. It is released under the Apache License 2.0.
The algorithms used by the library are based on (and extending) some concepts of the paper "Boilerplate Detection using Shallow Text Features" by Christian Kohlschütter et al., presented at WSDM 2010 -- The Third ACM International Conference on Web Search and Data Mining New York City, NY USA.
</description>
<scm>
<connection>scm:svn:http://boilerpipe.googlecode.com/svn/trunk/</connection>
<url>http://code.google.com/p/boilerpipe/source/browse/</url>
</scm>
<developers>
<developer>
<name>Christian Kohlschütter</name>
</developer>
</developers>
</project>

121
boilerpipe.spec Normal file
View File

@ -0,0 +1,121 @@
Name: boilerpipe
Version: 1.2.0
Release: 1
Summary: Boilerplate Removal and Fulltext Extraction from HTML pages
License: ASL 2.0
Url: https://github.com/kohlschutter/boilerpipe
Source0: http://boilerpipe.googlecode.com/files/%{name}-%{version}-src.tar.gz
Source1: http://boilerpipe.googlecode.com/svn/repo/de/l3s/%{name}/%{name}/%{version}/%{name}-%{version}.pom
Patch0: %{name}-1.2.0-libdir-patch
Patch1: %{name}-1.2.0-nekohtml-patch
BuildRequires: ant java-devel javapackages-local nekohtml xerces-j2
BuildArch: noarch
%description
The boilerpipe library provides algorithms to detect and
remove the surplus "clutter" (boilerplate, templates)
around the main textual content of a web page.
The library already provides specific strategies
for common tasks (for example: news article extraction) and
may also be easily extended for individual problem settings.
Extracting content is very fast (milliseconds), just needs the
input document (no global or site-level information required) and
is usually quite accurate.
%package javadoc
Summary: Javadoc for %{name}
%description javadoc
This package contains javadoc for %{name}.
%prep
%setup -q
find . -iname '*.jar' -delete
find . -iname '*.class' -delete
%patch0 -p0
cp %{SOURCE1} pom.xml
%patch1 -p1
for s in src/main/de/l3s/boilerpipe/BoilerpipeInput.java \
src/main/de/l3s/boilerpipe/BoilerpipeInput.java \
src/main/de/l3s/boilerpipe/BoilerpipeFilter.java \
src/main/de/l3s/boilerpipe/BoilerpipeExtractor.java \
src/main/de/l3s/boilerpipe/BoilerpipeProcessingException.java \
src/main/de/l3s/boilerpipe/conditions/TextBlockCondition.java \
src/main/de/l3s/boilerpipe/document/TextBlock.java \
src/main/de/l3s/boilerpipe/document/TextDocumentStatistics.java \
src/main/de/l3s/boilerpipe/document/TextDocument.java \
src/main/de/l3s/boilerpipe/estimators/SimpleEstimator.java \
src/main/de/l3s/boilerpipe/extractors/LargestContentExtractor.java \
src/main/de/l3s/boilerpipe/extractors/DefaultExtractor.java \
src/main/de/l3s/boilerpipe/extractors/NumWordsRulesExtractor.java \
src/main/de/l3s/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java \
src/main/de/l3s/boilerpipe/extractors/ExtractorBase.java \
src/main/de/l3s/boilerpipe/extractors/ArticleSentencesExtractor.java \
src/main/de/l3s/boilerpipe/extractors/CommonExtractors.java \
src/main/de/l3s/boilerpipe/extractors/CanolaExtractor.java \
src/main/de/l3s/boilerpipe/extractors/ArticleExtractor.java \
src/main/de/l3s/boilerpipe/extractors/KeepEverythingExtractor.java \
src/main/de/l3s/boilerpipe/filters/english/HeuristicFilterBase.java \
src/main/de/l3s/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java \
src/main/de/l3s/boilerpipe/filters/english/TerminatingBlocksFinder.java \
src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java \
src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java \
src/main/de/l3s/boilerpipe/filters/english/DensityRulesClassifier.java \
src/main/de/l3s/boilerpipe/filters/english/MinFulltextWordsFilter.java \
src/main/de/l3s/boilerpipe/filters/english/NumWordsRulesClassifier.java \
src/main/de/l3s/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java \
src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java \
src/main/de/l3s/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java \
src/main/de/l3s/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java \
src/main/de/l3s/boilerpipe/filters/heuristics/LabelFusion.java \
src/main/de/l3s/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java \
src/main/de/l3s/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java \
src/main/de/l3s/boilerpipe/filters/heuristics/ContentFusion.java \
src/main/de/l3s/boilerpipe/filters/simple/MinWordsFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/LabelToBoilerplateFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/LabelToContentFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/MinClauseWordsFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/BoilerplateBlockFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java \
src/main/de/l3s/boilerpipe/filters/simple/MarkEverythingContentFilter.java \
src/main/de/l3s/boilerpipe/labels/DefaultLabels.java \
src/main/de/l3s/boilerpipe/labels/ConditionalLabelAction.java \
src/main/de/l3s/boilerpipe/labels/LabelAction.java \
src/main/de/l3s/boilerpipe/sax/BoilerpipeSAXInput.java \
src/main/de/l3s/boilerpipe/sax/HTMLHighlighter.java \
src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLContentHandler.java \
src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLParser.java \
src/main/de/l3s/boilerpipe/sax/TagActionMap.java \
src/main/de/l3s/boilerpipe/sax/InputSourceable.java \
src/main/de/l3s/boilerpipe/sax/HTMLDocument.java \
src/main/de/l3s/boilerpipe/sax/CommonTagActions.java \
src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java \
src/main/de/l3s/boilerpipe/sax/HTMLFetcher.java \
src/main/de/l3s/boilerpipe/sax/TagAction.java \
src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java \
src/main/de/l3s/boilerpipe/util/UnicodeTokenizer.java;do
native2ascii -encoding UTF8 ${s} ${s}
done
%build
ant -Dapp.javaversion=1.6
%install
%mvn_artifact pom.xml dist/%{name}-%{version}.jar
%mvn_file de.l3s.%{name}:%{name} %{name}
%mvn_install -J javadoc/1.2
install -pm 644 dist/%{name}-demo-%{version}.jar \
%{buildroot}%{_javadir}/%{name}-demo.jar
%files -f .mfiles
%{_javadir}/%{name}-demo.jar
%license LICENSE.txt NOTICE.txt
%files javadoc -f .mfiles-javadoc
%license LICENSE.txt NOTICE.txt
%changelog
* Sat Jul 25 2020 chengzihan <chengzihan2@huawei.com> - 1.2.0-1
- Package init

4
boilerpipe.yaml Normal file
View File

@ -0,0 +1,4 @@
version_control: NA
src_repo: NA
tag_prefix: NA
seperator: NA