diff --git a/4.0.0.tar.gz b/4.0.0.tar.gz new file mode 100644 index 0000000..45e688a Binary files /dev/null and b/4.0.0.tar.gz differ diff --git a/tesseract.spec b/tesseract.spec new file mode 100644 index 0000000..65337c2 --- /dev/null +++ b/tesseract.spec @@ -0,0 +1,86 @@ +Name: tesseract +Version: 4.0.0 +Release: 1 +Summary: Raw OCR Engine +License: Apache 2.0 and BSD-2-Clause +URL: https://github.com/tesseract-ocr/%{name} +Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}.tar.gz +Patch0: tesseract_datadir.patch +BuildRequires: automake autoconf-archive gcc-c++ libtool libtiff-devel leptonica-devel +BuildRequires: cairo-devel libicu-devel pango-devel +BuildRequires: tesseract-langpack-eng +Requires: tesseract-langpack-eng + +%description +A commercial quality OCR engine originally developed at HP between 1985 and +1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was +open-sourced by HP and UNLV in 2005. + +%package devel +Summary: Development files for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} +%description devel +The %{name}-devel package contains header file for +developing applications that use %{name}. + +%package tools +Summary: Training tools for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} +%description tools +The %{name}-tools package contains tools for training %{name}. + +%prep +%autosetup -p1 -n %{name}-%{version} + +%build +autoreconf -ifv +%configure --disable-static +%make_build +%make_build training + +%install +%make_install +%make_install training-install +find %{buildroot}%{_libdir} -type f -name '*.la' -delete +mkdir -p %{buildroot}/%{_datadir}/%{name}/tessdata/ +%ldconfig_scriptlets + +%files +%license LICENSE +%doc AUTHORS ChangeLog README.md +%{_bindir}/%{name} +%dir %{_datadir}/%{name} +%dir %{_datadir}/%{name}/tessdata +%{_datadir}/%{name}/tessdata/configs/ +%{_datadir}/%{name}/tessdata/tessconfigs/ +%{_datadir}/%{name}/tessdata/pdf.ttf +%{_libdir}/lib%{name}*.so.4* + +%files devel +%{_includedir}/%{name} +%{_libdir}/lib%{name}*.so +%{_libdir}/pkgconfig/%{name}.pc + +%files tools +%{_bindir}/ambiguous_words +%{_bindir}/classifier_tester +%{_bindir}/cntraining +%{_bindir}/combine_lang_model +%{_bindir}/combine_tessdata +%{_bindir}/dawg2wordlist +%{_bindir}/language-specific.sh +%{_bindir}/lstmeval +%{_bindir}/lstmtraining +%{_bindir}/merge_unicharsets +%{_bindir}/mftraining +%{_bindir}/set_unicharset_properties +%{_bindir}/shapeclustering +%{_bindir}/tesstrain.sh +%{_bindir}/tesstrain_utils.sh +%{_bindir}/text2image +%{_bindir}/unicharset_extractor +%{_bindir}/wordlist2dawg + +%changelog +* Fri Jan 8 2021 chengzihan - 4.0.0-1 +- Package init diff --git a/tesseract_datadir.patch b/tesseract_datadir.patch new file mode 100644 index 0000000..c98e3a4 --- /dev/null +++ b/tesseract_datadir.patch @@ -0,0 +1,49 @@ +diff -rupN tesseract-4.0.0/src/ccutil/Makefile.am tesseract-4.0.0-new/src/ccutil/Makefile.am +--- tesseract-4.0.0/src/ccutil/Makefile.am 2018-10-29 09:53:12.000000000 +0100 ++++ tesseract-4.0.0-new/src/ccutil/Makefile.am 2018-11-13 15:23:08.331318335 +0100 +@@ -3,7 +3,7 @@ AM_CXXFLAGS = + AM_CPPFLAGS = + + if !NO_TESSDATA_PREFIX +-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@ ++AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/ + endif + + if VISIBILITY +diff -rupN tesseract-4.0.0/src/lstm/Makefile.am tesseract-4.0.0-new/src/lstm/Makefile.am +--- tesseract-4.0.0/src/lstm/Makefile.am 2018-10-29 09:53:12.000000000 +0100 ++++ tesseract-4.0.0-new/src/lstm/Makefile.am 2018-11-13 15:23:08.332317564 +0100 +@@ -12,7 +12,7 @@ SUBDIRS = + AM_CXXFLAGS = $(OPENMP_CXXFLAGS) + + if !NO_TESSDATA_PREFIX +-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@ ++AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/ + endif + + if VISIBILITY +diff -rupN tesseract-4.0.0/tessdata/configs/Makefile.am tesseract-4.0.0-new/tessdata/configs/Makefile.am +--- tesseract-4.0.0/tessdata/configs/Makefile.am 2018-10-29 09:53:12.000000000 +0100 ++++ tesseract-4.0.0-new/tessdata/configs/Makefile.am 2018-11-13 15:23:08.332317564 +0100 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/configs ++datadir = @datadir@/tesseract/tessdata/configs + data_DATA = inter makebox box.train unlv ambigs.train lstm.train lstmdebug api_config kannada box.train.stderr quiet logfile digits hocr tsv linebox pdf rebox strokewidth bigram txt + EXTRA_DIST = inter makebox box.train unlv ambigs.train lstm.train lstmdebug api_config kannada box.train.stderr quiet logfile digits hocr tsv linebox pdf rebox strokewidth bigram txt +diff -rupN tesseract-4.0.0/tessdata/Makefile.am tesseract-4.0.0-new/tessdata/Makefile.am +--- tesseract-4.0.0/tessdata/Makefile.am 2018-10-29 09:53:12.000000000 +0100 ++++ tesseract-4.0.0-new/tessdata/Makefile.am 2018-11-13 15:23:08.332317564 +0100 +@@ -1,4 +1,4 @@ +-datadir = @datadir@/tessdata ++datadir = @datadir@/tesseract/tessdata + + data_DATA = pdf.ttf + EXTRA_DIST = $(data_DATA) +diff -rupN tesseract-4.0.0/tessdata/tessconfigs/Makefile.am tesseract-4.0.0-new/tessdata/tessconfigs/Makefile.am +--- tesseract-4.0.0/tessdata/tessconfigs/Makefile.am 2018-10-29 09:53:12.000000000 +0100 ++++ tesseract-4.0.0-new/tessdata/tessconfigs/Makefile.am 2018-11-13 15:23:08.332317564 +0100 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/tessconfigs ++datadir = @datadir@/tesseract/tessdata/tessconfigs + data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo + EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo