aboutsummaryrefslogtreecommitdiff
path: root/tesseract.spec
blob: ff80fe78054a1c1ae613fe8d8c937bd0f6f51b5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# This is a rough draft that may only work on Fedora Core 6.
# Andrew Ziem, 25 May 2007
# Hacked to add the new langeuages as separate language packs.
# Ray Smith, 16 July 2007


Name:           tesseract
Version:        2.00
Release:        1%{?dist}
Summary:        Open source OCR Engine developed by HP Labs - now improved by Google

Group:          Applications/Multimedia
License:        Apache License
URL:            http://code.google.com/p/tesseract-ocr/
Source0:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.tar.gz
Source1:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.eng.tar.gz
Source2:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.fra.tar.gz
Source3:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.ita.tar.gz
Source4:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.deu.tar.gz
Source5:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.spa.tar.gz
Source6:        http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.nld.tar.gz
BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)

#BuildRequires:  compat-gcc-34-c++
BuildRequires:  libtiff-devel

%package devel
Summary: Development files for tesseract
Group: Development/Libraries
Requires: %name = %{version}


%description
The Tesseract OCR engine was one of the top 3 engines in the 1995 UNLV
Accuracy test. Since then it has had little work done on it, but it is
probably one of the most accurate open source OCR engines available. The
source code will read a binary, grey or color image and output text. A tiff
reader is built in that will read uncompressed TIFF images, or libtiff can
be added to read compressed images.

%description devel
tesseract libraries and includes

%prep
%setup -q

tar xzvf %{_sourcedir}/tesseract-%{version}.eng.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{version}.fra.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{version}.ita.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{version}.deu.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{version}.spa.tar.gz
tar xzvf %{_sourcedir}/tesseract-%{version}.nld.tar.gz


%build
export CFLAGS=
export CXXFLAGS=
# Should build with gcc4.1 now...
#export CC=gcc34
#export CXX=g++34
# % configure
./configure --bindir=%{_bindir} --datadir=%{_datadir} --libdir=%{_libdir}  --includedir=%{_includedir}
make %{?_smp_mflags}


%install
rm -rf $RPM_BUILD_ROOT
make install DESTDIR=$RPM_BUILD_ROOT


%clean
rm -rf $RPM_BUILD_ROOT


%files
%defattr(-,root,root,-)
%doc AUTHORS COPYING ChangeLog README
%{_bindir}/cntraining
%{_bindir}/mftraining
%{_bindir}/tesseract
%{_bindir}/unicharset_extractor
%{_bindir}/wordlist2dawg
%{_datadir}/tessdata/configs
%{_datadir}/tessdata/confsets
%{_datadir}/tessdata/tessconfigs

%files devel
%{_includedir}/tesseract/
%{_libdir}/libtesseract*

%package eng
Group:          Applications/Multimedia
Summary:        English language pack for tesseract
%description eng
The %{name}-%{version}.eng package contains the data files required to recognize English

%files eng
%{_datadir}/tessdata/eng.DangAmbigs
%{_datadir}/tessdata/eng.freq-dawg
%{_datadir}/tessdata/eng.inttemp
%{_datadir}/tessdata/eng.normproto
%{_datadir}/tessdata/eng.pffmtable
%{_datadir}/tessdata/eng.unicharset
%{_datadir}/tessdata/eng.user-words
%{_datadir}/tessdata/eng.word-dawg

%package fra
Group:          Applications/Multimedia
Summary:        French language pack for tesseract
%description fra
The %{name}-%{version}.fra package contains the data files required to recognize French

%files fra
%{_datadir}/tessdata/fra.DangAmbigs
%{_datadir}/tessdata/fra.freq-dawg
%{_datadir}/tessdata/fra.inttemp
%{_datadir}/tessdata/fra.normproto
%{_datadir}/tessdata/fra.pffmtable
%{_datadir}/tessdata/fra.unicharset
%{_datadir}/tessdata/fra.user-words
%{_datadir}/tessdata/fra.word-dawg

%package ita
Group:          Applications/Multimedia
Summary:        Italian language pack for tesseract
%description ita
The %{name}-%{version}.ita package contains the data files required to recognize Italian

%files ita
%{_datadir}/tessdata/ita.DangAmbigs
%{_datadir}/tessdata/ita.freq-dawg
%{_datadir}/tessdata/ita.inttemp
%{_datadir}/tessdata/ita.normproto
%{_datadir}/tessdata/ita.pffmtable
%{_datadir}/tessdata/ita.unicharset
%{_datadir}/tessdata/ita.user-words
%{_datadir}/tessdata/ita.word-dawg

%package deu
Group:          Applications/Multimedia
Summary:        German language pack for tesseract
%description deu
The %{name}-%{version}.deu package contains the data files required to recognize German

%files deu
%{_datadir}/tessdata/deu.DangAmbigs
%{_datadir}/tessdata/deu.freq-dawg
%{_datadir}/tessdata/deu.inttemp
%{_datadir}/tessdata/deu.normproto
%{_datadir}/tessdata/deu.pffmtable
%{_datadir}/tessdata/deu.unicharset
%{_datadir}/tessdata/deu.user-words
%{_datadir}/tessdata/deu.word-dawg

%package spa
Group:          Applications/Multimedia
Summary:        Spanish language pack for tesseract
%description spa
The %{name}-%{version}.spa package contains the data files required to recognize Spanish

%files spa
%{_datadir}/tessdata/spa.DangAmbigs
%{_datadir}/tessdata/spa.freq-dawg
%{_datadir}/tessdata/spa.inttemp
%{_datadir}/tessdata/spa.normproto
%{_datadir}/tessdata/spa.pffmtable
%{_datadir}/tessdata/spa.unicharset
%{_datadir}/tessdata/spa.user-words
%{_datadir}/tessdata/spa.word-dawg

%package nld
Group:          Applications/Multimedia
Summary:        Dutch language pack for tesseract
%description nld
The %{name}-%{version}.nld package contains the data files required to recognize Dutch

%files nld
%{_datadir}/tessdata/nld.DangAmbigs
%{_datadir}/tessdata/nld.freq-dawg
%{_datadir}/tessdata/nld.inttemp
%{_datadir}/tessdata/nld.normproto
%{_datadir}/tessdata/nld.pffmtable
%{_datadir}/tessdata/nld.unicharset
%{_datadir}/tessdata/nld.user-words
%{_datadir}/tessdata/nld.word-dawg


%changelog