aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIliyan Malchev <malchev@google.com>2008-09-24 09:36:58 -0700
committerIliyan Malchev <malchev@google.com>2008-09-24 09:36:58 -0700
commit49767402ccc6f1f40d42197e0976fe8269b8926b (patch)
tree906a76beae994d03f970f4349661db3447596fef
parent1a1dcc9716537c4e417946ec2ca12887615ce720 (diff)
downloadtesseract-49767402ccc6f1f40d42197e0976fe8269b8926b.tar.gz
add support for the ratings file to helium and tesseract\'s simple test app
Signed-off-by: Iliyan Malchev <malchev@google.com>
-rw-r--r--ccmain/test.cpp24
-rw-r--r--helium/tesseract.cpp21
-rw-r--r--helium/tesseract.h4
-rw-r--r--helium/test.cpp5
-rw-r--r--helium/textrecognition.cpp6
-rw-r--r--helium/textrecognition.h4
6 files changed, 49 insertions, 15 deletions
diff --git a/ccmain/test.cpp b/ccmain/test.cpp
index 15531c1..55d0a7b 100644
--- a/ccmain/test.cpp
+++ b/ccmain/test.cpp
@@ -9,6 +9,8 @@
#include <sys/stat.h>
#include "baseapi.h"
+#include "varable.h"
+#include "tessvars.h"
#define FAILIF(cond, msg...) do { \
if (cond) { \
@@ -18,21 +20,25 @@
} \
} while(0)
+// This is to make the ratings file parser happy.
+BOOL_VAR (tessedit_write_images, FALSE,
+ "Capture the image from the IPE");
int main(int argc, char **argv) {
- const char *infile, *outfile, *lang;
+ const char *infile, *outfile, *lang, *ratings;
void *buffer;
struct stat s;
int x, y, ifd;
- FAILIF(argc != 6,
- "tesstest infile xres yres outfile lang\n");
+ FAILIF(argc < 6 || argc > 7,
+ "tesstest infile xres yres outfile lang [ratings]\n");
infile = argv[1];
FAILIF(sscanf(argv[2], "%d", &x) != 1, "could not parse x!\n");
FAILIF(sscanf(argv[3], "%d", &y) != 1, "could not parse y!\n");
outfile = argv[4];
lang = argv[5];
+ ratings = argv[6];
printf("input file %s\n", infile);
ifd = open(infile, O_RDONLY);
@@ -47,15 +53,19 @@ int main(int argc, char **argv) {
printf("lang %s\n", lang);
FAILIF(api.Init("/sdcard/", lang), "could not initialize tesseract\n");
-#if 0
- FAILIF(false == api.ReadConfigFile("/sdcard/tessdata/ratings"),
- "could not read config file\n");
-#endif
+ if (ratings) {
+ printf("ratings %s\n", ratings);
+ FAILIF(false == api.ReadConfigFile("/sdcard/tessdata/ratings"),
+ "could not read config file\n");
+ }
printf("set image x=%d, y=%d\n", x, y);
api.SetImage((const unsigned char *)buffer, x, y, 1, x);
printf("recognize\n");
char * text = api.GetUTF8Text();
+ if (tessedit_write_images) {
+ page_image.write("tessinput.tif");
+ }
FAILIF(text == NULL, "didn't recognize\n");
printf("write to output %s\n", outfile);
diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp
index 8b00760..861f7ce 100644
--- a/helium/tesseract.cpp
+++ b/helium/tesseract.cpp
@@ -16,6 +16,10 @@
#include "ccmain/control.h"
#endif
+// This is to make the ratings file parser happy.
+BOOL_VAR (tessedit_write_images, FALSE,
+ "Capture the image from the IPE");
+
#undef LOG
// Local includes
@@ -29,8 +33,13 @@ using namespace helium;
const char* kArguments[3] = { "tesseract", "out", "batch" };
-int Tesseract::Init(const char* datapath, const char *lang) {
- return api_.Init(datapath, lang);
+int Tesseract::Init(const char* datapath,
+ const char *lang,
+ const char *configfile) {
+ int res = api_.Init(datapath, lang);
+ if (!res && configfile)
+ api_.ReadConfigFile(configfile);
+ return res;
}
void Tesseract::ReadMask(const Mask& mask, bool flipped) {
@@ -70,8 +79,14 @@ char* Tesseract::RecognizeText(const Mask& mask) {
MaskToBuffer(mask, buf);
api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width());
api_.Recognize(NULL);
+
delete[] buf;
- return api_.GetUTF8Text();
+ char *text = api_.GetUTF8Text();
+
+ if (tessedit_write_images)
+ page_image.write("tessinput.tif");
+
+ return text;
}
void Tesseract::End() {
diff --git a/helium/tesseract.h b/helium/tesseract.h
index f92d21d..dc88ebe 100644
--- a/helium/tesseract.h
+++ b/helium/tesseract.h
@@ -38,7 +38,9 @@ class Tesseract {
// Call this method exactly once to initialize the Tesseract engine with
// the data files at the specified path (This should be the path, that
// contains the 'tessdata' folder).
- static int Init(const char* datapath, const char *lang = NULL);
+ static int Init(const char* datapath,
+ const char *lang = NULL,
+ const char *configfile = NULL);
// Find the baseline, specified by the offset and slope, for the given
// Mask. If flipped is true, this method will flip the image vertically
diff --git a/helium/test.cpp b/helium/test.cpp
index 4c4891d..1d384aa 100644
--- a/helium/test.cpp
+++ b/helium/test.cpp
@@ -82,7 +82,10 @@ int main(int argc, char** argv) {
// Run OCR
printf("OCRing (language %s)...\n", lang);
TextAreas text;
- TextRecognition::Init("/sdcard/", lang);
+ TextRecognition::Init("/sdcard/",
+ lang,
+ "/sdcard/tessdata/ratings");
+
TextRecognition::RecognizeUsingBinarizer(&binarizer, text);
// Output Text
diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp
index 5768449..f2fdf90 100644
--- a/helium/textrecognition.cpp
+++ b/helium/textrecognition.cpp
@@ -16,9 +16,11 @@ using namespace helium;
bool TextRecognition::recognizer_initialized_ = false;
-void TextRecognition::Init(const char* data_path, const char *lang) {
+void TextRecognition::Init(const char* data_path,
+ const char *lang,
+ const char *configfile) {
// Allow reinitialization of Tesseract to get around its adaptation.
- Tesseract::Init(data_path, lang);
+ Tesseract::Init(data_path, lang, configfile);
recognizer_initialized_ = true;
}
diff --git a/helium/textrecognition.h b/helium/textrecognition.h
index 53f2233..fa4ecf5 100644
--- a/helium/textrecognition.h
+++ b/helium/textrecognition.h
@@ -23,7 +23,9 @@ class TextRecognition {
// tessdata directory.
// This must be called before using RecognizeUsingBinarizer(...), but it
// can be callled multiple times to clear OCR's internal adaptation.
- static void Init(const char* data_path, const char *lang = NULL);
+ static void Init(const char* data_path,
+ const char *lang = NULL,
+ const char *configfile = NULL);
// This method passes all the binarized masks, that were extracted by the
// specified Binarizer, through perspective correction, and