diff options
author | Iliyan Malchev <malchev@google.com> | 2008-09-24 09:36:58 -0700 |
---|---|---|
committer | Iliyan Malchev <malchev@google.com> | 2008-09-24 09:36:58 -0700 |
commit | 49767402ccc6f1f40d42197e0976fe8269b8926b (patch) | |
tree | 906a76beae994d03f970f4349661db3447596fef | |
parent | 1a1dcc9716537c4e417946ec2ca12887615ce720 (diff) | |
download | tesseract-49767402ccc6f1f40d42197e0976fe8269b8926b.tar.gz |
add support for the ratings file to helium and tesseract\'s simple test app
Signed-off-by: Iliyan Malchev <malchev@google.com>
-rw-r--r-- | ccmain/test.cpp | 24 | ||||
-rw-r--r-- | helium/tesseract.cpp | 21 | ||||
-rw-r--r-- | helium/tesseract.h | 4 | ||||
-rw-r--r-- | helium/test.cpp | 5 | ||||
-rw-r--r-- | helium/textrecognition.cpp | 6 | ||||
-rw-r--r-- | helium/textrecognition.h | 4 |
6 files changed, 49 insertions, 15 deletions
diff --git a/ccmain/test.cpp b/ccmain/test.cpp index 15531c1..55d0a7b 100644 --- a/ccmain/test.cpp +++ b/ccmain/test.cpp @@ -9,6 +9,8 @@ #include <sys/stat.h> #include "baseapi.h" +#include "varable.h" +#include "tessvars.h" #define FAILIF(cond, msg...) do { \ if (cond) { \ @@ -18,21 +20,25 @@ } \ } while(0) +// This is to make the ratings file parser happy. +BOOL_VAR (tessedit_write_images, FALSE, + "Capture the image from the IPE"); int main(int argc, char **argv) { - const char *infile, *outfile, *lang; + const char *infile, *outfile, *lang, *ratings; void *buffer; struct stat s; int x, y, ifd; - FAILIF(argc != 6, - "tesstest infile xres yres outfile lang\n"); + FAILIF(argc < 6 || argc > 7, + "tesstest infile xres yres outfile lang [ratings]\n"); infile = argv[1]; FAILIF(sscanf(argv[2], "%d", &x) != 1, "could not parse x!\n"); FAILIF(sscanf(argv[3], "%d", &y) != 1, "could not parse y!\n"); outfile = argv[4]; lang = argv[5]; + ratings = argv[6]; printf("input file %s\n", infile); ifd = open(infile, O_RDONLY); @@ -47,15 +53,19 @@ int main(int argc, char **argv) { printf("lang %s\n", lang); FAILIF(api.Init("/sdcard/", lang), "could not initialize tesseract\n"); -#if 0 - FAILIF(false == api.ReadConfigFile("/sdcard/tessdata/ratings"), - "could not read config file\n"); -#endif + if (ratings) { + printf("ratings %s\n", ratings); + FAILIF(false == api.ReadConfigFile("/sdcard/tessdata/ratings"), + "could not read config file\n"); + } printf("set image x=%d, y=%d\n", x, y); api.SetImage((const unsigned char *)buffer, x, y, 1, x); printf("recognize\n"); char * text = api.GetUTF8Text(); + if (tessedit_write_images) { + page_image.write("tessinput.tif"); + } FAILIF(text == NULL, "didn't recognize\n"); printf("write to output %s\n", outfile); diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp index 8b00760..861f7ce 100644 --- a/helium/tesseract.cpp +++ b/helium/tesseract.cpp @@ -16,6 +16,10 @@ #include "ccmain/control.h" #endif +// This is to make the ratings file parser happy. +BOOL_VAR (tessedit_write_images, FALSE, + "Capture the image from the IPE"); + #undef LOG // Local includes @@ -29,8 +33,13 @@ using namespace helium; const char* kArguments[3] = { "tesseract", "out", "batch" }; -int Tesseract::Init(const char* datapath, const char *lang) { - return api_.Init(datapath, lang); +int Tesseract::Init(const char* datapath, + const char *lang, + const char *configfile) { + int res = api_.Init(datapath, lang); + if (!res && configfile) + api_.ReadConfigFile(configfile); + return res; } void Tesseract::ReadMask(const Mask& mask, bool flipped) { @@ -70,8 +79,14 @@ char* Tesseract::RecognizeText(const Mask& mask) { MaskToBuffer(mask, buf); api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width()); api_.Recognize(NULL); + delete[] buf; - return api_.GetUTF8Text(); + char *text = api_.GetUTF8Text(); + + if (tessedit_write_images) + page_image.write("tessinput.tif"); + + return text; } void Tesseract::End() { diff --git a/helium/tesseract.h b/helium/tesseract.h index f92d21d..dc88ebe 100644 --- a/helium/tesseract.h +++ b/helium/tesseract.h @@ -38,7 +38,9 @@ class Tesseract { // Call this method exactly once to initialize the Tesseract engine with // the data files at the specified path (This should be the path, that // contains the 'tessdata' folder). - static int Init(const char* datapath, const char *lang = NULL); + static int Init(const char* datapath, + const char *lang = NULL, + const char *configfile = NULL); // Find the baseline, specified by the offset and slope, for the given // Mask. If flipped is true, this method will flip the image vertically diff --git a/helium/test.cpp b/helium/test.cpp index 4c4891d..1d384aa 100644 --- a/helium/test.cpp +++ b/helium/test.cpp @@ -82,7 +82,10 @@ int main(int argc, char** argv) { // Run OCR printf("OCRing (language %s)...\n", lang); TextAreas text; - TextRecognition::Init("/sdcard/", lang); + TextRecognition::Init("/sdcard/", + lang, + "/sdcard/tessdata/ratings"); + TextRecognition::RecognizeUsingBinarizer(&binarizer, text); // Output Text diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp index 5768449..f2fdf90 100644 --- a/helium/textrecognition.cpp +++ b/helium/textrecognition.cpp @@ -16,9 +16,11 @@ using namespace helium; bool TextRecognition::recognizer_initialized_ = false; -void TextRecognition::Init(const char* data_path, const char *lang) { +void TextRecognition::Init(const char* data_path, + const char *lang, + const char *configfile) { // Allow reinitialization of Tesseract to get around its adaptation. - Tesseract::Init(data_path, lang); + Tesseract::Init(data_path, lang, configfile); recognizer_initialized_ = true; } diff --git a/helium/textrecognition.h b/helium/textrecognition.h index 53f2233..fa4ecf5 100644 --- a/helium/textrecognition.h +++ b/helium/textrecognition.h @@ -23,7 +23,9 @@ class TextRecognition { // tessdata directory. // This must be called before using RecognizeUsingBinarizer(...), but it // can be callled multiple times to clear OCR's internal adaptation. - static void Init(const char* data_path, const char *lang = NULL); + static void Init(const char* data_path, + const char *lang = NULL, + const char *configfile = NULL); // This method passes all the binarized masks, that were extracted by the // specified Binarizer, through perspective correction, and |