diff options
author | Iliyan Malchev <malchev@google.com> | 2008-09-22 13:01:35 -0700 |
---|---|---|
committer | Iliyan Malchev <malchev@google.com> | 2008-09-22 13:05:47 -0700 |
commit | 45f016b006ef3ad5fd8200ca89212616caa7e79f (patch) | |
tree | 3720394c1d1ee7c1dcefe1a02c647b8defb3d67c | |
parent | 1868ad96d31218f9527a51a01364d4537ca9975a (diff) | |
download | tesseract-45f016b006ef3ad5fd8200ca89212616caa7e79f.tar.gz |
integrate CL 8304910 from perforce, separating helium from leptonica
Signed-off-by: Iliyan Malchev <malchev@google.com>
-rw-r--r-- | helium/box.cpp | 38 | ||||
-rw-r--r-- | helium/helium.h | 3 | ||||
-rw-r--r-- | helium/heliumtextdetector.cpp | 9 | ||||
-rw-r--r-- | helium/imageenhancer.cpp | 64 | ||||
-rw-r--r-- | helium/imageenhancer.h | 18 | ||||
-rw-r--r-- | helium/tesseract.cpp | 17 | ||||
-rw-r--r-- | helium/textrecognition.cpp | 1 |
7 files changed, 41 insertions, 109 deletions
diff --git a/helium/box.cpp b/helium/box.cpp index c6a533a..648c1bf 100644 --- a/helium/box.cpp +++ b/helium/box.cpp @@ -3,10 +3,6 @@ // Author: <renn@google.com> (Marius Renn) #include "box.h" -#include "third_party/leptonica/include/allheaders.h" - -// Enclose all routines in helium namespace to avoid conflicts with -// leptonica Box. namespace helium { @@ -112,22 +108,28 @@ void BoxesInvert(const Array<Box>& boxes, unsigned Area(const Array<Box>& boxes) { if (boxes.size() == 0) return 0; - Box min_enclosing = MinEnclosingBox(boxes); - // Create bitmap for min_enclosing box and set all pixels in intersection - Pix *pix = pixCreate(min_enclosing.width(), min_enclosing.height(), 1); + + Array<Box> frame_boxes(4); + frame_boxes.Add(MinEnclosingBox(boxes)); + unsigned frame_area = frame_boxes.ValueAt(0).Area(); + unsigned cur_index = 0; + unsigned max_index = 0; + + // Find inverse of area of boxes for (unsigned i = 0; i < boxes.size(); i++) { - // translate to align with min_enclosing box - pixRasterop(pix, - boxes.ValueAt(i).left() - min_enclosing.left(), - boxes.ValueAt(i).top() - min_enclosing.top(), - boxes.ValueAt(i).width(), boxes.ValueAt(i).height(), - PIX_SET, NULL, 0, 0); + for (unsigned j = cur_index; j <= max_index; j++) { + Box cur_area = frame_boxes.ValueAt(j); + CutBox(cur_area, boxes.ValueAt(i), frame_boxes); + } + cur_index = max_index + 1; + max_index = frame_boxes.size() - 1; } - // Count set pixels - l_int32 count = 0; - pixCountPixels(pix, &count, NULL); - pixDestroy(&pix); - return static_cast<unsigned int>(count); + + // Subtract inverse from Max area + for (unsigned i = cur_index; i <= max_index; i++) + frame_area -= frame_boxes.ValueAt(i).Area(); + + return frame_area; } void CutBox(const Box& area, const Box& cut, Array<Box>& result) { diff --git a/helium/helium.h b/helium/helium.h index da25983..84593db 100644 --- a/helium/helium.h +++ b/helium/helium.h @@ -2,8 +2,9 @@ #define THIRD_PARY_TESSERACT_HELIUM_HELIUM_H__ #include "third_party/tesseract/helium/cstringutils.h" +#include "third_party/tesseract/helium/color.h" +#include "third_party/tesseract/helium/debugging.h" #include "third_party/tesseract/helium/image.h" -#include "third_party/tesseract/helium/leptonica.h" #include "third_party/tesseract/helium/heliumbinarizer.h" #include "third_party/tesseract/helium/heliumtextdetector.h" #include "third_party/tesseract/helium/textareas.h" diff --git a/helium/heliumtextdetector.cpp b/helium/heliumtextdetector.cpp index 0f202df..424d546 100644 --- a/helium/heliumtextdetector.cpp +++ b/helium/heliumtextdetector.cpp @@ -19,7 +19,6 @@ #include "laplaceedgedetector.h" #include "textclassifier.h" #include "textvalidator.h" -#include "leptonica.h" using namespace helium; @@ -29,7 +28,7 @@ static void ShowTrace(ContourDetector& outliner, int trace_type, Mask mask(width, height); outliner.PlotTracesOnto(mask, trace_type); Image trace_image = Image::FromMask(mask); - Leptonica::DisplayImage(trace_image); + // Leptonica::DisplayImage(trace_image); } static void ShowShapeTree(Clusterer& clusterer, ShapeTree& shapes, @@ -38,7 +37,7 @@ static void ShowShapeTree(Clusterer& clusterer, ShapeTree& shapes, image.Clear(); shapes.PaintShapes(image); clusterer.DrawClusterBounds(image); - Leptonica::DisplayImage(image); + // Leptonica::DisplayImage(image); } HeliumTextDetector::HeliumTextDetector() @@ -122,8 +121,8 @@ void HeliumTextDetector::DetectText(const Image& image) { clusterer_.ClusterShapes(shapemaker_.shapes(), text_validator); if (show_debug_) { - Leptonica::DisplayImage(smooth_image); - Leptonica::DisplayGrayMap(edges); + // Leptonica::DisplayImage(smooth_image); + // Leptonica::DisplayGrayMap(edges); ShowTrace(outliner, TRACECLASS_TEXT, image.width(), image.height()); ShowShapeTree(clusterer_, shapemaker_, image.width(), image.height()); } diff --git a/helium/imageenhancer.cpp b/helium/imageenhancer.cpp index 43b8833..6a45204 100644 --- a/helium/imageenhancer.cpp +++ b/helium/imageenhancer.cpp @@ -11,7 +11,6 @@ #include "image.h" #include "graymap.h" #include "imageenhancer.h" -#include "leptonica.h" using namespace helium; @@ -209,7 +208,6 @@ void ImageEnhancer::LocalContrast(GrayMap& src, int ws, int hs, GrayMap mask; mask.Copy(src); Binarize(mask, fg_thresh, -1, 0); // keep BG values, zero out FG - // Leptonica::DisplayGrayMap(mask); im.Init(mask); // compute mean/var over background } else { im.Init(src); @@ -228,68 +226,6 @@ void ImageEnhancer::LocalContrast(GrayMap& src, int ws, int hs, } } -GrayMap ImageEnhancer::RankFilterGray(GrayMap& map, int fwidth, int fheight, - float rank_ratio) { - Pix* pix = Leptonica::GrayMapToPix(map); - int width = 2 * fwidth + 1; // full window width around a pixel - int height = 2 * fheight + 1; - Pix* filtered_pix = pixRankFilterGray(pix, width, height, rank_ratio); - GrayMap filtered_map = Leptonica::PixToGrayMap(filtered_pix); - pixDestroy(&pix); - pixDestroy(&filtered_pix); - return filtered_map; -} - -// Implements the Non-linear Niblack decomposition algorithm described in -// Kaihua Zhu's (khz@google.com) CBDAR05 paper with slight tweaking. -void ImageEnhancer::NLNiblack(GrayMap& src, int bg_width, int bg_height, - int bgfg_ratio, int min_fg_sdev_value, - float fg_sdev_range, float fg_sdev_rank) { - int Wbg = bg_width; // window half-width for computing background - int Wfg = bg_width / bgfg_ratio; - int Hbg = bg_height; - int Hfg = bg_height / bgfg_ratio; - - IntegralMatrix im; - im.Init(src); - GrayMap bg_mean(src.width(), src.height()); - GrayMap fg_sdev(src.width(), src.height()); - uint8 *bg_u = bg_mean.data(); - uint8 *fg_v = fg_sdev.data(); - for (int y = 0; y < src.height(); ++y) { - for (int x = 0; x < src.width(); ++x) { - double bg_mean, bg_var; - double fg_mean, fg_var; - im.GetWindowMeanVar(x-Wbg, y-Hbg, x+Wbg, y+Hbg, &bg_mean, &bg_var); - im.GetWindowMeanVar(x-Wfg, y-Hfg, x+Wfg, y+Hfg, &fg_mean, &fg_var); - *bg_u++ = static_cast<uint8>(bg_mean); - *fg_v++ = static_cast<uint8>(sqrt(fg_var)); - } - } - // Get 50-percentile (median filter) of the average background map, - // and use the top 20% largest variance in the foreground window - // to determine a threshold. - GrayMap bg_filtered_mean = RankFilterGray(bg_mean, Wbg, Hbg, 0.5); - GrayMap fg_filtered_sdev = RankFilterGray(fg_sdev, Wfg, Hfg, fg_sdev_rank); - - uint8 *data = src.data(); - bg_u = bg_filtered_mean.data(); - fg_v = fg_filtered_sdev.data(); - for (int y = 0; y < src.height(); ++y) { - for (int x = 0; x < src.width(); ++x) { - uint8 value = 128; // don't care state - if (*fg_v > min_fg_sdev_value) { - uint8 range = fg_sdev_range * *fg_v; - if (*data > *bg_u + range) value = 255; - if (*data < *bg_u - range) value = 0; - } - *data++ = value; - bg_u++; - fg_v++; - } - } -} - // Performs independent pixel operation in-place using given threshold. // For pixels whose value is below the threshold, the minvalue is used. // If minvalue==-1, the original value is used. Similarly for maxvalue. diff --git a/helium/imageenhancer.h b/helium/imageenhancer.h index b7cf1bf..7e95345 100644 --- a/helium/imageenhancer.h +++ b/helium/imageenhancer.h @@ -65,24 +65,6 @@ class ImageEnhancer { kDefaultEdgeThresh, &Func_SuppressBG); } - // Apply rank filter with specified window size on given graymap and - // return the new image. The size is specified by half-width and half - // height that is added around each pixel. The rank is a value between - // 0..1 representing the fraction of pixels in the neighborhood with - // smaller values. (See leptonica for details.) - static GrayMap RankFilterGray(GrayMap& map, - int half_width, int half_height, - float rank_ratio); - - // Implements a non-linear Niblack algorithm to produce a ternary valued - // component label map based on specified background window size and - // bg-to-fg window size ratio. Parameter std_range controls thresholding. - // Values within std_range of normalized distance will be labeled as - // don't care. - static void NLNiblack(GrayMap& src, int bg_half_width, int bg_half_height, - int bgfg_ratio, int min_fg_sdev_value, - float fg_sdev_range, float fg_sdev_rank); - static void Binarize(GrayMap& src, int threshold, int minval, int maxval); static void ApplyMask(const GrayMap& mask, GrayMap& src); diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp index c2ef380..06bb8c8 100644 --- a/helium/tesseract.cpp +++ b/helium/tesseract.cpp @@ -55,9 +55,22 @@ bool Tesseract::DetectBaseline(const Mask& mask, return api_.GetTextDirection(&out_offset, &out_slope); } +void MaskToBuffer(const Mask& mask, unsigned char* buf) { + bool* mask_ptr = mask.data(); + for (int y = 0; y < mask.height(); ++y) + for (int x = 0; x < mask.width(); ++x) + *buf++ = *(mask_ptr++) ? 0 : 255; +} + char* Tesseract::RecognizeText(const Mask& mask) { - MaskThresholder* mt = new MaskThresholder(mask, true); - api_.SetThresholder(mt); + // MaskThresholder* mt = new MaskThresholder(mask, true); + // api_.SetThresholder(mt); + // Check with Ray on directly passing in image after fixing Otsu thresholding + unsigned char* buf = new unsigned char[mask.width() * mask.height()]; + MaskToBuffer(mask, buf); + api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width()); + api_.Recognize(NULL); + delete[] buf; return api_.GetUTF8Text(); } diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp index c18262b..cdb7f7e 100644 --- a/helium/textrecognition.cpp +++ b/helium/textrecognition.cpp @@ -11,7 +11,6 @@ #include "tesseract.h" #include "textareas.h" #include "textrecognition.h" -#include "leptonica.h" using namespace helium; |