aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIliyan Malchev <malchev@google.com>2008-09-22 13:01:35 -0700
committerIliyan Malchev <malchev@google.com>2008-09-22 13:05:47 -0700
commit45f016b006ef3ad5fd8200ca89212616caa7e79f (patch)
tree3720394c1d1ee7c1dcefe1a02c647b8defb3d67c
parent1868ad96d31218f9527a51a01364d4537ca9975a (diff)
downloadtesseract-45f016b006ef3ad5fd8200ca89212616caa7e79f.tar.gz
integrate CL 8304910 from perforce, separating helium from leptonica
Signed-off-by: Iliyan Malchev <malchev@google.com>
-rw-r--r--helium/box.cpp38
-rw-r--r--helium/helium.h3
-rw-r--r--helium/heliumtextdetector.cpp9
-rw-r--r--helium/imageenhancer.cpp64
-rw-r--r--helium/imageenhancer.h18
-rw-r--r--helium/tesseract.cpp17
-rw-r--r--helium/textrecognition.cpp1
7 files changed, 41 insertions, 109 deletions
diff --git a/helium/box.cpp b/helium/box.cpp
index c6a533a..648c1bf 100644
--- a/helium/box.cpp
+++ b/helium/box.cpp
@@ -3,10 +3,6 @@
// Author: <renn@google.com> (Marius Renn)
#include "box.h"
-#include "third_party/leptonica/include/allheaders.h"
-
-// Enclose all routines in helium namespace to avoid conflicts with
-// leptonica Box.
namespace helium {
@@ -112,22 +108,28 @@ void BoxesInvert(const Array<Box>& boxes,
unsigned Area(const Array<Box>& boxes) {
if (boxes.size() == 0) return 0;
- Box min_enclosing = MinEnclosingBox(boxes);
- // Create bitmap for min_enclosing box and set all pixels in intersection
- Pix *pix = pixCreate(min_enclosing.width(), min_enclosing.height(), 1);
+
+ Array<Box> frame_boxes(4);
+ frame_boxes.Add(MinEnclosingBox(boxes));
+ unsigned frame_area = frame_boxes.ValueAt(0).Area();
+ unsigned cur_index = 0;
+ unsigned max_index = 0;
+
+ // Find inverse of area of boxes
for (unsigned i = 0; i < boxes.size(); i++) {
- // translate to align with min_enclosing box
- pixRasterop(pix,
- boxes.ValueAt(i).left() - min_enclosing.left(),
- boxes.ValueAt(i).top() - min_enclosing.top(),
- boxes.ValueAt(i).width(), boxes.ValueAt(i).height(),
- PIX_SET, NULL, 0, 0);
+ for (unsigned j = cur_index; j <= max_index; j++) {
+ Box cur_area = frame_boxes.ValueAt(j);
+ CutBox(cur_area, boxes.ValueAt(i), frame_boxes);
+ }
+ cur_index = max_index + 1;
+ max_index = frame_boxes.size() - 1;
}
- // Count set pixels
- l_int32 count = 0;
- pixCountPixels(pix, &count, NULL);
- pixDestroy(&pix);
- return static_cast<unsigned int>(count);
+
+ // Subtract inverse from Max area
+ for (unsigned i = cur_index; i <= max_index; i++)
+ frame_area -= frame_boxes.ValueAt(i).Area();
+
+ return frame_area;
}
void CutBox(const Box& area, const Box& cut, Array<Box>& result) {
diff --git a/helium/helium.h b/helium/helium.h
index da25983..84593db 100644
--- a/helium/helium.h
+++ b/helium/helium.h
@@ -2,8 +2,9 @@
#define THIRD_PARY_TESSERACT_HELIUM_HELIUM_H__
#include "third_party/tesseract/helium/cstringutils.h"
+#include "third_party/tesseract/helium/color.h"
+#include "third_party/tesseract/helium/debugging.h"
#include "third_party/tesseract/helium/image.h"
-#include "third_party/tesseract/helium/leptonica.h"
#include "third_party/tesseract/helium/heliumbinarizer.h"
#include "third_party/tesseract/helium/heliumtextdetector.h"
#include "third_party/tesseract/helium/textareas.h"
diff --git a/helium/heliumtextdetector.cpp b/helium/heliumtextdetector.cpp
index 0f202df..424d546 100644
--- a/helium/heliumtextdetector.cpp
+++ b/helium/heliumtextdetector.cpp
@@ -19,7 +19,6 @@
#include "laplaceedgedetector.h"
#include "textclassifier.h"
#include "textvalidator.h"
-#include "leptonica.h"
using namespace helium;
@@ -29,7 +28,7 @@ static void ShowTrace(ContourDetector& outliner, int trace_type,
Mask mask(width, height);
outliner.PlotTracesOnto(mask, trace_type);
Image trace_image = Image::FromMask(mask);
- Leptonica::DisplayImage(trace_image);
+ // Leptonica::DisplayImage(trace_image);
}
static void ShowShapeTree(Clusterer& clusterer, ShapeTree& shapes,
@@ -38,7 +37,7 @@ static void ShowShapeTree(Clusterer& clusterer, ShapeTree& shapes,
image.Clear();
shapes.PaintShapes(image);
clusterer.DrawClusterBounds(image);
- Leptonica::DisplayImage(image);
+ // Leptonica::DisplayImage(image);
}
HeliumTextDetector::HeliumTextDetector()
@@ -122,8 +121,8 @@ void HeliumTextDetector::DetectText(const Image& image) {
clusterer_.ClusterShapes(shapemaker_.shapes(), text_validator);
if (show_debug_) {
- Leptonica::DisplayImage(smooth_image);
- Leptonica::DisplayGrayMap(edges);
+ // Leptonica::DisplayImage(smooth_image);
+ // Leptonica::DisplayGrayMap(edges);
ShowTrace(outliner, TRACECLASS_TEXT, image.width(), image.height());
ShowShapeTree(clusterer_, shapemaker_, image.width(), image.height());
}
diff --git a/helium/imageenhancer.cpp b/helium/imageenhancer.cpp
index 43b8833..6a45204 100644
--- a/helium/imageenhancer.cpp
+++ b/helium/imageenhancer.cpp
@@ -11,7 +11,6 @@
#include "image.h"
#include "graymap.h"
#include "imageenhancer.h"
-#include "leptonica.h"
using namespace helium;
@@ -209,7 +208,6 @@ void ImageEnhancer::LocalContrast(GrayMap& src, int ws, int hs,
GrayMap mask;
mask.Copy(src);
Binarize(mask, fg_thresh, -1, 0); // keep BG values, zero out FG
- // Leptonica::DisplayGrayMap(mask);
im.Init(mask); // compute mean/var over background
} else {
im.Init(src);
@@ -228,68 +226,6 @@ void ImageEnhancer::LocalContrast(GrayMap& src, int ws, int hs,
}
}
-GrayMap ImageEnhancer::RankFilterGray(GrayMap& map, int fwidth, int fheight,
- float rank_ratio) {
- Pix* pix = Leptonica::GrayMapToPix(map);
- int width = 2 * fwidth + 1; // full window width around a pixel
- int height = 2 * fheight + 1;
- Pix* filtered_pix = pixRankFilterGray(pix, width, height, rank_ratio);
- GrayMap filtered_map = Leptonica::PixToGrayMap(filtered_pix);
- pixDestroy(&pix);
- pixDestroy(&filtered_pix);
- return filtered_map;
-}
-
-// Implements the Non-linear Niblack decomposition algorithm described in
-// Kaihua Zhu's (khz@google.com) CBDAR05 paper with slight tweaking.
-void ImageEnhancer::NLNiblack(GrayMap& src, int bg_width, int bg_height,
- int bgfg_ratio, int min_fg_sdev_value,
- float fg_sdev_range, float fg_sdev_rank) {
- int Wbg = bg_width; // window half-width for computing background
- int Wfg = bg_width / bgfg_ratio;
- int Hbg = bg_height;
- int Hfg = bg_height / bgfg_ratio;
-
- IntegralMatrix im;
- im.Init(src);
- GrayMap bg_mean(src.width(), src.height());
- GrayMap fg_sdev(src.width(), src.height());
- uint8 *bg_u = bg_mean.data();
- uint8 *fg_v = fg_sdev.data();
- for (int y = 0; y < src.height(); ++y) {
- for (int x = 0; x < src.width(); ++x) {
- double bg_mean, bg_var;
- double fg_mean, fg_var;
- im.GetWindowMeanVar(x-Wbg, y-Hbg, x+Wbg, y+Hbg, &bg_mean, &bg_var);
- im.GetWindowMeanVar(x-Wfg, y-Hfg, x+Wfg, y+Hfg, &fg_mean, &fg_var);
- *bg_u++ = static_cast<uint8>(bg_mean);
- *fg_v++ = static_cast<uint8>(sqrt(fg_var));
- }
- }
- // Get 50-percentile (median filter) of the average background map,
- // and use the top 20% largest variance in the foreground window
- // to determine a threshold.
- GrayMap bg_filtered_mean = RankFilterGray(bg_mean, Wbg, Hbg, 0.5);
- GrayMap fg_filtered_sdev = RankFilterGray(fg_sdev, Wfg, Hfg, fg_sdev_rank);
-
- uint8 *data = src.data();
- bg_u = bg_filtered_mean.data();
- fg_v = fg_filtered_sdev.data();
- for (int y = 0; y < src.height(); ++y) {
- for (int x = 0; x < src.width(); ++x) {
- uint8 value = 128; // don't care state
- if (*fg_v > min_fg_sdev_value) {
- uint8 range = fg_sdev_range * *fg_v;
- if (*data > *bg_u + range) value = 255;
- if (*data < *bg_u - range) value = 0;
- }
- *data++ = value;
- bg_u++;
- fg_v++;
- }
- }
-}
-
// Performs independent pixel operation in-place using given threshold.
// For pixels whose value is below the threshold, the minvalue is used.
// If minvalue==-1, the original value is used. Similarly for maxvalue.
diff --git a/helium/imageenhancer.h b/helium/imageenhancer.h
index b7cf1bf..7e95345 100644
--- a/helium/imageenhancer.h
+++ b/helium/imageenhancer.h
@@ -65,24 +65,6 @@ class ImageEnhancer {
kDefaultEdgeThresh, &Func_SuppressBG);
}
- // Apply rank filter with specified window size on given graymap and
- // return the new image. The size is specified by half-width and half
- // height that is added around each pixel. The rank is a value between
- // 0..1 representing the fraction of pixels in the neighborhood with
- // smaller values. (See leptonica for details.)
- static GrayMap RankFilterGray(GrayMap& map,
- int half_width, int half_height,
- float rank_ratio);
-
- // Implements a non-linear Niblack algorithm to produce a ternary valued
- // component label map based on specified background window size and
- // bg-to-fg window size ratio. Parameter std_range controls thresholding.
- // Values within std_range of normalized distance will be labeled as
- // don't care.
- static void NLNiblack(GrayMap& src, int bg_half_width, int bg_half_height,
- int bgfg_ratio, int min_fg_sdev_value,
- float fg_sdev_range, float fg_sdev_rank);
-
static void Binarize(GrayMap& src, int threshold, int minval, int maxval);
static void ApplyMask(const GrayMap& mask, GrayMap& src);
diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp
index c2ef380..06bb8c8 100644
--- a/helium/tesseract.cpp
+++ b/helium/tesseract.cpp
@@ -55,9 +55,22 @@ bool Tesseract::DetectBaseline(const Mask& mask,
return api_.GetTextDirection(&out_offset, &out_slope);
}
+void MaskToBuffer(const Mask& mask, unsigned char* buf) {
+ bool* mask_ptr = mask.data();
+ for (int y = 0; y < mask.height(); ++y)
+ for (int x = 0; x < mask.width(); ++x)
+ *buf++ = *(mask_ptr++) ? 0 : 255;
+}
+
char* Tesseract::RecognizeText(const Mask& mask) {
- MaskThresholder* mt = new MaskThresholder(mask, true);
- api_.SetThresholder(mt);
+ // MaskThresholder* mt = new MaskThresholder(mask, true);
+ // api_.SetThresholder(mt);
+ // Check with Ray on directly passing in image after fixing Otsu thresholding
+ unsigned char* buf = new unsigned char[mask.width() * mask.height()];
+ MaskToBuffer(mask, buf);
+ api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width());
+ api_.Recognize(NULL);
+ delete[] buf;
return api_.GetUTF8Text();
}
diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp
index c18262b..cdb7f7e 100644
--- a/helium/textrecognition.cpp
+++ b/helium/textrecognition.cpp
@@ -11,7 +11,6 @@
#include "tesseract.h"
#include "textareas.h"
#include "textrecognition.h"
-#include "leptonica.h"
using namespace helium;