aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Gutkin <agutkin@google.com>2012-09-12 18:11:43 +0100
committerAlexander Gutkin <agutkin@google.com>2012-09-12 18:11:43 +0100
commitdfd8b8327b93660601d016cdc6f29f433b45a8d8 (patch)
tree968ec84b8e32ad73ec18d74334930f36b7471906
parentf4c12fce1ee58e670f9c3fce46c40296ba9ee8a2 (diff)
downloadopenfst-dfd8b8327b93660601d016cdc6f29f433b45a8d8.tar.gz
Updated OpenFST version to openfst-1.3.2-CL32004048 from Greco3.
Change-Id: I19b0db718256b35c0e3e5a7315f1ed6335e6dcac
-rw-r--r--AUTHORS1
-rw-r--r--Android.mk2
-rw-r--r--LICENSE (renamed from LICENCE)0
-rw-r--r--NEWS21
-rw-r--r--README6
-rw-r--r--README.android5
-rwxr-xr-xconfigure87
-rw-r--r--configure.ac12
-rwxr-xr-xfix_headers.sh21
-rw-r--r--src/bin/fstarcsort.cc2
-rw-r--r--src/bin/fstclosure.cc2
-rw-r--r--src/bin/fstcompile.cc13
-rw-r--r--src/bin/fstcompose.cc2
-rw-r--r--src/bin/fstconcat.cc2
-rw-r--r--src/bin/fstconnect.cc2
-rw-r--r--src/bin/fstconvert.cc2
-rw-r--r--src/bin/fstdeterminize.cc2
-rw-r--r--src/bin/fstdifference.cc2
-rw-r--r--src/bin/fstdraw.cc23
-rw-r--r--src/bin/fstencode.cc2
-rw-r--r--src/bin/fstepsnormalize.cc2
-rw-r--r--src/bin/fstequal.cc2
-rw-r--r--src/bin/fstequivalent.cc2
-rw-r--r--src/bin/fstinfo.cc2
-rw-r--r--src/bin/fstintersect.cc2
-rw-r--r--src/bin/fstinvert.cc2
-rw-r--r--src/bin/fstmap.cc2
-rw-r--r--src/bin/fstminimize.cc2
-rw-r--r--src/bin/fstprint.cc13
-rw-r--r--src/bin/fstproject.cc2
-rw-r--r--src/bin/fstprune.cc2
-rw-r--r--src/bin/fstpush.cc2
-rw-r--r--src/bin/fstrandgen.cc2
-rw-r--r--src/bin/fstrelabel.cc15
-rw-r--r--src/bin/fstreplace.cc2
-rw-r--r--src/bin/fstreverse.cc3
-rw-r--r--src/bin/fstreweight.cc2
-rw-r--r--src/bin/fstrmepsilon.cc2
-rw-r--r--src/bin/fstshortestdistance.cc2
-rw-r--r--src/bin/fstshortestpath.cc2
-rw-r--r--src/bin/fstsymbols.cc11
-rw-r--r--src/bin/fstsynchronize.cc2
-rw-r--r--src/bin/fsttopsort.cc2
-rw-r--r--src/bin/fstunion.cc3
-rw-r--r--src/extensions/Makefile.am6
-rw-r--r--src/extensions/Makefile.in5
-rw-r--r--src/extensions/far/Makefile.am7
-rw-r--r--src/extensions/far/Makefile.in32
-rw-r--r--src/extensions/far/farcompilestrings.cc34
-rw-r--r--src/extensions/far/farcreate.cc22
-rw-r--r--src/extensions/far/farequal.cc57
-rw-r--r--src/extensions/far/farextract.cc12
-rw-r--r--src/extensions/far/farinfo.cc13
-rw-r--r--src/extensions/far/farprintstrings.cc21
-rw-r--r--src/extensions/far/farscript.cc28
-rw-r--r--src/extensions/far/main.cc18
-rw-r--r--src/extensions/far/strings.cc (renamed from src/extensions/far/compile-strings.cc)4
-rw-r--r--src/extensions/ngram/Makefile.am10
-rw-r--r--src/extensions/ngram/Makefile.in543
-rw-r--r--src/extensions/ngram/bitmap-index.cc187
-rw-r--r--src/extensions/ngram/ngram-fst.cc28
-rw-r--r--src/extensions/ngram/nthbit.cc107
-rw-r--r--src/extensions/pdt/pdtcompose.cc2
-rw-r--r--src/extensions/pdt/pdtexpand.cc6
-rw-r--r--src/extensions/pdt/pdtinfo.cc2
-rw-r--r--src/extensions/pdt/pdtreplace.cc6
-rw-r--r--src/extensions/pdt/pdtreverse.cc2
-rw-r--r--src/extensions/pdt/pdtshortestpath.cc6
-rw-r--r--src/include/Makefile.am19
-rw-r--r--src/include/Makefile.in29
-rw-r--r--src/include/fst/accumulator.h22
-rw-r--r--src/include/fst/arc.h1
-rw-r--r--src/include/fst/arcsort.h16
-rw-r--r--src/include/fst/bi-table.h81
-rw-r--r--src/include/fst/cache.h9
-rw-r--r--src/include/fst/compact-fst.h152
-rw-r--r--src/include/fst/compat.h23
-rw-r--r--src/include/fst/compose.h68
-rw-r--r--src/include/fst/config.h2
-rw-r--r--src/include/fst/const-fst.h30
-rw-r--r--src/include/fst/dfs-visit.h3
-rw-r--r--src/include/fst/expanded-fst.h4
-rw-r--r--src/include/fst/extensions/far/compile-strings.h61
-rw-r--r--src/include/fst/extensions/far/equal.h99
-rw-r--r--src/include/fst/extensions/far/extract.h2
-rw-r--r--src/include/fst/extensions/far/far.h184
-rw-r--r--src/include/fst/extensions/far/farscript.h51
-rw-r--r--src/include/fst/extensions/far/info.h2
-rw-r--r--src/include/fst/extensions/far/print-strings.h28
-rw-r--r--src/include/fst/extensions/far/stlist.h9
-rw-r--r--src/include/fst/extensions/far/sttable.h1
-rw-r--r--src/include/fst/extensions/ngram/bitmap-index.h183
-rw-r--r--src/include/fst/extensions/ngram/ngram-fst.h912
-rw-r--r--src/include/fst/extensions/ngram/nthbit.h46
-rw-r--r--src/include/fst/extensions/pdt/collection.h33
-rw-r--r--src/include/fst/extensions/pdt/info.h2
-rw-r--r--src/include/fst/extensions/pdt/paren.h30
-rw-r--r--src/include/fst/extensions/pdt/shortest-path.h14
-rw-r--r--src/include/fst/flags.h70
-rw-r--r--src/include/fst/float-weight.h91
-rw-r--r--src/include/fst/fst.h17
-rw-r--r--src/include/fst/fstlib.h2
-rw-r--r--src/include/fst/icu.h135
-rw-r--r--src/include/fst/lock.h11
-rw-r--r--src/include/fst/lookahead-matcher.h25
-rw-r--r--src/include/fst/matcher.h72
-rw-r--r--src/include/fst/mutable-fst.h2
-rw-r--r--src/include/fst/queue.h4
-rw-r--r--src/include/fst/register.h1
-rw-r--r--src/include/fst/rmepsilon.h1
-rw-r--r--src/include/fst/script/compile-impl.h1
-rw-r--r--src/include/fst/script/draw-impl.h6
-rw-r--r--src/include/fst/script/draw.h1
-rw-r--r--src/include/fst/script/fst-class.h43
-rw-r--r--src/include/fst/script/text-io.h1
-rw-r--r--src/include/fst/script/weight-class.h6
-rw-r--r--src/include/fst/shortest-distance.h2
-rw-r--r--src/include/fst/signed-log-weight.h12
-rw-r--r--src/include/fst/slist.h2
-rw-r--r--src/include/fst/state-map.h8
-rw-r--r--src/include/fst/string.h3
-rw-r--r--src/include/fst/symbol-table.h66
-rw-r--r--src/include/fst/test-properties.h20
-rw-r--r--src/include/fst/util.h7
-rw-r--r--src/include/fst/vector-fst.h10
-rw-r--r--src/include/fst/visit.h3
-rw-r--r--src/include/fst/weight.h26
-rw-r--r--src/lib/flags.cc75
-rw-r--r--src/lib/properties.cc5
-rw-r--r--src/lib/symbol-table.cc53
-rw-r--r--src/lib/temp_Android.temp_mk20
-rw-r--r--src/lib/util.cc4
-rw-r--r--src/script/fst-class.cc10
-rw-r--r--src/script/text-io.cc4
-rw-r--r--src/test/algo_test.cc2
-rw-r--r--src/test/fst_test.cc2
-rw-r--r--src/test/fst_test.h6
-rw-r--r--src/test/weight_test.cc2
138 files changed, 3705 insertions, 692 deletions
diff --git a/AUTHORS b/AUTHORS
index ac92ec6..2a8b3e4 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,6 +8,7 @@ Contributors:
Boulos Harb <harb@google.com>
Dan Bikel <dbikel@google.com
Jacob Ratkiewicz <jpr@cs.indiana.edu>
+Jeff Sorensen <sorenj@google.com>
Johan Schalkwyk <johans@google.com>
Kasturi Raghavan <kasturir@ucla.edu>
Masha Shugrina <shumash@google.com>
diff --git a/Android.mk b/Android.mk
index 0748c91..51989dc 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,9 +1,7 @@
-#
# Copyright 2012 Google Inc. All Rights Reserved.
# Author: npereira@google.com (Nicole Pereira)
#
# Android makefile for openfst library.
-#
OPENFST_DIR := $(call my-dir)
diff --git a/LICENCE b/LICENSE
index d216912..d216912 100644
--- a/LICENCE
+++ b/LICENSE
diff --git a/NEWS b/NEWS
index 0152c55..530d7b4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,24 @@
-OpenFst - Release 1.2
+OpenFst - Release 1.3
+ * Support for non-fatal exits on errors: (1.3.1)
+ * Added FLAGS_fst_error_fatal: FST errors are
+ fatal if true (default); o.w. return objects flagged as bad:
+ e.g., FSTs - kError
+ prop. true, FST weights - not a Member().
+ * Added kError property bit signifying bad FST
+ * Added NoWeight() method to FST weight requirements that returns
+ weight that is not a Member().
+ * Various improvements to the FAR extensions (1.3.1)
+ * a single FST is now a FAR type
+ * FLAGS_initial_symbols: Uses the symbol table from the
+ first FST in the archive for all entries"
+ * Input/output to standard input/output for some FAR and arc types
+ * --with-icu configuration option no longer needed (1.3.1)
+ * Improved flags usage esp. if use SET_FLAGS not SetFlags/InitFst (1.3.2)
+ * Added 'fst' as possible far writer type (1.3.2)
+ * phi matcher can now accept 0 as the phi label (1.3.2)
+ * Added ngram-fst extension (1.3.2)
+OpenFst - Release 1.2
* Added lookahead matching and filtering for faster composition
* Added EditFst for mutation of o.w. immutable FSTs
* Added script sub-namespace defining type FstClass - a non-templated
diff --git a/README b/README
index a4643d7..aff9e2f 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-OpenFst - Release 1.2
+OpenFst - Release 1.3
OpenFst is a library for constructing, combining, optimizing, and
searching weighted finite-state transducers (FSTs).
@@ -11,7 +11,7 @@ REQUIREMENTS:
INSTALLATION:
Follow the generic GNU build system instructions in ./INSTALL. We
- recommend configuring with =--enable-static=no= for faster
+ recommend configuring with --enable-static=no for faster
compiles.
Optional features:
@@ -20,8 +20,8 @@ INSTALLATION:
--enable-const-fsts Enable all ConstFst classes (def: no)
--enable-far Enable FAR (FST Archive) extension (def: no)
--enable-lookahead-fsts Enable LookAheadFst classes (def: no)
+ --enable-ngram-fsts Enable NGramFst classes (def: no)
--enable-pdt Experimental push-down transducer extensions (def: no)
- --with-icu Use ICU to provide unicode support (def: no)
Configuring with --enable-bin=no gives very fast compiles, but
excludes the command line utilities.
diff --git a/README.android b/README.android
index 6d822ec..1e90058 100644
--- a/README.android
+++ b/README.android
@@ -1,4 +1,3 @@
-Openfst openfst-1.2.10
-Updated to Google CL27738347
+Copy of openfst-1.3.2-CL32004048 from Greco3.
-Modified to use stlport, Android.mk file added. \ No newline at end of file
+Modified to use stlport, Android.mk file added.
diff --git a/configure b/configure
index 38c7241..0bd3a30 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for OpenFst 1.2.10.
+# Generated by GNU Autoconf 2.65 for OpenFst 1.3.2.
#
# Report bugs to <help@www.openfst.org>.
#
@@ -701,8 +701,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='OpenFst'
PACKAGE_TARNAME='openfst'
-PACKAGE_VERSION='1.2.10'
-PACKAGE_STRING='OpenFst 1.2.10'
+PACKAGE_VERSION='1.3.2'
+PACKAGE_STRING='OpenFst 1.3.2'
PACKAGE_BUGREPORT='help@www.openfst.org'
PACKAGE_URL=''
@@ -757,6 +757,8 @@ HAVE_PDT_FALSE
HAVE_PDT_TRUE
HAVE_LOOKAHEAD_FALSE
HAVE_LOOKAHEAD_TRUE
+HAVE_NGRAM_FALSE
+HAVE_NGRAM_TRUE
HAVE_FAR_FALSE
HAVE_FAR_TRUE
HAVE_CONST_FALSE
@@ -893,6 +895,7 @@ enable_bin
enable_compact_fsts
enable_const_fsts
enable_far
+enable_ngram_fsts
enable_lookahead_fsts
enable_pdt
with_icu
@@ -1451,7 +1454,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures OpenFst 1.2.10 to adapt to many kinds of systems.
+\`configure' configures OpenFst 1.3.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1521,7 +1524,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of OpenFst 1.2.10:";;
+ short | recursive ) echo "Configuration of OpenFst 1.3.2:";;
esac
cat <<\_ACEOF
@@ -1540,6 +1543,7 @@ Optional Features:
--enable-compact-fsts enable all CompactFst extensions
--enable-const-fsts enable all ConstFst extensions
--enable-far enable FAR extensions
+ --enable-ngram-fsts enable NGramFst extension
--enable-lookahead-fsts enable LookAheadFst extensions
--enable-pdt enable PDT extensions
@@ -1630,7 +1634,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-OpenFst configure 1.2.10
+OpenFst configure 1.3.2
generated by GNU Autoconf 2.65
Copyright (C) 2009 Free Software Foundation, Inc.
@@ -2083,7 +2087,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by OpenFst $as_me 1.2.10, which was
+It was created by OpenFst $as_me 1.3.2, which was
generated by GNU Autoconf 2.65. Invocation command line was
$ $0 $@
@@ -2891,7 +2895,7 @@ fi
# Define the identity of the package.
PACKAGE='openfst'
- VERSION='1.2.10'
+ VERSION='1.3.2'
cat >>confdefs.h <<_ACEOF
@@ -5007,13 +5011,13 @@ if test "${lt_cv_nm_interface+set}" = set; then :
else
lt_cv_nm_interface="BSD nm"
echo "int some_variable = 0;" > conftest.$ac_ext
- (eval echo "\"\$as_me:5010: $ac_compile\"" >&5)
+ (eval echo "\"\$as_me:5014: $ac_compile\"" >&5)
(eval "$ac_compile" 2>conftest.err)
cat conftest.err >&5
- (eval echo "\"\$as_me:5013: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+ (eval echo "\"\$as_me:5017: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
cat conftest.err >&5
- (eval echo "\"\$as_me:5016: output\"" >&5)
+ (eval echo "\"\$as_me:5020: output\"" >&5)
cat conftest.out >&5
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
lt_cv_nm_interface="MS dumpbin"
@@ -6219,7 +6223,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
- echo '#line 6222 "configure"' > conftest.$ac_ext
+ echo '#line 6226 "configure"' > conftest.$ac_ext
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -8246,11 +8250,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:8249: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:8253: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:8253: \$? = $ac_status" >&5
+ echo "$as_me:8257: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -8585,11 +8589,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:8588: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:8592: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:8592: \$? = $ac_status" >&5
+ echo "$as_me:8596: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -8690,11 +8694,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:8693: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:8697: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:8697: \$? = $ac_status" >&5
+ echo "$as_me:8701: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -8745,11 +8749,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:8748: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:8752: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:8752: \$? = $ac_status" >&5
+ echo "$as_me:8756: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -11129,7 +11133,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11132 "configure"
+#line 11136 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11225,7 +11229,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11228 "configure"
+#line 11232 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -13181,11 +13185,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:13184: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:13188: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:13188: \$? = $ac_status" >&5
+ echo "$as_me:13192: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -13280,11 +13284,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:13283: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:13287: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:13287: \$? = $ac_status" >&5
+ echo "$as_me:13291: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -13332,11 +13336,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:13335: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:13339: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:13339: \$? = $ac_status" >&5
+ echo "$as_me:13343: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -14303,7 +14307,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
ac_config_headers="$ac_config_headers config.h src/include/fst/config.h"
-ac_config_files="$ac_config_files Makefile src/Makefile src/include/Makefile src/lib/Makefile src/bin/Makefile src/test/Makefile src/extensions/Makefile src/extensions/compact/Makefile src/extensions/const/Makefile src/extensions/far/Makefile src/extensions/lookahead/Makefile src/extensions/pdt/Makefile src/script/Makefile"
+ac_config_files="$ac_config_files Makefile src/Makefile src/include/Makefile src/lib/Makefile src/bin/Makefile src/test/Makefile src/extensions/Makefile src/extensions/compact/Makefile src/extensions/const/Makefile src/extensions/far/Makefile src/extensions/lookahead/Makefile src/extensions/ngram/Makefile src/extensions/pdt/Makefile src/script/Makefile"
ac_ext=cpp
@@ -14408,6 +14412,22 @@ else
fi
+# Check whether --enable-ngram-fsts was given.
+if test "${enable_ngram_fsts+set}" = set; then :
+ enableval=$enable_ngram_fsts;
+else
+ enable_ngram_fsts=no
+fi
+
+ if test "x$enable_ngram_fsts" != xno; then
+ HAVE_NGRAM_TRUE=
+ HAVE_NGRAM_FALSE='#'
+else
+ HAVE_NGRAM_TRUE='#'
+ HAVE_NGRAM_FALSE=
+fi
+
+
# Check whether --enable-lookahead-fsts was given.
if test "${enable_lookahead_fsts+set}" = set; then :
enableval=$enable_lookahead_fsts;
@@ -14706,6 +14726,10 @@ if test -z "${HAVE_FAR_TRUE}" && test -z "${HAVE_FAR_FALSE}"; then
as_fn_error "conditional \"HAVE_FAR\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${HAVE_NGRAM_TRUE}" && test -z "${HAVE_NGRAM_FALSE}"; then
+ as_fn_error "conditional \"HAVE_NGRAM\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${HAVE_LOOKAHEAD_TRUE}" && test -z "${HAVE_LOOKAHEAD_FALSE}"; then
as_fn_error "conditional \"HAVE_LOOKAHEAD\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -15122,7 +15146,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by OpenFst $as_me 1.2.10, which was
+This file was extended by OpenFst $as_me 1.3.2, which was
generated by GNU Autoconf 2.65. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -15188,7 +15212,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-OpenFst config.status 1.2.10
+OpenFst config.status 1.3.2
configured by $0, generated by GNU Autoconf 2.65,
with options \\"\$ac_cs_config\\"
@@ -15677,6 +15701,7 @@ do
"src/extensions/const/Makefile") CONFIG_FILES="$CONFIG_FILES src/extensions/const/Makefile" ;;
"src/extensions/far/Makefile") CONFIG_FILES="$CONFIG_FILES src/extensions/far/Makefile" ;;
"src/extensions/lookahead/Makefile") CONFIG_FILES="$CONFIG_FILES src/extensions/lookahead/Makefile" ;;
+ "src/extensions/ngram/Makefile") CONFIG_FILES="$CONFIG_FILES src/extensions/ngram/Makefile" ;;
"src/extensions/pdt/Makefile") CONFIG_FILES="$CONFIG_FILES src/extensions/pdt/Makefile" ;;
"src/script/Makefile") CONFIG_FILES="$CONFIG_FILES src/script/Makefile" ;;
diff --git a/configure.ac b/configure.ac
index 211dcd7..908a6d5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([OpenFst], [1.2.10], [help@www.openfst.org])
+AC_INIT([OpenFst], [1.3.2], [help@www.openfst.org])
AM_INIT_AUTOMAKE([foreign nostdinc -Wall -Werror])
AC_PROG_CXX
@@ -19,6 +19,7 @@ AC_CONFIG_FILES([
src/extensions/const/Makefile
src/extensions/far/Makefile
src/extensions/lookahead/Makefile
+ src/extensions/ngram/Makefile
src/extensions/pdt/Makefile
src/script/Makefile
])
@@ -63,6 +64,15 @@ AC_ARG_ENABLE([far], [AS_HELP_STRING([--enable-far], [enable FAR extensions])],
AM_CONDITIONAL([HAVE_FAR], [test "x$enable_far" != xno])
AC_ARG_ENABLE(
+ [ngram-fsts],
+ [AS_HELP_STRING(
+ [--enable-ngram-fsts],
+ [enable NGramFst extension])],
+ [],
+ [enable_ngram_fsts=no])
+AM_CONDITIONAL([HAVE_NGRAM], [test "x$enable_ngram_fsts" != xno])
+
+AC_ARG_ENABLE(
[lookahead-fsts],
[AS_HELP_STRING(
[--enable-lookahead-fsts],
diff --git a/fix_headers.sh b/fix_headers.sh
new file mode 100755
index 0000000..b1b0844
--- /dev/null
+++ b/fix_headers.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+INPUT_FILE=$1
+
+function fix_tr1() {
+ grep $1 $INPUT_FILE > /dev/null 2>&1
+ if [ $? -eq 0 ] ; then
+ echo "Modifying $INPUT_FILE ..."
+ sed -e "s/$1/$2/g" $INPUT_FILE > .tmp
+ if [ $? -ne 0 ] ; then
+ echo "sed failed!"
+ return 1
+ fi
+ mv -f .tmp $INPUT_FILE
+ fi
+ return 0
+}
+
+fix_tr1 "tr1\/unordered_map" "unordered_map"
+fix_tr1 "tr1/unordered_set" "unordered_set"
+fix_tr1 "tr1::hash" "hash"
diff --git a/src/bin/fstarcsort.cc b/src/bin/fstarcsort.cc
index 33e534d..9f1b420 100644
--- a/src/bin/fstarcsort.cc
+++ b/src/bin/fstarcsort.cc
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
diff --git a/src/bin/fstclosure.cc b/src/bin/fstclosure.cc
index 569c708..f6b3324 100644
--- a/src/bin/fstclosure.cc
+++ b/src/bin/fstclosure.cc
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstcompile.cc b/src/bin/fstcompile.cc
index db62f95..2e0bdd4 100644
--- a/src/bin/fstcompile.cc
+++ b/src/bin/fstcompile.cc
@@ -45,14 +45,14 @@ int main(int argc, char **argv) {
usage += " [text.fst [binary.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
}
const char *source = "standard input";
- istream *istrm = &std::cin;
+ istream *istrm = &cin;
if (argc > 1 && strcmp(argv[1], "-") != 0) {
source = argv[1];
istrm = new fst::ifstream(argv[1]);
@@ -63,13 +63,16 @@ int main(int argc, char **argv) {
}
const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+ fst::SymbolTableTextOptions opts;
+ opts.allow_negative = FLAGS_allow_negative_labels;
+
if (!FLAGS_isymbols.empty()) {
- isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, opts);
if (!isyms) exit(1);
}
if (!FLAGS_osymbols.empty()) {
- osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, opts);
if (!osyms) exit(1);
}
@@ -85,7 +88,7 @@ int main(int argc, char **argv) {
FLAGS_acceptor, FLAGS_keep_isymbols, FLAGS_keep_osymbols,
FLAGS_keep_state_numbering, FLAGS_allow_negative_labels);
- if (istrm != &std::cin)
+ if (istrm != &cin)
delete istrm;
return 0;
diff --git a/src/bin/fstcompose.cc b/src/bin/fstcompose.cc
index 0ba8c47..996aca7 100644
--- a/src/bin/fstcompose.cc
+++ b/src/bin/fstcompose.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstconcat.cc b/src/bin/fstconcat.cc
index fbe9b68..4a02205 100644
--- a/src/bin/fstconcat.cc
+++ b/src/bin/fstconcat.cc
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstconnect.cc b/src/bin/fstconnect.cc
index f774767..2900d1b 100644
--- a/src/bin/fstconnect.cc
+++ b/src/bin/fstconnect.cc
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstconvert.cc b/src/bin/fstconvert.cc
index f7c4ad9..9fbb43e 100644
--- a/src/bin/fstconvert.cc
+++ b/src/bin/fstconvert.cc
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstdeterminize.cc b/src/bin/fstdeterminize.cc
index aa5064f..ca7840e 100644
--- a/src/bin/fstdeterminize.cc
+++ b/src/bin/fstdeterminize.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstdifference.cc b/src/bin/fstdifference.cc
index 5459b03..8bdf89e 100644
--- a/src/bin/fstdifference.cc
+++ b/src/bin/fstdifference.cc
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstdraw.cc b/src/bin/fstdraw.cc
index 51ebb2d..98f8ff4 100644
--- a/src/bin/fstdraw.cc
+++ b/src/bin/fstdraw.cc
@@ -26,8 +26,6 @@ DEFINE_string(isymbols, "", "Input label symbol table");
DEFINE_string(osymbols, "", "Output label symbol table");
DEFINE_string(ssymbols, "", "State label symbol table");
DEFINE_bool(numeric, false, "Print numeric labels");
-DEFINE_string(save_isymbols, "", "Save input symbol table to file");
-DEFINE_string(save_osymbols, "", "Save output symbol table to file");
DEFINE_int32(precision, 5, "Set precision (number of char/float)");
DEFINE_bool(show_weight_one, false,
"Print/draw arc weights and final weights equal to Weight::One()");
@@ -51,10 +49,10 @@ int main(int argc, char **argv) {
string usage = "Prints out binary FSTs in dot text format.\n\n Usage: ";
usage += argv[0];
- usage += " [binary.fst [text.fst]]\n";
+ usage += " [binary.fst [text.dot]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
@@ -65,7 +63,7 @@ int main(int argc, char **argv) {
s::FstClass *fst = s::FstClass::Read(in_name);
if (!fst) return 1;
- ostream *ostrm = &std::cout;
+ ostream *ostrm = &cout;
string dest = "stdout";
if (argc == 3) {
dest = argv[2];
@@ -79,13 +77,16 @@ int main(int argc, char **argv) {
const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+ fst::SymbolTableTextOptions opts;
+ opts.allow_negative = FLAGS_allow_negative_labels;
+
if (!FLAGS_isymbols.empty() && !FLAGS_numeric) {
- isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, opts);
if (!isyms) exit(1);
}
if (!FLAGS_osymbols.empty() && !FLAGS_numeric) {
- osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, opts);
if (!osyms) exit(1);
}
@@ -106,13 +107,7 @@ int main(int argc, char **argv) {
FLAGS_fontsize, FLAGS_precision,
FLAGS_show_weight_one, ostrm, dest);
- if (isyms && !FLAGS_save_isymbols.empty())
- isyms->WriteText(FLAGS_save_isymbols);
-
- if (osyms && !FLAGS_save_osymbols.empty())
- osyms->WriteText(FLAGS_save_osymbols);
-
- if (ostrm != &std::cout)
+ if (ostrm != &cout)
delete ostrm;
return 0;
}
diff --git a/src/bin/fstencode.cc b/src/bin/fstencode.cc
index 95dd49c..003ca24 100644
--- a/src/bin/fstencode.cc
+++ b/src/bin/fstencode.cc
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
usage += " in.fst codex [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstepsnormalize.cc b/src/bin/fstepsnormalize.cc
index c8813c6..d5c1a8d 100644
--- a/src/bin/fstepsnormalize.cc
+++ b/src/bin/fstepsnormalize.cc
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstequal.cc b/src/bin/fstequal.cc
index 885b330..c64f6e3 100644
--- a/src/bin/fstequal.cc
+++ b/src/bin/fstequal.cc
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc != 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstequivalent.cc b/src/bin/fstequivalent.cc
index e7e09f2..70010d7 100644
--- a/src/bin/fstequivalent.cc
+++ b/src/bin/fstequivalent.cc
@@ -43,7 +43,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc != 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstinfo.cc b/src/bin/fstinfo.cc
index 23816be..f02e0b3 100644
--- a/src/bin/fstinfo.cc
+++ b/src/bin/fstinfo.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " [in.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 2) {
ShowUsage();
return 1;
diff --git a/src/bin/fstintersect.cc b/src/bin/fstintersect.cc
index b3558fc..3bdc26b 100644
--- a/src/bin/fstintersect.cc
+++ b/src/bin/fstintersect.cc
@@ -40,7 +40,7 @@ int main(int argc, char **argv) {
usage += " Flags: connect\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstinvert.cc b/src/bin/fstinvert.cc
index f9b348b..00dadc5 100644
--- a/src/bin/fstinvert.cc
+++ b/src/bin/fstinvert.cc
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstmap.cc b/src/bin/fstmap.cc
index 231c725..81336b1 100644
--- a/src/bin/fstmap.cc
+++ b/src/bin/fstmap.cc
@@ -43,7 +43,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstminimize.cc b/src/bin/fstminimize.cc
index 8d9d635..2d4e7a9 100644
--- a/src/bin/fstminimize.cc
+++ b/src/bin/fstminimize.cc
@@ -36,7 +36,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out1.fst [out2.fst]]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstprint.cc b/src/bin/fstprint.cc
index b42bb7e..b5b23e5 100644
--- a/src/bin/fstprint.cc
+++ b/src/bin/fstprint.cc
@@ -44,7 +44,7 @@ int main(int argc, char **argv) {
usage += " [binary.fst [text.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
s::FstClass *fst = s::FstClass::Read(in_name);
if (!fst) return 1;
- ostream *ostrm = &std::cout;
+ ostream *ostrm = &cout;
string dest = "standard output";
if (argc == 3) {
dest = argv[2];
@@ -70,13 +70,16 @@ int main(int argc, char **argv) {
const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
+ fst::SymbolTableTextOptions opts;
+ opts.allow_negative = FLAGS_allow_negative_labels;
+
if (!FLAGS_isymbols.empty() && !FLAGS_numeric) {
- isyms = SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ isyms = SymbolTable::ReadText(FLAGS_isymbols, opts);
if (!isyms) exit(1);
}
if (!FLAGS_osymbols.empty() && !FLAGS_numeric) {
- osyms = SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ osyms = SymbolTable::ReadText(FLAGS_osymbols, opts);
if (!osyms) exit(1);
}
@@ -99,7 +102,7 @@ int main(int argc, char **argv) {
if (osyms && !FLAGS_save_osymbols.empty())
osyms->WriteText(FLAGS_save_osymbols);
- if (ostrm != &std::cout)
+ if (ostrm != &cout)
delete ostrm;
return 0;
}
diff --git a/src/bin/fstproject.cc b/src/bin/fstproject.cc
index a67f431..efd8dbf 100644
--- a/src/bin/fstproject.cc
+++ b/src/bin/fstproject.cc
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstprune.cc b/src/bin/fstprune.cc
index 2041b7c..dedf237 100644
--- a/src/bin/fstprune.cc
+++ b/src/bin/fstprune.cc
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstpush.cc b/src/bin/fstpush.cc
index 3b849b5..d40ff9f 100644
--- a/src/bin/fstpush.cc
+++ b/src/bin/fstpush.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstrandgen.cc b/src/bin/fstrandgen.cc
index f48d0cf..b3716e5 100644
--- a/src/bin/fstrandgen.cc
+++ b/src/bin/fstrandgen.cc
@@ -42,7 +42,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstrelabel.cc b/src/bin/fstrelabel.cc
index cc86450..e75db99 100644
--- a/src/bin/fstrelabel.cc
+++ b/src/bin/fstrelabel.cc
@@ -43,6 +43,7 @@ DEFINE_bool(allow_negative_labels, false,
int main(int argc, char **argv) {
namespace s = fst::script;
using fst::SymbolTable;
+ using fst::SymbolTableTextOptions;
using fst::script::FstClass;
using fst::script::MutableFstClass;
@@ -58,7 +59,7 @@ int main(int argc, char **argv) {
usage += " -relabel_opairs opairs.txts\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
@@ -71,24 +72,24 @@ int main(int argc, char **argv) {
if (!fst) return 1;
// Relabel with symbol tables
+ SymbolTableTextOptions opts;
+ opts.allow_negative = FLAGS_allow_negative_labels;
if (!FLAGS_relabel_isymbols.empty() || !FLAGS_relabel_osymbols.empty()) {
bool attach_new_isymbols = (fst->InputSymbols() != 0);
const SymbolTable* old_isymbols = FLAGS_isymbols.empty()
? fst->InputSymbols()
- : SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels);
+ : SymbolTable::ReadText(FLAGS_isymbols, opts);
const SymbolTable* relabel_isymbols = FLAGS_relabel_isymbols.empty()
? NULL
- : SymbolTable::ReadText(FLAGS_relabel_isymbols,
- FLAGS_allow_negative_labels);
+ : SymbolTable::ReadText(FLAGS_relabel_isymbols, opts);
bool attach_new_osymbols = (fst->OutputSymbols() != 0);
const SymbolTable* old_osymbols = FLAGS_osymbols.empty()
? fst->OutputSymbols()
- : SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels);
+ : SymbolTable::ReadText(FLAGS_osymbols, opts);
const SymbolTable* relabel_osymbols = FLAGS_relabel_osymbols.empty()
? NULL
- : SymbolTable::ReadText(FLAGS_relabel_osymbols,
- FLAGS_allow_negative_labels);
+ : SymbolTable::ReadText(FLAGS_relabel_osymbols, opts);
s::Relabel(fst,
old_isymbols, relabel_isymbols, attach_new_isymbols,
diff --git a/src/bin/fstreplace.cc b/src/bin/fstreplace.cc
index 5d1d770..d6522fc 100644
--- a/src/bin/fstreplace.cc
+++ b/src/bin/fstreplace.cc
@@ -31,7 +31,7 @@ int main(int argc, char **argv) {
usage += " root.fst rootlabel [rule1.fst label1 ...] [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstreverse.cc b/src/bin/fstreverse.cc
index 7507ffa..a919f58 100644
--- a/src/bin/fstreverse.cc
+++ b/src/bin/fstreverse.cc
@@ -26,6 +26,7 @@
#include <fst/script/fst-class.h>
#include <iostream>
#include <fstream>
+#include <sstream>
int main(int argc, char **argv) {
using fst::script::FstClass;
@@ -37,7 +38,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstreweight.cc b/src/bin/fstreweight.cc
index 3913a6d..4e8333d 100644
--- a/src/bin/fstreweight.cc
+++ b/src/bin/fstreweight.cc
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
usage += " in.fst potential.txt [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/bin/fstrmepsilon.cc b/src/bin/fstrmepsilon.cc
index 63a80b8..da4e6d9 100644
--- a/src/bin/fstrmepsilon.cc
+++ b/src/bin/fstrmepsilon.cc
@@ -42,7 +42,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstshortestdistance.cc b/src/bin/fstshortestdistance.cc
index 01f5617..9103b72 100644
--- a/src/bin/fstshortestdistance.cc
+++ b/src/bin/fstshortestdistance.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [distance.txt]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstshortestpath.cc b/src/bin/fstshortestpath.cc
index 4a9e720..6e33db8 100644
--- a/src/bin/fstshortestpath.cc
+++ b/src/bin/fstshortestpath.cc
@@ -42,7 +42,7 @@ int main(int argc, char **argv) {
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstsymbols.cc b/src/bin/fstsymbols.cc
index c07f1be..23a6447 100644
--- a/src/bin/fstsymbols.cc
+++ b/src/bin/fstsymbols.cc
@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
@@ -78,17 +78,18 @@ int main(int argc, char **argv) {
}
}
+ fst::SymbolTableTextOptions opts;
+ opts.allow_negative = FLAGS_allow_negative_labels;
+
if (FLAGS_clear_isymbols)
fst->SetInputSymbols(0);
else if (!FLAGS_isymbols.empty())
- fst->SetInputSymbols(
- SymbolTable::ReadText(FLAGS_isymbols, FLAGS_allow_negative_labels));
+ fst->SetInputSymbols(SymbolTable::ReadText(FLAGS_isymbols, opts));
if (FLAGS_clear_osymbols)
fst->SetOutputSymbols(0);
else if (!FLAGS_osymbols.empty())
- fst->SetOutputSymbols(
- SymbolTable::ReadText(FLAGS_osymbols, FLAGS_allow_negative_labels));
+ fst->SetOutputSymbols(SymbolTable::ReadText(FLAGS_osymbols, opts));
if (!FLAGS_relabel_ipairs.empty()) {
typedef int64 Label;
diff --git a/src/bin/fstsynchronize.cc b/src/bin/fstsynchronize.cc
index 7137504..8c226ad 100644
--- a/src/bin/fstsynchronize.cc
+++ b/src/bin/fstsynchronize.cc
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fsttopsort.cc b/src/bin/fsttopsort.cc
index 6f04215..69896c7 100644
--- a/src/bin/fsttopsort.cc
+++ b/src/bin/fsttopsort.cc
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
usage += " [in.fst [out.fst]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/bin/fstunion.cc b/src/bin/fstunion.cc
index 41c4f83..ae7eb66 100644
--- a/src/bin/fstunion.cc
+++ b/src/bin/fstunion.cc
@@ -25,6 +25,7 @@
#include <fst/script/union.h>
#include <iostream>
#include <fstream>
+#include <sstream>
int main(int argc, char **argv) {
using fst::script::FstClass;
@@ -36,7 +37,7 @@ int main(int argc, char **argv) {
usage += " in1.fst in2.fst [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/extensions/Makefile.am b/src/extensions/Makefile.am
index 64d5483..d982c15 100644
--- a/src/extensions/Makefile.am
+++ b/src/extensions/Makefile.am
@@ -14,8 +14,12 @@ if HAVE_LOOKAHEAD
lookaheaddir = lookahead
endif
+if HAVE_NGRAM
+ngramdir = ngram
+endif
+
if HAVE_PDT
pdtdir = pdt
endif
-SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(pdtdir)
+SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(ngramdir) $(pdtdir)
diff --git a/src/extensions/Makefile.in b/src/extensions/Makefile.in
index 43a74c8..6bcf056 100644
--- a/src/extensions/Makefile.in
+++ b/src/extensions/Makefile.in
@@ -63,7 +63,7 @@ AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
distdir
ETAGS = etags
CTAGS = ctags
-DIST_SUBDIRS = compact const far lookahead pdt
+DIST_SUBDIRS = compact const far lookahead ngram pdt
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
@@ -216,8 +216,9 @@ top_srcdir = @top_srcdir@
@HAVE_CONST_TRUE@constdir = const
@HAVE_FAR_TRUE@fardir = far
@HAVE_LOOKAHEAD_TRUE@lookaheaddir = lookahead
+@HAVE_NGRAM_TRUE@ngramdir = ngram
@HAVE_PDT_TRUE@pdtdir = pdt
-SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(pdtdir)
+SUBDIRS = $(compactdir) $(constdir) $(fardir) $(lookaheaddir) $(ngramdir) $(pdtdir)
all: all-recursive
.SUFFIXES:
diff --git a/src/extensions/far/Makefile.am b/src/extensions/far/Makefile.am
index 61f7cc8..a7c2fff 100644
--- a/src/extensions/far/Makefile.am
+++ b/src/extensions/far/Makefile.am
@@ -12,12 +12,13 @@ libfstfar_la_SOURCES = sttable.cc stlist.cc
libfstfar_la_LDFLAGS = -version-info 0:0:0
if HAVE_SCRIPT
-libfstfarscript_la_SOURCES = farscript.cc compile-strings.cc main.cc
+libfstfarscript_la_SOURCES = farscript.cc strings.cc main.cc
libfstfarscript_la_LDFLAGS = -version-info 0:0:0
endif
if HAVE_BIN
-bin_PROGRAMS = farcompilestrings farcreate farextract farinfo farprintstrings
+bin_PROGRAMS = farcompilestrings farcreate farequal farextract farinfo \
+ farprintstrings
LDADD = libfstfarscript.la libfstfar.la ../../script/libfstscript.la \
../../lib/libfst.la -lm -ldl
@@ -26,6 +27,8 @@ farcompilestrings_SOURCES = farcompilestrings.cc
farcreate_SOURCES = farcreate.cc
+farequal_SOURCES = farequal.cc
+
farextract_SOURCES = farextract.cc
farinfo_SOURCES = farinfo.cc
diff --git a/src/extensions/far/Makefile.in b/src/extensions/far/Makefile.in
index 8e05cab..0635556 100644
--- a/src/extensions/far/Makefile.in
+++ b/src/extensions/far/Makefile.in
@@ -36,8 +36,9 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
@HAVE_BIN_TRUE@bin_PROGRAMS = farcompilestrings$(EXEEXT) \
-@HAVE_BIN_TRUE@ farcreate$(EXEEXT) farextract$(EXEEXT) \
-@HAVE_BIN_TRUE@ farinfo$(EXEEXT) farprintstrings$(EXEEXT)
+@HAVE_BIN_TRUE@ farcreate$(EXEEXT) farequal$(EXEEXT) \
+@HAVE_BIN_TRUE@ farextract$(EXEEXT) farinfo$(EXEEXT) \
+@HAVE_BIN_TRUE@ farprintstrings$(EXEEXT)
subdir = src/extensions/far
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -84,10 +85,9 @@ libfstfar_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
@HAVE_SCRIPT_FALSE@am_libfstfar_la_rpath = -rpath $(libfstdir)
@HAVE_SCRIPT_TRUE@am_libfstfar_la_rpath = -rpath $(libfstdir)
libfstfarscript_la_LIBADD =
-am__libfstfarscript_la_SOURCES_DIST = farscript.cc compile-strings.cc \
- main.cc
+am__libfstfarscript_la_SOURCES_DIST = farscript.cc strings.cc main.cc
@HAVE_SCRIPT_TRUE@am_libfstfarscript_la_OBJECTS = farscript.lo \
-@HAVE_SCRIPT_TRUE@ compile-strings.lo main.lo
+@HAVE_SCRIPT_TRUE@ strings.lo main.lo
libfstfarscript_la_OBJECTS = $(am_libfstfarscript_la_OBJECTS)
libfstfarscript_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
@@ -109,6 +109,13 @@ farcreate_LDADD = $(LDADD)
@HAVE_BIN_TRUE@farcreate_DEPENDENCIES = libfstfarscript.la \
@HAVE_BIN_TRUE@ libfstfar.la ../../script/libfstscript.la \
@HAVE_BIN_TRUE@ ../../lib/libfst.la
+am__farequal_SOURCES_DIST = farequal.cc
+@HAVE_BIN_TRUE@am_farequal_OBJECTS = farequal.$(OBJEXT)
+farequal_OBJECTS = $(am_farequal_OBJECTS)
+farequal_LDADD = $(LDADD)
+@HAVE_BIN_TRUE@farequal_DEPENDENCIES = libfstfarscript.la libfstfar.la \
+@HAVE_BIN_TRUE@ ../../script/libfstscript.la \
+@HAVE_BIN_TRUE@ ../../lib/libfst.la
am__farextract_SOURCES_DIST = farextract.cc
@HAVE_BIN_TRUE@am_farextract_OBJECTS = farextract.$(OBJEXT)
farextract_OBJECTS = $(am_farextract_OBJECTS)
@@ -145,13 +152,13 @@ CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libfstfar_la_SOURCES) $(libfstfarscript_la_SOURCES) \
$(farcompilestrings_SOURCES) $(farcreate_SOURCES) \
- $(farextract_SOURCES) $(farinfo_SOURCES) \
+ $(farequal_SOURCES) $(farextract_SOURCES) $(farinfo_SOURCES) \
$(farprintstrings_SOURCES)
DIST_SOURCES = $(libfstfar_la_SOURCES) \
$(am__libfstfarscript_la_SOURCES_DIST) \
$(am__farcompilestrings_SOURCES_DIST) \
- $(am__farcreate_SOURCES_DIST) $(am__farextract_SOURCES_DIST) \
- $(am__farinfo_SOURCES_DIST) \
+ $(am__farcreate_SOURCES_DIST) $(am__farequal_SOURCES_DIST) \
+ $(am__farextract_SOURCES_DIST) $(am__farinfo_SOURCES_DIST) \
$(am__farprintstrings_SOURCES_DIST)
ETAGS = etags
CTAGS = ctags
@@ -283,13 +290,14 @@ AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
@HAVE_SCRIPT_TRUE@libfst_LTLIBRARIES = libfstfarscript.la libfstfar.la
libfstfar_la_SOURCES = sttable.cc stlist.cc
libfstfar_la_LDFLAGS = -version-info 0:0:0
-@HAVE_SCRIPT_TRUE@libfstfarscript_la_SOURCES = farscript.cc compile-strings.cc main.cc
+@HAVE_SCRIPT_TRUE@libfstfarscript_la_SOURCES = farscript.cc strings.cc main.cc
@HAVE_SCRIPT_TRUE@libfstfarscript_la_LDFLAGS = -version-info 0:0:0
@HAVE_BIN_TRUE@LDADD = libfstfarscript.la libfstfar.la ../../script/libfstscript.la \
@HAVE_BIN_TRUE@ ../../lib/libfst.la -lm -ldl
@HAVE_BIN_TRUE@farcompilestrings_SOURCES = farcompilestrings.cc
@HAVE_BIN_TRUE@farcreate_SOURCES = farcreate.cc
+@HAVE_BIN_TRUE@farequal_SOURCES = farequal.cc
@HAVE_BIN_TRUE@farextract_SOURCES = farextract.cc
@HAVE_BIN_TRUE@farinfo_SOURCES = farinfo.cc
@HAVE_BIN_TRUE@farprintstrings_SOURCES = farprintstrings.cc
@@ -411,6 +419,9 @@ farcompilestrings$(EXEEXT): $(farcompilestrings_OBJECTS) $(farcompilestrings_DEP
farcreate$(EXEEXT): $(farcreate_OBJECTS) $(farcreate_DEPENDENCIES)
@rm -f farcreate$(EXEEXT)
$(CXXLINK) $(farcreate_OBJECTS) $(farcreate_LDADD) $(LIBS)
+farequal$(EXEEXT): $(farequal_OBJECTS) $(farequal_DEPENDENCIES)
+ @rm -f farequal$(EXEEXT)
+ $(CXXLINK) $(farequal_OBJECTS) $(farequal_LDADD) $(LIBS)
farextract$(EXEEXT): $(farextract_OBJECTS) $(farextract_DEPENDENCIES)
@rm -f farextract$(EXEEXT)
$(CXXLINK) $(farextract_OBJECTS) $(farextract_LDADD) $(LIBS)
@@ -427,15 +438,16 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile-strings.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farcompilestrings.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farcreate.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farequal.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farextract.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farinfo.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farprintstrings.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/farscript.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stlist.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strings.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sttable.Plo@am__quote@
.cc.o:
diff --git a/src/extensions/far/farcompilestrings.cc b/src/extensions/far/farcompilestrings.cc
index 0c9b352..765b908 100644
--- a/src/extensions/far/farcompilestrings.cc
+++ b/src/extensions/far/farcompilestrings.cc
@@ -25,12 +25,15 @@
#include <fst/extensions/far/main.h>
#include <iostream>
#include <fstream>
+#include <sstream>
DEFINE_string(key_prefix, "", "Prefix to append to keys");
DEFINE_string(key_suffix, "", "Suffix to append to keys");
DEFINE_int32(generate_keys, 0,
"Generate N digit numeric keys (def: use file basenames)");
-DEFINE_string(far_type, "default", "FAR file format type: one of: ");
+DEFINE_string(far_type, "default",
+ "FAR file format type: one of: \"default\", \"fst\""
+ "\"stlist\", \"sttable\"");
DEFINE_bool(allow_negative_labels, false,
"Allow negative labels (not recommended; may cause conflicts)");
DEFINE_string(arc_type, "standard", "Output arc type");
@@ -43,7 +46,11 @@ DEFINE_string(symbols, "", "Label symbol table");
DEFINE_string(unknown_symbol, "", "");
DEFINE_bool(file_list_input, false,
"Each input files contains a list of files to be processed");
-
+DEFINE_bool(keep_symbols, false,
+ "Store symbol table in Far file");
+DEFINE_bool(initial_symbols, true,
+ "When keep_symbols==true, stores symbol table only for the first"
+ " Fst in archive.");
int main(int argc, char **argv) {
namespace s = fst::script;
@@ -51,23 +58,19 @@ int main(int argc, char **argv) {
string usage = "Compiles a set of strings as FSTs and stores them in";
usage += " a finite-state archive.\n\n Usage:";
usage += argv[0];
- usage += " in1.txt [in2.txt ...] out.far\n";
+ usage += " [in1.txt [[in2.txt ...] out.far]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
-
- if (argc < 3) {
- ShowUsage();
- return 1;
- }
-
- vector<string> in_fnames(argc - 2);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
- for (unsigned i = 1; i < argc - 1; ++i) {
- in_fnames[i - 1] = argv[i];
- }
+ vector<string> in_fnames;
+ for (unsigned i = 1; i < argc - 1; ++i)
+ in_fnames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
+ if (in_fnames.empty())
+ in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : "");
- string out_fname = argv[argc - 1];
+ string out_fname =
+ argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : "";
fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type);
fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type);
@@ -76,6 +79,7 @@ int main(int argc, char **argv) {
s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type,
far_type, FLAGS_generate_keys, fet, ftt,
FLAGS_symbols, FLAGS_unknown_symbol,
+ FLAGS_keep_symbols, FLAGS_initial_symbols,
FLAGS_allow_negative_labels,
FLAGS_file_list_input, FLAGS_key_prefix,
FLAGS_key_suffix);
diff --git a/src/extensions/far/farcreate.cc b/src/extensions/far/farcreate.cc
index 009fc34..9ddbec3 100644
--- a/src/extensions/far/farcreate.cc
+++ b/src/extensions/far/farcreate.cc
@@ -29,7 +29,8 @@ DEFINE_string(key_suffix, "", "Suffix to append to keys");
DEFINE_int32(generate_keys, 0,
"Generate N digit numeric keys (def: use file basenames)");
DEFINE_string(far_type, "default",
- "FAR file format type: one of: \"default\", \"stlist\", \"sstable\", \"sttable\"");
+ "FAR file format type: one of: \"default\", "
+ "\"stlist\", \"sttable\"");
DEFINE_bool(file_list_input, false,
"Each input files contains a list of files to be processed");
@@ -38,21 +39,20 @@ int main(int argc, char **argv) {
string usage = "Creates a finite-state archive from input FSTs.\n\n Usage:";
usage += argv[0];
- usage += " in1.fst [in2.fst ...] out.far\n";
+ usage += " [in1.fst [[in2.fst ...] out.far]]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
-
- if (argc < 3) {
- ShowUsage();
- return 1;
- }
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
vector<string> in_fnames;
- for (int i = 1; i < argc - 1; ++i)
- in_fnames.push_back(argv[i]);
+ for (unsigned i = 1; i < argc - 1; ++i)
+ in_fnames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
+ if (in_fnames.empty())
+ in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : "");
+
+ string out_fname =
+ argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : "";
- string out_fname = argv[argc - 1];
string arc_type = fst::LoadArcTypeFromFst(in_fnames[0]);
fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
diff --git a/src/extensions/far/farequal.cc b/src/extensions/far/farequal.cc
new file mode 100644
index 0000000..94e607c
--- /dev/null
+++ b/src/extensions/far/farequal.cc
@@ -0,0 +1,57 @@
+// farequal.cc
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// Tests if two Far files contains the same (key,fst) pairs.
+
+#include <fst/extensions/far/main.h>
+#include <fst/extensions/far/farscript.h>
+
+DEFINE_string(begin_key, "",
+ "First key to extract (def: first key in archive)");
+DEFINE_string(end_key, "",
+ "Last key to extract (def: last key in archive)");
+DEFINE_double(delta, fst::kDelta, "Comparison/quantization delta");
+
+int main(int argc, char **argv) {
+ namespace s = fst::script;
+
+ string usage = "Prints some basic information about the FSTs in an FST ";
+ usage += "archive.\n\n Usage:";
+ usage += argv[0];
+ usage += " in1.far in2.far\n";
+ usage += " Flags: begin_key end_key";
+
+ std::set_new_handler(FailedNewHandler);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
+
+ if (argc != 3) {
+ ShowUsage();
+ return 1;
+ }
+
+ string filename1(argv[1]), filename2(argv[2]);
+
+ bool result = s::FarEqual(
+ filename1, filename2, fst::LoadArcTypeFromFar(filename1),
+ FLAGS_delta, FLAGS_begin_key, FLAGS_end_key);
+
+ if (!result)
+ VLOG(1) << "FARs are not equal.";
+
+ return result ? 0 : 2;
+}
diff --git a/src/extensions/far/farextract.cc b/src/extensions/far/farextract.cc
index 72b2d15..66b7840 100644
--- a/src/extensions/far/farextract.cc
+++ b/src/extensions/far/farextract.cc
@@ -38,19 +38,15 @@ int main(int argc, char **argv) {
string usage = "Extracts FSTs from a finite-state archive.\n\n Usage:";
usage += argv[0];
- usage += " in1.far [in2.far...]\n";
+ usage += " [in1.far in2.far...]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
-
- if (argc < 2) {
- ShowUsage();
- return 1;
- }
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
vector<string> ifilenames;
for (int i = 1; i < argc; ++i)
- ifilenames.push_back(argv[i]);
+ ifilenames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
+ if (ifilenames.empty()) ifilenames.push_back("");
const string &arc_type = fst::LoadArcTypeFromFar(ifilenames[0]);
diff --git a/src/extensions/far/farinfo.cc b/src/extensions/far/farinfo.cc
index e8036a9..2665141 100644
--- a/src/extensions/far/farinfo.cc
+++ b/src/extensions/far/farinfo.cc
@@ -36,20 +36,17 @@ int main(int argc, char **argv) {
string usage = "Prints some basic information about the FSTs in an FST ";
usage += "archive.\n\n Usage:";
usage += argv[0];
- usage += " in1.far [in2.far...]\n";
+ usage += " [in1.far in2.far...]\n";
usage += " Flags: begin_key end_key list_fsts";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
-
- if (argc < 2) {
- ShowUsage();
- return 1;
- }
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
vector<string> filenames;
for (int i = 1; i < argc; ++i)
- filenames.push_back(argv[i]);
+ filenames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
+ if (filenames.empty())
+ filenames.push_back("");
s::FarInfo(filenames, fst::LoadArcTypeFromFar(filenames[0]),
FLAGS_begin_key, FLAGS_end_key, FLAGS_list_fsts);
diff --git a/src/extensions/far/farprintstrings.cc b/src/extensions/far/farprintstrings.cc
index ab7d52b..0204ab7 100644
--- a/src/extensions/far/farprintstrings.cc
+++ b/src/extensions/far/farprintstrings.cc
@@ -31,11 +31,14 @@ DEFINE_string(end_key, "",
"Last key to extract (def: last key in archive)");
// PrintStringsMain specific flag definitions.
DEFINE_bool(print_key, false, "Prefix each string by its key");
+DEFINE_bool(print_weight, false, "Suffix each string by its weight");
DEFINE_string(entry_type, "line", "Entry type: one of : "
"\"file\" (one FST per file), \"line\" (one FST per line)");
DEFINE_string(token_type, "symbol", "Token type: one of : "
"\"symbol\", \"byte\", \"utf8\"");
DEFINE_string(symbols, "", "Label symbol table");
+DEFINE_bool(initial_symbols, true,
+ "Uses symbol table from the first Fst in archive for all entries.");
int main(int argc, char **argv) {
@@ -43,27 +46,25 @@ int main(int argc, char **argv) {
string usage = "Print as string the string FSTs in an archive.\n\n Usage:";
usage += argv[0];
- usage += " in1.far [in2.far ...]\n";
+ usage += " [in1.far in2.far ...]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
-
- if (argc < 2) {
- ShowUsage();
- return 1;
- }
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
vector<string> ifilenames;
for (int i = 1; i < argc; ++i)
- ifilenames.push_back(argv[i]);
+ ifilenames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
+ if (ifilenames.empty()) ifilenames.push_back("");
string arc_type = fst::LoadArcTypeFromFar(ifilenames[0]);
s::FarPrintStrings(ifilenames, arc_type,
fst::StringToFarEntryType(FLAGS_entry_type),
fst::StringToFarTokenType(FLAGS_token_type),
- FLAGS_begin_key, FLAGS_end_key, FLAGS_print_key,
- FLAGS_symbols, FLAGS_generate_filenames,
+ FLAGS_begin_key, FLAGS_end_key,
+ FLAGS_print_key, FLAGS_print_weight,
+ FLAGS_symbols, FLAGS_initial_symbols,
+ FLAGS_generate_filenames,
FLAGS_filename_prefix, FLAGS_filename_suffix);
return 0;
diff --git a/src/extensions/far/farscript.cc b/src/extensions/far/farscript.cc
index a04645d..bc4ad0e 100644
--- a/src/extensions/far/farscript.cc
+++ b/src/extensions/far/farscript.cc
@@ -34,14 +34,17 @@ void FarCompileStrings(const vector<string> &in_fnames,
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
const string &key_suffix) {
FarCompileStringsArgs args(in_fnames, out_fname, fst_type, far_type,
generate_keys, fet, tt, symbols_fname,
- unknown_symbol, allow_negative_labels,
- file_list_input, key_prefix, key_suffix);
+ unknown_symbol, keep_symbols, initial_symbols,
+ allow_negative_labels, file_list_input,
+ key_prefix, key_suffix);
Apply<Operation<FarCompileStringsArgs> >("FarCompileStrings", arc_type,
&args);
@@ -61,6 +64,16 @@ void FarCreate(const vector<string> &in_fnames,
Apply<Operation<FarCreateArgs> >("FarCreate", arc_type, &args);
}
+bool FarEqual(const string &filename1, const string &filename2,
+ const string &arc_type, float delta,
+ const string &begin_key, const string &end_key) {
+ FarEqualInnerArgs args(filename1, filename2, delta, begin_key, end_key);
+ FarEqualArgs args_with_retval(args);
+
+ Apply<Operation<FarEqualArgs> >("FarEqual", arc_type, &args_with_retval);
+ return args_with_retval.retval;
+}
+
void FarExtract(const vector<string> &ifilenames,
const string &arc_type,
int32 generate_filenames, const string &begin_key,
@@ -89,15 +102,16 @@ void FarPrintStrings(const vector<string> &ifilenames,
const string &begin_key,
const string &end_key,
const bool print_key,
+ const bool print_weight,
const string &symbols_fname,
+ const bool initial_symbols,
const int32 generate_filenames,
const string &filename_prefix,
const string &filename_suffix) {
- FarPrintStringsArgs args(ifilenames, entry_type, token_type, begin_key,
- end_key, print_key, symbols_fname,
- generate_filenames,
- filename_prefix,
- filename_suffix);
+ FarPrintStringsArgs args(ifilenames, entry_type, token_type,
+ begin_key, end_key, print_key, print_weight,
+ symbols_fname, initial_symbols, generate_filenames,
+ filename_prefix, filename_suffix);
Apply<Operation<FarPrintStringsArgs> >("FarPrintStrings", arc_type,
&args);
diff --git a/src/extensions/far/main.cc b/src/extensions/far/main.cc
index b01d639..855def6 100644
--- a/src/extensions/far/main.cc
+++ b/src/extensions/far/main.cc
@@ -26,6 +26,7 @@ using std::vector;
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/extensions/far/main.h>
namespace fst {
@@ -33,7 +34,9 @@ namespace fst {
// Return the 'FarType' value corresponding to a far type name.
FarType FarTypeFromString(const string &str) {
FarType type = FAR_DEFAULT;
- if (str == "stlist")
+ if (str == "fst")
+ type = FAR_FST;
+ else if (str == "stlist")
type = FAR_STLIST;
else if (str == "sttable")
type = FAR_STTABLE;
@@ -46,6 +49,8 @@ FarType FarTypeFromString(const string &str) {
// Return the textual name corresponding to a 'FarType;.
string FarTypeToString(FarType type) {
switch (type) {
+ case FAR_FST:
+ return "fst";
case FAR_STLIST:
return "stlist";
case FAR_STTABLE:
@@ -85,19 +90,14 @@ FarTokenType StringToFarTokenType(const string &s) {
string LoadArcTypeFromFar(const string &far_fname) {
FarHeader hdr;
- if (far_fname.empty()) {
- LOG(ERROR) << "Reading FAR from standard in not supported";
- return "";
- }
-
if (!hdr.Read(far_fname)) {
- LOG(ERROR) << "Error reading FAR: " << far_fname;
+ FSTERROR() << "Error reading FAR: " << far_fname;
return "";
}
string atype = hdr.ArcType();
if (atype == "unknown") {
- LOG(ERROR) << "Empty FST archive: " << far_fname;
+ FSTERROR() << "Empty FST archive: " << far_fname;
return "";
}
@@ -108,7 +108,7 @@ string LoadArcTypeFromFst(const string &fst_fname) {
FstHeader hdr;
ifstream in(fst_fname.c_str(), ifstream::in | ifstream::binary);
if (!hdr.Read(in, fst_fname)) {
- LOG(ERROR) << "Error reading FST: " << fst_fname;
+ FSTERROR() << "Error reading FST: " << fst_fname;
return "";
}
diff --git a/src/extensions/far/compile-strings.cc b/src/extensions/far/strings.cc
index e8a99cc..c62a45b 100644
--- a/src/extensions/far/compile-strings.cc
+++ b/src/extensions/far/strings.cc
@@ -17,6 +17,10 @@
#include <fst/extensions/far/compile-strings.h>
#include <iostream>
#include <fstream>
+#include <sstream>
+
+DEFINE_string(far_field_separator, "\t",
+ "Set of characters used as a separator between printed fields");
namespace fst {
diff --git a/src/extensions/ngram/Makefile.am b/src/extensions/ngram/Makefile.am
new file mode 100644
index 0000000..e909eaf
--- /dev/null
+++ b/src/extensions/ngram/Makefile.am
@@ -0,0 +1,10 @@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+
+libfstdir = @libfstdir@
+libfst_LTLIBRARIES = libfstngram.la ngram-fst.la
+
+ngram_fst_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
+ngram_fst_la_LDFLAGS = -module
+
+libfstngram_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
+libfstngram_la_LDFLAGS = -version-info 0:0:0
diff --git a/src/extensions/ngram/Makefile.in b/src/extensions/ngram/Makefile.in
new file mode 100644
index 0000000..cce1255
--- /dev/null
+++ b/src/extensions/ngram/Makefile.in
@@ -0,0 +1,543 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/extensions/ngram
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_icu.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/src/include/fst/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libfstdir)"
+LTLIBRARIES = $(libfst_LTLIBRARIES)
+libfstngram_la_LIBADD =
+am_libfstngram_la_OBJECTS = bitmap-index.lo ngram-fst.lo nthbit.lo
+libfstngram_la_OBJECTS = $(am_libfstngram_la_OBJECTS)
+libfstngram_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(libfstngram_la_LDFLAGS) $(LDFLAGS) -o $@
+ngram_fst_la_LIBADD =
+am_ngram_fst_la_OBJECTS = bitmap-index.lo ngram-fst.lo nthbit.lo
+ngram_fst_la_OBJECTS = $(am_ngram_fst_la_OBJECTS)
+ngram_fst_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(ngram_fst_la_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES =
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libfstngram_la_SOURCES) $(ngram_fst_la_SOURCES)
+DIST_SOURCES = $(libfstngram_la_SOURCES) $(ngram_fst_la_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CFLAGS = @ICU_CFLAGS@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_CPPFLAGS = @ICU_CPPFLAGS@
+ICU_CXXFLAGS = @ICU_CXXFLAGS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+libfstdir = @libfstdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(srcdir)/../../include $(ICU_CPPFLAGS)
+libfst_LTLIBRARIES = libfstngram.la ngram-fst.la
+ngram_fst_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
+ngram_fst_la_LDFLAGS = -module
+libfstngram_la_SOURCES = bitmap-index.cc ngram-fst.cc nthbit.cc
+libfstngram_la_LDFLAGS = -version-info 0:0:0
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/extensions/ngram/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/extensions/ngram/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libfstLTLIBRARIES: $(libfst_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libfstdir)" || $(MKDIR_P) "$(DESTDIR)$(libfstdir)"
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libfstdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libfstdir)"; \
+ }
+
+uninstall-libfstLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libfst_LTLIBRARIES)'; test -n "$(libfstdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libfstdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libfstdir)/$$f"; \
+ done
+
+clean-libfstLTLIBRARIES:
+ -test -z "$(libfst_LTLIBRARIES)" || rm -f $(libfst_LTLIBRARIES)
+ @list='$(libfst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libfstngram.la: $(libfstngram_la_OBJECTS) $(libfstngram_la_DEPENDENCIES)
+ $(libfstngram_la_LINK) -rpath $(libfstdir) $(libfstngram_la_OBJECTS) $(libfstngram_la_LIBADD) $(LIBS)
+ngram-fst.la: $(ngram_fst_la_OBJECTS) $(ngram_fst_la_DEPENDENCIES)
+ $(ngram_fst_la_LINK) -rpath $(libfstdir) $(ngram_fst_la_OBJECTS) $(ngram_fst_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitmap-index.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ngram-fst.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nthbit.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(libfstdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libfstLTLIBRARIES clean-libtool \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-libfstLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libfstLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libfstLTLIBRARIES clean-libtool ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libfstLTLIBRARIES install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libfstLTLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/extensions/ngram/bitmap-index.cc b/src/extensions/ngram/bitmap-index.cc
new file mode 100644
index 0000000..c6aaa14
--- /dev/null
+++ b/src/extensions/ngram/bitmap-index.cc
@@ -0,0 +1,187 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: Jeffrey Soresnen (sorenj@google.com)
+
+#include <fst/extensions/ngram/bitmap-index.h>
+
+#include <algorithm>
+#include <iterator>
+
+#include <fst/extensions/ngram/nthbit.h>
+
+namespace fst {
+
+// These two internal classes implemented inverted views of the
+// primary and secondary indexes. That is, they provide iterators
+// that have operator*'s that return the number zeros rather than
+// the number of ones.
+
+class primary_index_inverted : public vector<uint32>::const_iterator {
+ public:
+ primary_index_inverted() {}
+ primary_index_inverted(vector<uint32>::const_iterator loc,
+ vector<uint32>::const_iterator begin) :
+ vector<uint32>::const_iterator(loc), begin_(begin) {}
+ uint32 operator*() {
+ return BitmapIndex::kStorageBitSize * BitmapIndex::kSecondaryBlockSize *
+ (1 + std::distance<vector<uint32>::const_iterator>(begin_, *this)) -
+ vector<uint32>::const_iterator::operator*();
+ }
+ private:
+ vector<uint32>::const_iterator begin_;
+};
+
+class secondary_index_inverted : public vector<uint16>::const_iterator {
+ public:
+ secondary_index_inverted() : vector<uint16>::const_iterator() {}
+ secondary_index_inverted(vector<uint16>::const_iterator loc,
+ vector<uint16>::const_iterator block_begin) :
+ vector<uint16>::const_iterator(loc), block_begin_(block_begin) {}
+ uint16 operator*() {
+ return ((1 + std::distance<vector<uint16>::const_iterator>(
+ block_begin_, *this)) << BitmapIndex::kStorageLogBitSize) -
+ vector<uint16>::const_iterator::operator*();
+ }
+ private:
+ vector<uint16>::const_iterator block_begin_;
+};
+
+size_t BitmapIndex::Rank1(size_t end) const {
+ if (end == 0) return 0;
+ CHECK_LE(end, Bits());
+ const uint32 end_word = (end - 1) >> BitmapIndex::kStorageLogBitSize;
+ const uint32 sum = get_index_ones_count(end_word);
+ const uint64 zero = 0;
+ const uint64 ones = ~zero;
+ return sum + __builtin_popcountll(bits_[end_word] &
+ (ones >> (kStorageBitSize - (end & kStorageBlockMask))));
+}
+
+size_t BitmapIndex::Select1(size_t bit_index) const {
+ if (bit_index >= GetOnesCount()) return Bits();
+ // search primary index for the relevant block
+ uint32 rembits = bit_index + 1;
+ const uint32 block = find_primary_block(bit_index + 1);
+ uint32 offset = 0;
+ if (block > 0) {
+ rembits -= primary_index_[block - 1];
+ offset += block * kSecondaryBlockSize;
+ }
+ // search the secondary index
+ uint32 word = find_secondary_block(offset, rembits);
+ if (word > 0) {
+ rembits -= secondary_index_[offset + word - 1];
+ offset += word;
+ }
+ int nth = nth_bit(bits_[offset], rembits);
+ return (offset << BitmapIndex::kStorageLogBitSize) + nth;
+}
+
+size_t BitmapIndex::Select0(size_t bit_index) const {
+ if (bit_index >= Bits() - GetOnesCount()) return Bits();
+ // search inverted primary index for relevant block
+ uint32 remzeros = bit_index + 1;
+ uint32 offset = 0;
+ const uint32 block = find_inverted_primary_block(bit_index + 1);
+ if (block > 0) {
+ remzeros -= *primary_index_inverted(primary_index_.begin() + block - 1,
+ primary_index_.begin());
+ offset += block * kSecondaryBlockSize;
+ }
+ // search the inverted secondary index
+ uint32 word = find_inverted_secondary_block(offset, remzeros);
+ if (word > 0) {
+ vector<uint16>::const_iterator block_begin =
+ secondary_index_.begin() + offset;
+ remzeros -= *secondary_index_inverted(block_begin + word - 1, block_begin);
+ offset += word;
+ }
+ int nth = nth_bit(~bits_[offset], remzeros);
+ return (offset << BitmapIndex::kStorageLogBitSize) + nth;
+}
+
+size_t BitmapIndex::get_index_ones_count(size_t array_index) const {
+ uint32 sum = 0;
+ if (array_index > 0) {
+ sum += secondary_index_[array_index-1];
+ uint32 end_block = (array_index - 1) / kSecondaryBlockSize;
+ if (end_block > 0) sum += primary_index_[end_block-1];
+ }
+ return sum;
+}
+
+void BitmapIndex::BuildIndex(const uint64 *bits, size_t size) {
+ bits_ = bits;
+ size_ = size;
+ secondary_index_.clear();
+ secondary_index_.reserve(ArraySize());
+ primary_index_.clear();
+ primary_index_.reserve(primary_index_size());
+ const uint64 zero = 0;
+ const uint64 ones = ~zero;
+ uint32 popcount = 0;
+ for (uint32 block_begin = 0; block_begin < ArraySize();
+ block_begin += kSecondaryBlockSize) {
+ uint32 block_popcount = 0;
+ uint32 block_end = block_begin + kSecondaryBlockSize;
+ if (block_end > ArraySize()) block_end = ArraySize();
+ for (uint32 j = block_begin; j < block_end; ++j) {
+ uint64 mask = ones;
+ if (j == ArraySize() - 1) {
+ mask = ones >> (-size_ & BitmapIndex::kStorageBlockMask);
+ }
+ block_popcount += __builtin_popcountll(bits_[j] & mask);
+ secondary_index_.push_back(block_popcount);
+ }
+ popcount += block_popcount;
+ primary_index_.push_back(popcount);
+ }
+}
+
+size_t BitmapIndex::find_secondary_block(
+ size_t block_begin, size_t rem_bit_index) const {
+ size_t block_end = block_begin + kSecondaryBlockSize;
+ if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
+ return std::distance(secondary_index_.begin() + block_begin,
+ std::lower_bound(secondary_index_.begin() + block_begin,
+ secondary_index_.begin() + block_end,
+ rem_bit_index));
+}
+
+size_t BitmapIndex::find_inverted_secondary_block(
+ size_t block_begin, size_t rem_bit_index) const {
+ size_t block_end = block_begin + kSecondaryBlockSize;
+ if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
+ secondary_index_inverted start(secondary_index_.begin() + block_begin,
+ secondary_index_.begin() + block_begin);
+ secondary_index_inverted end(secondary_index_.begin() + block_end,
+ secondary_index_.begin() + block_begin);
+ return std::distance(start,
+ std::lower_bound(start, end, rem_bit_index));
+}
+
+inline size_t BitmapIndex::find_primary_block(size_t bit_index) const {
+ return std::distance(primary_index_.begin(),
+ std::lower_bound(primary_index_.begin(),
+ primary_index_.end(), bit_index));
+}
+
+size_t BitmapIndex::find_inverted_primary_block(size_t bit_index) const {
+ primary_index_inverted start(primary_index_.begin(), primary_index_.begin());
+ primary_index_inverted end(primary_index_.end(), primary_index_.begin());
+ return std::distance(start, std::lower_bound(start, end, bit_index));
+}
+
+} // end namespace fst
diff --git a/src/extensions/ngram/ngram-fst.cc b/src/extensions/ngram/ngram-fst.cc
new file mode 100644
index 0000000..18f88db
--- /dev/null
+++ b/src/extensions/ngram/ngram-fst.cc
@@ -0,0 +1,28 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#include <fst/extensions/ngram/ngram-fst.h>
+
+#include <sys/types.h>
+
+#include <fst/fstlib.h>
+
+using fst::NGramFst;
+using fst::StdArc;
+using fst::LogArc;
+
+REGISTER_FST(NGramFst, StdArc);
+REGISTER_FST(NGramFst, LogArc);
diff --git a/src/extensions/ngram/nthbit.cc b/src/extensions/ngram/nthbit.cc
new file mode 100644
index 0000000..7b006e7
--- /dev/null
+++ b/src/extensions/ngram/nthbit.cc
@@ -0,0 +1,107 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen
+
+#include <fst/extensions/ngram/nthbit.h>
+
+//
+// This table is generate using:
+//
+// unsigned int nth_bit_scan(uint64 v, unsigned int r) {
+// int i=0;
+// for (; i<64; i++) {
+// if ((r -= v & 1) == 0) return i;
+// v >>= 1;
+// }
+// return i;
+// }
+//
+// for (size_t i = 0; i < 256; ++i) {
+// uint32 offsets = 0;
+// for (size_t b = 1; b <= 8; ++b) {
+// uint32 offset = min<uint32>(nth_bit_scan(i, b), 8);
+// offsets |= (offset << ((b - 1) << 2));
+// }
+// bit_offset = offsets;
+// printf("0x%x, ", bit_offset);
+// if (i % 4 == 3) printf("\n");
+// }
+//
+uint32 nth_bit_bit_offset[] = {
+0x88888888, 0x88888880, 0x88888881, 0x88888810,
+0x88888882, 0x88888820, 0x88888821, 0x88888210,
+0x88888883, 0x88888830, 0x88888831, 0x88888310,
+0x88888832, 0x88888320, 0x88888321, 0x88883210,
+0x88888884, 0x88888840, 0x88888841, 0x88888410,
+0x88888842, 0x88888420, 0x88888421, 0x88884210,
+0x88888843, 0x88888430, 0x88888431, 0x88884310,
+0x88888432, 0x88884320, 0x88884321, 0x88843210,
+0x88888885, 0x88888850, 0x88888851, 0x88888510,
+0x88888852, 0x88888520, 0x88888521, 0x88885210,
+0x88888853, 0x88888530, 0x88888531, 0x88885310,
+0x88888532, 0x88885320, 0x88885321, 0x88853210,
+0x88888854, 0x88888540, 0x88888541, 0x88885410,
+0x88888542, 0x88885420, 0x88885421, 0x88854210,
+0x88888543, 0x88885430, 0x88885431, 0x88854310,
+0x88885432, 0x88854320, 0x88854321, 0x88543210,
+0x88888886, 0x88888860, 0x88888861, 0x88888610,
+0x88888862, 0x88888620, 0x88888621, 0x88886210,
+0x88888863, 0x88888630, 0x88888631, 0x88886310,
+0x88888632, 0x88886320, 0x88886321, 0x88863210,
+0x88888864, 0x88888640, 0x88888641, 0x88886410,
+0x88888642, 0x88886420, 0x88886421, 0x88864210,
+0x88888643, 0x88886430, 0x88886431, 0x88864310,
+0x88886432, 0x88864320, 0x88864321, 0x88643210,
+0x88888865, 0x88888650, 0x88888651, 0x88886510,
+0x88888652, 0x88886520, 0x88886521, 0x88865210,
+0x88888653, 0x88886530, 0x88886531, 0x88865310,
+0x88886532, 0x88865320, 0x88865321, 0x88653210,
+0x88888654, 0x88886540, 0x88886541, 0x88865410,
+0x88886542, 0x88865420, 0x88865421, 0x88654210,
+0x88886543, 0x88865430, 0x88865431, 0x88654310,
+0x88865432, 0x88654320, 0x88654321, 0x86543210,
+0x88888887, 0x88888870, 0x88888871, 0x88888710,
+0x88888872, 0x88888720, 0x88888721, 0x88887210,
+0x88888873, 0x88888730, 0x88888731, 0x88887310,
+0x88888732, 0x88887320, 0x88887321, 0x88873210,
+0x88888874, 0x88888740, 0x88888741, 0x88887410,
+0x88888742, 0x88887420, 0x88887421, 0x88874210,
+0x88888743, 0x88887430, 0x88887431, 0x88874310,
+0x88887432, 0x88874320, 0x88874321, 0x88743210,
+0x88888875, 0x88888750, 0x88888751, 0x88887510,
+0x88888752, 0x88887520, 0x88887521, 0x88875210,
+0x88888753, 0x88887530, 0x88887531, 0x88875310,
+0x88887532, 0x88875320, 0x88875321, 0x88753210,
+0x88888754, 0x88887540, 0x88887541, 0x88875410,
+0x88887542, 0x88875420, 0x88875421, 0x88754210,
+0x88887543, 0x88875430, 0x88875431, 0x88754310,
+0x88875432, 0x88754320, 0x88754321, 0x87543210,
+0x88888876, 0x88888760, 0x88888761, 0x88887610,
+0x88888762, 0x88887620, 0x88887621, 0x88876210,
+0x88888763, 0x88887630, 0x88887631, 0x88876310,
+0x88887632, 0x88876320, 0x88876321, 0x88763210,
+0x88888764, 0x88887640, 0x88887641, 0x88876410,
+0x88887642, 0x88876420, 0x88876421, 0x88764210,
+0x88887643, 0x88876430, 0x88876431, 0x88764310,
+0x88876432, 0x88764320, 0x88764321, 0x87643210,
+0x88888765, 0x88887650, 0x88887651, 0x88876510,
+0x88887652, 0x88876520, 0x88876521, 0x88765210,
+0x88887653, 0x88876530, 0x88876531, 0x88765310,
+0x88876532, 0x88765320, 0x88765321, 0x87653210,
+0x88887654, 0x88876540, 0x88876541, 0x88765410,
+0x88876542, 0x88765420, 0x88765421, 0x87654210,
+0x88876543, 0x88765430, 0x88765431, 0x87654310,
+0x88765432, 0x87654320, 0x87654321, 0x76543210,
+};
diff --git a/src/extensions/pdt/pdtcompose.cc b/src/extensions/pdt/pdtcompose.cc
index 0bab405..0076e1d 100644
--- a/src/extensions/pdt/pdtcompose.cc
+++ b/src/extensions/pdt/pdtcompose.cc
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
usage += " in.fst in.pdt [out.pdt]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 3 || argc > 4) {
ShowUsage();
return 1;
diff --git a/src/extensions/pdt/pdtexpand.cc b/src/extensions/pdt/pdtexpand.cc
index 2134932..18d849d 100644
--- a/src/extensions/pdt/pdtexpand.cc
+++ b/src/extensions/pdt/pdtexpand.cc
@@ -17,7 +17,7 @@
// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
//
// \file
-// Expands a PDT and an FST.
+// Expands a (bounded-stack) PDT as an FST.
//
#include <fst/extensions/pdt/pdtscript.h>
@@ -32,12 +32,12 @@ DEFINE_string(weight, "", "Weight threshold");
int main(int argc, char **argv) {
namespace s = fst::script;
- string usage = "Expand a PDT and an FST.\n\n Usage: ";
+ string usage = "Expand a (bounded-stack) PDT as an FST.\n\n Usage: ";
usage += argv[0];
usage += " in.pdt [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/extensions/pdt/pdtinfo.cc b/src/extensions/pdt/pdtinfo.cc
index 89227d3..71c0ed0 100644
--- a/src/extensions/pdt/pdtinfo.cc
+++ b/src/extensions/pdt/pdtinfo.cc
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 2) {
ShowUsage();
return 1;
diff --git a/src/extensions/pdt/pdtreplace.cc b/src/extensions/pdt/pdtreplace.cc
index 543b6ea..9f8aa86 100644
--- a/src/extensions/pdt/pdtreplace.cc
+++ b/src/extensions/pdt/pdtreplace.cc
@@ -15,6 +15,7 @@
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
+// Converts an RTN represented by FSTs and non-terminal labels into a PDT .
#include <utility>
using std::pair; using std::make_pair;
@@ -29,13 +30,14 @@ DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
int main(int argc, char **argv) {
namespace s = fst::script;
- string usage = "Recursively replace Fst arcs with other Fst(s).\n";
+ string usage = "Converts an RTN represented by FSTs";
+ usage += " and non-terminal labels into PDT";
usage += " Usage: ";
usage += argv[0];
usage += " root.fst rootlabel [rule1.fst label1 ...] [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc < 4) {
ShowUsage();
return 1;
diff --git a/src/extensions/pdt/pdtreverse.cc b/src/extensions/pdt/pdtreverse.cc
index e79cd7f..48f412d 100644
--- a/src/extensions/pdt/pdtreverse.cc
+++ b/src/extensions/pdt/pdtreverse.cc
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
usage += " in.pdt [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/extensions/pdt/pdtshortestpath.cc b/src/extensions/pdt/pdtshortestpath.cc
index ad67969..5973db9 100644
--- a/src/extensions/pdt/pdtshortestpath.cc
+++ b/src/extensions/pdt/pdtshortestpath.cc
@@ -17,7 +17,7 @@
// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
//
// \file
-// Return the shortest path in a PDT.
+// Return the shortest path in a (bounded-stack) PDT.
//
#include <fst/extensions/pdt/pdtscript.h>
@@ -33,12 +33,12 @@ DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
int main(int argc, char **argv) {
namespace s = fst::script;
- string usage = "Shortest path in a PDT.\n\n Usage: ";
+ string usage = "Shortest path in a (bounded-stack) PDT.\n\n Usage: ";
usage += argv[0];
usage += " in.pdt [out.fst]\n";
std::set_new_handler(FailedNewHandler);
- SetFlags(usage.c_str(), &argc, &argv, true);
+ SET_FLAGS(usage.c_str(), &argc, &argv, true);
if (argc > 3) {
ShowUsage();
return 1;
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
index b4e6213..e9eb92f 100644
--- a/src/include/Makefile.am
+++ b/src/include/Makefile.am
@@ -1,10 +1,17 @@
if HAVE_FAR
far_include_headers = fst/extensions/far/compile-strings.h \
-fst/extensions/far/create.h fst/extensions/far/extract.h \
-fst/extensions/far/far.h fst/extensions/far/farlib.h \
-fst/extensions/far/farscript.h fst/extensions/far/info.h \
-fst/extensions/far/main.h fst/extensions/far/print-strings.h \
-fst/extensions/far/stlist.h fst/extensions/far/sttable.h
+fst/extensions/far/create.h fst/extensions/far/equal.h \
+fst/extensions/far/extract.h fst/extensions/far/far.h \
+fst/extensions/far/farlib.h fst/extensions/far/farscript.h \
+fst/extensions/far/info.h fst/extensions/far/main.h \
+fst/extensions/far/print-strings.h fst/extensions/far/stlist.h \
+fst/extensions/far/sttable.h
+endif
+
+if HAVE_NGRAM
+ngram_include_headers = fst/extensions/ngram/bitmap-index.h \
+fst/extensions/ngram/ngram-fst.h \
+fst/extensions/ngram/nthbit.h
endif
if HAVE_PDT
@@ -62,6 +69,6 @@ fst/replace-util.h fst/icu.h fst/string.h fst/signed-log-weight.h \
fst/sparse-tuple-weight.h fst/sparse-power-weight.h fst/expectation-weight.h \
fst/symbol-table-ops.h fst/bi-table.h \
$(far_include_headers) \
+$(ngram_include_headers) \
$(pdt_include_headers) \
$(script_include_headers)
-
diff --git a/src/include/Makefile.in b/src/include/Makefile.in
index ab6c28d..b4e3f86 100644
--- a/src/include/Makefile.in
+++ b/src/include/Makefile.in
@@ -82,11 +82,14 @@ am__nobase_include_HEADERS_DIST = fst/arc.h fst/determinize.h \
fst/sparse-power-weight.h fst/expectation-weight.h \
fst/symbol-table-ops.h fst/bi-table.h \
fst/extensions/far/compile-strings.h \
- fst/extensions/far/create.h fst/extensions/far/extract.h \
- fst/extensions/far/far.h fst/extensions/far/farlib.h \
- fst/extensions/far/farscript.h fst/extensions/far/info.h \
- fst/extensions/far/main.h fst/extensions/far/print-strings.h \
- fst/extensions/far/stlist.h fst/extensions/far/sttable.h \
+ fst/extensions/far/create.h fst/extensions/far/equal.h \
+ fst/extensions/far/extract.h fst/extensions/far/far.h \
+ fst/extensions/far/farlib.h fst/extensions/far/farscript.h \
+ fst/extensions/far/info.h fst/extensions/far/main.h \
+ fst/extensions/far/print-strings.h fst/extensions/far/stlist.h \
+ fst/extensions/far/sttable.h \
+ fst/extensions/ngram/bitmap-index.h \
+ fst/extensions/ngram/ngram-fst.h fst/extensions/ngram/nthbit.h \
fst/extensions/pdt/collection.h fst/extensions/pdt/compose.h \
fst/extensions/pdt/expand.h fst/extensions/pdt/info.h \
fst/extensions/pdt/paren.h fst/extensions/pdt/pdt.h \
@@ -264,11 +267,16 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
@HAVE_FAR_TRUE@far_include_headers = fst/extensions/far/compile-strings.h \
-@HAVE_FAR_TRUE@fst/extensions/far/create.h fst/extensions/far/extract.h \
-@HAVE_FAR_TRUE@fst/extensions/far/far.h fst/extensions/far/farlib.h \
-@HAVE_FAR_TRUE@fst/extensions/far/farscript.h fst/extensions/far/info.h \
-@HAVE_FAR_TRUE@fst/extensions/far/main.h fst/extensions/far/print-strings.h \
-@HAVE_FAR_TRUE@fst/extensions/far/stlist.h fst/extensions/far/sttable.h
+@HAVE_FAR_TRUE@fst/extensions/far/create.h fst/extensions/far/equal.h \
+@HAVE_FAR_TRUE@fst/extensions/far/extract.h fst/extensions/far/far.h \
+@HAVE_FAR_TRUE@fst/extensions/far/farlib.h fst/extensions/far/farscript.h \
+@HAVE_FAR_TRUE@fst/extensions/far/info.h fst/extensions/far/main.h \
+@HAVE_FAR_TRUE@fst/extensions/far/print-strings.h fst/extensions/far/stlist.h \
+@HAVE_FAR_TRUE@fst/extensions/far/sttable.h
+
+@HAVE_NGRAM_TRUE@ngram_include_headers = fst/extensions/ngram/bitmap-index.h \
+@HAVE_NGRAM_TRUE@fst/extensions/ngram/ngram-fst.h \
+@HAVE_NGRAM_TRUE@fst/extensions/ngram/nthbit.h
@HAVE_PDT_TRUE@pdt_include_headers = fst/extensions/pdt/collection.h \
@HAVE_PDT_TRUE@fst/extensions/pdt/compose.h fst/extensions/pdt/expand.h \
@@ -323,6 +331,7 @@ fst/replace-util.h fst/icu.h fst/string.h fst/signed-log-weight.h \
fst/sparse-tuple-weight.h fst/sparse-power-weight.h fst/expectation-weight.h \
fst/symbol-table-ops.h fst/bi-table.h \
$(far_include_headers) \
+$(ngram_include_headers) \
$(pdt_include_headers) \
$(script_include_headers)
diff --git a/src/include/fst/accumulator.h b/src/include/fst/accumulator.h
index fcb960c..9801b93 100644
--- a/src/include/fst/accumulator.h
+++ b/src/include/fst/accumulator.h
@@ -258,7 +258,7 @@ class FastLogAccumulator {
for(StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
StateId s = siter.Value();
if (fst.NumArcs(s) >= arc_limit_) {
- double sum = FloatLimits<double>::kPosInfinity;
+ double sum = FloatLimits<double>::PosInfinity();
weight_positions.push_back(weight_position);
weights.push_back(sum);
++weight_position;
@@ -282,12 +282,12 @@ class FastLogAccumulator {
private:
double LogPosExp(double x) {
- return x == FloatLimits<double>::kPosInfinity ?
+ return x == FloatLimits<double>::PosInfinity() ?
0.0 : log(1.0F + exp(-x));
}
double LogMinusExp(double x) {
- return x == FloatLimits<double>::kPosInfinity ?
+ return x == FloatLimits<double>::PosInfinity() ?
0.0 : log(1.0F - exp(-x));
}
@@ -302,7 +302,7 @@ class FastLogAccumulator {
double LogPlus(double f1, Weight v) {
double f2 = to_log_weight_(v).Value();
- if (f1 == FloatLimits<double>::kPosInfinity)
+ if (f1 == FloatLimits<double>::PosInfinity())
return f2;
else if (f1 > f2)
return f2 - LogPosExp(f1 - f2);
@@ -317,7 +317,7 @@ class FastLogAccumulator {
error_ = true;
return Weight::NoWeight();
}
- if (f2 == FloatLimits<double>::kPosInfinity)
+ if (f2 == FloatLimits<double>::PosInfinity())
return to_weight_(f1);
else
return to_weight_(f1 - LogMinusExp(f2 - f1));
@@ -485,7 +485,7 @@ class CacheLogAccumulator {
if ((weights_ == 0) && (fst_->NumArcs(s) >= arc_limit_)) {
weights_ = new vector<double>;
weights_->reserve(fst_->NumArcs(s) + 1);
- weights_->push_back(FloatLimits<double>::kPosInfinity);
+ weights_->push_back(FloatLimits<double>::PosInfinity());
data_->AddWeights(s, weights_);
}
}
@@ -524,7 +524,7 @@ class CacheLogAccumulator {
- weights_->begin() - 1;
} else {
size_t n = 0;
- double x = FloatLimits<double>::kPosInfinity;
+ double x = FloatLimits<double>::PosInfinity();
for(aiter->Reset(); !aiter->Done(); aiter->Next(), ++n) {
x = LogPlus(x, aiter->Value().weight);
if (x < w) break;
@@ -537,12 +537,12 @@ class CacheLogAccumulator {
private:
double LogPosExp(double x) {
- return x == FloatLimits<double>::kPosInfinity ?
+ return x == FloatLimits<double>::PosInfinity() ?
0.0 : log(1.0F + exp(-x));
}
double LogMinusExp(double x) {
- return x == FloatLimits<double>::kPosInfinity ?
+ return x == FloatLimits<double>::PosInfinity() ?
0.0 : log(1.0F - exp(-x));
}
@@ -557,7 +557,7 @@ class CacheLogAccumulator {
double LogPlus(double f1, Weight v) {
double f2 = to_log_weight_(v).Value();
- if (f1 == FloatLimits<double>::kPosInfinity)
+ if (f1 == FloatLimits<double>::PosInfinity())
return f2;
else if (f1 > f2)
return f2 - LogPosExp(f1 - f2);
@@ -572,7 +572,7 @@ class CacheLogAccumulator {
error_ = true;
return Weight::NoWeight();
}
- if (f2 == FloatLimits<double>::kPosInfinity)
+ if (f2 == FloatLimits<double>::PosInfinity())
return to_weight_(f1);
else
return to_weight_(f1 - LogMinusExp(f2 - f1));
diff --git a/src/include/fst/arc.h b/src/include/fst/arc.h
index 56086c9..5f4014b 100644
--- a/src/include/fst/arc.h
+++ b/src/include/fst/arc.h
@@ -34,6 +34,7 @@
#include <fst/sparse-power-weight.h>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/string-weight.h>
diff --git a/src/include/fst/arcsort.h b/src/include/fst/arcsort.h
index 38f4f95..37a51dc 100644
--- a/src/include/fst/arcsort.h
+++ b/src/include/fst/arcsort.h
@@ -118,9 +118,11 @@ typedef CacheOptions ArcSortFstOptions;
// and exclusive of caching.
template <class A, class C>
class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > {
+ using StateMapFst<A, A, ArcSortMapper<A, C> >::GetImpl;
public:
typedef A Arc;
- typedef ArcSortMapper<A, C> M;
+ typedef typename Arc::StateId StateId;
+ typedef ArcSortMapper<A, C> M;
ArcSortFst(const Fst<A> &fst, const C &comp)
: StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp)) {}
@@ -136,6 +138,18 @@ class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > {
virtual ArcSortFst<A, C> *Copy(bool safe = false) const {
return new ArcSortFst(*this, safe);
}
+
+ virtual size_t NumArcs(StateId s) const {
+ return GetImpl()->GetFst().NumArcs(s);
+ }
+
+ virtual size_t NumInputEpsilons(StateId s) const {
+ return GetImpl()->GetFst().NumInputEpsilons(s);
+ }
+
+ virtual size_t NumOutputEpsilons(StateId s) const {
+ return GetImpl()->GetFst().NumOutputEpsilons(s);
+ }
};
diff --git a/src/include/fst/bi-table.h b/src/include/fst/bi-table.h
index dbb436c..bd37781 100644
--- a/src/include/fst/bi-table.h
+++ b/src/include/fst/bi-table.h
@@ -39,8 +39,9 @@ namespace fst {
// // Required constructors.
// BiTable();
//
-// // Lookup integer ID from entry. If it doesn't exist, then add it.
-// I FindId(const T &entry);
+// // Lookup integer ID from entry. If it doesn't exist and 'insert'
+// / is true, then add it. Otherwise return -1.
+// I FindId(const T &entry, bool insert = true);
// // Lookup entry from integer ID.
// const T &FindEntry(I) const;
// // # of stored entries.
@@ -58,11 +59,15 @@ class HashBiTable {
T empty_entry;
}
- I FindId(const T &entry) {
+ I FindId(const T &entry, bool insert = true) {
I &id_ref = entry2id_[entry];
- if (id_ref == 0) { // T not found; store and assign it a new ID.
- id2entry_.push_back(entry);
- id_ref = id2entry_.size();
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ } else {
+ return -1;
+ }
}
return id_ref - 1; // NB: id_ref = ID + 1
}
@@ -109,14 +114,18 @@ class CompactHashBiTable {
id2entry_.reserve(table_size);
}
- I FindId(const T &entry) {
+ I FindId(const T &entry, bool insert = true) {
current_entry_ = &entry;
typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
- if (it == keys_.end()) {
- I key = id2entry_.size();
- id2entry_.push_back(entry);
- keys_.insert(key);
- return key;
+ if (it == keys_.end()) { // T not found
+ if (insert) { // store and assign it a new ID
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return -1;
+ }
} else {
return *it;
}
@@ -191,14 +200,18 @@ class VectorBiTable {
~VectorBiTable() { delete fp_; }
- I FindId(const T &entry) {
+ I FindId(const T &entry, bool insert = true) {
ssize_t fp = (*fp_)(entry);
if (fp >= fp2id_.size())
fp2id_.resize(fp + 1);
I &id_ref = fp2id_[fp];
- if (id_ref == 0) { // T not found; store and assign it a new ID.
- id2entry_.push_back(entry);
- id_ref = id2entry_.size();
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size();
+ } else {
+ return -1;
+ }
}
return id_ref - 1; // NB: id_ref = ID + 1
}
@@ -251,24 +264,32 @@ class VectorHashBiTable {
delete h_;
}
- I FindId(const T &entry) {
+ I FindId(const T &entry, bool insert = true) {
if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true'
uint64 fp = (*fp_)(entry);
if (fp2id_.size() <= fp)
fp2id_.resize(fp + 1, 0);
- if (fp2id_[fp] == 0) {
- id2entry_.push_back(entry);
- fp2id_[fp] = id2entry_.size();
+ if (fp2id_[fp] == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ fp2id_[fp] = id2entry_.size();
+ } else {
+ return -1;
+ }
}
return fp2id_[fp] - 1; // NB: assoc_value = ID + 1
} else { // Use the hash table otherwise.
current_entry_ = &entry;
typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
if (it == keys_.end()) {
- I key = id2entry_.size();
- id2entry_.push_back(entry);
- keys_.insert(key);
- return key;
+ if (insert) {
+ I key = id2entry_.size();
+ id2entry_.push_back(entry);
+ keys_.insert(key);
+ return key;
+ } else {
+ return -1;
+ }
} else {
return *it;
}
@@ -357,11 +378,15 @@ class ErasableBiTable {
public:
ErasableBiTable() : first_(0) {}
- I FindId(const T &entry) {
+ I FindId(const T &entry, bool insert = true) {
I &id_ref = entry2id_[entry];
- if (id_ref == 0) { // T not found; store and assign it a new ID.
- id2entry_.push_back(entry);
- id_ref = id2entry_.size() + first_;
+ if (id_ref == 0) { // T not found
+ if (insert) { // store and assign it a new ID
+ id2entry_.push_back(entry);
+ id_ref = id2entry_.size() + first_;
+ } else {
+ return -1;
+ }
}
return id_ref - 1; // NB: id_ref = ID + 1
}
diff --git a/src/include/fst/cache.h b/src/include/fst/cache.h
index a6a92d4..0177396 100644
--- a/src/include/fst/cache.h
+++ b/src/include/fst/cache.h
@@ -292,13 +292,13 @@ class CacheBaseImpl : public VectorFstBaseImpl<S> {
void DeleteArcs(StateId s, size_t n) {
S *state = ExtendState(s);
- const vector<Arc> &arcs = GetState(s)->arcs;
+ const vector<Arc> &arcs = state->arcs;
for (size_t i = 0; i < n; ++i) {
size_t j = arcs.size() - i - 1;
if (arcs[j].ilabel == 0)
- --GetState(s)->niepsilons;
+ --state->niepsilons;
if (arcs[j].olabel == 0)
- --GetState(s)->noepsilons;
+ --state->noepsilons;
}
state->arcs.resize(arcs.size() - n);
SetProperties(DeleteArcsProperties(Properties()));
@@ -503,9 +503,6 @@ struct CacheState {
size_t noepsilons; // # of output epsilons
mutable uint32 flags;
mutable int ref_count;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(CacheState);
};
// A CacheBaseImpl with a commonly used CacheState.
diff --git a/src/include/fst/compact-fst.h b/src/include/fst/compact-fst.h
index efa567a..57c927e 100644
--- a/src/include/fst/compact-fst.h
+++ b/src/include/fst/compact-fst.h
@@ -175,10 +175,10 @@ class CompactFstData {
bool Error() const { return error_; }
- private:
// Byte alignment for states and arcs in file format (version 1 only)
static const int kFileAlign = 16;
+ private:
Unsigned *states_;
CompactElement *compacts_;
size_t nstates_;
@@ -539,17 +539,16 @@ class CompactFstImpl : public CacheImpl<A> {
}
Weight Final(StateId s) {
- if (!HasFinal(s)) {
- Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
- if ((compactor_->Size() != -1) ||
- (data_->States(s) != data_->States(s + 1)))
- arc = ComputeArc(s,
- compactor_->Size() == -1
- ? data_->States(s)
- : s * compactor_->Size());
- SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero());
- }
- return CacheImpl<A>::Final(s);
+ if (HasFinal(s))
+ return CacheImpl<A>::Final(s);
+ Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+ if ((compactor_->Size() != -1) ||
+ (data_->States(s) != data_->States(s + 1)))
+ arc = ComputeArc(s,
+ compactor_->Size() == -1
+ ? data_->States(s)
+ : s * compactor_->Size());
+ return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
}
StateId NumStates() const {
@@ -650,7 +649,6 @@ class CompactFstImpl : public CacheImpl<A> {
// Ensures compatibility
int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
WriteHeader(strm, opts, file_version, &hdr);
-
compactor_->Write(strm);
return data_->Write(strm, opts);
}
@@ -678,9 +676,13 @@ class CompactFstImpl : public CacheImpl<A> {
data_->States(s + 1) : (s + 1) * compactor_->Size();
for (size_t i = begin; i < end; ++i) {
const Arc &arc = ComputeArc(s, i);
- if (arc.ilabel == kNoLabel) continue;
- PushArc(s, arc);
+ if (arc.ilabel == kNoLabel)
+ SetFinal(s, arc.weight);
+ else
+ PushArc(s, arc);
}
+ if (!HasFinal(s))
+ SetFinal(s, Weight::Zero());
SetArcs(s);
}
@@ -694,6 +696,9 @@ class CompactFstImpl : public CacheImpl<A> {
C *GetCompactor() const { return compactor_; }
CompactFstData<CompactElement, U> *Data() const { return data_; }
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+
protected:
template <class B, class D>
explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
@@ -710,6 +715,8 @@ class CompactFstImpl : public CacheImpl<A> {
}
private:
+ friend class CompactFst<A, C, U>; // allow access during write.
+
void Init(const Fst<Arc> &fst) {
string type = "compact";
if (sizeof(U) != sizeof(uint32)) {
@@ -751,8 +758,6 @@ class CompactFstImpl : public CacheImpl<A> {
SetProperties(kError, kError);
}
- // Properties always true of this Fst class
- static const uint64 kStaticProperties = kExpanded;
// Current unaligned file format version
static const int kFileVersion = 2;
// Current aligned file format version
@@ -863,6 +868,10 @@ class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
return Fst<A>::WriteFile(filename);
}
+ template <class F>
+ static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
+ const FstWriteOptions &opts);
+
virtual void InitStateIterator(StateIteratorData<A> *data) const {
GetImpl()->InitStateIterator(data);
}
@@ -893,6 +902,115 @@ class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
void operator=(const CompactFst<A, C, U> &fst); // disallow
};
+// Writes Fst in Compact format, potentially with a pass over the machine
+// before writing to compute the number of states and arcs.
+//
+template <class A, class C, class U>
+template <class F>
+bool CompactFst<A, C, U>::WriteFst(const F &fst,
+ const C &compactor,
+ ostream &strm,
+ const FstWriteOptions &opts) {
+ typedef U Unsigned;
+ typedef typename C::Element CompactElement;
+ typedef typename A::Weight Weight;
+ static const int kFileAlign =
+ CompactFstData<CompactElement, U>::kFileAlign;
+ int file_version = opts.align ?
+ CompactFstImpl<A, C, U>::kAlignedFileVersion :
+ CompactFstImpl<A, C, U>::kFileVersion;
+ size_t num_arcs = -1, num_states = -1, num_compacts = -1;
+ C first_pass_compactor = compactor;
+ if (fst.Type() == CompactFst<A, C, U>().Type()) {
+ const CompactFst<A, C, U> *compact_fst =
+ reinterpret_cast<const CompactFst<A, C, U> *>(&fst);
+ num_arcs = compact_fst->GetImpl()->Data()->NumArcs();
+ num_states = compact_fst->GetImpl()->Data()->NumStates();
+ num_compacts = compact_fst->GetImpl()->Data()->NumCompacts();
+ first_pass_compactor = *compact_fst->GetImpl()->GetCompactor();
+ } else {
+ // A first pass is needed to compute the state of the compactor, which
+ // is saved ahead of the rest of the data structures. This unfortunately
+ // means forcing a complete double compaction when writing in this format.
+ // TODO(allauzen): eliminate mutable state from compactors.
+ num_arcs = 0;
+ num_states = 0;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ ++num_states;
+ if (fst.Final(s) != Weight::Zero()) {
+ first_pass_compactor.Compact(
+ s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
+ }
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ ++num_arcs;
+ first_pass_compactor.Compact(s, aiter.Value());
+ }
+ }
+ }
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(num_states);
+ hdr.SetNumArcs(num_arcs);
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += C::Type();
+ uint64 copy_properties = fst.Properties(kCopyProperties, true);
+ if ((copy_properties & kError) || !compactor.Compatible(fst)) {
+ LOG(ERROR) << "fst incompatible with compactor";
+ return false;
+ }
+ uint64 properties = copy_properties |
+ CompactFstImpl<A, C, U>::kStaticProperties;
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
+ &hdr);
+ first_pass_compactor.Write(strm);
+ if (first_pass_compactor.Size() == -1) {
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ Unsigned compacts = 0;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
+ if (fst.Final(s) != Weight::Zero()) {
+ ++compacts;
+ }
+ compacts += fst.NumArcs(s);
+ }
+ strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
+ }
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after writing states";
+ }
+ C second_pass_compactor = compactor;
+ CompactElement element;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ if (fst.Final(s) != Weight::Zero()) {
+ element = second_pass_compactor.Compact(
+ s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
+ strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
+ }
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ element = second_pass_compactor.Compact(s, aiter.Value());
+ strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
+ }
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "CompactFst write failed: " << opts.source;
+ return false;
+ }
+ return true;
+}
+
// Specialization for CompactFst; see generic version in fst.h
// for sample usage (but use the CompactFst type!). This version
diff --git a/src/include/fst/compat.h b/src/include/fst/compat.h
index 034b57e..3b5275d 100644
--- a/src/include/fst/compat.h
+++ b/src/include/fst/compat.h
@@ -39,10 +39,7 @@
#include <fst/lock.h>
#include <fst/flags.h>
#include <fst/log.h>
-
-#ifdef HAVE_ICU
#include <fst/icu.h>
-#endif
using std::cin;
using std::cout;
@@ -87,7 +84,7 @@ class CheckSummer {
void Reset() {
count_ = 0;
for (int i = 0; i < kCheckSumLength; ++i)
- check_sum_[0] = '\0';
+ check_sum_[i] = '\0';
}
void Update(void const *data, int size) {
@@ -113,24 +110,6 @@ class CheckSummer {
DISALLOW_COPY_AND_ASSIGN(CheckSummer);
};
-// Define the UTF8 string conversion function to throw an error
-// when the ICU Library is missing or disabled.
-#ifndef HAVE_ICU
-
-template <class Label>
-bool UTF8StringToLabels(const string&, std::vector<Label>*) {
- LOG(ERROR) << "UTF8StringToLabels: ICU Library required for UTF8 handling";
- return false;
-}
-
-template <class Label>
-bool LabelsToUTF8String(const std::vector<Label>&, string*) {
- LOG(ERROR) << "LabelsToUTF8String: ICU Library required for UTF8 handling";
- return false;
-}
-
-#endif // HAVE_ICU
-
} // namespace fst
diff --git a/src/include/fst/compose.h b/src/include/fst/compose.h
index c0bf4b1..dfdff0a 100644
--- a/src/include/fst/compose.h
+++ b/src/include/fst/compose.h
@@ -360,6 +360,10 @@ class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
return Times(final1, final2);
}
+ // Identifies and verifies the capabilities of the matcher to be used for
+ // composition.
+ void SetMatchType();
+
F *filter_;
Matcher1 *matcher1_;
Matcher2 *matcher2_;
@@ -385,14 +389,63 @@ ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
fst2_(matcher2_->GetFst()),
state_table_(opts.state_table ? opts.state_table :
new T(fst1_, fst2_)) {
+ SetMatchType();
+ if (match_type_ == MATCH_NONE)
+ SetProperties(kError, kError);
+ VLOG(2) << "ComposeFst(" << this << "): Match type: "
+ << (match_type_ == MATCH_OUTPUT ? "output" :
+ (match_type_ == MATCH_INPUT ? "input" :
+ (match_type_ == MATCH_BOTH ? "both" :
+ (match_type_ == MATCH_NONE ? "none" : "unknown"))));
+
+ uint64 fprops1 = fst1.Properties(kFstProperties, false);
+ uint64 fprops2 = fst2.Properties(kFstProperties, false);
+ uint64 mprops1 = matcher1_->Properties(fprops1);
+ uint64 mprops2 = matcher2_->Properties(fprops2);
+ uint64 cprops = ComposeProperties(mprops1, mprops2);
+ SetProperties(filter_->Properties(cprops), kCopyProperties);
+ if (state_table_->Error()) SetProperties(kError, kError);
+ VLOG(2) << "ComposeFst(" << this << "): Initialized";
+}
+
+template <class M1, class M2, class F, class T>
+void ComposeFstImpl<M1, M2, F, T>::SetMatchType() {
MatchType type1 = matcher1_->Type(false);
MatchType type2 = matcher2_->Type(false);
- if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
+ uint32 flags1 = matcher1_->Flags();
+ uint32 flags2 = matcher2_->Flags();
+ if (flags1 & flags2 & kRequireMatch) {
+ FSTERROR() << "ComposeFst: only one argument can require matching.";
+ match_type_ = MATCH_NONE;
+ } else if (flags1 & kRequireMatch) {
+ if (matcher1_->Type(true) != MATCH_OUTPUT) {
+ FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
+ match_type_ = MATCH_NONE;
+ }
+ match_type_ = MATCH_OUTPUT;
+ } else if (flags2 & kRequireMatch) {
+ if (matcher2_->Type(true) != MATCH_INPUT) {
+ FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
+ match_type_ = MATCH_NONE;
+ }
+ match_type_ = MATCH_INPUT;
+ } else if (flags1 & flags2 & kPreferMatch &&
+ type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
+ match_type_ = MATCH_BOTH;
+ } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (flags2 & kPreferMatch && type2 == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
+ } else if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
match_type_ = MATCH_BOTH;
} else if (type1 == MATCH_OUTPUT) {
match_type_ = MATCH_OUTPUT;
} else if (type2 == MATCH_INPUT) {
match_type_ = MATCH_INPUT;
+ } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) {
+ match_type_ = MATCH_OUTPUT;
+ } else if (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) {
+ match_type_ = MATCH_INPUT;
} else if (matcher1_->Type(true) == MATCH_OUTPUT) {
match_type_ = MATCH_OUTPUT;
} else if (matcher2_->Type(true) == MATCH_INPUT) {
@@ -400,16 +453,8 @@ ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
} else {
FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
<< "and 2nd argument cannot match on input labels (sort?).";
- SetProperties(kError, kError);
+ match_type_ = MATCH_NONE;
}
- uint64 fprops1 = fst1.Properties(kFstProperties, false);
- uint64 fprops2 = fst2.Properties(kFstProperties, false);
- uint64 mprops1 = matcher1_->Properties(fprops1);
- uint64 mprops2 = matcher2_->Properties(fprops2);
- uint64 cprops = ComposeProperties(mprops1, mprops2);
- SetProperties(filter_->Properties(cprops), kCopyProperties);
- if (state_table_->Error()) SetProperties(kError, kError);
- VLOG(2) << "ComposeFst(" << this << "): Initialized";
}
@@ -539,16 +584,19 @@ class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers
default:
case MATCH_NONE: { // Default composition (no look-ahead)
+ VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
ComposeFstOptions<Arc> nopts(opts);
return CreateBase1(fst1, fst2, nopts);
}
case MATCH_OUTPUT: { // Lookahead on fst1
+ VLOG(2) << "ComposeFst: Lookahead on fst1";
typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
ComposeFstOptions<Arc, M, F> nopts(opts);
return CreateBase1(fst1, fst2, nopts);
}
case MATCH_INPUT: { // Lookahead on fst2
+ VLOG(2) << "ComposeFst: Lookahead on fst2";
typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
ComposeFstOptions<Arc, M, F> nopts(opts);
diff --git a/src/include/fst/config.h b/src/include/fst/config.h
index 046b49c..47e472e 100644
--- a/src/include/fst/config.h
+++ b/src/include/fst/config.h
@@ -4,7 +4,7 @@
/* Define to 1 if you have the ICU library. */
/* #undef HAVE_ICU */
-/* Define to 1 if the system has the type `std::tr1::hash<long long
+/* Define to 1 if the system has the type `std::hash<long long
unsigned>'. */
#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1
diff --git a/src/include/fst/const-fst.h b/src/include/fst/const-fst.h
index f68e8ed..80efc8d 100644
--- a/src/include/fst/const-fst.h
+++ b/src/include/fst/const-fst.h
@@ -87,8 +87,6 @@ class ConstFstImpl : public FstImpl<A> {
static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
- bool Write(ostream &strm, const FstWriteOptions &opts) const;
-
A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
// Provide information needed for generic state iterator
@@ -330,23 +328,24 @@ template <class A, class U>
template <class F>
bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
const FstWriteOptions &opts) {
- static const int kFileVersion = 2;
- static const int kAlignedFileVersion = 1;
- static const int kFileAlign = 16;
- int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion :
+ ConstFstImpl<A, U>::kFileVersion;
size_t num_arcs = -1, num_states = -1;
size_t start_offset = 0;
bool update_header = true;
if (fst.Type() == ConstFst<A, U>().Type()) {
- const ConstFst<A, U> *const_fst = static_cast<const ConstFst<A, U> *>(&fst);
+ const ConstFst<A, U> *const_fst =
+ reinterpret_cast<const ConstFst<A, U> *>(&fst);
num_arcs = const_fst->GetImpl()->narcs_;
num_states = const_fst->GetImpl()->nstates_;
update_header = false;
} else if ((start_offset = strm.tellp()) == -1) {
// precompute values needed for header when we cannot seek to rewrite it.
+ num_arcs = 0;
+ num_states = 0;
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
num_arcs += fst.NumArcs(siter.Value());
- num_states++;
+ ++num_states;
}
update_header = false;
}
@@ -360,8 +359,11 @@ bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
Int64ToStr(8 * sizeof(U), &size);
type += size;
}
- FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, &hdr);
- if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ uint64 properties = fst.Properties(kCopyProperties, true) |
+ ConstFstImpl<A, U>::kStaticProperties;
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
+ &hdr);
+ if (opts.align && !AlignOutput(strm, ConstFstImpl<A, U>::kFileAlign)) {
LOG(ERROR) << "Could not align file during write after header";
return false;
}
@@ -375,11 +377,11 @@ bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
state.noepsilons = fst.NumOutputEpsilons(siter.Value());
strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
pos += state.narcs;
- states++;
+ ++states;
}
hdr.SetNumStates(states);
hdr.SetNumArcs(pos);
- if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ if (opts.align && !AlignOutput(strm, ConstFstImpl<A, U>::kFileAlign)) {
LOG(ERROR) << "Could not align file during write after writing states";
}
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
@@ -391,12 +393,12 @@ bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
}
strm.flush();
if (!strm) {
- LOG(ERROR) << "WriteAsVectorFst write failed: " << opts.source;
+ LOG(ERROR) << "ConstFst Write write failed: " << opts.source;
return false;
}
if (update_header) {
return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
- &hdr, start_offset);
+ properties, &hdr, start_offset);
} else {
if (hdr.NumStates() != num_states) {
LOG(ERROR) << "Inconsistent number of states observed during write";
diff --git a/src/include/fst/dfs-visit.h b/src/include/fst/dfs-visit.h
index b47c78d..4d93a39 100644
--- a/src/include/fst/dfs-visit.h
+++ b/src/include/fst/dfs-visit.h
@@ -177,7 +177,8 @@ void DfsVisit(const Fst<Arc> &fst, V *visitor, ArcFilter filter) {
// Find next tree root
for (root = root == start ? 0 : root + 1;
root < nstates && state_color[root] != kDfsWhite;
- ++root);
+ ++root) {
+ }
// Check for a state beyond the largest known state
if (!expanded && root == nstates) {
diff --git a/src/include/fst/expanded-fst.h b/src/include/fst/expanded-fst.h
index b44b81c..676ceb3 100644
--- a/src/include/fst/expanded-fst.h
+++ b/src/include/fst/expanded-fst.h
@@ -82,7 +82,7 @@ class ExpandedFst : public Fst<A> {
}
return Read(strm, FstReadOptions(filename));
} else {
- return Read(std::cin, FstReadOptions("standard input"));
+ return Read(cin, FstReadOptions("standard input"));
}
}
};
@@ -154,7 +154,7 @@ class ImplToExpandedFst : public ImplToFst<I, F> {
}
return I::Read(strm, FstReadOptions(filename));
} else {
- return I::Read(std::cin, FstReadOptions("standard input"));
+ return I::Read(cin, FstReadOptions("standard input"));
}
}
diff --git a/src/include/fst/extensions/far/compile-strings.h b/src/include/fst/extensions/far/compile-strings.h
index d7f4d6b..ca247db 100644
--- a/src/include/fst/extensions/far/compile-strings.h
+++ b/src/include/fst/extensions/far/compile-strings.h
@@ -56,7 +56,7 @@ class StringReader {
const SymbolTable *syms = 0,
Label unknown_label = kNoStateId)
: nline_(0), strm_(istrm), source_(source), entry_type_(entry_type),
- token_type_(token_type), done_(false),
+ token_type_(token_type), symbols_(syms), done_(false),
compiler_(token_type, syms, unknown_label, allow_negative_labels) {
Next(); // Initialize the reader to the first input.
}
@@ -87,8 +87,12 @@ class StringReader {
done_ = true; // whitespace at the end of a file.
}
- VectorFst<A> *GetVectorFst() {
+ VectorFst<A> *GetVectorFst(bool keep_symbols = false) {
VectorFst<A> *fst = new VectorFst<A>;
+ if (keep_symbols) {
+ fst->SetInputSymbols(symbols_);
+ fst->SetOutputSymbols(symbols_);
+ }
if (compiler_(content_, fst)) {
return fst;
} else {
@@ -97,9 +101,16 @@ class StringReader {
}
}
- CompactFst<A, StringCompactor<A> > *GetCompactFst() {
- CompactFst<A, StringCompactor<A> > *fst =
- new CompactFst<A, StringCompactor<A> >;
+ CompactFst<A, StringCompactor<A> > *GetCompactFst(bool keep_symbols = false) {
+ CompactFst<A, StringCompactor<A> > *fst;
+ if (keep_symbols) {
+ VectorFst<A> tmp;
+ tmp.SetInputSymbols(symbols_);
+ tmp.SetOutputSymbols(symbols_);
+ fst = new CompactFst<A, StringCompactor<A> >(tmp);
+ } else {
+ fst = new CompactFst<A, StringCompactor<A> >;
+ }
if (compiler_(content_, fst)) {
return fst;
} else {
@@ -114,6 +125,7 @@ class StringReader {
string source_;
EntryType entry_type_;
TokenType token_type_;
+ const SymbolTable *symbols_;
bool done_;
StringCompiler<A> compiler_;
string content_; // The actual content of the input stream's next FST.
@@ -135,6 +147,8 @@ void FarCompileStrings(const vector<string> &in_fnames,
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -175,8 +189,9 @@ void FarCompileStrings(const vector<string> &in_fnames,
const SymbolTable *syms = 0;
typename Arc::Label unknown_label = kNoLabel;
if (!symbols_fname.empty()) {
- syms = SymbolTable::ReadText(symbols_fname,
- allow_negative_labels);
+ SymbolTableTextOptions opts;
+ opts.allow_negative = allow_negative_labels;
+ syms = SymbolTable::ReadText(symbols_fname, opts);
if (!syms) {
FSTERROR() << "FarCompileStrings: error reading symbol table: "
<< symbols_fname;
@@ -199,32 +214,47 @@ void FarCompileStrings(const vector<string> &in_fnames,
vector<string> inputs;
if (file_list_input) {
for (int i = 1; i < in_fnames.size(); ++i) {
- ifstream istrm(in_fnames[i].c_str());
+ istream *istrm = in_fnames.empty() ? &cin :
+ new ifstream(in_fnames[i].c_str());
string str;
- while (getline(istrm, str))
+ while (getline(*istrm, str))
inputs.push_back(str);
+ if (!in_fnames.empty())
+ delete istrm;
}
} else {
inputs = in_fnames;
}
for (int i = 0, n = 0; i < inputs.size(); ++i) {
+ if (generate_keys == 0 && inputs[i].empty()) {
+ FSTERROR() << "FarCompileStrings: read from a file instead of stdin or"
+ << " set the --generate_keys flags.";
+ delete far_writer;
+ delete syms;
+ return;
+ }
int key_size = generate_keys ? generate_keys :
(entry_type == StringReader<Arc>::FILE ? 1 :
KeySize(inputs[i].c_str()));
- ifstream istrm(inputs[i].c_str());
+ istream *istrm = inputs[i].empty() ? &cin :
+ new ifstream(inputs[i].c_str());
+ bool keep_syms = keep_symbols;
for (StringReader<Arc> reader(
- istrm, inputs[i], entry_type, token_type,
- allow_negative_labels, syms, unknown_label);
+ *istrm, inputs[i].empty() ? "stdin" : inputs[i],
+ entry_type, token_type, allow_negative_labels,
+ syms, unknown_label);
!reader.Done();
reader.Next()) {
++n;
const Fst<Arc> *fst;
if (compact)
- fst = reader.GetCompactFst();
+ fst = reader.GetCompactFst(keep_syms);
else
- fst = reader.GetVectorFst();
+ fst = reader.GetVectorFst(keep_syms);
+ if (initial_symbols)
+ keep_syms = false;
if (!fst) {
FSTERROR() << "FarCompileStrings: compiling string number " << n
<< " in file " << inputs[i] << " failed with token_type = "
@@ -236,6 +266,7 @@ void FarCompileStrings(const vector<string> &in_fnames,
(fet == FET_FILE ? "file" : "unknown"));
delete far_writer;
delete syms;
+ if (!inputs[i].empty()) delete istrm;
return;
}
ostringstream keybuf;
@@ -260,6 +291,8 @@ void FarCompileStrings(const vector<string> &in_fnames,
}
if (generate_keys == 0)
n = 0;
+ if (!inputs[i].empty())
+ delete istrm;
}
delete far_writer;
diff --git a/src/include/fst/extensions/far/equal.h b/src/include/fst/extensions/far/equal.h
new file mode 100644
index 0000000..be82e2d
--- /dev/null
+++ b/src/include/fst/extensions/far/equal.h
@@ -0,0 +1,99 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+
+#ifndef FST_EXTENSIONS_FAR_EQUAL_H_
+#define FST_EXTENSIONS_FAR_EQUAL_H_
+
+#include <string>
+
+#include <fst/extensions/far/far.h>
+#include <fst/equal.h>
+
+namespace fst {
+
+template <class Arc>
+bool FarEqual(const string &filename1,
+ const string &filename2,
+ float delta = kDelta,
+ const string &begin_key = string(),
+ const string &end_key = string()) {
+
+ FarReader<Arc> *reader1 = FarReader<Arc>::Open(filename1);
+ FarReader<Arc> *reader2 = FarReader<Arc>::Open(filename2);
+ if (!reader1 || !reader2) {
+ delete reader1;
+ delete reader2;
+ VLOG(1) << "FarEqual: cannot open input Far file(s)";
+ return false;
+ }
+
+ if (!begin_key.empty()) {
+ bool find_begin1 = reader1->Find(begin_key);
+ bool find_begin2 = reader2->Find(begin_key);
+ if (!find_begin1 || !find_begin2) {
+ bool ret = !find_begin1 && !find_begin2;
+ if (!ret) {
+ VLOG(1) << "FarEqual: key \"" << begin_key << "\" missing from "
+ << (find_begin1 ? "second" : "first") << " archive.";
+ }
+ delete reader1;
+ delete reader2;
+ return ret;
+ }
+ }
+
+ for(; !reader1->Done() && !reader2->Done();
+ reader1->Next(), reader2->Next()) {
+ const string key1 = reader1->GetKey();
+ const string key2 = reader2->GetKey();
+ if (!end_key.empty() && end_key < key1 && end_key < key2) {
+ delete reader1;
+ delete reader2;
+ return true;
+ }
+ if (key1 != key2) {
+ VLOG(1) << "FarEqual: mismatched keys \""
+ << key1 << "\" <> \"" << key2 << "\".";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+ if (!Equal(reader1->GetFst(), reader2->GetFst(), delta)) {
+ VLOG(1) << "FarEqual: Fsts for key \"" << key1 << "\" are not equal.";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+ }
+
+ if (!reader1->Done() || !reader2->Done()) {
+ VLOG(1) << "FarEqual: key \""
+ << (reader1->Done() ? reader2->GetKey() : reader1->GetKey())
+ << "\" missing form " << (reader2->Done() ? "first" : "second")
+ << " archive.";
+ delete reader1;
+ delete reader2;
+ return false;
+ }
+
+ delete reader1;
+ delete reader2;
+ return true;
+}
+
+} // namespace fst
+
+#endif // FST_EXTENSIONS_FAR_EQUAL_H_
diff --git a/src/include/fst/extensions/far/extract.h b/src/include/fst/extensions/far/extract.h
index 022ca60..d6f92ff 100644
--- a/src/include/fst/extensions/far/extract.h
+++ b/src/include/fst/extensions/far/extract.h
@@ -70,7 +70,7 @@ void FarExtract(const vector<string> &ifilenames,
if (nrep > 0) {
ostringstream tmp;
tmp << '.' << nrep;
- key += tmp.str();
+ key.append(tmp.str().data(), tmp.str().size());
}
ofilename = key;
}
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h
index 82b9e5c..acce76e 100644
--- a/src/include/fst/extensions/far/far.h
+++ b/src/include/fst/extensions/far/far.h
@@ -32,6 +32,13 @@ namespace fst {
enum FarEntryType { FET_LINE, FET_FILE };
enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+inline bool IsFst(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+ return IsFstHeader(strm, filename);
+}
+
// FST archive header class
class FarHeader {
public:
@@ -40,8 +47,11 @@ class FarHeader {
bool Read(const string &filename) {
FstHeader fsthdr;
- if (filename.empty()) { // Header reading unsupported on stdin.
- return false;
+ if (filename.empty()) {
+ // Header reading unsupported on stdin. Assumes STList and StdArc.
+ fartype_ = "stlist";
+ arctype_ = "standard";
+ return true;
} else if (IsSTTable(filename)) { // Check if STTable
ReadSTTableHeader(filename, &fsthdr);
fartype_ = "sttable";
@@ -52,6 +62,12 @@ class FarHeader {
fartype_ = "sttable";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
+ } else if (IsFst(filename)) { // Check if Fst
+ ifstream istrm(filename.c_str());
+ fsthdr.Read(istrm, filename);
+ fartype_ = "fst";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
}
return false;
}
@@ -61,8 +77,12 @@ class FarHeader {
string arctype_;
};
-enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
- FAR_SSTABLE = 3 };
+enum FarType {
+ FAR_DEFAULT = 0,
+ FAR_STTABLE = 1,
+ FAR_STLIST = 2,
+ FAR_FST = 3,
+};
// This class creates an archive of FSTs.
template <class A>
@@ -153,7 +173,7 @@ class STTableFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STTableFarWriter *Create(const string filename) {
+ static STTableFarWriter *Create(const string &filename) {
STTableWriter<Fst<A>, FstWriter<A> > *writer =
STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STTableFarWriter(writer);
@@ -183,7 +203,7 @@ class STListFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STListFarWriter *Create(const string filename) {
+ static STListFarWriter *Create(const string &filename) {
STListWriter<Fst<A>, FstWriter<A> > *writer =
STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STListFarWriter(writer);
@@ -209,6 +229,43 @@ class STListFarWriter : public FarWriter<A> {
template <class A>
+class FstFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ explicit FstFarWriter(const string &filename)
+ : filename_(filename), error_(false), written_(false) {}
+
+ static FstFarWriter *Create(const string &filename) {
+ return new FstFarWriter(filename);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) {
+ if (written_) {
+ LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
+ << " subsequent entries discarded.";
+ } else {
+ error_ = !fst.Write(filename_);
+ written_ = true;
+ }
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarWriter() {}
+
+ private:
+ string filename_;
+ bool error_;
+ bool written_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
+};
+
+
+template <class A>
FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
switch(type) {
case FAR_DEFAULT:
@@ -220,6 +277,9 @@ FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
case FAR_STLIST:
return STListFarWriter<A>::Create(filename);
break;
+ case FAR_FST:
+ return FstFarWriter<A>::Create(filename);
+ break;
default:
LOG(ERROR) << "FarWriter::Create: unknown far type";
return 0;
@@ -331,6 +391,114 @@ class STListFarReader : public FarReader<A> {
DISALLOW_COPY_AND_ASSIGN(STListFarReader);
};
+template <class A>
+class FstFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static FstFarReader *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new FstFarReader<A>(filenames);
+ }
+
+ static FstFarReader *Open(const vector<string> &filenames) {
+ return new FstFarReader<A>(filenames);
+ }
+
+ FstFarReader(const vector<string> &filenames)
+ : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
+ sort(keys_.begin(), keys_.end());
+ streams_.resize(keys_.size(), 0);
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ if (keys_[i].empty()) {
+ if (!has_stdin_) {
+ streams_[i] = &cin;
+ //sources_[i] = "stdin";
+ has_stdin_ = true;
+ } else {
+ FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ keys_[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ }
+ if (pos_ >= keys_.size()) return;
+ ReadFst();
+ }
+
+ void Reset() {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
+ error_ = true;
+ return;
+ }
+ pos_ = 0;
+ ReadFst();
+ }
+
+ bool Find(const string &key) {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
+ error_ = true;
+ return false;
+ }
+ pos_ = 0;//TODO
+ ReadFst();
+ return true;
+ }
+
+ bool Done() const { return error_ || pos_ >= keys_.size(); }
+
+ void Next() {
+ ++pos_;
+ ReadFst();
+ }
+
+ const string &GetKey() const {
+ return keys_[pos_];
+ }
+
+ const Fst<A> &GetFst() const {
+ return *fst_;
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarReader() {
+ if (fst_) delete fst_;
+ for (size_t i = 0; i < keys_.size(); ++i)
+ delete streams_[i];
+ }
+
+ private:
+ void ReadFst() {
+ if (fst_) delete fst_;
+ if (pos_ >= keys_.size()) return;
+ streams_[pos_]->seekg(0);
+ fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
+ if (!fst_) {
+ FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
+ error_ = true;
+ }
+ }
+
+ private:
+ vector<string> keys_;
+ vector<istream*> streams_;
+ bool has_stdin_;
+ size_t pos_;
+ mutable Fst<A> *fst_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarReader);
+};
template <class A>
FarReader<A> *FarReader<A>::Open(const string &filename) {
@@ -340,6 +508,8 @@ FarReader<A> *FarReader<A>::Open(const string &filename) {
return STTableFarReader<A>::Open(filename);
else if (IsSTList(filename))
return STListFarReader<A>::Open(filename);
+ else if (IsFst(filename))
+ return FstFarReader<A>::Open(filename);
return 0;
}
@@ -352,6 +522,8 @@ FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
return STTableFarReader<A>::Open(filenames);
else if (!filenames.empty() && IsSTList(filenames[0]))
return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsFst(filenames[0]))
+ return FstFarReader<A>::Open(filenames);
return 0;
}
diff --git a/src/include/fst/extensions/far/farscript.h b/src/include/fst/extensions/far/farscript.h
index 9c3b1ca..3a9c145 100644
--- a/src/include/fst/extensions/far/farscript.h
+++ b/src/include/fst/extensions/far/farscript.h
@@ -27,6 +27,7 @@ using std::vector;
#include <fst/script/arg-packs.h>
#include <fst/extensions/far/compile-strings.h>
#include <fst/extensions/far/create.h>
+#include <fst/extensions/far/equal.h>
#include <fst/extensions/far/extract.h>
#include <fst/extensions/far/info.h>
#include <fst/extensions/far/print-strings.h>
@@ -51,6 +52,8 @@ struct FarCompileStringsArgs {
const FarTokenType tt;
const string &symbols_fname;
const string &unknown_symbol;
+ const bool keep_symbols;
+ const bool initial_symbols;
const bool allow_negative_labels;
const bool file_list_input;
const string &key_prefix;
@@ -65,6 +68,8 @@ struct FarCompileStringsArgs {
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -72,6 +77,7 @@ struct FarCompileStringsArgs {
in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type),
far_type(far_type), generate_keys(generate_keys), fet(fet),
tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol),
+ keep_symbols(keep_symbols), initial_symbols(initial_symbols),
allow_negative_labels(allow_negative_labels),
file_list_input(file_list_input), key_prefix(key_prefix),
key_suffix(key_suffix) { }
@@ -82,7 +88,8 @@ void FarCompileStrings(FarCompileStringsArgs *args) {
fst::FarCompileStrings<Arc>(
args->in_fnames, args->out_fname, args->fst_type, args->far_type,
args->generate_keys, args->fet, args->tt, args->symbols_fname,
- args->unknown_symbol, args->allow_negative_labels, args->file_list_input,
+ args->unknown_symbol, args->keep_symbols, args->initial_symbols,
+ args->allow_negative_labels, args->file_list_input,
args->key_prefix, args->key_suffix);
}
@@ -97,6 +104,8 @@ void FarCompileStrings(
FarTokenType tt,
const string &symbols_fname,
const string &unknown_symbol,
+ bool keep_symbols,
+ bool initial_symbols,
bool allow_negative_labels,
bool file_list_input,
const string &key_prefix,
@@ -143,6 +152,25 @@ void FarCreate(const vector<string> &in_fnames,
const string &key_suffix);
+typedef args::Package<const string &, const string &, float,
+ const string &, const string &> FarEqualInnerArgs;
+typedef args::WithReturnValue<bool, FarEqualInnerArgs> FarEqualArgs;
+
+template <class Arc>
+void FarEqual(FarEqualArgs *args) {
+ args->retval = fst::FarEqual<Arc>(
+ args->args.arg1, args->args.arg2, args->args.arg3,
+ args->args.arg4, args->args.arg5);
+}
+
+bool FarEqual(const string &filename1,
+ const string &filename2,
+ const string &arc_type,
+ float delta = kDelta,
+ const string &begin_key = string(),
+ const string &end_key = string());
+
+
typedef args::Package<const vector<string> &, int32,
const string&, const string&, const string&,
const string&> FarExtractArgs;
@@ -180,7 +208,9 @@ struct FarPrintStringsArgs {
const string &begin_key;
const string &end_key;
const bool print_key;
+ const bool print_weight;
const string &symbols_fname;
+ const bool initial_symbols;
const int32 generate_filenames;
const string &filename_prefix;
const string &filename_suffix;
@@ -188,12 +218,14 @@ struct FarPrintStringsArgs {
FarPrintStringsArgs(
const vector<string> &ifilenames, const FarEntryType entry_type,
const FarTokenType token_type, const string &begin_key,
- const string &end_key, const bool print_key,
- const string &symbols_fname, const int32 generate_filenames,
+ const string &end_key, const bool print_key, const bool print_weight,
+ const string &symbols_fname, const bool initial_symbols,
+ const int32 generate_filenames,
const string &filename_prefix, const string &filename_suffix) :
ifilenames(ifilenames), entry_type(entry_type), token_type(token_type),
- begin_key(begin_key), end_key(end_key), print_key(print_key),
- symbols_fname(symbols_fname),
+ begin_key(begin_key), end_key(end_key),
+ print_key(print_key), print_weight(print_weight),
+ symbols_fname(symbols_fname), initial_symbols(initial_symbols),
generate_filenames(generate_filenames), filename_prefix(filename_prefix),
filename_suffix(filename_suffix) { }
};
@@ -202,9 +234,9 @@ template <class Arc>
void FarPrintStrings(FarPrintStringsArgs *args) {
fst::FarPrintStrings<Arc>(
args->ifilenames, args->entry_type, args->token_type,
- args->begin_key, args->end_key, args->print_key,
- args->symbols_fname, args->generate_filenames, args->filename_prefix,
- args->filename_suffix);
+ args->begin_key, args->end_key, args->print_key, args->print_weight,
+ args->symbols_fname, args->initial_symbols, args->generate_filenames,
+ args->filename_prefix, args->filename_suffix);
}
@@ -215,7 +247,9 @@ void FarPrintStrings(const vector<string> &ifilenames,
const string &begin_key,
const string &end_key,
const bool print_key,
+ const bool print_weight,
const string &symbols_fname,
+ const bool initial_symbols,
const int32 generate_filenames,
const string &filename_prefix,
const string &filename_suffix);
@@ -227,6 +261,7 @@ void FarPrintStrings(const vector<string> &ifilenames,
#define REGISTER_FST_FAR_OPERATIONS(ArcType) \
REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \
REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \
+ REGISTER_FST_OPERATION(FarEqual, ArcType, FarEqualArgs); \
REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \
REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \
REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs)
diff --git a/src/include/fst/extensions/far/info.h b/src/include/fst/extensions/far/info.h
index f010546..100fe68 100644
--- a/src/include/fst/extensions/far/info.h
+++ b/src/include/fst/extensions/far/info.h
@@ -34,7 +34,7 @@ void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) {
StateIterator<Fst<Arc> > siter(fst);
for (; !siter.Done(); siter.Next(), ++(*nstate)) {
ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
- for (; !aiter.Done(); aiter.Next(), ++(*narc));
+ for (; !aiter.Done(); aiter.Next(), ++(*narc)) {}
}
}
diff --git a/src/include/fst/extensions/far/print-strings.h b/src/include/fst/extensions/far/print-strings.h
index aff1e51..dcc7351 100644
--- a/src/include/fst/extensions/far/print-strings.h
+++ b/src/include/fst/extensions/far/print-strings.h
@@ -27,17 +27,21 @@
using std::vector;
#include <fst/extensions/far/far.h>
+#include <fst/shortest-distance.h>
#include <fst/string.h>
+DECLARE_string(far_field_separator);
+
namespace fst {
template <class Arc>
void FarPrintStrings(
const vector<string> &ifilenames, const FarEntryType entry_type,
const FarTokenType far_token_type, const string &begin_key,
- const string &end_key, const bool print_key, const string &symbols_fname,
- const int32 generate_filenames, const string &filename_prefix,
- const string &filename_suffix) {
+ const string &end_key, const bool print_key, const bool print_weight,
+ const string &symbols_fname, const bool initial_symbols,
+ const int32 generate_filenames,
+ const string &filename_prefix, const string &filename_suffix) {
typename StringPrinter<Arc>::TokenType token_type;
if (far_token_type == FTT_SYMBOL) {
@@ -54,7 +58,9 @@ void FarPrintStrings(
const SymbolTable *syms = 0;
if (!symbols_fname.empty()) {
// allow negative flag?
- syms = SymbolTable::ReadText(symbols_fname, true);
+ SymbolTableTextOptions opts;
+ opts.allow_negative = true;
+ syms = SymbolTable::ReadText(symbols_fname, opts);
if (!syms) {
FSTERROR() << "FarPrintStrings: error reading symbol table: "
<< symbols_fname;
@@ -62,8 +68,6 @@ void FarPrintStrings(
}
}
- StringPrinter<Arc> string_printer(token_type, syms);
-
FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
if (!far_reader) return;
@@ -83,14 +87,21 @@ void FarPrintStrings(
okey = key;
const Fst<Arc> &fst = far_reader->GetFst();
+ if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
+ syms = fst.InputSymbols()->Copy();
string str;
VLOG(2) << "Handling key: " << key;
+ StringPrinter<Arc> string_printer(
+ token_type, syms ? syms : fst.InputSymbols());
string_printer(fst, &str);
if (entry_type == FET_LINE) {
if (print_key)
- cout << key << "\t";
- cout << str << endl;
+ cout << key << FLAGS_far_field_separator[0];
+ cout << str;
+ if (print_weight)
+ cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
+ cout << endl;
} else if (entry_type == FET_FILE) {
stringstream sstrm;
if (generate_filenames) {
@@ -117,6 +128,7 @@ void FarPrintStrings(
ostrm << "\n";
}
}
+ delete syms;
}
diff --git a/src/include/fst/extensions/far/stlist.h b/src/include/fst/extensions/far/stlist.h
index 4738181..1cdc80c 100644
--- a/src/include/fst/extensions/far/stlist.h
+++ b/src/include/fst/extensions/far/stlist.h
@@ -26,6 +26,7 @@
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/util.h>
#include <algorithm>
@@ -58,7 +59,7 @@ class STListWriter {
explicit STListWriter(const string filename)
: stream_(
- filename.empty() ? &std::cout :
+ filename.empty() ? &cout :
new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
error_(false) {
WriteType(*stream_, kSTListMagicNumber);
@@ -92,7 +93,7 @@ class STListWriter {
~STListWriter() {
WriteType(*stream_, string());
- if (stream_ != &std::cout)
+ if (stream_ != &cout)
delete stream_;
}
@@ -127,7 +128,7 @@ class STListReader {
for (size_t i = 0; i < filenames.size(); ++i) {
if (filenames[i].empty()) {
if (!has_stdin) {
- streams_[i] = &std::cin;
+ streams_[i] = &cin;
sources_[i] = "stdin";
has_stdin = true;
} else {
@@ -177,7 +178,7 @@ class STListReader {
~STListReader() {
for (size_t i = 0; i < streams_.size(); ++i) {
- if (streams_[i] != &std::cin)
+ if (streams_[i] != &cin)
delete streams_[i];
}
if (entry_)
diff --git a/src/include/fst/extensions/far/sttable.h b/src/include/fst/extensions/far/sttable.h
index 3a03133..3ce0a4b 100644
--- a/src/include/fst/extensions/far/sttable.h
+++ b/src/include/fst/extensions/far/sttable.h
@@ -29,6 +29,7 @@
#include <algorithm>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/util.h>
namespace fst {
diff --git a/src/include/fst/extensions/ngram/bitmap-index.h b/src/include/fst/extensions/ngram/bitmap-index.h
new file mode 100644
index 0000000..f5a5ba7
--- /dev/null
+++ b/src/include/fst/extensions/ngram/bitmap-index.h
@@ -0,0 +1,183 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+
+#ifndef FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
+#define FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
+
+#include <vector>
+using std::vector;
+
+#include <fst/compat.h>
+
+// This class is a bitstring storage class with an index that allows
+// seeking to the Nth set or clear bit in time O(Log(N)) where N is
+// the length of the bit vector. In addition, it allows counting set or
+// clear bits over ranges in constant time.
+//
+// This is accomplished by maintaining an "secondary" index of limited
+// size in bits that maintains a running count of the number of bits set
+// in each block of bitmap data. A block is defined as the number of
+// uint64 values that can fit in the secondary index before an overflow
+// occurs.
+//
+// To handle overflows, a "primary" index containing a running count of
+// bits set in each block is created using the type uint64.
+
+namespace fst {
+
+class BitmapIndex {
+ public:
+ static size_t StorageSize(size_t size) {
+ return ((size + kStorageBlockMask) >> kStorageLogBitSize);
+ }
+
+ BitmapIndex() : bits_(NULL), size_(0) { }
+
+ bool Get(size_t index) const {
+ return (bits_[index >> kStorageLogBitSize] &
+ (kOne << (index & kStorageBlockMask))) != 0;
+ }
+
+ static void Set(uint64* bits, size_t index) {
+ bits[index >> kStorageLogBitSize] |= (kOne << (index & kStorageBlockMask));
+ }
+
+ static void Clear(uint64* bits, size_t index) {
+ bits[index >> kStorageLogBitSize] &= ~(kOne << (index & kStorageBlockMask));
+ }
+
+ size_t Bits() const {
+ return size_;
+ }
+
+ size_t ArraySize() const {
+ return StorageSize(size_);
+ }
+
+ // Returns the number of one bits in the bitmap
+ size_t GetOnesCount() const {
+ return primary_index_[primary_index_size() - 1];
+ }
+
+ // Returns the number of one bits in positions 0 to limit - 1.
+ // REQUIRES: limit <= Bits()
+ size_t Rank1(size_t end) const;
+
+ // Returns the number of one bits in the range start to end - 1.
+ // REQUIRES: limit <= Bits()
+ size_t GetOnesCountInRange(size_t start, size_t end) const {
+ return Rank1(end) - Rank1(start);
+ }
+
+ // Returns the number of zero bits in positions 0 to limit - 1.
+ // REQUIRES: limit <= Bits()
+ size_t Rank0(size_t end) const {
+ return end - Rank1(end);
+ }
+
+ // Returns the number of zero bits in the range start to end - 1.
+ // REQUIRES: limit <= Bits()
+ size_t GetZeroesCountInRange(size_t start, size_t end) const {
+ return end - start - GetOnesCountInRange(start, end);
+ }
+
+ // Return true if any bit between begin inclusive and end exclusive
+ // is set. 0 <= begin <= end <= Bits() is required.
+ //
+ bool TestRange(size_t start, size_t end) const {
+ return Rank1(end) > Rank1(start);
+ }
+
+ // Returns the offset to the nth set bit (zero based)
+ // or Bits() if index >= number of ones
+ size_t Select1(size_t bit_index) const;
+
+ // Returns the offset to the nth clear bit (zero based)
+ // or Bits() if index > number of
+ size_t Select0(size_t bit_index) const;
+
+ // Rebuilds from index for the associated Bitmap, should be called
+ // whenever changes have been made to the Bitmap or else behavior
+ // of the indexed bitmap methods will be undefined.
+ void BuildIndex(const uint64 *bits, size_t size);
+
+ // the secondary index accumulates counts until it can possibly overflow
+ // this constant computes the number of uint64 units that can fit into
+ // units the size of uint16.
+ static const uint64 kOne = 1;
+ static const uint32 kStorageBitSize = 64;
+ static const uint32 kStorageLogBitSize = 6;
+ static const uint32 kSecondaryBlockSize = ((1 << 16) - 1)
+ >> kStorageLogBitSize;
+
+ private:
+ static const uint32 kStorageBlockMask = kStorageBitSize - 1;
+
+ // returns, from the index, the count of ones up to array_index
+ size_t get_index_ones_count(size_t array_index) const;
+
+ // because the indexes, both primary and secondary, contain a running
+ // count of the population of one bits contained in [0,i), there is
+ // no reason to have an element in the zeroth position as this value would
+ // necessarily be zero. (The bits are indexed in a zero based way.) Thus
+ // we don't store the 0th element in either index. Both of the following
+ // functions, if greater than 0, must be decremented by one before retreiving
+ // the value from the corresponding array.
+ // returns the 1 + the block that contains the bitindex in question
+ // the inverted version works the same but looks for zeros using an inverted
+ // view of the index
+ size_t find_primary_block(size_t bit_index) const;
+
+ size_t find_inverted_primary_block(size_t bit_index) const;
+
+ // similarly, the secondary index (which resets its count to zero at
+ // the end of every kSecondaryBlockSize entries) does not store the element
+ // at 0. Note that the rem_bit_index parameter is the number of bits
+ // within the secondary block, after the bits accounted for by the primary
+ // block have been removed (i.e. the remaining bits) And, because we
+ // reset to zero with each new block, there is no need to store those
+ // actual zeros.
+ // returns 1 + the secondary block that contains the bitindex in question
+ size_t find_secondary_block(size_t block, size_t rem_bit_index) const;
+
+ size_t find_inverted_secondary_block(size_t block, size_t rem_bit_index)
+ const;
+
+ // We create a primary index based upon the number of secondary index
+ // blocks. The primary index uses fields wide enough to accomodate any
+ // index of the bitarray so cannot overflow
+ // The primary index is the actual running
+ // count of one bits set for all blocks (and, thus, all uint64s).
+ size_t primary_index_size() const {
+ return (ArraySize() + kSecondaryBlockSize - 1) / kSecondaryBlockSize;
+ }
+
+ const uint64* bits_;
+ size_t size_;
+
+ // The primary index contains the running popcount of all blocks
+ // which means the nth value contains the popcounts of
+ // [0,n*kSecondaryBlockSize], however, the 0th element is omitted.
+ vector<uint32> primary_index_;
+ // The secondary index contains the running popcount of the associated
+ // bitmap. It is the same length (in units of uint16) as the
+ // bitmap's map is in units of uint64s.
+ vector<uint16> secondary_index_;
+};
+
+} // end namespace fst
+
+#endif // FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
diff --git a/src/include/fst/extensions/ngram/ngram-fst.h b/src/include/fst/extensions/ngram/ngram-fst.h
new file mode 100644
index 0000000..eee664a
--- /dev/null
+++ b/src/include/fst/extensions/ngram/ngram-fst.h
@@ -0,0 +1,912 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+//
+#ifndef FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
+#define FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
+
+#include <stddef.h>
+#include <string.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/compat.h>
+#include <fst/fstlib.h>
+#include <fst/extensions/ngram/bitmap-index.h>
+
+// NgramFst implements a n-gram language model based upon the LOUDS data
+// structure. Please refer to "Unary Data Strucutres for Language Models"
+// http://research.google.com/pubs/archive/37218.pdf
+
+namespace fst {
+template <class A> class NGramFst;
+template <class A> class NGramFstMatcher;
+
+// Instance data containing mutable state for bookkeeping repeated access to
+// the same state.
+template <class A>
+struct NGramFstInst {
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ StateId state_;
+ size_t num_futures_;
+ size_t offset_;
+ size_t node_;
+ StateId node_state_;
+ vector<Label> context_;
+ StateId context_state_;
+ NGramFstInst()
+ : state_(kNoStateId), node_state_(kNoStateId),
+ context_state_(kNoStateId) { }
+};
+
+// Implementation class for LOUDS based NgramFst interface
+template <class A>
+class NGramFstImpl : public FstImpl<A> {
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::WriteHeader;
+
+ friend class ArcIterator<NGramFst<A> >;
+ friend class NGramFstMatcher<A>;
+
+ public:
+ using FstImpl<A>::InputSymbols;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ NGramFstImpl() : data_(0), owned_(false) {
+ SetType("ngram");
+ SetInputSymbols(NULL);
+ SetOutputSymbols(NULL);
+ SetProperties(kStaticProperties);
+ }
+
+ NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out);
+
+ ~NGramFstImpl() {
+ if (owned_) {
+ delete [] data_;
+ }
+ }
+
+ static NGramFstImpl<A>* Read(istream &strm, // NOLINT
+ const FstReadOptions &opts) {
+ NGramFstImpl<A>* impl = new NGramFstImpl();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return 0;
+ uint64 num_states, num_futures, num_final;
+ const size_t offset = sizeof(num_states) + sizeof(num_futures) +
+ sizeof(num_final);
+ // Peek at num_states and num_futures to see how much more needs to be read.
+ strm.read(reinterpret_cast<char *>(&num_states), sizeof(num_states));
+ strm.read(reinterpret_cast<char *>(&num_futures), sizeof(num_futures));
+ strm.read(reinterpret_cast<char *>(&num_final), sizeof(num_final));
+ size_t size = Storage(num_states, num_futures, num_final);
+ char* data = new char[size];
+ // Copy num_states, num_futures and num_final back into data.
+ memcpy(data, reinterpret_cast<char *>(&num_states), sizeof(num_states));
+ memcpy(data + sizeof(num_states), reinterpret_cast<char *>(&num_futures),
+ sizeof(num_futures));
+ memcpy(data + sizeof(num_states) + sizeof(num_futures),
+ reinterpret_cast<char *>(&num_final), sizeof(num_final));
+ strm.read(data + offset, size - offset);
+ if (!strm) {
+ delete impl;
+ return NULL;
+ }
+ impl->Init(data, true /* owned */);
+ return impl;
+ }
+
+ bool Write(ostream &strm, // NOLINT
+ const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(Start());
+ hdr.SetNumStates(num_states_);
+ WriteHeader(strm, opts, kFileVersion, &hdr);
+ strm.write(data_, Storage(num_states_, num_futures_, num_final_));
+ return strm;
+ }
+
+ StateId Start() const {
+ return 1;
+ }
+
+ Weight Final(StateId state) const {
+ if (final_index_.Get(state)) {
+ return final_probs_[final_index_.Rank1(state)];
+ } else {
+ return Weight::Zero();
+ }
+ }
+
+ size_t NumArcs(StateId state, NGramFstInst<A> *inst = NULL) const {
+ if (inst == NULL) {
+ const size_t next_zero = future_index_.Select0(state + 1);
+ const size_t this_zero = future_index_.Select0(state);
+ return next_zero - this_zero - 1;
+ }
+ SetInstFuture(state, inst);
+ return inst->num_futures_ + ((state == 0) ? 0 : 1);
+ }
+
+ size_t NumInputEpsilons(StateId state) const {
+ // State 0 has no parent, thus no backoff.
+ if (state == 0) return 0;
+ return 1;
+ }
+
+ size_t NumOutputEpsilons(StateId state) const {
+ return NumInputEpsilons(state);
+ }
+
+ StateId NumStates() const {
+ return num_states_;
+ }
+
+ void InitStateIterator(StateIteratorData<A>* data) const {
+ data->base = 0;
+ data->nstates = num_states_;
+ }
+
+ static size_t Storage(uint64 num_states, uint64 num_futures,
+ uint64 num_final) {
+ uint64 b64;
+ Weight weight;
+ Label label;
+ size_t offset = sizeof(num_states) + sizeof(num_futures) +
+ sizeof(num_final);
+ offset += sizeof(b64) * (
+ BitmapIndex::StorageSize(num_states * 2 + 1) +
+ BitmapIndex::StorageSize(num_futures + num_states + 1) +
+ BitmapIndex::StorageSize(num_states));
+ offset += (num_states + 1) * sizeof(label) + num_futures * sizeof(label);
+ // Pad for alignemnt, see
+ // http://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
+ offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
+ offset += (num_states + 1) * sizeof(weight) + num_final * sizeof(weight) +
+ (num_futures + 1) * sizeof(weight);
+ return offset;
+ }
+
+ void SetInstFuture(StateId state, NGramFstInst<A> *inst) const {
+ if (inst->state_ != state) {
+ inst->state_ = state;
+ const size_t next_zero = future_index_.Select0(state + 1);
+ const size_t this_zero = future_index_.Select0(state);
+ inst->num_futures_ = next_zero - this_zero - 1;
+ inst->offset_ = future_index_.Rank1(future_index_.Select0(state) + 1);
+ }
+ }
+
+ void SetInstNode(NGramFstInst<A> *inst) const {
+ if (inst->node_state_ != inst->state_) {
+ inst->node_state_ = inst->state_;
+ inst->node_ = context_index_.Select1(inst->state_);
+ }
+ }
+
+ void SetInstContext(NGramFstInst<A> *inst) const {
+ SetInstNode(inst);
+ if (inst->context_state_ != inst->state_) {
+ inst->context_state_ = inst->state_;
+ inst->context_.clear();
+ size_t node = inst->node_;
+ while (node != 0) {
+ inst->context_.push_back(context_words_[context_index_.Rank1(node)]);
+ node = context_index_.Select1(context_index_.Rank0(node) - 1);
+ }
+ }
+ }
+
+ // Access to the underlying representation
+ const char* GetData(size_t* data_size) const {
+ *data_size = Storage(num_states_, num_futures_, num_final_);
+ return data_;
+ }
+
+ void Init(const char* data, bool owned);
+
+ private:
+ StateId Transition(const vector<Label> &context, Label future) const;
+
+ // Properties always true for this Fst class.
+ static const uint64 kStaticProperties = kAcceptor | kIDeterministic |
+ kODeterministic | kEpsilons | kIEpsilons | kOEpsilons | kILabelSorted |
+ kOLabelSorted | kWeighted | kCyclic | kInitialAcyclic | kNotTopSorted |
+ kAccessible | kCoAccessible | kNotString | kExpanded;
+ // Current file format version.
+ static const int kFileVersion = 4;
+ // Minimum file format version supported.
+ static const int kMinFileVersion = 4;
+
+ const char* data_;
+ bool owned_; // True if we own data_
+ uint64 num_states_, num_futures_, num_final_;
+ size_t root_num_children_;
+ const Label *root_children_;
+ size_t root_first_child_;
+ // borrowed references
+ const uint64 *context_, *future_, *final_;
+ const Label *context_words_, *future_words_;
+ const Weight *backoff_, *final_probs_, *future_probs_;
+ BitmapIndex context_index_;
+ BitmapIndex future_index_;
+ BitmapIndex final_index_;
+
+ void operator=(const NGramFstImpl<A> &); // Disallow
+};
+
+template<typename A>
+NGramFstImpl<A>::NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out)
+ : data_(0), owned_(false) {
+ typedef A Arc;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::StateId StateId;
+ SetType("ngram");
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ SetProperties(kStaticProperties);
+
+ // Check basic requirements for an OpenGRM language model Fst.
+ int64 props = kAcceptor | kIDeterministic | kIEpsilons | kILabelSorted;
+ if (fst.Properties(props, true) != props) {
+ FSTERROR() << "NGramFst only accepts OpenGRM langauge models as input";
+ SetProperties(kError, kError);
+ return;
+ }
+
+ int64 num_states = CountStates(fst);
+ Label* context = new Label[num_states];
+
+ // Find the unigram state by starting from the start state, following
+ // epsilons.
+ StateId unigram = fst.Start();
+ while (1) {
+ ArcIterator<Fst<A> > aiter(fst, unigram);
+ if (aiter.Done()) {
+ FSTERROR() << "Start state has no arcs";
+ SetProperties(kError, kError);
+ return;
+ }
+ if (aiter.Value().ilabel != 0) break;
+ unigram = aiter.Value().nextstate;
+ }
+
+ // Each state's context is determined by the subtree it is under from the
+ // unigram state.
+ queue<pair<StateId, Label> > label_queue;
+ vector<bool> visited(num_states);
+ // Force an epsilon link to the start state.
+ label_queue.push(make_pair(fst.Start(), 0));
+ for (ArcIterator<Fst<A> > aiter(fst, unigram);
+ !aiter.Done(); aiter.Next()) {
+ label_queue.push(make_pair(aiter.Value().nextstate, aiter.Value().ilabel));
+ }
+ // investigate states in breadth first fashion to assign context words.
+ while (!label_queue.empty()) {
+ pair<StateId, Label> &now = label_queue.front();
+ if (!visited[now.first]) {
+ context[now.first] = now.second;
+ visited[now.first] = true;
+ for (ArcIterator<Fst<A> > aiter(fst, now.first);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (arc.ilabel != 0) {
+ label_queue.push(make_pair(arc.nextstate, now.second));
+ }
+ }
+ }
+ label_queue.pop();
+ }
+ visited.clear();
+
+ // The arc from the start state should be assigned an epsilon to put it
+ // in front of the all other labels (which makes Start state 1 after
+ // unigram which is state 0).
+ context[fst.Start()] = 0;
+
+ // Build the tree of contexts fst by reversing the epsilon arcs from fst.
+ VectorFst<Arc> context_fst;
+ uint64 num_final = 0;
+ for (int i = 0; i < num_states; ++i) {
+ if (fst.Final(i) != Weight::Zero()) {
+ ++num_final;
+ }
+ context_fst.SetFinal(context_fst.AddState(), fst.Final(i));
+ }
+ context_fst.SetStart(unigram);
+ context_fst.SetInputSymbols(fst.InputSymbols());
+ context_fst.SetOutputSymbols(fst.OutputSymbols());
+ int64 num_context_arcs = 0;
+ int64 num_futures = 0;
+ for (StateIterator<Fst<A> > siter(fst); !siter.Done(); siter.Next()) {
+ const StateId &state = siter.Value();
+ num_futures += fst.NumArcs(state) - fst.NumInputEpsilons(state);
+ ArcIterator<Fst<A> > aiter(fst, state);
+ if (!aiter.Done()) {
+ const Arc &arc = aiter.Value();
+ // this arc goes from state to arc.nextstate, so create an arc from
+ // arc.nextstate to state to reverse it.
+ if (arc.ilabel == 0) {
+ context_fst.AddArc(arc.nextstate, Arc(context[state], context[state],
+ arc.weight, state));
+ num_context_arcs++;
+ }
+ }
+ }
+ if (num_context_arcs != context_fst.NumStates() - 1) {
+ FSTERROR() << "Number of contexts arcs != number of states - 1";
+ SetProperties(kError, kError);
+ return;
+ }
+ if (context_fst.NumStates() != num_states) {
+ FSTERROR() << "Number of contexts != number of states";
+ SetProperties(kError, kError);
+ return;
+ }
+ int64 context_props = context_fst.Properties(kIDeterministic |
+ kILabelSorted, true);
+ if (!(context_props & kIDeterministic)) {
+ FSTERROR() << "Input fst is not structured properly";
+ SetProperties(kError, kError);
+ return;
+ }
+ if (!(context_props & kILabelSorted)) {
+ ArcSort(&context_fst, ILabelCompare<Arc>());
+ }
+
+ delete [] context;
+
+ uint64 b64;
+ Weight weight;
+ Label label = kNoLabel;
+ const size_t storage = Storage(num_states, num_futures, num_final);
+ char* data = new char[storage];
+ memset(data, 0, storage);
+ size_t offset = 0;
+ memcpy(data + offset, reinterpret_cast<char *>(&num_states),
+ sizeof(num_states));
+ offset += sizeof(num_states);
+ memcpy(data + offset, reinterpret_cast<char *>(&num_futures),
+ sizeof(num_futures));
+ offset += sizeof(num_futures);
+ memcpy(data + offset, reinterpret_cast<char *>(&num_final),
+ sizeof(num_final));
+ offset += sizeof(num_final);
+ uint64* context_bits = reinterpret_cast<uint64*>(data + offset);
+ offset += BitmapIndex::StorageSize(num_states * 2 + 1) * sizeof(b64);
+ uint64* future_bits = reinterpret_cast<uint64*>(data + offset);
+ offset +=
+ BitmapIndex::StorageSize(num_futures + num_states + 1) * sizeof(b64);
+ uint64* final_bits = reinterpret_cast<uint64*>(data + offset);
+ offset += BitmapIndex::StorageSize(num_states) * sizeof(b64);
+ Label* context_words = reinterpret_cast<Label*>(data + offset);
+ offset += (num_states + 1) * sizeof(label);
+ Label* future_words = reinterpret_cast<Label*>(data + offset);
+ offset += num_futures * sizeof(label);
+ offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
+ Weight* backoff = reinterpret_cast<Weight*>(data + offset);
+ offset += (num_states + 1) * sizeof(weight);
+ Weight* final_probs = reinterpret_cast<Weight*>(data + offset);
+ offset += num_final * sizeof(weight);
+ Weight* future_probs = reinterpret_cast<Weight*>(data + offset);
+ int64 context_arc = 0, future_arc = 0, context_bit = 0, future_bit = 0,
+ final_bit = 0;
+
+ // pseudo-root bits
+ BitmapIndex::Set(context_bits, context_bit++);
+ ++context_bit;
+ context_words[context_arc] = label;
+ backoff[context_arc] = Weight::Zero();
+ context_arc++;
+
+ ++future_bit;
+ if (order_out) {
+ order_out->clear();
+ order_out->resize(num_states);
+ }
+
+ queue<StateId> context_q;
+ context_q.push(context_fst.Start());
+ StateId state_number = 0;
+ while (!context_q.empty()) {
+ const StateId &state = context_q.front();
+ if (order_out) {
+ (*order_out)[state] = state_number;
+ }
+
+ const Weight &final = context_fst.Final(state);
+ if (final != Weight::Zero()) {
+ BitmapIndex::Set(final_bits, state_number);
+ final_probs[final_bit] = final;
+ ++final_bit;
+ }
+
+ for (ArcIterator<VectorFst<A> > aiter(context_fst, state);
+ !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ context_words[context_arc] = arc.ilabel;
+ backoff[context_arc] = arc.weight;
+ ++context_arc;
+ BitmapIndex::Set(context_bits, context_bit++);
+ context_q.push(arc.nextstate);
+ }
+ ++context_bit;
+
+ for (ArcIterator<Fst<A> > aiter(fst, state); !aiter.Done(); aiter.Next()) {
+ const Arc &arc = aiter.Value();
+ if (arc.ilabel != 0) {
+ future_words[future_arc] = arc.ilabel;
+ future_probs[future_arc] = arc.weight;
+ ++future_arc;
+ BitmapIndex::Set(future_bits, future_bit++);
+ }
+ }
+ ++future_bit;
+ ++state_number;
+ context_q.pop();
+ }
+
+ if ((state_number != num_states) ||
+ (context_bit != num_states * 2 + 1) ||
+ (context_arc != num_states) ||
+ (future_arc != num_futures) ||
+ (future_bit != num_futures + num_states + 1) ||
+ (final_bit != num_final)) {
+ FSTERROR() << "Structure problems detected during construction";
+ SetProperties(kError, kError);
+ return;
+ }
+
+ Init(data, true /* owned */);
+}
+
+template<typename A>
+inline void NGramFstImpl<A>::Init(const char* data, bool owned) {
+ if (owned_) {
+ delete [] data_;
+ }
+ owned_ = owned;
+ data_ = data;
+ size_t offset = 0;
+ num_states_ = *(reinterpret_cast<const uint64*>(data_ + offset));
+ offset += sizeof(num_states_);
+ num_futures_ = *(reinterpret_cast<const uint64*>(data_ + offset));
+ offset += sizeof(num_futures_);
+ num_final_ = *(reinterpret_cast<const uint64*>(data_ + offset));
+ offset += sizeof(num_final_);
+ uint64 bits;
+ size_t context_bits = num_states_ * 2 + 1;
+ size_t future_bits = num_futures_ + num_states_ + 1;
+ context_ = reinterpret_cast<const uint64*>(data_ + offset);
+ offset += BitmapIndex::StorageSize(context_bits) * sizeof(bits);
+ future_ = reinterpret_cast<const uint64*>(data_ + offset);
+ offset += BitmapIndex::StorageSize(future_bits) * sizeof(bits);
+ final_ = reinterpret_cast<const uint64*>(data_ + offset);
+ offset += BitmapIndex::StorageSize(num_states_ + 1) * sizeof(bits);
+ context_words_ = reinterpret_cast<const Label*>(data_ + offset);
+ offset += (num_states_ + 1) * sizeof(*context_words_);
+ future_words_ = reinterpret_cast<const Label*>(data_ + offset);
+ offset += num_futures_ * sizeof(*future_words_);
+ offset = (offset + sizeof(*backoff_) - 1) & ~(sizeof(*backoff_) - 1);
+ backoff_ = reinterpret_cast<const Weight*>(data_ + offset);
+ offset += (num_states_ + 1) * sizeof(*backoff_);
+ final_probs_ = reinterpret_cast<const Weight*>(data_ + offset);
+ offset += num_final_ * sizeof(*final_probs_);
+ future_probs_ = reinterpret_cast<const Weight*>(data_ + offset);
+
+ context_index_.BuildIndex(context_, context_bits);
+ future_index_.BuildIndex(future_, future_bits);
+ final_index_.BuildIndex(final_, num_states_);
+
+ const size_t node_rank = context_index_.Rank1(0);
+ root_first_child_ = context_index_.Select0(node_rank) + 1;
+ if (context_index_.Get(root_first_child_) == false) {
+ FSTERROR() << "Missing unigrams";
+ SetProperties(kError, kError);
+ return;
+ }
+ const size_t last_child = context_index_.Select0(node_rank + 1) - 1;
+ root_num_children_ = last_child - root_first_child_ + 1;
+ root_children_ = context_words_ + context_index_.Rank1(root_first_child_);
+}
+
+template<typename A>
+inline typename A::StateId NGramFstImpl<A>::Transition(
+ const vector<Label> &context, Label future) const {
+ size_t num_children = root_num_children_;
+ const Label *children = root_children_;
+ const Label *loc = lower_bound(children, children + num_children, future);
+ if (loc == children + num_children || *loc != future) {
+ return context_index_.Rank1(0);
+ }
+ size_t node = root_first_child_ + loc - children;
+ size_t node_rank = context_index_.Rank1(node);
+ size_t first_child = context_index_.Select0(node_rank) + 1;
+ if (context_index_.Get(first_child) == false) {
+ return context_index_.Rank1(node);
+ }
+ size_t last_child = context_index_.Select0(node_rank + 1) - 1;
+ num_children = last_child - first_child + 1;
+ for (int word = context.size() - 1; word >= 0; --word) {
+ children = context_words_ + context_index_.Rank1(first_child);
+ loc = lower_bound(children, children + last_child - first_child + 1,
+ context[word]);
+ if (loc == children + last_child - first_child + 1 ||
+ *loc != context[word]) {
+ break;
+ }
+ node = first_child + loc - children;
+ node_rank = context_index_.Rank1(node);
+ first_child = context_index_.Select0(node_rank) + 1;
+ if (context_index_.Get(first_child) == false) break;
+ last_child = context_index_.Select0(node_rank + 1) - 1;
+ }
+ return context_index_.Rank1(node);
+}
+
+/*****************************************************************************/
+template<class A>
+class NGramFst : public ImplToExpandedFst<NGramFstImpl<A> > {
+ friend class ArcIterator<NGramFst<A> >;
+ friend class NGramFstMatcher<A>;
+
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef NGramFstImpl<A> Impl;
+
+ explicit NGramFst(const Fst<A> &dst)
+ : ImplToExpandedFst<Impl>(new Impl(dst, NULL)) {}
+
+ NGramFst(const Fst<A> &fst, vector<StateId>* order_out)
+ : ImplToExpandedFst<Impl>(new Impl(fst, order_out)) {}
+
+ // Because the NGramFstImpl is a const stateless data structure, there
+ // is never a need to do anything beside copy the reference.
+ NGramFst(const NGramFst<A> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, false) {}
+
+ NGramFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ // Non-standard constructor to initialize NGramFst directly from data.
+ NGramFst(const char* data, bool owned) : ImplToExpandedFst<Impl>(new Impl()) {
+ GetImpl()->Init(data, owned);
+ }
+
+ // Get method that gets the data associated with Init().
+ const char* GetData(size_t* data_size) const {
+ return GetImpl()->GetData(data_size);
+ }
+
+ virtual size_t NumArcs(StateId s) const {
+ return GetImpl()->NumArcs(s, &inst_);
+ }
+
+ virtual NGramFst<A>* Copy(bool safe = false) const {
+ return new NGramFst(*this, safe);
+ }
+
+ static NGramFst<A>* Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new NGramFst<A>(impl) : 0;
+ }
+
+ static NGramFst<A>* Read(const string &filename) {
+ if (!filename.empty()) {
+ ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
+ if (!strm) {
+ LOG(ERROR) << "NGramFst::Read: Can't open file: " << filename;
+ return 0;
+ }
+ return Read(strm, FstReadOptions(filename));
+ } else {
+ return Read(cin, FstReadOptions("standard input"));
+ }
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A>* data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual inline void InitArcIterator(
+ StateId s, ArcIteratorData<A>* data) const;
+
+ virtual MatcherBase<A>* InitMatcher(MatchType match_type) const {
+ return new NGramFstMatcher<A>(*this, match_type);
+ }
+
+ private:
+ explicit NGramFst(Impl* impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ Impl* GetImpl() const {
+ return
+ ImplToExpandedFst<Impl, ExpandedFst<A> >::GetImpl();
+ }
+
+ void SetImpl(Impl* impl, bool own_impl = true) {
+ ImplToExpandedFst<Impl, Fst<A> >::SetImpl(impl, own_impl);
+ }
+
+ mutable NGramFstInst<A> inst_;
+};
+
+template <class A> inline void
+NGramFst<A>::InitArcIterator(StateId s, ArcIteratorData<A>* data) const {
+ GetImpl()->SetInstFuture(s, &inst_);
+ GetImpl()->SetInstNode(&inst_);
+ data->base = new ArcIterator<NGramFst<A> >(*this, s);
+}
+
+/*****************************************************************************/
+template <class A>
+class NGramFstMatcher : public MatcherBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ NGramFstMatcher(const NGramFst<A> &fst, MatchType match_type)
+ : fst_(fst), inst_(fst.inst_), match_type_(match_type),
+ current_loop_(false),
+ loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
+ if (match_type_ == MATCH_OUTPUT) {
+ swap(loop_.ilabel, loop_.olabel);
+ }
+ }
+
+ NGramFstMatcher(const NGramFstMatcher<A> &matcher, bool safe = false)
+ : fst_(matcher.fst_), inst_(matcher.inst_),
+ match_type_(matcher.match_type_), current_loop_(false),
+ loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
+ if (match_type_ == MATCH_OUTPUT) {
+ swap(loop_.ilabel, loop_.olabel);
+ }
+ }
+
+ virtual NGramFstMatcher<A>* Copy(bool safe = false) const {
+ return new NGramFstMatcher<A>(*this, safe);
+ }
+
+ virtual MatchType Type(bool test) const {
+ return match_type_;
+ }
+
+ virtual const Fst<A> &GetFst() const {
+ return fst_;
+ }
+
+ virtual uint64 Properties(uint64 props) const {
+ return props;
+ }
+
+ private:
+ virtual void SetState_(StateId s) {
+ fst_.GetImpl()->SetInstFuture(s, &inst_);
+ current_loop_ = false;
+ }
+
+ virtual bool Find_(Label label) {
+ const Label nolabel = kNoLabel;
+ done_ = true;
+ if (label == 0 || label == nolabel) {
+ if (label == 0) {
+ current_loop_ = true;
+ loop_.nextstate = inst_.state_;
+ }
+ // The unigram state has no epsilon arc.
+ if (inst_.state_ != 0) {
+ arc_.ilabel = arc_.olabel = 0;
+ fst_.GetImpl()->SetInstNode(&inst_);
+ arc_.nextstate = fst_.GetImpl()->context_index_.Rank1(
+ fst_.GetImpl()->context_index_.Select1(
+ fst_.GetImpl()->context_index_.Rank0(inst_.node_) - 1));
+ arc_.weight = fst_.GetImpl()->backoff_[inst_.state_];
+ done_ = false;
+ }
+ } else {
+ const Label *start = fst_.GetImpl()->future_words_ + inst_.offset_;
+ const Label *end = start + inst_.num_futures_;
+ const Label* search = lower_bound(start, end, label);
+ if (search != end && *search == label) {
+ size_t state = search - start;
+ arc_.ilabel = arc_.olabel = label;
+ arc_.weight = fst_.GetImpl()->future_probs_[inst_.offset_ + state];
+ fst_.GetImpl()->SetInstContext(&inst_);
+ arc_.nextstate = fst_.GetImpl()->Transition(inst_.context_, label);
+ done_ = false;
+ }
+ }
+ return !Done_();
+ }
+
+ virtual bool Done_() const {
+ return !current_loop_ && done_;
+ }
+
+ virtual const Arc& Value_() const {
+ return (current_loop_) ? loop_ : arc_;
+ }
+
+ virtual void Next_() {
+ if (current_loop_) {
+ current_loop_ = false;
+ } else {
+ done_ = true;
+ }
+ }
+
+ const NGramFst<A>& fst_;
+ NGramFstInst<A> inst_;
+ MatchType match_type_; // Supplied by caller
+ bool done_;
+ Arc arc_;
+ bool current_loop_; // Current arc is the implicit loop
+ Arc loop_;
+};
+
+/*****************************************************************************/
+template<class A>
+class ArcIterator<NGramFst<A> > : public ArcIteratorBase<A> {
+ public:
+ typedef A Arc;
+ typedef typename A::Label Label;
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ ArcIterator(const NGramFst<A> &fst, StateId state)
+ : lazy_(~0), impl_(fst.GetImpl()), i_(0), flags_(kArcValueFlags) {
+ inst_ = fst.inst_;
+ impl_->SetInstFuture(state, &inst_);
+ impl_->SetInstNode(&inst_);
+ }
+
+ bool Done() const {
+ return i_ >= ((inst_.node_ == 0) ? inst_.num_futures_ :
+ inst_.num_futures_ + 1);
+ }
+
+ const Arc &Value() const {
+ bool eps = (inst_.node_ != 0 && i_ == 0);
+ StateId state = (inst_.node_ == 0) ? i_ : i_ - 1;
+ if (flags_ & lazy_ & (kArcILabelValue | kArcOLabelValue)) {
+ arc_.ilabel =
+ arc_.olabel = eps ? 0 : impl_->future_words_[inst_.offset_ + state];
+ lazy_ &= ~(kArcILabelValue | kArcOLabelValue);
+ }
+ if (flags_ & lazy_ & kArcNextStateValue) {
+ if (eps) {
+ arc_.nextstate = impl_->context_index_.Rank1(
+ impl_->context_index_.Select1(
+ impl_->context_index_.Rank0(inst_.node_) - 1));
+ } else {
+ if (lazy_ & kArcNextStateValue) {
+ impl_->SetInstContext(&inst_); // first time only.
+ }
+ arc_.nextstate =
+ impl_->Transition(inst_.context_,
+ impl_->future_words_[inst_.offset_ + state]);
+ }
+ lazy_ &= ~kArcNextStateValue;
+ }
+ if (flags_ & lazy_ & kArcWeightValue) {
+ arc_.weight = eps ? impl_->backoff_[inst_.state_] :
+ impl_->future_probs_[inst_.offset_ + state];
+ lazy_ &= ~kArcWeightValue;
+ }
+ return arc_;
+ }
+
+ void Next() {
+ ++i_;
+ lazy_ = ~0;
+ }
+
+ size_t Position() const { return i_; }
+
+ void Reset() {
+ i_ = 0;
+ lazy_ = ~0;
+ }
+
+ void Seek(size_t a) {
+ if (i_ != a) {
+ i_ = a;
+ lazy_ = ~0;
+ }
+ }
+
+ uint32 Flags() const {
+ return flags_;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {
+ flags_ &= ~m;
+ flags_ |= (f & kArcValueFlags);
+ }
+
+ private:
+ virtual bool Done_() const { return Done(); }
+ virtual const Arc& Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual size_t Position_() const { return Position(); }
+ virtual void Reset_() { Reset(); }
+ virtual void Seek_(size_t a) { Seek(a); }
+ uint32 Flags_() const { return Flags(); }
+ void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
+
+ mutable Arc arc_;
+ mutable uint32 lazy_;
+ const NGramFstImpl<A> *impl_;
+ mutable NGramFstInst<A> inst_;
+
+ size_t i_;
+ uint32 flags_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+/*****************************************************************************/
+// Specialization for NGramFst; see generic version in fst.h
+// for sample usage (but use the ProdLmFst type!). This version
+// should inline.
+template <class A>
+class StateIterator<NGramFst<A> > : public StateIteratorBase<A> {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const NGramFst<A> &fst)
+ : s_(0), num_states_(fst.NumStates()) { }
+
+ bool Done() const { return s_ >= num_states_; }
+ StateId Value() const { return s_; }
+ void Next() { ++s_; }
+ void Reset() { s_ = 0; }
+
+ private:
+ virtual bool Done_() const { return Done(); }
+ virtual StateId Value_() const { return Value(); }
+ virtual void Next_() { Next(); }
+ virtual void Reset_() { Reset(); }
+
+ StateId s_, num_states_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+} // namespace fst
+#endif // FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
diff --git a/src/include/fst/extensions/ngram/nthbit.h b/src/include/fst/extensions/ngram/nthbit.h
new file mode 100644
index 0000000..d4a9a5a
--- /dev/null
+++ b/src/include/fst/extensions/ngram/nthbit.h
@@ -0,0 +1,46 @@
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+// dr@google.com (Doug Rohde)
+
+#ifndef FST_EXTENSIONS_NGRAM_NTHBIT_H_
+#define FST_EXTENSIONS_NGRAM_NTHBIT_H_
+
+#include <fst/types.h>
+
+extern uint32 nth_bit_bit_offset[];
+
+inline uint32 nth_bit(uint64 v, uint32 r) {
+ uint32 shift = 0;
+ uint32 c = __builtin_popcount(v & 0xffffffff);
+ uint32 mask = -(r > c);
+ r -= c & mask;
+ shift += (32 & mask);
+
+ c = __builtin_popcount((v >> shift) & 0xffff);
+ mask = -(r > c);
+ r -= c & mask;
+ shift += (16 & mask);
+
+ c = __builtin_popcount((v >> shift) & 0xff);
+ mask = -(r > c);
+ r -= c & mask;
+ shift += (8 & mask);
+
+ return shift + ((nth_bit_bit_offset[(v >> shift) & 0xff] >>
+ ((r - 1) << 2)) & 0xf);
+}
+
+#endif // FST_EXTENSIONS_NGRAM_NTHBIT_H_
diff --git a/src/include/fst/extensions/pdt/collection.h b/src/include/fst/extensions/pdt/collection.h
index 26be504..24a443f 100644
--- a/src/include/fst/extensions/pdt/collection.h
+++ b/src/include/fst/extensions/pdt/collection.h
@@ -16,7 +16,7 @@
// Author: riley@google.com (Michael Riley)
//
// \file
-// Class to store a collection of sets with elements of type T.
+// Class to store a collection of ordered (multi-)sets with elements of type T.
#ifndef FST_EXTENSIONS_PDT_COLLECTION_H__
#define FST_EXTENSIONS_PDT_COLLECTION_H__
@@ -29,11 +29,11 @@ using std::vector;
namespace fst {
-// Stores a collection of non-empty sets with elements of type T. A
-// default constructor, equality ==, a total order <, and an STL-style
-// hash class must be defined on the elements. Provides signed
-// integer ID (of type I) of each unique set. The IDs are allocated
-// starting from 0 in order.
+// Stores a collection of non-empty, ordered (multi-)sets with elements
+// of type T. A default constructor, equality ==, and an STL-style
+// hash class must be defined on the elements. Provides signed integer
+// ID (of type I) of each unique set. The IDs are allocated starting
+// from 0 in order.
template <class I, class T>
class Collection {
public:
@@ -80,31 +80,34 @@ class Collection {
Collection() {}
- // Lookups integer ID from set. If it doesn't exist, then adds it.
- // Set elements should be in strict order (and therefore unique).
- I FindId(const vector<T> &set) {
+ // Lookups integer ID from ordered multi-set. If it doesn't exist
+ // and 'insert' is true, then adds it. Otherwise returns -1.
+ I FindId(const vector<T> &set, bool insert = true) {
I node_id = kNoNodeId;
for (ssize_t i = set.size() - 1; i >= 0; --i) {
Node node(node_id, set[i]);
- node_id = node_table_.FindId(node);
+ node_id = node_table_.FindId(node, insert);
+ if (node_id == -1) break;
}
return node_id;
}
- // Finds set given integer ID. Returns true if ID corresponds
- // to set. Use iterators below to traverse result.
+ // Finds ordered (multi-)set given integer ID. Returns set iterator
+ // to traverse result.
SetIterator FindSet(I id) {
- if (id < 0 && id >= node_table_.Size()) {
+ if (id < 0 || id >= node_table_.Size()) {
return SetIterator(kNoNodeId, Node(kNoNodeId, T()), &node_table_);
} else {
return SetIterator(id, node_table_.FindEntry(id), &node_table_);
}
}
+ I Size() const { return node_table_.Size(); }
+
private:
static const I kNoNodeId;
static const size_t kPrime;
- static std::tr1::hash<T> hash_;
+ static std::hash<T> hash_;
NodeTable node_table_;
@@ -115,7 +118,7 @@ template<class I, class T> const I Collection<I, T>::kNoNodeId = -1;
template <class I, class T> const size_t Collection<I, T>::kPrime = 7853;
-template <class I, class T> std::tr1::hash<T> Collection<I, T>::hash_;
+template <class I, class T> std::hash<T> Collection<I, T>::hash_;
} // namespace fst
diff --git a/src/include/fst/extensions/pdt/info.h b/src/include/fst/extensions/pdt/info.h
index ef9a860..55e76c4 100644
--- a/src/include/fst/extensions/pdt/info.h
+++ b/src/include/fst/extensions/pdt/info.h
@@ -24,7 +24,7 @@
#include <unordered_map>
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
-#include <tr1/unordered_set>
+#include <unordered_set>
using std::tr1::unordered_set;
using std::tr1::unordered_multiset;
#include <vector>
diff --git a/src/include/fst/extensions/pdt/paren.h b/src/include/fst/extensions/pdt/paren.h
index 7b9887f..a9d30c5 100644
--- a/src/include/fst/extensions/pdt/paren.h
+++ b/src/include/fst/extensions/pdt/paren.h
@@ -26,7 +26,7 @@
#include <unordered_map>
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
-#include <tr1/unordered_set>
+#include <unordered_set>
using std::tr1::unordered_set;
using std::tr1::unordered_multiset;
#include <set>
@@ -144,7 +144,8 @@ class PdtParenReachable {
const vector<pair<Label, Label> > &parens, bool close)
: fst_(fst),
parens_(parens),
- close_(close) {
+ close_(close),
+ error_(false) {
for (Label i = 0; i < parens.size(); ++i) {
const pair<Label, Label> &p = parens[i];
paren_id_map_[p.first] = i;
@@ -155,12 +156,18 @@ class PdtParenReachable {
StateId start = fst.Start();
if (start == kNoStateId)
return;
- DFSearch(start, start);
+ if (!DFSearch(start)) {
+ FSTERROR() << "PdtReachable: Underlying cyclicity not supported";
+ error_ = true;
+ }
} else {
FSTERROR() << "PdtParenReachable: open paren info not implemented";
+ error_ = true;
}
}
+ bool const Error() { return error_; }
+
// Given a state ID, returns an iterator over paren IDs
// for close (open) parens reachable from that state along balanced
// paths.
@@ -194,7 +201,7 @@ class PdtParenReachable {
private:
// DFS that gathers paren and state set information.
// Bool returns false when cycle detected.
- bool DFSearch(StateId s, StateId start);
+ bool DFSearch(StateId s);
// Unions state sets together gathered by the DFS.
void ComputeStateSet(StateId s);
@@ -212,12 +219,13 @@ class PdtParenReachable {
vector<char> state_color_; // DFS state
mutable Collection<ssize_t, StateId> state_sets_; // Reachable states -> ID
StateSetMap set_map_; // ID -> Reachable states
+ bool error_;
DISALLOW_COPY_AND_ASSIGN(PdtParenReachable);
};
// DFS that gathers paren and state set information.
template <class A>
-bool PdtParenReachable<A>::DFSearch(StateId s, StateId start) {
+bool PdtParenReachable<A>::DFSearch(StateId s) {
if (s >= state_color_.size())
state_color_.resize(s + 1, kDfsWhite);
@@ -239,7 +247,8 @@ bool PdtParenReachable<A>::DFSearch(StateId s, StateId start) {
if (pit != paren_id_map_.end()) { // paren?
Label paren_id = pit->second;
if (arc.ilabel == parens_[paren_id].first) { // open paren
- DFSearch(arc.nextstate, arc.nextstate);
+ if (!DFSearch(arc.nextstate))
+ return false;
for (SetIterator set_iter = FindStates(paren_id, arc.nextstate);
!set_iter.Done(); set_iter.Next()) {
for (ParenArcIterator paren_arc_iter =
@@ -247,15 +256,14 @@ bool PdtParenReachable<A>::DFSearch(StateId s, StateId start) {
!paren_arc_iter.Done();
paren_arc_iter.Next()) {
const A &cparc = paren_arc_iter.Value();
- DFSearch(cparc.nextstate, start);
+ if (!DFSearch(cparc.nextstate))
+ return false;
}
}
}
} else { // non-paren
- if(!DFSearch(arc.nextstate, start)) {
- FSTERROR() << "PdtReachable: Underlying cyclicity not supported";
- return true;
- }
+ if(!DFSearch(arc.nextstate))
+ return false;
}
}
ComputeStateSet(s);
diff --git a/src/include/fst/extensions/pdt/shortest-path.h b/src/include/fst/extensions/pdt/shortest-path.h
index e90471b..85f94b8 100644
--- a/src/include/fst/extensions/pdt/shortest-path.h
+++ b/src/include/fst/extensions/pdt/shortest-path.h
@@ -28,7 +28,7 @@
#include <unordered_map>
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
-#include <tr1/unordered_set>
+#include <unordered_set>
using std::tr1::unordered_set;
using std::tr1::unordered_multiset;
#include <stack>
@@ -387,7 +387,6 @@ class PdtShortestPath {
typedef typename SpData::SearchState SearchState;
typedef typename SpData::ParenSpec ParenSpec;
- typedef typename PdtParenReachable<Arc>::SetIterator StateSetIterator;
typedef typename PdtBalanceData<Arc>::SetIterator CloseSourceIterator;
PdtShortestPath(const Fst<Arc> &ifst,
@@ -403,7 +402,7 @@ class PdtShortestPath {
if ((Weight::Properties() & (kPath | kRightSemiring))
!= (kPath | kRightSemiring)) {
- FSTERROR() << "SingleShortestPath: Weight needs to have the path"
+ FSTERROR() << "PdtShortestPath: Weight needs to have the path"
<< " property and be right distributive: " << Weight::Type();
error_ = true;
}
@@ -440,6 +439,7 @@ class PdtShortestPath {
static const Arc kNoArc;
static const uint8 kEnqueued;
static const uint8 kExpanded;
+ static const uint8 kFinished;
const uint8 kFinal;
public:
@@ -543,6 +543,7 @@ void PdtShortestPath<Arc, Queue>::GetDistance(StateId start) {
ProcArcs(s);
sp_data_.SetFlags(s, kExpanded, kExpanded);
}
+ sp_data_.SetFlags(q, kFinished, kFinished);
balance_data_.FinishInsert(start);
sp_data_.GC(start);
}
@@ -607,7 +608,11 @@ void PdtShortestPath<Arc, Queue>::ProcOpenParen(
Queue *state_queue = state_queue_;
GetDistance(d.start);
state_queue_ = state_queue;
+ } else if (!(sp_data_.Flags(d) & kFinished)) {
+ FSTERROR() << "PdtShortestPath: open parenthesis recursion: not bounded stack";
+ error_ = true;
}
+
for (CloseSourceIterator set_iter =
balance_data_.Find(paren_id, arc.nextstate);
!set_iter.Done(); set_iter.Next()) {
@@ -765,6 +770,9 @@ template<class Arc, class Queue>
const uint8 PdtShortestPath<Arc, Queue>::kExpanded = 0x20;
template<class Arc, class Queue>
+const uint8 PdtShortestPath<Arc, Queue>::kFinished = 0x40;
+
+template<class Arc, class Queue>
void ShortestPath(const Fst<Arc> &ifst,
const vector<pair<typename Arc::Label,
typename Arc::Label> > &parens,
diff --git a/src/include/fst/flags.h b/src/include/fst/flags.h
index ec3d301..b3bb66c 100644
--- a/src/include/fst/flags.h
+++ b/src/include/fst/flags.h
@@ -22,6 +22,8 @@
#include <iostream>
#include <map>
+#include <set>
+#include <sstream>
#include <string>
#include <fst/types.h>
@@ -42,7 +44,7 @@ using std::string;
//
// DECLARE_int32(length);
//
-// SetFlags() can be used to set flags from the command line
+// SET_FLAGS() can be used to set flags from the command line
// using, for example, '--length=2'.
//
// ShowUsage() can be used to print out command and flag usage.
@@ -56,12 +58,18 @@ using std::string;
template <typename T>
struct FlagDescription {
- FlagDescription(T *addr, const char *doc, const char *type, const T val)
- : address(addr), doc_string(doc), type_name(type), default_value(val) {}
+ FlagDescription(T *addr, const char *doc, const char *type,
+ const char *file, const T val)
+ : address(addr),
+ doc_string(doc),
+ type_name(type),
+ file_name(file),
+ default_value(val) {}
T *address;
const char *doc_string;
const char *type_name;
+ const char *file_name;
const T default_value;
};
@@ -118,8 +126,7 @@ class FlagRegister {
}
bool SetFlag(const string &arg, const string &val) const {
- for (typename std::map< string,
- FlagDescription<T> >::const_iterator it =
+ for (typename std::map< string, FlagDescription<T> >::const_iterator it =
flag_table_.begin();
it != flag_table_.end();
++it) {
@@ -131,19 +138,7 @@ class FlagRegister {
return false;
}
- void ShowDefault(bool default_value) const {
- std::cout << ", default = ";
- std::cout << (default_value ? "true" : "false");
- }
- void ShowDefault(const string &default_value) const {
- std::cout << ", default = ";
- std::cout << "\"" << default_value << "\"";
- }
- template<typename V> void ShowDefault(const V& default_value) const {
- std::cout << ", default = ";
- std::cout << default_value;
- }
- void ShowUsage() const {
+ void GetUsage(std::set< std::pair<string, string> > *usage_set) const {
for (typename std::map< string,
FlagDescription<T> >::const_iterator it =
flag_table_.begin();
@@ -151,10 +146,13 @@ class FlagRegister {
++it) {
const string &name = it->first;
const FlagDescription<T> &desc = it->second;
- std::cout << " --" << name
- << ": type = " << desc.type_name;
- ShowDefault(desc.default_value);
- std::cout << "\n " << desc.doc_string << "\n";
+ string usage = " --" + name;
+ usage += ": type = ";
+ usage += desc.type_name;
+ usage += ", default = ";
+ usage += GetDefault(desc.default_value) + "\n ";
+ usage += desc.doc_string;
+ usage_set->insert(make_pair(desc.file_name, usage));
}
}
@@ -163,11 +161,26 @@ class FlagRegister {
register_lock_ = new fst::Mutex;
register_ = new FlagRegister<T>;
}
+
+ std::map< string, FlagDescription<T> > flag_table_;
+
+ string GetDefault(bool default_value) const {
+ return default_value ? "true" : "false";
+ }
+
+ string GetDefault(const string &default_value) const {
+ return "\"" + default_value + "\"";
+ }
+
+ template<typename V> string GetDefault(const V& default_value) const {
+ std::ostringstream strm;
+ strm << default_value;
+ return strm.str();
+ }
+
static fst::FstOnceType register_init_; // ensures only called once
static fst::Mutex* register_lock_; // multithreading lock
static FlagRegister<T> *register_;
-
- std::map< string, FlagDescription<T> > flag_table_;
};
template <class T>
@@ -199,6 +212,7 @@ class FlagRegisterer {
name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \
doc, \
#type, \
+ __FILE__, \
value))
#define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc)
@@ -212,13 +226,17 @@ class FlagRegisterer {
// Temporary directory
DECLARE_string(tmpdir);
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags);
+void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags,
+ const char *src = "");
+
+#define SET_FLAGS(usage, argc, argv, rmflags) \
+SetFlags(usage, argc, argv, rmflags, __FILE__)
// Deprecated - for backward compatibility
inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) {
return SetFlags(usage, argc, argv, rmflags);
}
-void ShowUsage();
+void ShowUsage(bool long_usage = true);
#endif // FST_LIB_FLAGS_H__
diff --git a/src/include/fst/float-weight.h b/src/include/fst/float-weight.h
index 530cbdd..eb22638 100644
--- a/src/include/fst/float-weight.h
+++ b/src/include/fst/float-weight.h
@@ -37,19 +37,22 @@ namespace fst {
template <class T>
class FloatLimits {
public:
- static const T kPosInfinity;
- static const T kNegInfinity;
- static const T kNumberBad;
-};
+ static const T PosInfinity() {
+ static const T pos_infinity = numeric_limits<T>::infinity();
+ return pos_infinity;
+ }
-template <class T>
-const T FloatLimits<T>::kPosInfinity = numeric_limits<T>::infinity();
+ static const T NegInfinity() {
+ static const T neg_infinity = -PosInfinity();
+ return neg_infinity;
+ }
-template <class T>
-const T FloatLimits<T>::kNegInfinity = -FloatLimits<T>::kPosInfinity;
+ static const T NumberBad() {
+ static const T number_bad = numeric_limits<T>::quiet_NaN();
+ return number_bad;
+ }
-template <class T>
-const T FloatLimits<T>::kNumberBad = numeric_limits<T>::quiet_NaN();
+};
// weight class to be templated on floating-points types
template <class T = float>
@@ -151,9 +154,9 @@ inline bool ApproxEqual(const FloatWeightTpl<T> &w1,
template <class T>
inline ostream &operator<<(ostream &strm, const FloatWeightTpl<T> &w) {
- if (w.Value() == FloatLimits<T>::kPosInfinity)
+ if (w.Value() == FloatLimits<T>::PosInfinity())
return strm << "Infinity";
- else if (w.Value() == FloatLimits<T>::kNegInfinity)
+ else if (w.Value() == FloatLimits<T>::NegInfinity())
return strm << "-Infinity";
else if (w.Value() != w.Value()) // Fails for NaN
return strm << "BadNumber";
@@ -166,9 +169,9 @@ inline istream &operator>>(istream &strm, FloatWeightTpl<T> &w) {
string s;
strm >> s;
if (s == "Infinity") {
- w = FloatWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ w = FloatWeightTpl<T>(FloatLimits<T>::PosInfinity());
} else if (s == "-Infinity") {
- w = FloatWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ w = FloatWeightTpl<T>(FloatLimits<T>::NegInfinity());
} else {
char *p;
T f = strtod(s.c_str(), &p);
@@ -196,13 +199,13 @@ class TropicalWeightTpl : public FloatWeightTpl<T> {
TropicalWeightTpl(const TropicalWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
static const TropicalWeightTpl<T> Zero() {
- return TropicalWeightTpl<T>(FloatLimits<T>::kPosInfinity); }
+ return TropicalWeightTpl<T>(FloatLimits<T>::PosInfinity()); }
static const TropicalWeightTpl<T> One() {
return TropicalWeightTpl<T>(0.0F); }
static const TropicalWeightTpl<T> NoWeight() {
- return TropicalWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+ return TropicalWeightTpl<T>(FloatLimits<T>::NumberBad()); }
static const string &Type() {
static const string type = "tropical" +
@@ -212,12 +215,12 @@ class TropicalWeightTpl : public FloatWeightTpl<T> {
bool Member() const {
// First part fails for IEEE NaN
- return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ return Value() == Value() && Value() != FloatLimits<T>::NegInfinity();
}
TropicalWeightTpl<T> Quantize(float delta = kDelta) const {
- if (Value() == FloatLimits<T>::kNegInfinity ||
- Value() == FloatLimits<T>::kPosInfinity ||
+ if (Value() == FloatLimits<T>::NegInfinity() ||
+ Value() == FloatLimits<T>::PosInfinity() ||
Value() != Value())
return *this;
else
@@ -259,9 +262,9 @@ inline TropicalWeightTpl<T> Times(const TropicalWeightTpl<T> &w1,
if (!w1.Member() || !w2.Member())
return TropicalWeightTpl<T>::NoWeight();
T f1 = w1.Value(), f2 = w2.Value();
- if (f1 == FloatLimits<T>::kPosInfinity)
+ if (f1 == FloatLimits<T>::PosInfinity())
return w1;
- else if (f2 == FloatLimits<T>::kPosInfinity)
+ else if (f2 == FloatLimits<T>::PosInfinity())
return w2;
else
return TropicalWeightTpl<T>(f1 + f2);
@@ -284,10 +287,10 @@ inline TropicalWeightTpl<T> Divide(const TropicalWeightTpl<T> &w1,
if (!w1.Member() || !w2.Member())
return TropicalWeightTpl<T>::NoWeight();
T f1 = w1.Value(), f2 = w2.Value();
- if (f2 == FloatLimits<T>::kPosInfinity)
- return FloatLimits<T>::kNumberBad;
- else if (f1 == FloatLimits<T>::kPosInfinity)
- return FloatLimits<T>::kPosInfinity;
+ if (f2 == FloatLimits<T>::PosInfinity())
+ return FloatLimits<T>::NumberBad();
+ else if (f1 == FloatLimits<T>::PosInfinity())
+ return FloatLimits<T>::PosInfinity();
else
return TropicalWeightTpl<T>(f1 - f2);
}
@@ -320,7 +323,7 @@ class LogWeightTpl : public FloatWeightTpl<T> {
LogWeightTpl(const LogWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
static const LogWeightTpl<T> Zero() {
- return LogWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ return LogWeightTpl<T>(FloatLimits<T>::PosInfinity());
}
static const LogWeightTpl<T> One() {
@@ -328,7 +331,7 @@ class LogWeightTpl : public FloatWeightTpl<T> {
}
static const LogWeightTpl<T> NoWeight() {
- return LogWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+ return LogWeightTpl<T>(FloatLimits<T>::NumberBad()); }
static const string &Type() {
static const string type = "log" + FloatWeightTpl<T>::GetPrecisionString();
@@ -337,12 +340,12 @@ class LogWeightTpl : public FloatWeightTpl<T> {
bool Member() const {
// First part fails for IEEE NaN
- return Value() == Value() && Value() != FloatLimits<T>::kNegInfinity;
+ return Value() == Value() && Value() != FloatLimits<T>::NegInfinity();
}
LogWeightTpl<T> Quantize(float delta = kDelta) const {
- if (Value() == FloatLimits<T>::kNegInfinity ||
- Value() == FloatLimits<T>::kPosInfinity ||
+ if (Value() == FloatLimits<T>::NegInfinity() ||
+ Value() == FloatLimits<T>::PosInfinity() ||
Value() != Value())
return *this;
else
@@ -368,9 +371,9 @@ template <class T>
inline LogWeightTpl<T> Plus(const LogWeightTpl<T> &w1,
const LogWeightTpl<T> &w2) {
T f1 = w1.Value(), f2 = w2.Value();
- if (f1 == FloatLimits<T>::kPosInfinity)
+ if (f1 == FloatLimits<T>::PosInfinity())
return w2;
- else if (f2 == FloatLimits<T>::kPosInfinity)
+ else if (f2 == FloatLimits<T>::PosInfinity())
return w1;
else if (f1 > f2)
return LogWeightTpl<T>(f2 - LogExp(f1 - f2));
@@ -394,9 +397,9 @@ inline LogWeightTpl<T> Times(const LogWeightTpl<T> &w1,
if (!w1.Member() || !w2.Member())
return LogWeightTpl<T>::NoWeight();
T f1 = w1.Value(), f2 = w2.Value();
- if (f1 == FloatLimits<T>::kPosInfinity)
+ if (f1 == FloatLimits<T>::PosInfinity())
return w1;
- else if (f2 == FloatLimits<T>::kPosInfinity)
+ else if (f2 == FloatLimits<T>::PosInfinity())
return w2;
else
return LogWeightTpl<T>(f1 + f2);
@@ -419,10 +422,10 @@ inline LogWeightTpl<T> Divide(const LogWeightTpl<T> &w1,
if (!w1.Member() || !w2.Member())
return LogWeightTpl<T>::NoWeight();
T f1 = w1.Value(), f2 = w2.Value();
- if (f2 == FloatLimits<T>::kPosInfinity)
- return FloatLimits<T>::kNumberBad;
- else if (f1 == FloatLimits<T>::kPosInfinity)
- return FloatLimits<T>::kPosInfinity;
+ if (f2 == FloatLimits<T>::PosInfinity())
+ return FloatLimits<T>::NumberBad();
+ else if (f1 == FloatLimits<T>::PosInfinity())
+ return FloatLimits<T>::PosInfinity();
else
return LogWeightTpl<T>(f1 - f2);
}
@@ -454,15 +457,15 @@ class MinMaxWeightTpl : public FloatWeightTpl<T> {
MinMaxWeightTpl(const MinMaxWeightTpl<T> &w) : FloatWeightTpl<T>(w) {}
static const MinMaxWeightTpl<T> Zero() {
- return MinMaxWeightTpl<T>(FloatLimits<T>::kPosInfinity);
+ return MinMaxWeightTpl<T>(FloatLimits<T>::PosInfinity());
}
static const MinMaxWeightTpl<T> One() {
- return MinMaxWeightTpl<T>(FloatLimits<T>::kNegInfinity);
+ return MinMaxWeightTpl<T>(FloatLimits<T>::NegInfinity());
}
static const MinMaxWeightTpl<T> NoWeight() {
- return MinMaxWeightTpl<T>(FloatLimits<T>::kNumberBad); }
+ return MinMaxWeightTpl<T>(FloatLimits<T>::NumberBad()); }
static const string &Type() {
static const string type = "minmax" +
@@ -477,8 +480,8 @@ class MinMaxWeightTpl : public FloatWeightTpl<T> {
MinMaxWeightTpl<T> Quantize(float delta = kDelta) const {
// If one of infinities, or a NaN
- if (Value() == FloatLimits<T>::kNegInfinity ||
- Value() == FloatLimits<T>::kPosInfinity ||
+ if (Value() == FloatLimits<T>::NegInfinity() ||
+ Value() == FloatLimits<T>::PosInfinity() ||
Value() != Value())
return *this;
else
@@ -541,7 +544,7 @@ inline MinMaxWeightTpl<T> Divide(const MinMaxWeightTpl<T> &w1,
if (!w1.Member() || !w2.Member())
return MinMaxWeightTpl<T>::NoWeight();
// min(w1, x) = w2, w1 >= w2 => min(w1, x) = w2, x = w2
- return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::kNumberBad;
+ return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::NumberBad();
}
inline MinMaxWeightTpl<float> Divide(const MinMaxWeightTpl<float> &w1,
diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h
index 9c4d0db..dd11e4f 100644
--- a/src/include/fst/fst.h
+++ b/src/include/fst/fst.h
@@ -36,6 +36,7 @@
#include <fst/register.h>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/symbol-table.h>
#include <fst/util.h>
@@ -229,7 +230,7 @@ class Fst {
}
return Read(strm, FstReadOptions(filename));
} else {
- return Read(std::cin, FstReadOptions("standard input"));
+ return Read(cin, FstReadOptions("standard input"));
}
}
@@ -267,7 +268,6 @@ class Fst {
virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
protected:
-
bool WriteFile(const string &filename) const {
if (!filename.empty()) {
ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
@@ -277,7 +277,7 @@ class Fst {
}
return Write(strm, FstWriteOptions(filename));
} else {
- return Write(std::cout, FstWriteOptions("standard output"));
+ return Write(cout, FstWriteOptions("standard output"));
}
}
};
@@ -706,12 +706,13 @@ template <class A> class FstImpl {
// This method is used in the cross-type serialization methods Fst::WriteFst.
static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
const FstWriteOptions& opts, int version,
- const string &type, FstHeader *hdr) {
+ const string &type, uint64 properties,
+ FstHeader *hdr) {
if (opts.write_header) {
hdr->SetFstType(type);
hdr->SetArcType(A::Type());
hdr->SetVersion(version);
- hdr->SetProperties(fst.Properties(kFstProperties, false));
+ hdr->SetProperties(properties);
int32 file_flags = 0;
if (fst.InputSymbols() && opts.write_isymbols)
file_flags |= FstHeader::HAS_ISYMBOLS;
@@ -737,14 +738,14 @@ template <class A> class FstImpl {
// returns true on success, false on failure.
static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
const FstWriteOptions& opts, int version,
- const string &type, FstHeader *hdr,
- size_t header_offset) {
+ const string &type, uint64 properties,
+ FstHeader *hdr, size_t header_offset) {
strm.seekp(header_offset);
if (!strm) {
LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
return false;
}
- WriteFstHeader(fst, strm, opts, version, type, hdr);
+ WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
if (!strm) {
LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
return false;
diff --git a/src/include/fst/fstlib.h b/src/include/fst/fstlib.h
index c05c775..de5976d 100644
--- a/src/include/fst/fstlib.h
+++ b/src/include/fst/fstlib.h
@@ -142,6 +142,8 @@
#include <fst/state-reachable.h>
#include <iostream>
#include <fstream>
+#include <sstream>
+#include <fst/string.h>
#include <fst/symbol-table.h>
#include <fst/symbol-table-ops.h>
#include <fst/test-properties.h>
diff --git a/src/include/fst/icu.h b/src/include/fst/icu.h
index 6b74c2e..3947716 100644
--- a/src/include/fst/icu.h
+++ b/src/include/fst/icu.h
@@ -13,88 +13,101 @@
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
-// Author: roubert@google.com (Fredrik Roubert)
-
-// Wrapper class for UErrorCode, with conversion operators for direct use in
-// ICU C and C++ APIs.
-//
-// Features:
-// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
-// removing one common source of errors.
-// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
-// UErrorCode& (reference), via conversion operators.
-// - Automatic checking for success when it goes out of scope. On failure,
-// the destructor will FSTERROR() an error message.
-//
-// Most of ICU will handle errors gracefully and provide sensible fallbacks.
-// Using IcuErrorCode, it is therefore possible to write very compact code
-// that does sensible things on failure and provides logging for debugging.
+// Author: sorenj@google.com (Jeffrey Sorensen)
+// roubert@google.com (Fredrik Roubert)
//
-// Example:
-//
-// IcuErrorCode icuerrorcode;
-// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
+// This library implements an unrestricted Thompson/Pike UTF-8 parser and
+// serializer. UTF-8 is a restricted subset of this byte stream encoding. See
+// http://en.wikipedia.org/wiki/UTF-8 for a good description of the encoding
+// details.
#ifndef FST_LIB_ICU_H_
#define FST_LIB_ICU_H_
-#include <unicode/errorcode.h>
-#include <unicode/unistr.h>
-#include <unicode/ustring.h>
-#include <unicode/utf8.h>
-
-class IcuErrorCode : public icu::ErrorCode {
- public:
- IcuErrorCode() {}
- virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
-
- // Redefine 'errorName()' in order to be compatible with ICU version 4.2
- const char* errorName() const {
- return u_errorName(errorCode);
- }
-
- protected:
- virtual void handleFailure() const {
- FSTERROR() << errorName();
-}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
-};
+#include <iostream>
+#include <fstream>
+#include <sstream>
namespace fst {
template <class Label>
bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
- const char *c_str = str.c_str();
- int32_t length = str.size();
- UChar32 c;
- for (int32_t i = 0; i < length; /* no update */) {
- U8_NEXT(c_str, i, length, c);
- if (c < 0) {
- LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
- return false;
+ const char *data = str.data();
+ size_t length = str.size();
+ for (int i = 0; i < length; /* no update */) {
+ int c = data[i++] & 0xff;
+ if ((c & 0x80) == 0) {
+ labels->push_back(c);
+ } else {
+ if ((c & 0xc0) == 0x80) {
+ LOG(ERROR) << "UTF8StringToLabels: continuation byte as lead byte";
+ return false;
+ }
+ int count = (c >= 0xc0) + (c >= 0xe0) + (c >= 0xf0) + (c >= 0xf8) +
+ (c >= 0xfc);
+ int code = c & ((1 << (6 - count)) - 1);
+ while (count != 0) {
+ if (i == length) {
+ LOG(ERROR) << "UTF8StringToLabels: truncated utf-8 byte sequence";
+ return false;
+ }
+ char cb = data[i++];
+ if ((cb & 0xc0) != 0x80) {
+ LOG(ERROR) << "UTF8StringToLabels: missing/invalid continuation byte";
+ return false;
+ }
+ code = (code << 6) | (cb & 0x3f);
+ count--;
+ }
+ if (code < 0) {
+ // This should not be able to happen.
+ LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
+ return false;
+ }
+ labels->push_back(code);
}
- labels->push_back(c);
}
return true;
}
template <class Label>
bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
- icu::UnicodeString u_str;
- char c_str[5];
+ ostringstream ostr;
for (size_t i = 0; i < labels.size(); ++i) {
- u_str.setTo(labels[i]);
- IcuErrorCode error;
- u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
- if (error.isFailure()) {
- LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
- << error.errorName();
+ int32_t code = labels[i];
+ if (code < 0) {
+ LOG(ERROR) << "LabelsToUTF8String: Invalid character found: " << code;
return false;
+ } else if (code < 0x80) {
+ ostr << static_cast<char>(code);
+ } else if (code < 0x800) {
+ ostr << static_cast<char>((code >> 6) | 0xc0);
+ ostr << static_cast<char>((code & 0x3f) | 0x80);
+ } else if (code < 0x10000) {
+ ostr << static_cast<char>((code >> 12) | 0xe0);
+ ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
+ ostr << static_cast<char>((code & 0x3f) | 0x80);
+ } else if (code < 0x200000) {
+ ostr << static_cast<char>((code >> 18) | 0xf0);
+ ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
+ ostr << static_cast<char>((code & 0x3f) | 0x80);
+ } else if (code < 0x4000000) {
+ ostr << static_cast<char>((code >> 24) | 0xf8);
+ ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
+ ostr << static_cast<char>((code & 0x3f) | 0x80);
+ } else {
+ ostr << static_cast<char>((code >> 30) | 0xfc);
+ ostr << static_cast<char>(((code >> 24) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80);
+ ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80);
+ ostr << static_cast<char>((code & 0x3f) | 0x80);
}
- *str += c_str;
}
+ *str = ostr.str();
return true;
}
diff --git a/src/include/fst/lock.h b/src/include/fst/lock.h
index 3adf7df..329015d 100644
--- a/src/include/fst/lock.h
+++ b/src/include/fst/lock.h
@@ -16,6 +16,9 @@
//
// \file
// Google-compatibility locking declarations and inline definitions
+//
+// Classes and functions here are no-ops (by design); proper locking requires
+// actual implementation.
#ifndef FST_LIB_LOCK_H__
#define FST_LIB_LOCK_H__
@@ -61,6 +64,14 @@ class MutexLock {
DISALLOW_COPY_AND_ASSIGN(MutexLock);
};
+class ReaderMutexLock {
+ public:
+ ReaderMutexLock(Mutex *) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock);
+};
+
// Reference counting - single-thread implementation
class RefCounter {
public:
diff --git a/src/include/fst/lookahead-matcher.h b/src/include/fst/lookahead-matcher.h
index 10d9c01..f927d65 100644
--- a/src/include/fst/lookahead-matcher.h
+++ b/src/include/fst/lookahead-matcher.h
@@ -96,35 +96,35 @@ namespace fst {
// LOOK-AHEAD FLAGS (see also kMatcherFlags in matcher.h):
//
// Matcher is a lookahead matcher when 'match_type' is MATCH_INPUT.
-const uint32 kInputLookAheadMatcher = 0x00000001;
+const uint32 kInputLookAheadMatcher = 0x00000010;
// Matcher is a lookahead matcher when 'match_type' is MATCH_OUTPUT.
-const uint32 kOutputLookAheadMatcher = 0x00000002;
+const uint32 kOutputLookAheadMatcher = 0x00000020;
// A non-trivial implementation of LookAheadWeight() method defined and
// should be used?
-const uint32 kLookAheadWeight = 0x00000004;
+const uint32 kLookAheadWeight = 0x00000040;
// A non-trivial implementation of LookAheadPrefix() method defined and
// should be used?
-const uint32 kLookAheadPrefix = 0x00000008;
+const uint32 kLookAheadPrefix = 0x00000080;
// Look-ahead of matcher FST non-epsilon arcs?
-const uint32 kLookAheadNonEpsilons = 0x00000010;
+const uint32 kLookAheadNonEpsilons = 0x00000100;
// Look-ahead of matcher FST epsilon arcs?
-const uint32 kLookAheadEpsilons = 0x00000020;
+const uint32 kLookAheadEpsilons = 0x00000200;
// Ignore epsilon paths for the lookahead prefix? Note this gives
// correct results in composition only with an appropriate composition
// filter since it depends on the filter blocking the ignored paths.
-const uint32 kLookAheadNonEpsilonPrefix = 0x00000040;
+const uint32 kLookAheadNonEpsilonPrefix = 0x00000400;
// For LabelLookAheadMatcher, save relabeling data to file
-const uint32 kLookAheadKeepRelabelData = 0x00000080;
+const uint32 kLookAheadKeepRelabelData = 0x00000800;
// Flags used for lookahead matchers.
-const uint32 kLookAheadFlags = 0x000000ff;
+const uint32 kLookAheadFlags = 0x00000ff0;
// LookAhead Matcher interface, templated on the Arc definition; used
// for lookahead matcher specializations that are returned by the
@@ -601,10 +601,12 @@ bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) {
bool reach_arc = label_reachable_->Reach(&aiter, 0,
internal::NumArcs(*lfst_, s),
reach_input, compute_weight);
+ Weight lfinal = internal::Final(*lfst_, s);
+ bool reach_final = lfinal != Weight::Zero() && label_reachable_->ReachFinal();
if (reach_arc) {
ssize_t begin = label_reachable_->ReachBegin();
ssize_t end = label_reachable_->ReachEnd();
- if (compute_prefix && end - begin == 1) {
+ if (compute_prefix && end - begin == 1 && !reach_final) {
aiter.Seek(begin);
SetLookAheadPrefix(aiter.Value());
compute_weight = false;
@@ -612,9 +614,6 @@ bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) {
SetLookAheadWeight(label_reachable_->ReachWeight());
}
}
- Weight lfinal = internal::Final(*lfst_, s);
- bool reach_final = lfinal != Weight::Zero() &&
- label_reachable_->ReachFinal();
if (reach_final && compute_weight)
SetLookAheadWeight(reach_arc ?
Plus(LookAheadWeight(), lfinal) : lfinal);
diff --git a/src/include/fst/matcher.h b/src/include/fst/matcher.h
index a89325b..5ab3d26 100644
--- a/src/include/fst/matcher.h
+++ b/src/include/fst/matcher.h
@@ -83,8 +83,17 @@ namespace fst {
// uint64 Properties(uint64 props) const;
// };
+//
+// MATCHER FLAGS (see also kLookAheadFlags in lookahead-matcher.h)
+//
+// Matcher prefers being used as the matching side in composition.
+const uint32 kPreferMatch = 0x00000001;
+
+// Matcher needs to be used as the matching side in composition.
+const uint32 kRequireMatch = 0x00000002;
+
// Flags used for basic matchers (see also lookahead.h).
-const uint32 kMatcherFlags = 0x00000000;
+const uint32 kMatcherFlags = kPreferMatch | kRequireMatch;
// Matcher interface, templated on the Arc definition; used
// for matcher specializations that are returned by the
@@ -452,6 +461,12 @@ class RhoMatcher : public MatcherBase<typename M::Arc> {
virtual uint64 Properties(uint64 props) const;
+ virtual uint32 Flags() const {
+ if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE)
+ return matcher_->Flags();
+ return matcher_->Flags() | kRequireMatch;
+ }
+
private:
virtual void SetState_(StateId s) { SetState(s); }
virtual bool Find_(Label label) { return Find(label); }
@@ -631,6 +646,15 @@ class SigmaMatcher : public MatcherBase<typename M::Arc> {
virtual uint64 Properties(uint64 props) const;
+ virtual uint32 Flags() const {
+ if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE)
+ return matcher_->Flags();
+ // kRequireMatch temporarily disabled until issues
+ // in //speech/gaudi/annotation/util/denorm are resolved.
+ // return matcher_->Flags() | kRequireMatch;
+ return matcher_->Flags();
+ }
+
private:
virtual void SetState_(StateId s) { SetState(s); }
virtual bool Find_(Label label) { return Find(label); }
@@ -722,11 +746,6 @@ class PhiMatcher : public MatcherBase<typename M::Arc> {
match_type_ = MATCH_NONE;
error_ = true;
}
- if (phi_label == 0) {
- FSTERROR() << "PhiMatcher: 0 cannot be used as phi_label";
- phi_label_ = kNoLabel;
- error_ = true;
- }
if (rewrite_mode == MATCHER_REWRITE_AUTO)
rewrite_both_ = fst.Properties(kAcceptor, true);
@@ -768,10 +787,15 @@ class PhiMatcher : public MatcherBase<typename M::Arc> {
const Arc& Value() const {
if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
return matcher_->Value();
+ } else if (phi_match_ == 0) { // Virtual epsilon loop
+ phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
+ if (match_type_ == MATCH_OUTPUT)
+ swap(phi_arc_.ilabel, phi_arc_.olabel);
+ return phi_arc_;
} else {
phi_arc_ = matcher_->Value();
phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
- if (phi_match_ != kNoLabel) {
+ if (phi_match_ != kNoLabel) { // Phi loop match
if (rewrite_both_) {
if (phi_arc_.ilabel == phi_label_)
phi_arc_.ilabel = phi_match_;
@@ -793,6 +817,12 @@ class PhiMatcher : public MatcherBase<typename M::Arc> {
virtual uint64 Properties(uint64 props) const;
+ virtual uint32 Flags() const {
+ if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE)
+ return matcher_->Flags();
+ return matcher_->Flags() | kRequireMatch;
+ }
+
private:
virtual void SetState_(StateId s) { SetState(s); }
virtual bool Find_(Label label) { return Find(label); }
@@ -818,19 +848,33 @@ private:
template <class M> inline
bool PhiMatcher<M>::Find(Label match_label) {
- if (match_label == phi_label_ && phi_label_ != kNoLabel) {
- FSTERROR() << "PhiMatcher::Find: bad label (phi)";
+ if (match_label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
+ FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
error_ = true;
return false;
}
matcher_->SetState(state_);
phi_match_ = kNoLabel;
phi_weight_ = Weight::One();
+ if (phi_label_ == 0) { // When 'phi_label_ == 0',
+ if (match_label == kNoLabel) // there are no more true epsilon arcs,
+ return false;
+ if (match_label == 0) { // but virtual eps loop need to be returned
+ if (!matcher_->Find(kNoLabel)) {
+ return matcher_->Find(0);
+ } else {
+ phi_match_ = 0;
+ return true;
+ }
+ }
+ }
if (!has_phi_ || match_label == 0 || match_label == kNoLabel)
return matcher_->Find(match_label);
StateId state = state_;
while (!matcher_->Find(match_label)) {
- if (!matcher_->Find(phi_label_))
+ // Look for phi transition (if phi_label_ == 0, we need to look
+ // for -1 to avoid getting the virtual self-loop)
+ if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_))
return false;
if (phi_loop_ && matcher_->Value().nextstate == state) {
phi_match_ = match_label;
@@ -856,6 +900,10 @@ uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
if (match_type_ == MATCH_NONE) {
return outprops;
} else if (match_type_ == MATCH_INPUT) {
+ if (phi_label_ == 0) {
+ outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
+ outprops |= kNoEpsilons | kNoIEpsilons;
+ }
if (rewrite_both_) {
return outprops & ~(kODeterministic | kNonODeterministic | kString |
kILabelSorted | kNotILabelSorted |
@@ -866,6 +914,10 @@ uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
kOLabelSorted | kNotOLabelSorted);
}
} else if (match_type_ == MATCH_OUTPUT) {
+ if (phi_label_ == 0) {
+ outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
+ outprops |= kNoEpsilons | kNoOEpsilons;
+ }
if (rewrite_both_) {
return outprops & ~(kIDeterministic | kNonIDeterministic | kString |
kILabelSorted | kNotILabelSorted |
diff --git a/src/include/fst/mutable-fst.h b/src/include/fst/mutable-fst.h
index 9afcab3..09eb237 100644
--- a/src/include/fst/mutable-fst.h
+++ b/src/include/fst/mutable-fst.h
@@ -128,7 +128,7 @@ class MutableFst : public ExpandedFst<A> {
}
return Read(strm, FstReadOptions(filename));
} else {
- return Read(std::cin, FstReadOptions("standard input"));
+ return Read(cin, FstReadOptions("standard input"));
}
} else { // Converts to 'convert_type' if not mutable.
Fst<A> *ifst = Fst<A>::Read(filename);
diff --git a/src/include/fst/queue.h b/src/include/fst/queue.h
index 707dffc..e31f087 100644
--- a/src/include/fst/queue.h
+++ b/src/include/fst/queue.h
@@ -451,7 +451,7 @@ class SccQueue : public QueueBase<S> {
while ((front_ <= back_) &&
(((*queue_)[front_] && (*queue_)[front_]->Empty())
|| (((*queue_)[front_] == 0) &&
- ((front_ > trivial_queue_.size())
+ ((front_ >= trivial_queue_.size())
|| (trivial_queue_[front_] == kNoStateId)))))
++front_;
if ((*queue_)[front_])
@@ -493,7 +493,7 @@ class SccQueue : public QueueBase<S> {
else if ((*queue_)[front_])
return (*queue_)[front_]->Empty();
else
- return (front_ > trivial_queue_.size())
+ return (front_ >= trivial_queue_.size())
|| (trivial_queue_[front_] == kNoStateId);
}
diff --git a/src/include/fst/register.h b/src/include/fst/register.h
index 55651cd..ea3f4d8 100644
--- a/src/include/fst/register.h
+++ b/src/include/fst/register.h
@@ -28,6 +28,7 @@
#include <fst/compat.h>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/util.h>
#include <fst/generic-register.h>
diff --git a/src/include/fst/rmepsilon.h b/src/include/fst/rmepsilon.h
index ee9753e..32e64de 100644
--- a/src/include/fst/rmepsilon.h
+++ b/src/include/fst/rmepsilon.h
@@ -110,7 +110,6 @@ class RmEpsilonState {
class ElementKey {
public:
size_t operator()(const Element& e) const {
- return static_cast<size_t>(e.nextstate);
return static_cast<size_t>(e.nextstate +
e.ilabel * kPrime0 +
e.olabel * kPrime1);
diff --git a/src/include/fst/script/compile-impl.h b/src/include/fst/script/compile-impl.h
index 4aab15b..1743452 100644
--- a/src/include/fst/script/compile-impl.h
+++ b/src/include/fst/script/compile-impl.h
@@ -31,6 +31,7 @@ using std::vector;
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/fst.h>
#include <fst/util.h>
#include <fst/vector-fst.h>
diff --git a/src/include/fst/script/draw-impl.h b/src/include/fst/script/draw-impl.h
index e346649..893e258 100644
--- a/src/include/fst/script/draw-impl.h
+++ b/src/include/fst/script/draw-impl.h
@@ -139,9 +139,9 @@ template <class A> class FstDrawer {
EscapeChars(symbol, &nsymbol);
PrintString(nsymbol);
} else {
- ostringstream sid;
- sid << id;
- PrintString(sid.str());
+ string idstr;
+ Int64ToStr(id, &idstr);
+ PrintString(idstr);
}
}
diff --git a/src/include/fst/script/draw.h b/src/include/fst/script/draw.h
index 1611ad1..2b66373 100644
--- a/src/include/fst/script/draw.h
+++ b/src/include/fst/script/draw.h
@@ -22,6 +22,7 @@
#include <fst/script/draw-impl.h>
#include <iostream>
#include <fstream>
+#include <sstream>
namespace fst {
namespace script {
diff --git a/src/include/fst/script/fst-class.h b/src/include/fst/script/fst-class.h
index 3eacab4..a820c1c 100644
--- a/src/include/fst/script/fst-class.h
+++ b/src/include/fst/script/fst-class.h
@@ -24,6 +24,7 @@
#include <fst/vector-fst.h>
#include <iostream>
#include <fstream>
+#include <sstream>
// Classes to support "boxing" all existing types of FST arcs in a single
// FstClass which hides the arc types. This allows clients to load
@@ -52,6 +53,7 @@ class FstClassBase {
virtual const SymbolTable *InputSymbols() const = 0;
virtual const SymbolTable *OutputSymbols() const = 0;
virtual void Write(const string& fname) const = 0;
+ virtual void Write(ostream &ostr, const FstWriteOptions &opts) const = 0;
virtual uint64 Properties(uint64 mask, bool test) const = 0;
virtual ~FstClassBase() { }
};
@@ -114,12 +116,18 @@ class FstClassImpl : public FstClassImplBase {
impl_->Write(fname);
}
+ virtual void Write(ostream &ostr, const FstWriteOptions &opts) const {
+ impl_->Write(ostr, opts);
+ }
+
virtual uint64 Properties(uint64 mask, bool test) const {
return impl_->Properties(mask, test);
}
virtual ~FstClassImpl() { delete impl_; }
+ Fst<Arc> *GetImpl() const { return impl_; }
+
Fst<Arc> *GetImpl() { return impl_; }
virtual FstClassImpl *Copy() {
@@ -154,13 +162,25 @@ class FstClass : public FstClassBase {
}
}
+ FstClass() : impl_(NULL) {
+ }
+
template<class Arc>
- explicit FstClass(Fst<Arc> *fst) : impl_(new FstClassImpl<Arc>(fst)) { }
+ explicit FstClass(Fst<Arc> *fst) : impl_(new FstClassImpl<Arc>(fst)) {
+ }
explicit FstClass(const FstClass &other) : impl_(other.impl_->Copy()) { }
+ FstClass &operator=(const FstClass &other) {
+ delete impl_;
+ impl_ = other.impl_->Copy();
+ return *this;
+ }
+
static FstClass *Read(const string &fname);
+ static FstClass *Read(istream &istr, const string &source);
+
virtual const string &ArcType() const {
return impl_->ArcType();
}
@@ -185,6 +205,10 @@ class FstClass : public FstClassBase {
impl_->Write(fname);
}
+ virtual void Write(ostream &ostr, const FstWriteOptions &opts) const {
+ impl_->Write(ostr, opts);
+ }
+
virtual uint64 Properties(uint64 mask, bool test) const {
return impl_->Properties(mask, test);
}
@@ -214,6 +238,8 @@ class FstClass : public FstClassBase {
<< "particular arc type.";
return 0;
}
+
+
protected:
explicit FstClass(FstClassImplBase *impl) : impl_(impl) { }
@@ -233,7 +259,12 @@ class FstClass : public FstClassBase {
}
}
+ FstClassImplBase *GetImpl() const { return impl_; }
+
FstClassImplBase *GetImpl() { return impl_; }
+
+// friend ostream &operator<<(ostream&, const FstClass&);
+
private:
FstClassImplBase *impl_;
};
@@ -269,6 +300,14 @@ class MutableFstClass : public FstClass {
}
}
+ virtual void Write(const string &fname) const {
+ GetImpl()->Write(fname);
+ }
+
+ virtual void Write(ostream &ostr, const FstWriteOptions &opts) const {
+ GetImpl()->Write(ostr, opts);
+ }
+
static MutableFstClass *Read(const string &fname, bool convert = false);
virtual void SetInputSymbols(SymbolTable *is) {
@@ -338,6 +377,4 @@ class VectorFstClass : public MutableFstClass {
} // namespace script
} // namespace fst
-
-
#endif // FST_SCRIPT_FST_CLASS_H_
diff --git a/src/include/fst/script/text-io.h b/src/include/fst/script/text-io.h
index 95cc182..d97a007 100644
--- a/src/include/fst/script/text-io.h
+++ b/src/include/fst/script/text-io.h
@@ -32,6 +32,7 @@ using std::vector;
#include <iostream>
#include <fstream>
+#include <sstream>
#include <fst/script/weight-class.h>
namespace fst {
diff --git a/src/include/fst/script/weight-class.h b/src/include/fst/script/weight-class.h
index 5a4890f..228216d 100644
--- a/src/include/fst/script/weight-class.h
+++ b/src/include/fst/script/weight-class.h
@@ -56,9 +56,9 @@ struct WeightClassImpl : public WeightImplBase {
}
virtual string to_string() const {
- ostringstream s;
- s << weight;
- return s.str();
+ string str;
+ WeightToStr(weight, &str);
+ return str;
}
virtual bool operator == (const WeightImplBase &other) const {
diff --git a/src/include/fst/shortest-distance.h b/src/include/fst/shortest-distance.h
index 5d38409..9320c4c 100644
--- a/src/include/fst/shortest-distance.h
+++ b/src/include/fst/shortest-distance.h
@@ -178,7 +178,7 @@ void ShortestDistanceState<Arc, Queue, ArcFilter>::ShortestDistance(
!aiter.Done();
aiter.Next()) {
const Arc &arc = aiter.Value();
- if (!arc_filter_(arc) || arc.weight == Weight::Zero())
+ if (!arc_filter_(arc))
continue;
while (distance_->size() <= arc.nextstate) {
distance_->push_back(Weight::Zero());
diff --git a/src/include/fst/signed-log-weight.h b/src/include/fst/signed-log-weight.h
index da96479..61adefb 100644
--- a/src/include/fst/signed-log-weight.h
+++ b/src/include/fst/signed-log-weight.h
@@ -113,9 +113,9 @@ inline SignedLogWeightTpl<T> Plus(const SignedLogWeightTpl<T> &w1,
bool s2 = w2.Value1().Value() > 0.0;
T f1 = w1.Value2().Value();
T f2 = w2.Value2().Value();
- if (f1 == FloatLimits<T>::kPosInfinity)
+ if (f1 == FloatLimits<T>::PosInfinity())
return w2;
- else if (f2 == FloatLimits<T>::kPosInfinity)
+ else if (f2 == FloatLimits<T>::PosInfinity())
return w1;
else if (f1 == f2) {
if (s1 == s2)
@@ -173,12 +173,12 @@ inline SignedLogWeightTpl<T> Divide(const SignedLogWeightTpl<T> &w1,
bool s2 = w2.Value1().Value() > 0.0;
T f1 = w1.Value2().Value();
T f2 = w2.Value2().Value();
- if (f2 == FloatLimits<T>::kPosInfinity)
+ if (f2 == FloatLimits<T>::PosInfinity())
return SignedLogWeightTpl<T>(TropicalWeight(1.0),
- FloatLimits<T>::kNumberBad);
- else if (f1 == FloatLimits<T>::kPosInfinity)
+ FloatLimits<T>::NumberBad());
+ else if (f1 == FloatLimits<T>::PosInfinity())
return SignedLogWeightTpl<T>(TropicalWeight(1.0),
- FloatLimits<T>::kPosInfinity);
+ FloatLimits<T>::PosInfinity());
else if (s1 == s2)
return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 - f2));
else
diff --git a/src/include/fst/slist.h b/src/include/fst/slist.h
index 9f94027..d061ebe 100644
--- a/src/include/fst/slist.h
+++ b/src/include/fst/slist.h
@@ -22,7 +22,7 @@
#include <fst/config.h>
-#if !defined(__ANDROID__) && defined(HAVE___GNU_CXX__SLIST_INT_)
+#if !defined(__ANDROID__) && HAVE___GNU_CXX__SLIST_INT_
#include <slist>
diff --git a/src/include/fst/state-map.h b/src/include/fst/state-map.h
index ace4a3c..454db24 100644
--- a/src/include/fst/state-map.h
+++ b/src/include/fst/state-map.h
@@ -295,6 +295,10 @@ class StateMapFstImpl : public CacheImpl<B> {
SetArcs(s);
}
+ const Fst<A> &GetFst() const {
+ return *fst_;
+ }
+
private:
void Init() {
SetType("statemap");
@@ -364,10 +368,10 @@ class StateMapFst : public ImplToFst< StateMapFstImpl<A, B, C> > {
GetImpl()->InitArcIterator(s, data);
}
- private:
- // Makes visible to friends.
+ protected:
Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+ private:
void operator=(const StateMapFst<A, B, C> &fst); // disallow
};
diff --git a/src/include/fst/string.h b/src/include/fst/string.h
index 3099b87..d51182e 100644
--- a/src/include/fst/string.h
+++ b/src/include/fst/string.h
@@ -24,6 +24,7 @@
#define FST_LIB_STRING_H_
#include <fst/compact-fst.h>
+#include <fst/icu.h>
#include <fst/mutable-fst.h>
DECLARE_string(fst_field_separator);
@@ -48,7 +49,7 @@ class StringCompiler {
// Compile string 's' into FST 'fst'.
template <class F>
- bool operator()(const string &s, F *fst) {
+ bool operator()(const string &s, F *fst) const {
vector<Label> labels;
if (!ConvertStringToLabels(s, &labels))
return false;
diff --git a/src/include/fst/symbol-table.h b/src/include/fst/symbol-table.h
index 93ebe76..6eb6c2d 100644
--- a/src/include/fst/symbol-table.h
+++ b/src/include/fst/symbol-table.h
@@ -33,6 +33,7 @@ using std::vector;
#include <fst/compat.h>
#include <iostream>
#include <fstream>
+#include <sstream>
#include <map>
@@ -56,6 +57,13 @@ struct SymbolTableReadOptions {
string source;
};
+struct SymbolTableTextOptions {
+ SymbolTableTextOptions();
+
+ bool allow_negative;
+ string fst_field_separator;
+};
+
class SymbolTableImpl {
public:
SymbolTableImpl(const string &name)
@@ -88,9 +96,9 @@ class SymbolTableImpl {
return (key == -1) ? AddSymbol(symbol, available_key_++) : key;
}
- static SymbolTableImpl* ReadText(istream &strm,
- const string &name,
- bool allow_negative = false);
+ static SymbolTableImpl* ReadText(
+ istream &strm, const string &name,
+ const SymbolTableTextOptions &opts = SymbolTableTextOptions());
static SymbolTableImpl* Read(istream &strm,
const SymbolTableReadOptions& opts);
@@ -149,13 +157,11 @@ class SymbolTableImpl {
}
string CheckSum() const {
- MutexLock check_sum_lock(&check_sum_mutex_);
MaybeRecomputeCheckSum();
return check_sum_string_;
}
string LabeledCheckSum() const {
- MutexLock check_sum_lock(&check_sum_mutex_);
MaybeRecomputeCheckSum();
return labeled_check_sum_string_;
}
@@ -171,6 +177,8 @@ class SymbolTableImpl {
private:
// Recomputes the checksums (both of them) if we've had changes since the last
// computation (i.e., if check_sum_finalized_ is false).
+ // Takes ~2.5 microseconds (dbg) or ~230 nanoseconds (opt) on a 2.67GHz Xeon
+ // if the checksum is up-to-date (requiring no recomputation).
void MaybeRecomputeCheckSum() const;
struct StrCmp {
@@ -188,8 +196,6 @@ class SymbolTableImpl {
mutable RefCounter ref_count_;
mutable bool check_sum_finalized_;
- mutable CheckSummer check_sum_;
- mutable CheckSummer labeled_check_sum_;
mutable string check_sum_string_;
mutable string labeled_check_sum_string_;
mutable Mutex check_sum_mutex_;
@@ -212,6 +218,9 @@ class SymbolTable {
public:
static const int64 kNoSymbol = -1;
+ // Construct symbol table with an unspecified name.
+ SymbolTable() : impl_(new SymbolTableImpl("<unspecified>")) {}
+
// Construct symbol table with a unique name.
SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {}
@@ -226,14 +235,21 @@ class SymbolTable {
if (!impl_->DecrRefCount()) delete impl_;
}
+ // Copys the implemenation from one symbol table to another.
+ void operator=(const SymbolTable &st) {
+ if (impl_ != st.impl_) {
+ st.impl_->IncrRefCount();
+ if (!impl_->DecrRefCount()) delete impl_;
+ impl_ = st.impl_;
+ }
+ }
+
// Read an ascii representation of the symbol table from an istream. Pass a
// name to give the resulting SymbolTable.
- static SymbolTable* ReadText(istream &strm,
- const string& name,
- bool allow_negative = false) {
- SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm,
- name,
- allow_negative);
+ static SymbolTable* ReadText(
+ istream &strm, const string& name,
+ const SymbolTableTextOptions &opts = SymbolTableTextOptions()) {
+ SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm, name, opts);
if (!impl)
return 0;
else
@@ -242,13 +258,13 @@ class SymbolTable {
// read an ascii representation of the symbol table
static SymbolTable* ReadText(const string& filename,
- bool allow_negative = false) {
+ const SymbolTableTextOptions &opts = SymbolTableTextOptions()) {
ifstream strm(filename.c_str(), ifstream::in);
if (!strm) {
LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename;
return 0;
}
- return ReadText(strm, filename, allow_negative);
+ return ReadText(strm, filename, opts);
}
@@ -341,7 +357,9 @@ class SymbolTable {
}
// Dump an ascii text representation of the symbol table via a stream
- virtual bool WriteText(ostream &strm) const;
+ virtual bool WriteText(
+ ostream &strm,
+ const SymbolTableTextOptions &opts = SymbolTableTextOptions()) const;
// Dump an ascii text representation of the symbol table
bool WriteText(const string& filename) const {
@@ -404,8 +422,6 @@ class SymbolTable {
private:
SymbolTableImpl* impl_;
-
- void operator=(const SymbolTable &table); // disallow
};
@@ -502,6 +518,20 @@ SymbolTable *RelabelSymbolTable(const SymbolTable *table,
return new_table;
}
+// Symbol Table Serialization
+inline void SymbolTableToString(const SymbolTable *table, string *result) {
+ ostringstream ostrm;
+ table->Write(ostrm);
+ *result = ostrm.str();
+}
+
+inline SymbolTable *StringToSymbolTable(const string &s) {
+ istringstream istrm(s);
+ return SymbolTable::Read(istrm, SymbolTableReadOptions());
+}
+
+
+
} // namespace fst
#endif // FST_LIB_SYMBOL_TABLE_H__
diff --git a/src/include/fst/test-properties.h b/src/include/fst/test-properties.h
index db1ddcc..12bcba7 100644
--- a/src/include/fst/test-properties.h
+++ b/src/include/fst/test-properties.h
@@ -125,13 +125,14 @@ uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known,
siter.Next()) {
StateId s = siter.Value();
- Arc prev_arc(kNoLabel, kNoLabel, Weight::One(), 0);
+ Arc prev_arc;
// Create these only if we need to
if (mask & (kIDeterministic | kNonIDeterministic))
ilabels = new unordered_set<Label>;
if (mask & (kODeterministic | kNonODeterministic))
olabels = new unordered_set<Label>;
+ bool first_arc = true;
for (ArcIterator< Fst<Arc> > aiter(fst, s);
!aiter.Done();
aiter.Next()) {
@@ -161,13 +162,15 @@ uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known,
comp_props |= kOEpsilons;
comp_props &= ~kNoOEpsilons;
}
- if (prev_arc.ilabel != kNoLabel && arc.ilabel < prev_arc.ilabel) {
- comp_props |= kNotILabelSorted;
- comp_props &= ~kILabelSorted;
- }
- if (prev_arc.olabel != kNoLabel && arc.olabel < prev_arc.olabel) {
- comp_props |= kNotOLabelSorted;
- comp_props &= ~kOLabelSorted;
+ if (!first_arc) {
+ if (arc.ilabel < prev_arc.ilabel) {
+ comp_props |= kNotILabelSorted;
+ comp_props &= ~kILabelSorted;
+ }
+ if (arc.olabel < prev_arc.olabel) {
+ comp_props |= kNotOLabelSorted;
+ comp_props &= ~kOLabelSorted;
+ }
}
if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) {
comp_props |= kWeighted;
@@ -182,6 +185,7 @@ uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known,
comp_props &= ~kString;
}
prev_arc = arc;
+ first_arc = false;
if (ilabels)
ilabels->insert(arc.ilabel);
if (olabels)
diff --git a/src/include/fst/util.h b/src/include/fst/util.h
index 87231e1..a325beb 100644
--- a/src/include/fst/util.h
+++ b/src/include/fst/util.h
@@ -41,6 +41,7 @@ using std::vector;
#include <iostream>
#include <fstream>
+#include <sstream>
//
// UTILITY FOR ERROR HANDLING
@@ -264,7 +265,7 @@ void WeightToStr(Weight w, string *s) {
ostringstream strm;
strm.precision(9);
strm << w;
- *s += strm.str();
+ s->append(strm.str().data(), strm.str().size());
}
// Utilities for reading/writing label pairs
@@ -312,7 +313,7 @@ bool ReadLabelPairs(const string& filename,
template <typename Label>
bool WriteLabelPairs(const string& filename,
const vector<pair<Label, Label> >& pairs) {
- ostream *strm = &std::cout;
+ ostream *strm = &cout;
if (!filename.empty()) {
strm = new ofstream(filename.c_str());
if (!*strm) {
@@ -329,7 +330,7 @@ bool WriteLabelPairs(const string& filename,
<< (filename.empty() ? "standard output" : filename);
return false;
}
- if (strm != &std::cout)
+ if (strm != &cout)
delete strm;
return true;
}
diff --git a/src/include/fst/vector-fst.h b/src/include/fst/vector-fst.h
index f6d8a6d..8b80876 100644
--- a/src/include/fst/vector-fst.h
+++ b/src/include/fst/vector-fst.h
@@ -273,9 +273,10 @@ class VectorFstImpl : public VectorFstBaseImpl< VectorState<A> > {
SetProperties(DeleteArcsProperties(Properties()));
}
- private:
// Properties always true of this Fst class
static const uint64 kStaticProperties = kExpanded | kMutable;
+
+ private:
// Current file format version
static const int kFileVersion = 2;
// Minimum file format version supported
@@ -542,7 +543,10 @@ bool VectorFst<A>::WriteFst(const F &fst, ostream &strm,
hdr.SetNumStates(CountStates(fst));
update_header = false;
}
- FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector", &hdr);
+ uint64 properties = fst.Properties(kCopyProperties, false) |
+ VectorFstImpl<A>::kStaticProperties;
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector",
+ properties, &hdr);
StateId num_states = 0;
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
typename A::StateId s = siter.Value();
@@ -566,7 +570,7 @@ bool VectorFst<A>::WriteFst(const F &fst, ostream &strm,
if (update_header) {
hdr.SetNumStates(num_states);
return FstImpl<A>::UpdateFstHeader(fst, strm, opts, kFileVersion, "vector",
- &hdr, start_offset);
+ properties, &hdr, start_offset);
} else {
if (num_states != hdr.NumStates()) {
LOG(ERROR) << "Inconsistent number of states observed during write";
diff --git a/src/include/fst/visit.h b/src/include/fst/visit.h
index 31a00a8..a02d86a 100644
--- a/src/include/fst/visit.h
+++ b/src/include/fst/visit.h
@@ -166,7 +166,8 @@ void Visit(const Fst<Arc> &fst, V *visitor, Q *queue, ArcFilter filter) {
// Finds next tree root
for (root = root == start ? 0 : root + 1;
root < nstates && state_status[root] != kWhiteState;
- ++root);
+ ++root) {
+ }
// Check for a state beyond the largest known state
if (!expanded && root == nstates) {
diff --git a/src/include/fst/weight.h b/src/include/fst/weight.h
index 72f5a22..7eb4bb1 100644
--- a/src/include/fst/weight.h
+++ b/src/include/fst/weight.h
@@ -28,16 +28,18 @@
// A left semiring distributes on the left; a right semiring is
// similarly defined.
//
-// A Weight class is required to be (at least) a left or right semiring.
+// A Weight class must have binary functions =Plus= and =Times= and
+// static member functions =Zero()= and =One()= and these must form
+// (at least) a left or right semiring.
//
// In addition, the following should be defined for a Weight:
// Member: predicate on set membership.
-// NoWeight: returns an element that is not a member, should only be
-// used to signal an error.
-// >>: reads weight.
-// <<: prints weight.
-// Read(istream &strm): reads from an input stream.
-// Write(ostream &strm): writes to an output stream.
+// NoWeight: static member function that returns an element that is
+// not a set member; used to signal an error.
+// >>: reads textual representation of a weight.
+// <<: prints textual representation of a weight.
+// Read(istream &strm): reads binary representation of a weight.
+// Write(ostream &strm): writes binary representation of a weight.
// Hash: maps weight to size_t.
// ApproxEqual: approximate equality (for inexact weights)
// Quantize: quantizes wrt delta (for inexact weights)
@@ -46,11 +48,9 @@
// and Times(a, b') == c
// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
// and Times(a', b) == c
-// --> b' = Divide(c, a)
-// = Divide(c, a, DIVIDE_ANY)
-// = Divide(c, a, DIVIDE_LEFT)
-// = Divide(c, a, DIVIDE_RIGHT) if a commutative semiring,
-// b'.Member() and Times(a, b') == Times(b', a) == c
+// --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) =
+// Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a
+// commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c
// ReverseWeight: the type of the corresponding reverse weight.
// Typically the same type as Weight for a (both left and right) semiring.
// For the left string semiring, it is the right string semiring.
@@ -66,7 +66,7 @@
// RightSemiring: indicates weights form a right semiring.
// Commutative: for all a,b: Times(a,b) == Times(b,a)
// Idempotent: for all a: Plus(a, a) == a.
-// Path Property: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
+// Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
#ifndef FST_LIB_WEIGHT_H__
diff --git a/src/lib/flags.cc b/src/lib/flags.cc
index d05fd73..222d66b 100644
--- a/src/lib/flags.cc
+++ b/src/lib/flags.cc
@@ -24,14 +24,18 @@
DEFINE_int32(v, 0, "verbose level");
DEFINE_bool(help, false, "show usage information");
+DEFINE_bool(helpshort, false, "show brief usage information");
DEFINE_string(tmpdir, "/tmp/", "temporary directory");
using namespace std;
static string flag_usage;
+static string prog_src;
-void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags) {
+void SetFlags(const char *usage, int *argc, char ***argv,
+ bool remove_flags, const char *src) {
flag_usage = usage;
+ prog_src = src;
int index = 1;
for (; index < *argc; ++index) {
string argval = (*argv)[index];
@@ -82,22 +86,75 @@ void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags) {
}
if (FLAGS_help) {
- ShowUsage();
+ ShowUsage(true);
+ exit(1);
+ }
+
+ if (FLAGS_helpshort) {
+ ShowUsage(false);
exit(1);
}
}
-void ShowUsage() {
+// If flag is defined in file 'src' and 'in_src' true or is not
+// defined in file 'src' and 'in_src' is false, then print usage.
+static void
+ShowUsageRestrict(const std::set< pair<string, string> > &usage_set,
+ const string &src, bool in_src, bool show_file) {
+ string old_file;
+ bool file_out = false;
+ bool usage_out = false;
+ for (std::set< pair<string, string> >::const_iterator it =
+ usage_set.begin();
+ it != usage_set.end();
+ ++it) {
+ const string &file = it->first;
+ const string &usage = it->second;
+
+ bool match = file == src;
+ if ((match && !in_src) || (!match && in_src))
+ continue;
+
+ if (file != old_file) {
+ if (show_file) {
+ if (file_out) cout << "\n";
+ cout << "Flags from: " << file << "\n";
+ file_out = true;
+ }
+ old_file = file;
+ }
+ cout << usage << "\n";
+ usage_out = true;
+ }
+ if (usage_out) cout << "\n";
+}
+
+void ShowUsage(bool long_usage) {
+ std::set< pair<string, string> > usage_set;
+
cout << flag_usage << "\n";
- cout << " Flags Description:\n";
+
FlagRegister<bool> *bool_register = FlagRegister<bool>::GetRegister();
- bool_register->ShowUsage();
+ bool_register->GetUsage(&usage_set);
FlagRegister<string> *string_register = FlagRegister<string>::GetRegister();
- string_register->ShowUsage();
+ string_register->GetUsage(&usage_set);
FlagRegister<int32> *int32_register = FlagRegister<int32>::GetRegister();
- int32_register->ShowUsage();
+ int32_register->GetUsage(&usage_set);
FlagRegister<int64> *int64_register = FlagRegister<int64>::GetRegister();
- int64_register->ShowUsage();
+ int64_register->GetUsage(&usage_set);
FlagRegister<double> *double_register = FlagRegister<double>::GetRegister();
- double_register->ShowUsage();
+ double_register->GetUsage(&usage_set);
+
+ if (!prog_src.empty()) {
+ cout << "PROGRAM FLAGS:\n\n";
+ ShowUsageRestrict(usage_set, prog_src, true, false);
+ }
+
+ if (!long_usage)
+ return;
+
+ if (!prog_src.empty())
+ cout << "LIBRARY FLAGS:\n\n";
+
+ ShowUsageRestrict(usage_set, prog_src, false, true);
}
diff --git a/src/lib/properties.cc b/src/lib/properties.cc
index db0e2c8..9e03d59 100644
--- a/src/lib/properties.cc
+++ b/src/lib/properties.cc
@@ -117,8 +117,10 @@ uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
uint64 outprops = kAccessible;
if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
outprops |= kIDeterministic;
- outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic |
+ outprops |= (kError | kAcceptor | kAcyclic |
kInitialAcyclic | kCoAccessible | kString) & inprops;
+ if (inprops & kNoIEpsilons)
+ outprops |= kNoEpsilons & inprops;
if (inprops & kAccessible)
outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
kCyclic) & inprops;
@@ -375,6 +377,7 @@ uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
& inprops1 & inprops2;
outprops |= kError & (inprops1 | inprops2);
+ outprops |= kInitialAcyclic;
bool empty1 = delayed; // Can fst1 be the empty machine?
bool empty2 = delayed; // Can fst2 be the empty machine?
diff --git a/src/lib/symbol-table.cc b/src/lib/symbol-table.cc
index 8b35cdf..a195a7c 100644
--- a/src/lib/symbol-table.cc
+++ b/src/lib/symbol-table.cc
@@ -20,6 +20,7 @@
// Classes to provide symbol-to-integer and integer-to-symbol mappings.
#include <fst/symbol-table.h>
+
#include <fst/util.h>
DEFINE_bool(fst_compat_symbols, true,
@@ -35,9 +36,12 @@ const int kLineLen = 8096;
// Identifies stream data as a symbol table (and its endianity)
static const int32 kSymbolTableMagicNumber = 2125658996;
+SymbolTableTextOptions::SymbolTableTextOptions()
+ : allow_negative(false), fst_field_separator(FLAGS_fst_field_separator) { }
+
SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
const string &filename,
- bool allow_negative) {
+ const SymbolTableTextOptions &opts) {
SymbolTableImpl* impl = new SymbolTableImpl(filename);
int64 nline = 0;
@@ -45,7 +49,7 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
while (strm.getline(line, kLineLen)) {
++nline;
vector<char *> col;
- string separator = FLAGS_fst_field_separator + "\n";
+ string separator = opts.fst_field_separator + "\n";
SplitToVector(line, separator.c_str(), &col, true);
if (col.size() == 0) // empty line
continue;
@@ -61,7 +65,7 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
char *p;
int64 key = strtoll(value, &p, 10);
if (p < value + strlen(value) ||
- (!allow_negative && key < 0) || key == -1) {
+ (!opts.allow_negative && key < 0) || key == -1) {
LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \""
<< value << "\" (skipping), "
<< "file = " << filename << ", line = " << nline;
@@ -74,21 +78,30 @@ SymbolTableImpl* SymbolTableImpl::ReadText(istream &strm,
}
void SymbolTableImpl::MaybeRecomputeCheckSum() const {
- if (check_sum_finalized_)
- return;
+ {
+ ReaderMutexLock check_sum_lock(&check_sum_mutex_);
+ if (check_sum_finalized_)
+ return;
+ }
+
+ // We'll aquire an exclusive lock to recompute the checksums.
+ MutexLock check_sum_lock(&check_sum_mutex_);
+ if (check_sum_finalized_) // Another thread (coming in around the same time
+ return; // might have done it already). So we recheck.
// Calculate the original label-agnostic check sum.
- check_sum_.Reset();
+ CheckSummer check_sum;
for (int64 i = 0; i < symbols_.size(); ++i)
- check_sum_.Update(symbols_[i], strlen(symbols_[i]) + 1);
- check_sum_string_ = check_sum_.Digest();
+ check_sum.Update(symbols_[i], strlen(symbols_[i]) + 1);
+ check_sum_string_ = check_sum.Digest();
// Calculate the safer, label-dependent check sum.
- labeled_check_sum_.Reset();
+ CheckSummer labeled_check_sum;
for (int64 key = 0; key < dense_key_limit_; ++key) {
ostringstream line;
line << symbols_[key] << '\t' << key;
- labeled_check_sum_.Update(line.str()); }
+ labeled_check_sum.Update(line.str().data(), line.str().size());
+ }
for (map<int64, const char*>::const_iterator it =
key_map_.begin();
it != key_map_.end();
@@ -96,10 +109,10 @@ void SymbolTableImpl::MaybeRecomputeCheckSum() const {
if (it->first >= dense_key_limit_) {
ostringstream line;
line << it->second << '\t' << it->first;
- labeled_check_sum_.Update(line.str());
+ labeled_check_sum.Update(line.str().data(), line.str().size());
}
}
- labeled_check_sum_string_ = labeled_check_sum_.Digest();
+ labeled_check_sum_string_ = labeled_check_sum.Digest();
check_sum_finalized_ = true;
}
@@ -231,12 +244,22 @@ void SymbolTable::AddTable(const SymbolTable& table) {
impl_->AddSymbol(iter.Symbol());
}
-bool SymbolTable::WriteText(ostream &strm) const {
+bool SymbolTable::WriteText(ostream &strm,
+ const SymbolTableTextOptions &opts) const {
+ if (opts.fst_field_separator.empty()) {
+ LOG(ERROR) << "Missing required field separator";
+ return false;
+ }
+ bool once_only = false;
for (SymbolTableIterator iter(*this); !iter.Done(); iter.Next()) {
ostringstream line;
- line << iter.Symbol() << FLAGS_fst_field_separator[0] << iter.Value()
+ if (iter.Value() < 0 && !opts.allow_negative && !once_only) {
+ LOG(WARNING) << "Negative symbol table entry when not allowed";
+ once_only = true;
+ }
+ line << iter.Symbol() << opts.fst_field_separator[0] << iter.Value()
<< '\n';
- strm.write(line.str().c_str(), line.str().length());
+ strm.write(line.str().data(), line.str().length());
}
return true;
}
diff --git a/src/lib/temp_Android.temp_mk b/src/lib/temp_Android.temp_mk
deleted file mode 100644
index 7a6936e..0000000
--- a/src/lib/temp_Android.temp_mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# Copyright 2012 Google Inc. All Rights Reserved.
-# Author: npereira@google.com (Nicole Pereira)
-#
-# Android makefile for openfst library.
-#
-
-OPENFST_DIR := $(call my-dir)
-
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libopenfst_patts
-
-LOCAL_CPP_EXTENSION := .cc
-LOCAL_C_INCLUDES += $(OPENFST_DIR)/src/include/
-
-#LOCAL_SRC_FILES := $(call private-function-all-cpp-files-under, src)
-
-include $(BUILD_STATIC_LIBRARY)
diff --git a/src/lib/util.cc b/src/lib/util.cc
index eeba92b..f754da5 100644
--- a/src/lib/util.cc
+++ b/src/lib/util.cc
@@ -26,7 +26,7 @@
DEFINE_bool(fst_error_fatal, true,
"FST errors are fatal; o.w. return objects flagged as bad: "
- " e.g., FSTs - kError prop. true, FST weights - not a member()");
+ " e.g., FSTs - kError prop. true, FST weights - not a Member()");
namespace fst {
@@ -49,7 +49,7 @@ int64 StrToInt64(const string &s, const string &src, size_t nline,
void Int64ToStr(int64 n, string *s) {
ostringstream nstr;
nstr << n;
- *s = nstr.str();
+ s->append(nstr.str().data(), nstr.str().size());
}
void ConvertToLegalCSymbol(string *s) {
diff --git a/src/script/fst-class.cc b/src/script/fst-class.cc
index a784b08..bf5d963 100644
--- a/src/script/fst-class.cc
+++ b/src/script/fst-class.cc
@@ -74,10 +74,14 @@ FstClass *FstClass::Read(const string &fname) {
ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
return ReadFst<FstClass>(in, fname);
} else {
- return ReadFst<FstClass>(std::cin, "standard input");
+ return ReadFst<FstClass>(cin, "standard input");
}
}
+FstClass *FstClass::Read(istream &istr, const string &source) {
+ return ReadFst<FstClass>(istr, source);
+}
+
//
// MUTABLE FST CLASS METHODS
//
@@ -88,7 +92,7 @@ MutableFstClass *MutableFstClass::Read(const string &fname, bool convert) {
ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
return ReadFst<MutableFstClass>(in, fname);
} else {
- return ReadFst<MutableFstClass>(std::cin, "standard input");
+ return ReadFst<MutableFstClass>(cin, "standard input");
}
} else { // Converts to VectorFstClass if not mutable.
FstClass *ifst = FstClass::Read(fname);
@@ -133,7 +137,7 @@ VectorFstClass *VectorFstClass::Read(const string &fname) {
ifstream in(fname.c_str(), ifstream::in | ifstream::binary);
return ReadFst<VectorFstClass>(in, fname);
} else {
- return ReadFst<VectorFstClass>(std::cin, "standard input");
+ return ReadFst<VectorFstClass>(cin, "standard input");
}
}
diff --git a/src/script/text-io.cc b/src/script/text-io.cc
index 2f5efab..65b4eed 100644
--- a/src/script/text-io.cc
+++ b/src/script/text-io.cc
@@ -67,7 +67,7 @@ bool ReadPotentials(const string &weight_type,
// Writes vector of weights; returns true on success.
bool WritePotentials(const string& filename,
const vector<WeightClass>& potential) {
- ostream *strm = &std::cout;
+ ostream *strm = &cout;
if (!filename.empty()) {
strm = new ofstream(filename.c_str());
if (!*strm) {
@@ -85,7 +85,7 @@ bool WritePotentials(const string& filename,
LOG(ERROR) << "WritePotentials: Write failed: "
<< (filename.empty() ? "standard output" : filename);
bool ret = *strm;
- if (strm != &std::cout)
+ if (strm != &cout)
delete strm;
return ret;
}
diff --git a/src/test/algo_test.cc b/src/test/algo_test.cc
index 9538d54..27e3e95 100644
--- a/src/test/algo_test.cc
+++ b/src/test/algo_test.cc
@@ -66,7 +66,7 @@ using fst::AlgoTester;
int main(int argc, char **argv) {
FLAGS_fst_verify_properties = true;
std::set_new_handler(FailedNewHandler);
- SetFlags(argv[0], &argc, &argv, true);
+ SET_FLAGS(argv[0], &argc, &argv, true);
static const int kCacheGcLimit = 20;
diff --git a/src/test/fst_test.cc b/src/test/fst_test.cc
index 7d8d600..47899ba 100644
--- a/src/test/fst_test.cc
+++ b/src/test/fst_test.cc
@@ -123,7 +123,7 @@ using fst::EditFst;
int main(int argc, char **argv) {
FLAGS_fst_verify_properties = true;
std::set_new_handler(FailedNewHandler);
- SetFlags(argv[0], &argc, &argv, true);
+ SET_FLAGS(argv[0], &argc, &argv, true);
// VectorFst<StdArc> tests
{
diff --git a/src/test/fst_test.h b/src/test/fst_test.h
index da16476..8f9dca0 100644
--- a/src/test/fst_test.h
+++ b/src/test/fst_test.h
@@ -65,14 +65,14 @@ class FstTester {
StateIterator<G> siter(fst);
Matcher<G> matcher(fst, MATCH_INPUT);
MatchType match_type = matcher.Type(true);
- for (; !siter.Done(); siter.Next());
+ for (; !siter.Done(); siter.Next()) {}
for (siter.Reset(); !siter.Done(); siter.Next()) {
StateId s = siter.Value();
matcher.SetState(s);
CHECK_EQ(fst.Final(s), NthWeight(s));
size_t na = 0;
ArcIterator<G> aiter(fst, s);
- for (; !aiter.Done(); aiter.Next());
+ for (; !aiter.Done(); aiter.Next()) {}
for (aiter.Reset(); !aiter.Done(); aiter.Next()) {
++na;
const Arc &arc = aiter.Value();
@@ -129,7 +129,7 @@ class FstTester {
size_t na = 0;
size_t ni = fst->NumInputEpsilons(s);
MutableArcIterator<G> aiter(fst, s);
- for (; !aiter.Done(); aiter.Next());
+ for (; !aiter.Done(); aiter.Next()) {}
for (aiter.Reset(); !aiter.Done(); aiter.Next()) {
++na;
Arc arc = aiter.Value();
diff --git a/src/test/weight_test.cc b/src/test/weight_test.cc
index 54ba85d..3554456 100644
--- a/src/test/weight_test.cc
+++ b/src/test/weight_test.cc
@@ -97,7 +97,7 @@ void TestTemplatedWeights(int repeat, int seed) {
int main(int argc, char **argv) {
std::set_new_handler(FailedNewHandler);
- SetFlags(argv[0], &argc, &argv, true);
+ SET_FLAGS(argv[0], &argc, &argv, true);
int seed = FLAGS_seed >= 0 ? FLAGS_seed : time(0);
LOG(INFO) << "Seed = " << seed;