aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/state-table.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/state-table.h')
-rw-r--r--src/include/fst/state-table.h469
1 files changed, 469 insertions, 0 deletions
diff --git a/src/include/fst/state-table.h b/src/include/fst/state-table.h
new file mode 100644
index 0000000..7d863a0
--- /dev/null
+++ b/src/include/fst/state-table.h
@@ -0,0 +1,469 @@
+// state-table.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for representing the mapping between state tuples and state Ids.
+
+#ifndef FST_LIB_STATE_TABLE_H__
+#define FST_LIB_STATE_TABLE_H__
+
+#include <deque>
+#include <vector>
+using std::vector;
+
+#include <fst/bi-table.h>
+#include <fst/expanded-fst.h>
+
+
+namespace fst {
+
+// STATE TABLES - these determine the bijective mapping between state
+// tuples (e.g. in composition triples of two FST states and a
+// composition filter state) and their corresponding state IDs.
+// They are classes, templated on state tuples, of the form:
+//
+// template <class T>
+// class StateTable {
+// public:
+// typedef typename T StateTuple;
+//
+// // Required constructors.
+// StateTable();
+//
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// };
+//
+// A state tuple has the form:
+//
+// template <class S>
+// struct StateTuple {
+// typedef typename S StateId;
+//
+// // Required constructor.
+// StateTuple();
+// };
+
+
+// An implementation using a hash map for the tuple to state ID mapping.
+// The state tuple T must have == defined and the default constructor
+// must produce a tuple that will never be seen. H is the hash function.
+template <class T, class H>
+class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using HashBiTable<StateId, T, H>::FindId;
+ using HashBiTable<StateId, T, H>::FindEntry;
+ using HashBiTable<StateId, T, H>::Size;
+
+ HashStateTable() : HashBiTable<StateId, T, H>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash set for the tuple to state ID
+// mapping. The state tuple T must have == defined and the default
+// constructor must produce a tuple that will never be seen. H is the
+// hash function.
+template <class T, class H>
+class CompactHashStateTable
+ : public CompactHashBiTable<typename T::StateId, T, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using CompactHashBiTable<StateId, T, H>::FindId;
+ using CompactHashBiTable<StateId, T, H>::FindEntry;
+ using CompactHashBiTable<StateId, T, H>::Size;
+
+ CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
+
+ // Reserves space for table_size elements.
+ explicit CompactHashStateTable(size_t table_size)
+ : CompactHashBiTable<StateId, T, H>(table_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+// An implementation using a vector for the tuple to state mapping.
+// It is passed a function object FP that should fingerprint tuples
+// uniquely to an integer that can used as a vector index. Normally,
+// VectorStateTable constructs the FP object. The user can instead
+// pass in this object; in that case, VectorStateTable takes its
+// ownership.
+template <class T, class FP>
+class VectorStateTable
+ : public VectorBiTable<typename T::StateId, T, FP> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorBiTable<StateId, T, FP>::FindId;
+ using VectorBiTable<StateId, T, FP>::FindEntry;
+ using VectorBiTable<StateId, T, FP>::Size;
+ using VectorBiTable<StateId, T, FP>::Fingerprint;
+
+ explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a vector and a compact hash table. The
+// selecting functor S returns true for tuples to be hashed in the
+// vector. The fingerprinting functor FP returns a unique fingerprint
+// for each tuple to be hashed in the vector (these need to be
+// suitable for indexing in a vector). The hash functor H is used when
+// hashing tuple into the compact hash table.
+template <class T, class S, class FP, class H>
+class VectorHashStateTable
+ : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
+ using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Size;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
+ using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
+
+ VectorHashStateTable(S *s, FP *fp, H *h,
+ size_t vector_size = 0,
+ size_t tuple_size = 0)
+ : VectorHashBiTable<StateId, T, S, FP, H>(
+ s, fp, h, vector_size, tuple_size) {}
+
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+
+// An implementation using a hash map for the tuple to state ID
+// mapping. This version permits erasing of states. The state tuple T
+// must have == defined and its default constructor must produce a
+// tuple that will never be seen. F is the hash function.
+template <class T, class F>
+class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
+ public:
+ typedef T StateTuple;
+ typedef typename StateTuple::StateId StateId;
+ using ErasableBiTable<StateId, T, F>::FindId;
+ using ErasableBiTable<StateId, T, F>::FindEntry;
+ using ErasableBiTable<StateId, T, F>::Size;
+ using ErasableBiTable<StateId, T, F>::Erase;
+
+ ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
+ StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
+ const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
+};
+
+//
+// COMPOSITION STATE TUPLES AND TABLES
+//
+// The composition state table has the form:
+//
+// template <class A, class F>
+// class ComposeStateTable {
+// public:
+// typedef A Arc;
+// typedef F FilterState;
+// typedef typename A::StateId StateId;
+// typedef ComposeStateTuple<StateId> StateTuple;
+//
+// // Required constructors. Copy constructor does not copy state.
+// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
+// ComposeStateTable(const ComposeStateTable<A, F> &table);
+// // Lookup state ID by tuple. If it doesn't exist, then add it.
+// StateId FindState(const StateTuple &);
+// // Lookup state tuple by state ID.
+// const StateTuple<StateId> &Tuple(StateId) const;
+// // # of stored tuples.
+// StateId Size() const;
+// // Return true if error encountered
+// bool Error() const;
+// };
+
+// Represents the composition state.
+template <typename S, typename F>
+struct ComposeStateTuple {
+ typedef S StateId;
+ typedef F FilterState;
+
+ ComposeStateTuple()
+ : state_id1(kNoStateId), state_id2(kNoStateId),
+ filter_state(FilterState::NoState()) {}
+
+ ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
+ : state_id1(s1), state_id2(s2), filter_state(f) {}
+
+ StateId state_id1; // State Id on fst1
+ StateId state_id2; // State Id on fst2
+ FilterState filter_state; // State of composition filter
+};
+
+// Equality of composition state tuples.
+template <typename S, typename F>
+inline bool operator==(const ComposeStateTuple<S, F>& x,
+ const ComposeStateTuple<S, F>& y) {
+ if (&x == &y)
+ return true;
+ return x.state_id1 == y.state_id1 &&
+ x.state_id2 == y.state_id2 &&
+ x.filter_state == y.filter_state;
+}
+
+
+// Hashing of composition state tuples.
+template <typename S, typename F>
+class ComposeHash {
+ public:
+ size_t operator()(const ComposeStateTuple<S, F>& t) const {
+ return t.state_id1 + t.state_id2 * kPrime0 +
+ t.filter_state.Hash() * kPrime1;
+ }
+ private:
+ static const size_t kPrime0;
+ static const size_t kPrime1;
+};
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime0 = 7853;
+
+template <typename S, typename F>
+const size_t ComposeHash<S, F>::kPrime1 = 7867;
+
+
+// A HashStateTable over composition tuples.
+template <typename A,
+ typename F,
+ typename H =
+ CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > >
+class GenericComposeStateTable : public H {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const GenericComposeStateTable<A, F> &table); // disallow
+};
+
+
+// Fingerprint for general composition tuples.
+template <typename S, typename F>
+class ComposeFingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ // Required but suboptimal constructor.
+ ComposeFingerprint() : mult1_(8192), mult2_(8192) {
+ LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
+ }
+
+ // Constructor is provided the sizes of the input FSTs
+ ComposeFingerprint(StateId nstates1, StateId nstates2)
+ : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
+
+ size_t operator()(const StateTuple &tuple) {
+ return tuple.state_id1 + tuple.state_id2 * mult1_ +
+ tuple.filter_state.Hash() * mult2_;
+ }
+
+ private:
+ ssize_t mult1_;
+ ssize_t mult2_;
+};
+
+
+// Useful when the first composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState1Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
+};
+
+
+// Useful when the second composition state determines the tuple.
+template <typename S, typename F>
+class ComposeState2Fingerprint {
+ public:
+ typedef S StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<S, F> StateTuple;
+
+ size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// the product of number of states in FST1 and FST2 (and the
+// composition filter state hash) is manageable. If the FSTs are not
+// expanded Fsts, they will first have their states counted.
+template <typename A, typename F>
+class ProductComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeFingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(CountStates(fst1),
+ CountStates(fst2))) { }
+
+ ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
+ : VectorStateTable<ComposeStateTuple<StateId, F>,
+ ComposeFingerprint<StateId, F> >
+ (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ProductComposeStateTable<A, F> &table); // disallow
+};
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST1 is a string (satisfies kStringProperties) and FST2 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class StringDetComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ : error_(false) {
+ uint64 props1 = kString;
+ uint64 props2 = kIDeterministic | kNoIEpsilons;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
+ << " fst2 not input deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const StringDetComposeStateTable<A, F> &table); // disallow
+};
+
+
+// A VectorStateTable over composition tuples. This can be used when
+// FST2 is a string (satisfies kStringProperties) and FST1 is
+// epsilon-free and deterministic. It should be used with a
+// composition filter that creates at most one filter state per tuple
+// under these conditions (e.g. SequenceComposeFilter or
+// MatchComposeFilter).
+template <typename A, typename F>
+class DetStringComposeStateTable : public
+VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeState1Fingerprint<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
+ :error_(false) {
+ uint64 props1 = kODeterministic | kNoOEpsilons;
+ uint64 props2 = kString;
+ if (fst1.Properties(props1, true) != props1 ||
+ fst2.Properties(props2, true) != props2) {
+ FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
+ << " fst1 not output deterministic and epsilon-free";
+ error_ = true;
+ }
+ }
+
+ DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
+ : error_(table.error_) {}
+
+ bool Error() const { return error_; }
+
+ private:
+ bool error_;
+
+ void operator=(const DetStringComposeStateTable<A, F> &table); // disallow
+};
+
+
+// An ErasableStateTable over composition tuples. The Erase(StateId) method
+// can be called if the user either is sure that composition will never return
+// to that tuple or doesn't care that if it does, it is assigned a new
+// state ID.
+template <typename A, typename F>
+class ErasableComposeStateTable : public
+ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
+ ComposeHash<typename A::StateId, F> > {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef F FilterState;
+ typedef ComposeStateTuple<StateId, F> StateTuple;
+
+ ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
+
+ ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
+
+ bool Error() const { return false; }
+
+ private:
+ void operator=(const ErasableComposeStateTable<A, F> &table); // disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_STATE_TABLE_H__