aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/const-fst.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/const-fst.h')
-rw-r--r--src/include/fst/const-fst.h483
1 files changed, 483 insertions, 0 deletions
diff --git a/src/include/fst/const-fst.h b/src/include/fst/const-fst.h
new file mode 100644
index 0000000..f68e8ed
--- /dev/null
+++ b/src/include/fst/const-fst.h
@@ -0,0 +1,483 @@
+// const-fst.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Simple concrete immutable FST whose states and arcs are each stored
+// in single arrays.
+
+#ifndef FST_LIB_CONST_FST_H__
+#define FST_LIB_CONST_FST_H__
+
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+template <class A, class U> class ConstFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// States and arcs each implemented by single arrays, templated on the
+// Arc definition. The unsigned type U is used to represent indices into
+// the arc array.
+template <class A, class U>
+class ConstFstImpl : public FstImpl<A> {
+ public:
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef U Unsigned;
+
+ ConstFstImpl()
+ : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ explicit ConstFstImpl(const Fst<A> &fst);
+
+ ~ConstFstImpl() {
+ delete[] states_;
+ delete[] arcs_;
+ }
+
+ StateId Start() const { return start_; }
+
+ Weight Final(StateId s) const { return states_[s].final; }
+
+ StateId NumStates() const { return nstates_; }
+
+ size_t NumArcs(StateId s) const { return states_[s].narcs; }
+
+ size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
+
+ size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
+
+ static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = nstates_;
+ }
+
+ // Provide information needed for the generic arc iterator
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ data->base = 0;
+ data->arcs = arcs_ + states_[s].pos;
+ data->narcs = states_[s].narcs;
+ data->ref_count = 0;
+ }
+
+ private:
+ friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write
+
+ // States implemented by array *states_ below, arcs by (single) *arcs_.
+ struct State {
+ Weight final; // Final weight
+ Unsigned pos; // Start of state's arcs in *arcs_
+ Unsigned narcs; // Number of arcs (per state)
+ Unsigned niepsilons; // # of input epsilons
+ Unsigned noepsilons; // # of output epsilons
+ State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
+ };
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+ // Current unaligned file format version. The unaligned version was added and
+ // made the default since the aligned version does not work on pipes.
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+ // Byte alignment for states and arcs in file format (version 1 only)
+ static const int kFileAlign = 16;
+
+ State *states_; // States represenation
+ A *arcs_; // Arcs representation
+ StateId nstates_; // Number of states
+ size_t narcs_; // Number of arcs (per FST)
+ StateId start_; // Initial state
+
+ DISALLOW_COPY_AND_ASSIGN(ConstFstImpl);
+};
+
+template <class A, class U>
+const uint64 ConstFstImpl<A, U>::kStaticProperties;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kAlignedFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kMinFileVersion;
+template <class A, class U>
+const int ConstFstImpl<A, U>::kFileAlign;
+
+
+template<class A, class U>
+ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(sizeof(U) * 8, &size);
+ type += size;
+ }
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ start_ = fst.Start();
+
+ // Count # of states and arcs.
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ }
+ states_ = new State[nstates_];
+ arcs_ = new A[narcs_];
+ size_t pos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ states_[s].final = fst.Final(s);
+ states_[s].pos = pos;
+ states_[s].narcs = 0;
+ states_[s].niepsilons = 0;
+ states_[s].noepsilons = 0;
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ ++states_[s].narcs;
+ if (arc.ilabel == 0)
+ ++states_[s].niepsilons;
+ if (arc.olabel == 0)
+ ++states_[s].noepsilons;
+ arcs_[pos++] = arc;
+ }
+ }
+ SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties);
+}
+
+
+template<class A, class U>
+ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm,
+ const FstReadOptions &opts) {
+ ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>;
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+ impl->start_ = hdr.Start();
+ impl->nstates_ = hdr.NumStates();
+ impl->narcs_ = hdr.NumArcs();
+ impl->states_ = new State[impl->nstates_];
+ impl->arcs_ = new A[impl->narcs_];
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State);
+ strm.read(reinterpret_cast<char *>(impl->states_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
+ !AlignInput(strm, kFileAlign)) {
+ LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ b = impl->narcs_ * sizeof(A);
+ strm.read(reinterpret_cast<char *>(impl->arcs_), b);
+ if (!strm) {
+ LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
+ delete impl;
+ return 0;
+ }
+ return impl;
+}
+
+// Simple concrete immutable FST. This class attaches interface to
+// implementation and handles reference counting, delegating most
+// methods to ImplToExpandedFst. The unsigned type U is used to
+// represent indices into the arc array (uint32 by default, declared
+// in fst-decl.h).
+template <class A, class U>
+class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > {
+ public:
+ friend class StateIterator< ConstFst<A, U> >;
+ friend class ArcIterator< ConstFst<A, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef ConstFstImpl<A, U> Impl;
+ typedef U Unsigned;
+
+ ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit ConstFst(const Fst<A> &fst)
+ : ImplToExpandedFst<Impl>(new Impl(fst)) {}
+
+ ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}
+
+ // Get a copy of this ConstFst. See Fst<>::Copy() for further doc.
+ virtual ConstFst<A, U> *Copy(bool safe = false) const {
+ return new ConstFst<A, U>(*this);
+ }
+
+ // Read a ConstFst from an input stream; return NULL on error
+ static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ // Read a ConstFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static ConstFst<A, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new ConstFst<A, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return WriteFst(*this, strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts);
+
+ virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = true) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ void operator=(const ConstFst<A, U> &fst); // disallow
+};
+
+// Writes Fst in Const format, potentially with a pass over the machine
+// before writing to compute number of states and arcs.
+//
+template <class A, class U>
+template <class F>
+bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm,
+ const FstWriteOptions &opts) {
+ static const int kFileVersion = 2;
+ static const int kAlignedFileVersion = 1;
+ static const int kFileAlign = 16;
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ size_t num_arcs = -1, num_states = -1;
+ size_t start_offset = 0;
+ bool update_header = true;
+ if (fst.Type() == ConstFst<A, U>().Type()) {
+ const ConstFst<A, U> *const_fst = static_cast<const ConstFst<A, U> *>(&fst);
+ num_arcs = const_fst->GetImpl()->narcs_;
+ num_states = const_fst->GetImpl()->nstates_;
+ update_header = false;
+ } else if ((start_offset = strm.tellp()) == -1) {
+ // precompute values needed for header when we cannot seek to rewrite it.
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ num_arcs += fst.NumArcs(siter.Value());
+ num_states++;
+ }
+ update_header = false;
+ }
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(num_states);
+ hdr.SetNumArcs(num_arcs);
+ string type = "const";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, &hdr);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after header";
+ return false;
+ }
+ size_t pos = 0, states = 0;
+ typename ConstFstImpl<A, U>::State state;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ state.final = fst.Final(siter.Value());
+ state.pos = pos;
+ state.narcs = fst.NumArcs(siter.Value());
+ state.niepsilons = fst.NumInputEpsilons(siter.Value());
+ state.noepsilons = fst.NumOutputEpsilons(siter.Value());
+ strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
+ pos += state.narcs;
+ states++;
+ }
+ hdr.SetNumStates(states);
+ hdr.SetNumArcs(pos);
+ if (opts.align && !AlignOutput(strm, kFileAlign)) {
+ LOG(ERROR) << "Could not align file during write after writing states";
+ }
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ const A &arc = aiter.Value();
+ strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
+ }
+ }
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "WriteAsVectorFst write failed: " << opts.source;
+ return false;
+ }
+ if (update_header) {
+ return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type,
+ &hdr, start_offset);
+ } else {
+ if (hdr.NumStates() != num_states) {
+ LOG(ERROR) << "Inconsistent number of states observed during write";
+ return false;
+ }
+ if (hdr.NumArcs() != num_arcs) {
+ LOG(ERROR) << "Inconsistent number of arcs observed during write";
+ return false;
+ }
+ }
+ return true;
+}
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class StateIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ explicit StateIterator(const ConstFst<A, U> &fst)
+ : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
+
+ bool Done() const { return s_ >= nstates_; }
+
+ StateId Value() const { return s_; }
+
+ void Next() { ++s_; }
+
+ void Reset() { s_ = 0; }
+
+ private:
+ StateId nstates_;
+ StateId s_;
+
+ DISALLOW_COPY_AND_ASSIGN(StateIterator);
+};
+
+
+// Specialization for ConstFst; see generic version in fst.h
+// for sample usage (but use the ConstFst type!). This version
+// should inline.
+template <class A, class U>
+class ArcIterator< ConstFst<A, U> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const ConstFst<A, U> &fst, StateId s)
+ : arcs_(fst.GetImpl()->Arcs(s)),
+ narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}
+
+ bool Done() const { return i_ >= narcs_; }
+
+ const A& Value() const { return arcs_[i_]; }
+
+ void Next() { ++i_; }
+
+ size_t Position() const { return i_; }
+
+ void Reset() { i_ = 0; }
+
+ void Seek(size_t a) { i_ = a; }
+
+ uint32 Flags() const {
+ return kArcValueFlags;
+ }
+
+ void SetFlags(uint32 f, uint32 m) {}
+
+ private:
+ const A *arcs_;
+ size_t narcs_;
+ size_t i_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+// A useful alias when using StdArc.
+typedef ConstFst<StdArc> StdConstFst;
+
+} // namespace fst
+
+#endif // FST_LIB_CONST_FST_H__