aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/compose-filter.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/compose-filter.h')
-rw-r--r--src/include/fst/compose-filter.h542
1 files changed, 542 insertions, 0 deletions
diff --git a/src/include/fst/compose-filter.h b/src/include/fst/compose-filter.h
new file mode 100644
index 0000000..6bf7736
--- /dev/null
+++ b/src/include/fst/compose-filter.h
@@ -0,0 +1,542 @@
+// compose-filter.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Classes for filtering the composition matches, e.g. for correct epsilon
+// handling.
+
+#ifndef FST_LIB_COMPOSE_FILTER_H__
+#define FST_LIB_COMPOSE_FILTER_H__
+
+#include <fst/fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/matcher.h>
+
+
+namespace fst {
+
+
+// COMPOSITION FILTER STATE - this represents the state of
+// the composition filter. It has the form:
+//
+// class FilterState {
+// public:
+// // Required constructors
+// FilterState();
+// FilterState(const FilterState &f);
+// // An invalid filter state.
+// static const FilterState NoState();
+// // Maps state to integer for hashing.
+// size_t Hash() const;
+// // Equality of filter states.
+// bool operator==(const FilterState &f) const;
+// // Inequality of filter states.
+// bool operator!=(const FilterState &f) const;
+// // Assignment to filter states.
+// FilterState& operator=(const FilterState& f);
+// };
+
+
+// Filter state that is a signed integral type.
+template <typename T>
+class IntegerFilterState {
+ public:
+ IntegerFilterState() : state_(kNoStateId) {}
+ explicit IntegerFilterState(T s) : state_(s) {}
+
+ static const IntegerFilterState NoState() { return IntegerFilterState(); }
+
+ size_t Hash() const { return static_cast<size_t>(state_); }
+
+ bool operator==(const IntegerFilterState &f) const {
+ return state_ == f.state_;
+ }
+
+ bool operator!=(const IntegerFilterState &f) const {
+ return state_ != f.state_;
+ }
+
+ T GetState() const { return state_; }
+
+ void SetState(T state) { state_ = state; }
+
+private:
+ T state_;
+};
+
+typedef IntegerFilterState<signed char> CharFilterState;
+typedef IntegerFilterState<short> ShortFilterState;
+typedef IntegerFilterState<int> IntFilterState;
+
+
+// Filter state that is a weight (class).
+template <class W>
+class WeightFilterState {
+ public:
+ WeightFilterState() : weight_(W::Zero()) {}
+ explicit WeightFilterState(W w) : weight_(w) {}
+
+ static const WeightFilterState NoState() { return WeightFilterState(); }
+
+ size_t Hash() const { return weight_.Hash(); }
+
+ bool operator==(const WeightFilterState &f) const {
+ return weight_ == f.weight_;
+ }
+
+ bool operator!=(const WeightFilterState &f) const {
+ return weight_ != f.weight_;
+ }
+
+ W GetWeight() const { return weight_; }
+
+ void SetWeight(W w) { weight_ = w; }
+
+private:
+ W weight_;
+};
+
+
+// Filter state that is the combination of two filter states.
+template <class F1, class F2>
+class PairFilterState {
+ public:
+ PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {}
+
+ PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {}
+
+ static const PairFilterState NoState() { return PairFilterState(); }
+
+ size_t Hash() const {
+ size_t h1 = f1_.Hash();
+ size_t h2 = f2_.Hash();
+ const int lshift = 5;
+ const int rshift = CHAR_BIT * sizeof(size_t) - 5;
+ return h1 << lshift ^ h1 >> rshift ^ h2;
+ }
+
+ bool operator==(const PairFilterState &f) const {
+ return f1_ == f.f1_ && f2_ == f.f2_;
+ }
+
+ bool operator!=(const PairFilterState &f) const {
+ return f1_ != f.f1_ || f2_ != f.f2_;
+ }
+
+ const F1 &GetState1() const { return f1_; }
+ const F2 &GetState2() const { return f2_; }
+
+ void SetState(const F1 &f1, const F2 &f2) {
+ f1_ = f1;
+ f2_ = f2;
+ }
+
+private:
+ F1 f1_;
+ F2 f2_;
+};
+
+
+// COMPOSITION FILTERS - these determine which matches are allowed to
+// proceed. The filter's state is represented by the type
+// ComposeFilter::FilterState. The basic filters handle correct
+// epsilon matching. Their interface is:
+//
+// template <class M1, class M2>
+// class ComposeFilter {
+// public:
+// typedef typename M1::FST1 FST1;
+// typedef typename M1::FST2 FST2;
+// typedef typename FST1::Arc Arc;
+// typedef ... FilterState;
+// typedef ... Matcher1;
+// typedef ... Matcher2;
+//
+// // Required constructors.
+// ComposeFilter(const FST1 &fst1, const FST2 &fst2,
+// // M1 *matcher1 = 0, M2 *matcher2 = 0);
+// // If safe=true, the copy is thread-safe. See Fst<>::Copy()
+// // for further doc.
+// ComposeFilter(const ComposeFilter<M1, M2> &filter,
+// // bool safe = false);
+// // Return start state of filter.
+// FilterState Start() const;
+// // Specifies current composition state.
+// void SetState(StateId s1, StateId s2, const FilterState &f);
+//
+// // Apply filter at current composition state to these transitions.
+// // If an arc label to be matched is kNolabel, then that side
+// // does not consume a symbol. Returns the new filter state or,
+// // if disallowed, FilterState::NoState(). The filter is permitted to
+// // modify its inputs, e.g. for optimizations.
+// FilterState FilterArc(Arc *arc1, Arc *arc2) const;
+
+// // Apply filter at current composition state to these final weights
+// // (cf. superfinal transitions). The filter may modify its inputs,
+// // e.g. for optimizations.
+// void FilterFinal(Weight *final1, Weight *final2) const;
+//
+// // Return resp matchers. Ownership stays with filter. These
+// // methods allow the filter to access and possibly modify
+// // the composition matchers (useful e.g. with lookahead).
+// Matcher1 *GetMatcher1();
+// Matcher2 *GetMatcher2();
+//
+// // This specifies how the filter affects the composition result
+// // properties. It takes as argument the properties that would
+// // apply with a trivial composition fitler.
+// uint64 Properties(uint64 props) const;
+// };
+
+// This filter requires epsilons on FST1 to be read before epsilons on FST2.
+template <class M1, class M2>
+class SequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~SequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool fin1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !fin1;
+ noeps1_ = ne1 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc1->olabel == kNoLabel)
+ return alleps1_ ? FilterState::NoState() :
+ noeps1_ ? FilterState(0) : FilterState(1);
+ else if (arc2->ilabel == kNoLabel)
+ return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps1_; // Only epsilons (and non-final) leaving s1_?
+ bool noeps1_; // No epsilons leaving s1_?
+
+ void operator=(const SequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST2 to be read before epsilons on FST1.
+template <class M1, class M2>
+class AltSequenceComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~AltSequenceComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool fin2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !fin2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel)
+ return alleps2_ ? FilterState::NoState() :
+ noeps2_ ? FilterState(0) : FilterState(1);
+ else if (arc1->olabel == kNoLabel)
+ return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0);
+ else
+ return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state
+ bool alleps2_; // Only epsilons (and non-final) leaving s2_?
+ bool noeps2_; // No epsilons leaving s2_?
+
+void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter requires epsilons on FST1 to be matched with epsilons on FST2
+// whenever possible.
+template <class M1, class M2>
+class MatchComposeFilter {
+ public:
+ typedef typename M1::FST FST1;
+ typedef typename M2::FST FST2;
+ typedef typename FST1::Arc Arc;
+ typedef CharFilterState FilterState;
+ typedef M1 Matcher1;
+ typedef M2 Matcher2;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MatchComposeFilter(const FST1 &fst1, const FST2 &fst2,
+ M1 *matcher1 = 0, M2 *matcher2 = 0)
+ : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)),
+ matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter,
+ bool safe = false)
+ : matcher1_(filter.matcher1_->Copy(safe)),
+ matcher2_(filter.matcher2_->Copy(safe)),
+ fst1_(matcher1_->GetFst()),
+ fst2_(matcher2_->GetFst()),
+ s1_(kNoStateId),
+ s2_(kNoStateId),
+ f_(kNoStateId) {}
+
+ ~MatchComposeFilter() {
+ delete matcher1_;
+ delete matcher2_;
+ }
+
+ FilterState Start() const { return FilterState(0); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ if (s1_ == s1 && s2_ == s2 && f == f_)
+ return;
+ s1_ = s1;
+ s2_ = s2;
+ f_ = f;
+ size_t na1 = internal::NumArcs(fst1_, s1);
+ size_t ne1 = internal::NumOutputEpsilons(fst1_, s1);
+ bool f1 = internal::Final(fst1_, s1) != Weight::Zero();
+ alleps1_ = na1 == ne1 && !f1;
+ noeps1_ = ne1 == 0;
+ size_t na2 = internal::NumArcs(fst2_, s2);
+ size_t ne2 = internal::NumInputEpsilons(fst2_, s2);
+ bool f2 = internal::Final(fst2_, s2) != Weight::Zero();
+ alleps2_ = na2 == ne2 && !f2;
+ noeps2_ = ne2 == 0;
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ if (arc2->ilabel == kNoLabel) // Epsilon on Fst1
+ return f_ == FilterState(0) ?
+ (noeps2_ ? FilterState(0) :
+ (alleps2_ ? FilterState::NoState(): FilterState(1))) :
+ (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState());
+ else if (arc1->olabel == kNoLabel) // Epsilon on Fst2
+ return f_ == FilterState(0) ?
+ (noeps1_ ? FilterState(0) :
+ (alleps1_ ? FilterState::NoState() : FilterState(2))) :
+ (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState());
+ else if (arc1->olabel == 0) // Epsilon on both
+ return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState();
+ else // Both are non-epsilons
+ return FilterState(0);
+ }
+
+ void FilterFinal(Weight *, Weight *) const {}
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return matcher1_; }
+ Matcher2 *GetMatcher2() { return matcher2_; }
+
+ uint64 Properties(uint64 props) const { return props; }
+
+ private:
+ Matcher1 *matcher1_;
+ Matcher2 *matcher2_;
+ const FST1 &fst1_;
+ const FST2 &fst2_;
+ StateId s1_; // Current fst1_ state;
+ StateId s2_; // Current fst2_ state;
+ FilterState f_; // Current filter state ID
+ bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2?
+ bool noeps1_, noeps2_; // No epsilons leaving s1, s2?
+
+ void operator=(const MatchComposeFilter<M1, M2> &); // disallow
+};
+
+
+// This filter works with the MultiEpsMatcher to determine if
+// 'multi-epsilons' are preserved in the composition output
+// (rather than rewritten as 0) and ensures correct properties.
+template <class F>
+class MultiEpsFilter {
+ public:
+ typedef typename F::FST1 FST1;
+ typedef typename F::FST2 FST2;
+ typedef typename F::Arc Arc;
+ typedef typename F::Matcher1 Matcher1;
+ typedef typename F::Matcher2 Matcher2;
+ typedef typename F::FilterState FilterState;
+ typedef MultiEpsFilter<F> Filter;
+
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+
+ MultiEpsFilter(const FST1 &fst1, const FST2 &fst2,
+ Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0,
+ bool keep_multi_eps = false)
+ : filter_(fst1, fst2, matcher1, matcher2),
+ keep_multi_eps_(keep_multi_eps) {}
+
+ MultiEpsFilter(const Filter &filter, bool safe = false)
+ : filter_(filter.filter_, safe),
+ keep_multi_eps_(filter.keep_multi_eps_) {}
+
+ FilterState Start() const { return filter_.Start(); }
+
+ void SetState(StateId s1, StateId s2, const FilterState &f) {
+ return filter_.SetState(s1, s2, f);
+ }
+
+ FilterState FilterArc(Arc *arc1, Arc *arc2) const {
+ FilterState f = filter_.FilterArc(arc1, arc2);
+ if (keep_multi_eps_) {
+ if (arc1->olabel == kNoLabel)
+ arc1->ilabel = arc2->ilabel;
+ if (arc2->ilabel == kNoLabel)
+ arc2->olabel = arc1->olabel;
+ }
+ return f;
+ }
+
+ void FilterFinal(Weight *w1, Weight *w2) const {
+ return filter_.FilterFinal(w1, w2);
+ }
+
+ // Return resp matchers. Ownership stays with filter.
+ Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); }
+ Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); }
+
+ uint64 Properties(uint64 iprops) const {
+ uint64 oprops = filter_.Properties(iprops);
+ return oprops & kILabelInvariantProperties & kOLabelInvariantProperties;
+ }
+
+ private:
+ F filter_;
+ bool keep_multi_eps_;
+};
+
+} // namespace fst
+
+
+#endif // FST_LIB_COMPOSE_FILTER_H__