aboutsummaryrefslogtreecommitdiff
path: root/src/lib/fst.cc
blob: 69bee35c64990e0b7a0d225cf40e11f9869bc9b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// fst.cc

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// FST definitions.

#include <fst/fst.h>

// Include these so they are registered
#include <fst/compact-fst.h>
#include <fst/const-fst.h>
#include <fst/matcher-fst.h>
#include <fst/vector-fst.h>
#include <fst/edit-fst.h>

// FST flag definitions

DEFINE_bool(fst_verify_properties, false,
            "Verify fst properties queried by TestProperties");

DEFINE_string(fst_weight_separator, ",",
              "Character separator between printed composite weights; "
              "must be a single character");

DEFINE_string(fst_weight_parentheses, "",
              "Characters enclosing the first weight of a printed composite "
              "weight (e.g. pair weight, tuple weight and derived classes) to "
              "ensure proper I/O of nested composite weights; "
              "must have size 0 (none) or 2 (open and close parenthesis)");

DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");

DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
             "Cache byte size that triggers garbage collection");

DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");

DEFINE_string(save_relabel_ipairs, "",  "Save input relabel pairs to file");
DEFINE_string(save_relabel_opairs, "",  "Save output relabel pairs to file");

namespace fst {

// Register VectorFst, ConstFst and EditFst for common arcs types
REGISTER_FST(VectorFst, StdArc);
REGISTER_FST(VectorFst, LogArc);
REGISTER_FST(VectorFst, Log64Arc);
REGISTER_FST(ConstFst, StdArc);
REGISTER_FST(ConstFst, LogArc);
REGISTER_FST(ConstFst, Log64Arc);
REGISTER_FST(EditFst, StdArc);
REGISTER_FST(EditFst, LogArc);
REGISTER_FST(EditFst, Log64Arc);

// Register CompactFst for common arcs with the default (uint32) size type
static FstRegisterer<
  CompactFst<StdArc, StringCompactor<StdArc> > >
CompactFst_StdArc_StringCompactor_registerer;
static FstRegisterer<
  CompactFst<LogArc, StringCompactor<LogArc> > >
CompactFst_LogArc_StringCompactor_registerer;
static FstRegisterer<
  CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
CompactFst_StdArc_WeightedStringCompactor_registerer;
static FstRegisterer<
  CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
CompactFst_LogArc_WeightedStringCompactor_registerer;
static FstRegisterer<
  CompactFst<StdArc, AcceptorCompactor<StdArc> > >
CompactFst_StdArc_AcceptorCompactor_registerer;
static FstRegisterer<
  CompactFst<LogArc, AcceptorCompactor<LogArc> > >
CompactFst_LogArc_AcceptorCompactor_registerer;
static FstRegisterer<
  CompactFst<StdArc, UnweightedCompactor<StdArc> > >
CompactFst_StdArc_UnweightedCompactor_registerer;
static FstRegisterer<
  CompactFst<LogArc, UnweightedCompactor<LogArc> > >
CompactFst_LogArc_UnweightedCompactor_registerer;
static FstRegisterer<
  CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
static FstRegisterer<
  CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;

// Fst type definitions for lookahead Fsts.
extern const char arc_lookahead_fst_type[] = "arc_lookahead";
extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";

// Identifies stream data as an FST (and its endianity)
static const int32 kFstMagicNumber = 2125659606;

// Check for Fst magic number in stream, to indicate
// caller function that the stream content is an Fst header;
bool IsFstHeader(istream &strm, const string &source) {
  int64 pos = strm.tellg();
  bool match = true;
  int32 magic_number = 0;
  ReadType(strm, &magic_number);
  if (magic_number != kFstMagicNumber
      ) {
    match = false;
  }
  strm.seekg(pos);
  return match;
}

// Check Fst magic number and read in Fst header.
// If rewind = true, reposition stream to before call (if possible).
bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
  int64 pos = 0;
  if (rewind) pos = strm.tellg();
  int32 magic_number = 0;
  ReadType(strm, &magic_number);
  if (magic_number != kFstMagicNumber
      ) {
    LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
    if (rewind) strm.seekg(pos);
    return false;
  }

  ReadType(strm, &fsttype_);
  ReadType(strm, &arctype_);
  ReadType(strm, &version_);
  ReadType(strm, &flags_);
  ReadType(strm, &properties_);
  ReadType(strm, &start_);
  ReadType(strm, &numstates_);
  ReadType(strm, &numarcs_);
  if (!strm) {
    LOG(ERROR) << "FstHeader::Read: read failed: " << source;
    return false;
  }
  if (rewind) strm.seekg(pos);
  return true;
}

// Write Fst magic number and Fst header.
bool FstHeader::Write(ostream &strm, const string &source) const {
  WriteType(strm, kFstMagicNumber);
  WriteType(strm, fsttype_);
  WriteType(strm, arctype_);
  WriteType(strm, version_);
  WriteType(strm, flags_);
  WriteType(strm, properties_);
  WriteType(strm, start_);
  WriteType(strm, numstates_);
  WriteType(strm, numarcs_);
  return true;
}

}  // namespace fst