aboutsummaryrefslogtreecommitdiff
path: root/src/common/windows/pdb_source_line_writer.h
blob: 8c74e2ca3b8ffec825864987e3dfc911ebe24cd0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
// Copyright 2006 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
// a line/address map for use with BasicSourceLineResolver.

#ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
#define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_

#include <atlcomcli.h>

#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "common/windows/module_info.h"
#include "common/windows/omap.h"

struct IDiaEnumLineNumbers;
struct IDiaSession;
struct IDiaSymbol;

namespace google_breakpad {

using std::map;
using std::vector;
using std::wstring;
using std::unordered_map;

class PDBSourceLineWriter {
 public:
  enum FileFormat {
    PDB_FILE,  // a .pdb file containing debug symbols
    EXE_FILE,  // a .exe or .dll file
    ANY_FILE   // try PDB_FILE and then EXE_FILE
  };

  explicit PDBSourceLineWriter(bool handle_inline = false);
  ~PDBSourceLineWriter();

  // Opens the given file.  For executable files, the corresponding pdb
  // file must be available; Open will be if it is not.
  // If there is already a pdb file open, it is automatically closed.
  // Returns true on success.
  bool Open(const wstring& file, FileFormat format);

  // Closes the current pdb file and its associated resources.
  void Close();

  // Sets the code file full path.  This is optional for 32-bit modules.  It is
  // also optional for 64-bit modules when there is an executable file stored
  // in the same directory as the PDB file.  It is only required for 64-bit
  // modules when the executable file is not in the same location as the PDB
  // file and it must be called after Open() and before WriteMap().
  // If Open() was called for an executable file, then it is an error to call
  // SetCodeFile() with a different file path and it will return false.
  bool SetCodeFile(const wstring& exe_file);

  // Writes a Breakpad symbol file from the current pdb file to |symbol_file|.
  // Returns true on success.
  bool WriteSymbols(FILE *symbol_file);

  // Retrieves information about the module's debugging file.  Returns
  // true on success and false on failure.
  bool GetModuleInfo(PDBModuleInfo *info);

  // Retrieves information about the module's PE file.  Returns
  // true on success and false on failure.
  bool GetPEInfo(PEModuleInfo *info);

  // Sets uses_guid to true if the opened file uses a new-style CodeView
  // record with a 128-bit GUID, or false if the opened file uses an old-style
  // CodeView record.  When no GUID is available, a 32-bit signature should be
  // used to identify the module instead.  If the information cannot be
  // determined, this method returns false.
  bool UsesGUID(bool *uses_guid);

 private:
  // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an
  // inlined function.
  struct InlineOrigin {
    // The unique id for an InlineOrigin.
    int id;
    // The name of the inlined function.
    wstring name;
  };

  // Line represents LINE record in a symbol file. It represents a source code
  // line.
  struct Line {
    // The relative address of a line.
    DWORD rva;
    // The number bytes this line has.
    DWORD length;
    // The source line number.
    DWORD line_num;
    // The source file id where the source line is located at.
    DWORD file_id;
  };

  // Inline represents INLINE record in a symbol file.
  class Inline {
   public:
    explicit Inline(int inline_nest_level);

    void SetOriginId(int origin_id);

    // Adding inlinee line's range into ranges. If line is adjacent with any
    // existing lines, extend the range. Otherwise, add line as a new range.
    void ExtendRanges(const Line& line);

    void SetCallSiteLine(DWORD call_site_line);

    void SetCallSiteFileId(DWORD call_site_file_id);

    void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines);

    void Print(FILE* output) const;

   private:
    // The nest level of this inline record.
    int inline_nest_level_;
    // The source line number at where this inlined function is called.
    DWORD call_site_line_ = 0;
    // The call site file id at where this inlined function is called.
    DWORD call_site_file_id_ = 0;
    // The id used for referring to an InlineOrigin.
    int origin_id_ = 0;
    // A map from rva to length. This is the address ranges covered by this
    // Inline.
    map<DWORD, DWORD> ranges_;
    // The list of direct Inlines inlined inside this Inline.
    vector<std::unique_ptr<Inline>> child_inlines_;
  };

  // Lines represents a map of lines inside a function with rva as the key.
  // AddLine function adds a line into the map and ensures that there is no
  // overlap between any two lines in the map.
  class Lines {
   public:
    const map<DWORD, Line>& GetLineMap() const { return line_map_; }

    // Finds the line from line_map_ that contains the given rva returns its
    // line_num. If not found, return 0.
    DWORD GetLineNum(DWORD rva) const;

    // Finds the line from line_map_ that contains the given rva returns its
    // file_id. If not found, return 0.
    DWORD GetFileId(DWORD rva) const;

    // Add the `line` into line_map_. If the `line` overlaps with existing
    // lines, truncate the existing lines and add the given line. It ensures
    // that all lines in line_map_ do not overlap with each other. For example,
    // suppose there is a line A in the map and we call AddLine with Line B.
    // Line A: rva: 100, length: 20, line_num: 10, file_id: 1
    // Line B: rva: 105, length: 10, line_num: 4, file_id: 2
    // After calling AddLine with Line B, we will have the following lines:
    // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1
    // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2
    // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1
    void AddLine(const Line& line);

   private:
    // Finds the line from line_map_ that contains the given rva. If not found,
    // return nullptr.
    const Line* GetLine(DWORD rva) const;
    // The key is rva. AddLine function ensures that any two lines in the map do
    // not overlap.
    map<DWORD, Line> line_map_;
  };

  // Construct Line from IDiaLineNumber. The output Line is stored at line.
  // Return true on success.
  bool GetLine(IDiaLineNumber* dia_line, Line* line) const;

  // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at
  // line_list.
  // Returns true on success.
  bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const;

  // Outputs the line/address pairs for each line in the enumerator.
  void PrintLines(const Lines& lines) const;

  // Outputs a function address and name, followed by its source line list.
  // block can be the same object as function, or it can be a reference to a
  // code block that is lexically part of this function, but resides at a
  // separate address. If has_multiple_symbols is true, this function's
  // instructions correspond to multiple symbols. Returns true on success.
  bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block,
                     bool has_multiple_symbols);

  // Outputs all functions as described above.  Returns true on success.
  bool PrintFunctions();

  // Outputs all of the source files in the session's pdb file.
  // Returns true on success.
  bool PrintSourceFiles();

  // Output all inline origins.
  void PrintInlineOrigins() const;

  // Retrieve inlines inside the given block. It also adds inlinee lines to
  // `line_list` since inner lines are more precise source location. If the
  // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call
  // itself with each child as first argument. Returns true on success.
  // `block`: the IDiaSymbol that may have inline sites.
  // `line_list`: the list of lines inside current function.
  // `inline_nest_level`: the nest level of block's Inlines.
  // `inlines`: the vector to store the list of inlines for the block.
  bool GetInlines(IDiaSymbol* block,
                  Lines* line_list,
                  int inline_nest_level,
                  vector<std::unique_ptr<Inline>>* inlines);

  // Outputs all inlines.
  void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const;

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers. For x86 data stored in
  // .pdb files. Returns true on success.
  bool PrintFrameDataUsingPDB();

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers. For x64 data stored in
  // .exe, .dll files. Returns true on success.
  bool PrintFrameDataUsingEXE();

  // Outputs all of the frame information necessary to construct stack
  // backtraces in the absence of frame pointers.  Returns true on success.
  bool PrintFrameData();

  // Outputs a single public symbol address and name, if the symbol corresponds
  // to a code address.  Returns true on success.  If symbol is does not
  // correspond to code, returns true without outputting anything. If
  // has_multiple_symbols is true, the symbol corresponds to a code address and
  // the instructions correspond to multiple symbols.
  bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols);

  // Outputs a line identifying the PDB file that is being dumped, along with
  // its uuid and age.
  bool PrintPDBInfo();

  // Outputs a line identifying the PE file corresponding to the PDB
  // file that is being dumped, along with its code identifier,
  // which consists of its timestamp and file size.
  bool PrintPEInfo();

  // Returns true if this filename has already been seen,
  // and an ID is stored for it, or false if it has not.
  bool FileIDIsCached(const wstring& file) {
    return unique_files_.find(file) != unique_files_.end();
  }

  // Cache this filename and ID for later reuse.
  void CacheFileID(const wstring& file, DWORD id) {
    unique_files_[file] = id;
  }

  // Store this ID in the cache as a duplicate for this filename.
  void StoreDuplicateFileID(const wstring& file, DWORD id) {
    unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
    if (iter != unique_files_.end()) {
      // map this id to the previously seen one
      file_ids_[id] = iter->second;
    }
  }

  // Given a file's unique ID, return the ID that should be used to
  // reference it. There may be multiple files with identical filenames
  // but different unique IDs. The cache attempts to coalesce these into
  // one ID per unique filename.
  DWORD GetRealFileID(DWORD id) const {
    unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id);
    if (iter == file_ids_.end())
      return id;
    return iter->second;
  }

  // Find the PE file corresponding to the loaded PDB file, and
  // set the code_file_ member. Returns false on failure.
  bool FindPEFile();

  // Returns the function name for a symbol.  If possible, the name is
  // undecorated.  If the symbol's decorated form indicates the size of
  // parameters on the stack, this information is returned in stack_param_size.
  // Returns true on success.  If the symbol doesn't encode parameter size
  // information, stack_param_size is set to -1.
  static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
                                    int *stack_param_size);

  // Returns the number of bytes of stack space used for a function's
  // parameters.  function must have the tag SymTagFunction.  In the event of
  // a failure, returns 0, which is also a valid number of bytes.
  static int GetFunctionStackParamSize(IDiaSymbol *function);

  // The filename of the PE file corresponding to the currently-open
  // pdb file.
  wstring code_file_;

  // The session for the currently-open pdb file.
  CComPtr<IDiaSession> session_;

  // The current output file for this WriteMap invocation.
  FILE *output_;

  // There may be many duplicate filenames with different IDs.
  // This maps from the DIA "unique ID" to a single ID per unique
  // filename.
  unordered_map<DWORD, DWORD> file_ids_;
  // This maps unique filenames to file IDs.
  unordered_map<wstring, DWORD> unique_files_;

  // The INLINE_ORIGINS records. The key is the function name.
  std::map<wstring, InlineOrigin> inline_origins_;

  // This is used for calculating post-transform symbol addresses and lengths.
  ImageMap image_map_;

  // If we should output INLINE/INLINE_ORIGIN records
  bool handle_inline_;

  // Disallow copy ctor and operator=
  PDBSourceLineWriter(const PDBSourceLineWriter&);
  void operator=(const PDBSourceLineWriter&);
};

}  // namespace google_breakpad

#endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_