aboutsummaryrefslogtreecommitdiff
path: root/internal/pkgbits/decoder.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/pkgbits/decoder.go')
-rw-r--r--internal/pkgbits/decoder.go517
1 files changed, 517 insertions, 0 deletions
diff --git a/internal/pkgbits/decoder.go b/internal/pkgbits/decoder.go
new file mode 100644
index 000000000..b92e8e6eb
--- /dev/null
+++ b/internal/pkgbits/decoder.go
@@ -0,0 +1,517 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "go/constant"
+ "go/token"
+ "io"
+ "math/big"
+ "os"
+ "runtime"
+ "strings"
+)
+
+// A PkgDecoder provides methods for decoding a package's Unified IR
+// export data.
+type PkgDecoder struct {
+ // version is the file format version.
+ version uint32
+
+ // sync indicates whether the file uses sync markers.
+ sync bool
+
+ // pkgPath is the package path for the package to be decoded.
+ //
+ // TODO(mdempsky): Remove; unneeded since CL 391014.
+ pkgPath string
+
+ // elemData is the full data payload of the encoded package.
+ // Elements are densely and contiguously packed together.
+ //
+ // The last 8 bytes of elemData are the package fingerprint.
+ elemData string
+
+ // elemEnds stores the byte-offset end positions of element
+ // bitstreams within elemData.
+ //
+ // For example, element I's bitstream data starts at elemEnds[I-1]
+ // (or 0, if I==0) and ends at elemEnds[I].
+ //
+ // Note: elemEnds is indexed by absolute indices, not
+ // section-relative indices.
+ elemEnds []uint32
+
+ // elemEndsEnds stores the index-offset end positions of relocation
+ // sections within elemEnds.
+ //
+ // For example, section K's end positions start at elemEndsEnds[K-1]
+ // (or 0, if K==0) and end at elemEndsEnds[K].
+ elemEndsEnds [numRelocs]uint32
+
+ scratchRelocEnt []RelocEnt
+}
+
+// PkgPath returns the package path for the package
+//
+// TODO(mdempsky): Remove; unneeded since CL 391014.
+func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
+
+// SyncMarkers reports whether pr uses sync markers.
+func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
+
+// NewPkgDecoder returns a PkgDecoder initialized to read the Unified
+// IR export data from input. pkgPath is the package path for the
+// compilation unit that produced the export data.
+//
+// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
+func NewPkgDecoder(pkgPath, input string) PkgDecoder {
+ pr := PkgDecoder{
+ pkgPath: pkgPath,
+ }
+
+ // TODO(mdempsky): Implement direct indexing of input string to
+ // avoid copying the position information.
+
+ r := strings.NewReader(input)
+
+ assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
+
+ switch pr.version {
+ default:
+ panic(fmt.Errorf("unsupported version: %v", pr.version))
+ case 0:
+ // no flags
+ case 1:
+ var flags uint32
+ assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
+ pr.sync = flags&flagSyncMarkers != 0
+ }
+
+ assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
+
+ pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
+ assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
+
+ pos, err := r.Seek(0, io.SeekCurrent)
+ assert(err == nil)
+
+ pr.elemData = input[pos:]
+ assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
+
+ return pr
+}
+
+// NumElems returns the number of elements in section k.
+func (pr *PkgDecoder) NumElems(k RelocKind) int {
+ count := int(pr.elemEndsEnds[k])
+ if k > 0 {
+ count -= int(pr.elemEndsEnds[k-1])
+ }
+ return count
+}
+
+// TotalElems returns the total number of elements across all sections.
+func (pr *PkgDecoder) TotalElems() int {
+ return len(pr.elemEnds)
+}
+
+// Fingerprint returns the package fingerprint.
+func (pr *PkgDecoder) Fingerprint() [8]byte {
+ var fp [8]byte
+ copy(fp[:], pr.elemData[len(pr.elemData)-8:])
+ return fp
+}
+
+// AbsIdx returns the absolute index for the given (section, index)
+// pair.
+func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
+ absIdx := int(idx)
+ if k > 0 {
+ absIdx += int(pr.elemEndsEnds[k-1])
+ }
+ if absIdx >= int(pr.elemEndsEnds[k]) {
+ errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
+ }
+ return absIdx
+}
+
+// DataIdx returns the raw element bitstream for the given (section,
+// index) pair.
+func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
+ absIdx := pr.AbsIdx(k, idx)
+
+ var start uint32
+ if absIdx > 0 {
+ start = pr.elemEnds[absIdx-1]
+ }
+ end := pr.elemEnds[absIdx]
+
+ return pr.elemData[start:end]
+}
+
+// StringIdx returns the string value for the given string index.
+func (pr *PkgDecoder) StringIdx(idx Index) string {
+ return pr.DataIdx(RelocString, idx)
+}
+
+// NewDecoder returns a Decoder for the given (section, index) pair,
+// and decodes the given SyncMarker from the element bitstream.
+func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
+ r := pr.NewDecoderRaw(k, idx)
+ r.Sync(marker)
+ return r
+}
+
+// TempDecoder returns a Decoder for the given (section, index) pair,
+// and decodes the given SyncMarker from the element bitstream.
+// If possible the Decoder should be RetireDecoder'd when it is no longer
+// needed, this will avoid heap allocations.
+func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
+ r := pr.TempDecoderRaw(k, idx)
+ r.Sync(marker)
+ return r
+}
+
+func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
+ pr.scratchRelocEnt = d.Relocs
+ d.Relocs = nil
+}
+
+// NewDecoderRaw returns a Decoder for the given (section, index) pair.
+//
+// Most callers should use NewDecoder instead.
+func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
+ r := Decoder{
+ common: pr,
+ k: k,
+ Idx: idx,
+ }
+
+ // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved.
+ r.Data = *strings.NewReader(pr.DataIdx(k, idx))
+
+ r.Sync(SyncRelocs)
+ r.Relocs = make([]RelocEnt, r.Len())
+ for i := range r.Relocs {
+ r.Sync(SyncReloc)
+ r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
+ }
+
+ return r
+}
+
+func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
+ r := Decoder{
+ common: pr,
+ k: k,
+ Idx: idx,
+ }
+
+ r.Data.Reset(pr.DataIdx(k, idx))
+ r.Sync(SyncRelocs)
+ l := r.Len()
+ if cap(pr.scratchRelocEnt) >= l {
+ r.Relocs = pr.scratchRelocEnt[:l]
+ pr.scratchRelocEnt = nil
+ } else {
+ r.Relocs = make([]RelocEnt, l)
+ }
+ for i := range r.Relocs {
+ r.Sync(SyncReloc)
+ r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
+ }
+
+ return r
+}
+
+// A Decoder provides methods for decoding an individual element's
+// bitstream data.
+type Decoder struct {
+ common *PkgDecoder
+
+ Relocs []RelocEnt
+ Data strings.Reader
+
+ k RelocKind
+ Idx Index
+}
+
+func (r *Decoder) checkErr(err error) {
+ if err != nil {
+ errorf("unexpected decoding error: %w", err)
+ }
+}
+
+func (r *Decoder) rawUvarint() uint64 {
+ x, err := readUvarint(&r.Data)
+ r.checkErr(err)
+ return x
+}
+
+// readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
+// This avoids the interface conversion and thus has better escape properties,
+// which flows up the stack.
+func readUvarint(r *strings.Reader) (uint64, error) {
+ var x uint64
+ var s uint
+ for i := 0; i < binary.MaxVarintLen64; i++ {
+ b, err := r.ReadByte()
+ if err != nil {
+ if i > 0 && err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ return x, err
+ }
+ if b < 0x80 {
+ if i == binary.MaxVarintLen64-1 && b > 1 {
+ return x, overflow
+ }
+ return x | uint64(b)<<s, nil
+ }
+ x |= uint64(b&0x7f) << s
+ s += 7
+ }
+ return x, overflow
+}
+
+var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
+
+func (r *Decoder) rawVarint() int64 {
+ ux := r.rawUvarint()
+
+ // Zig-zag decode.
+ x := int64(ux >> 1)
+ if ux&1 != 0 {
+ x = ^x
+ }
+ return x
+}
+
+func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
+ e := r.Relocs[idx]
+ assert(e.Kind == k)
+ return e.Idx
+}
+
+// Sync decodes a sync marker from the element bitstream and asserts
+// that it matches the expected marker.
+//
+// If r.common.sync is false, then Sync is a no-op.
+func (r *Decoder) Sync(mWant SyncMarker) {
+ if !r.common.sync {
+ return
+ }
+
+ pos, _ := r.Data.Seek(0, io.SeekCurrent)
+ mHave := SyncMarker(r.rawUvarint())
+ writerPCs := make([]int, r.rawUvarint())
+ for i := range writerPCs {
+ writerPCs[i] = int(r.rawUvarint())
+ }
+
+ if mHave == mWant {
+ return
+ }
+
+ // There's some tension here between printing:
+ //
+ // (1) full file paths that tools can recognize (e.g., so emacs
+ // hyperlinks the "file:line" text for easy navigation), or
+ //
+ // (2) short file paths that are easier for humans to read (e.g., by
+ // omitting redundant or irrelevant details, so it's easier to
+ // focus on the useful bits that remain).
+ //
+ // The current formatting favors the former, as it seems more
+ // helpful in practice. But perhaps the formatting could be improved
+ // to better address both concerns. For example, use relative file
+ // paths if they would be shorter, or rewrite file paths to contain
+ // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
+ // to reliably expand that again.
+
+ fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
+
+ fmt.Printf("\nfound %v, written at:\n", mHave)
+ if len(writerPCs) == 0 {
+ fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
+ }
+ for _, pc := range writerPCs {
+ fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
+ }
+
+ fmt.Printf("\nexpected %v, reading at:\n", mWant)
+ var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
+ n := runtime.Callers(2, readerPCs[:])
+ for _, pc := range fmtFrames(readerPCs[:n]...) {
+ fmt.Printf("\t%s\n", pc)
+ }
+
+ // We already printed a stack trace for the reader, so now we can
+ // simply exit. Printing a second one with panic or base.Fatalf
+ // would just be noise.
+ os.Exit(1)
+}
+
+// Bool decodes and returns a bool value from the element bitstream.
+func (r *Decoder) Bool() bool {
+ r.Sync(SyncBool)
+ x, err := r.Data.ReadByte()
+ r.checkErr(err)
+ assert(x < 2)
+ return x != 0
+}
+
+// Int64 decodes and returns an int64 value from the element bitstream.
+func (r *Decoder) Int64() int64 {
+ r.Sync(SyncInt64)
+ return r.rawVarint()
+}
+
+// Uint64 decodes and returns a uint64 value from the element bitstream.
+func (r *Decoder) Uint64() uint64 {
+ r.Sync(SyncUint64)
+ return r.rawUvarint()
+}
+
+// Len decodes and returns a non-negative int value from the element bitstream.
+func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
+
+// Int decodes and returns an int value from the element bitstream.
+func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
+
+// Uint decodes and returns a uint value from the element bitstream.
+func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
+
+// Code decodes a Code value from the element bitstream and returns
+// its ordinal value. It's the caller's responsibility to convert the
+// result to an appropriate Code type.
+//
+// TODO(mdempsky): Ideally this method would have signature "Code[T
+// Code] T" instead, but we don't allow generic methods and the
+// compiler can't depend on generics yet anyway.
+func (r *Decoder) Code(mark SyncMarker) int {
+ r.Sync(mark)
+ return r.Len()
+}
+
+// Reloc decodes a relocation of expected section k from the element
+// bitstream and returns an index to the referenced element.
+func (r *Decoder) Reloc(k RelocKind) Index {
+ r.Sync(SyncUseReloc)
+ return r.rawReloc(k, r.Len())
+}
+
+// String decodes and returns a string value from the element
+// bitstream.
+func (r *Decoder) String() string {
+ r.Sync(SyncString)
+ return r.common.StringIdx(r.Reloc(RelocString))
+}
+
+// Strings decodes and returns a variable-length slice of strings from
+// the element bitstream.
+func (r *Decoder) Strings() []string {
+ res := make([]string, r.Len())
+ for i := range res {
+ res[i] = r.String()
+ }
+ return res
+}
+
+// Value decodes and returns a constant.Value from the element
+// bitstream.
+func (r *Decoder) Value() constant.Value {
+ r.Sync(SyncValue)
+ isComplex := r.Bool()
+ val := r.scalar()
+ if isComplex {
+ val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
+ }
+ return val
+}
+
+func (r *Decoder) scalar() constant.Value {
+ switch tag := CodeVal(r.Code(SyncVal)); tag {
+ default:
+ panic(fmt.Errorf("unexpected scalar tag: %v", tag))
+
+ case ValBool:
+ return constant.MakeBool(r.Bool())
+ case ValString:
+ return constant.MakeString(r.String())
+ case ValInt64:
+ return constant.MakeInt64(r.Int64())
+ case ValBigInt:
+ return constant.Make(r.bigInt())
+ case ValBigRat:
+ num := r.bigInt()
+ denom := r.bigInt()
+ return constant.Make(new(big.Rat).SetFrac(num, denom))
+ case ValBigFloat:
+ return constant.Make(r.bigFloat())
+ }
+}
+
+func (r *Decoder) bigInt() *big.Int {
+ v := new(big.Int).SetBytes([]byte(r.String()))
+ if r.Bool() {
+ v.Neg(v)
+ }
+ return v
+}
+
+func (r *Decoder) bigFloat() *big.Float {
+ v := new(big.Float).SetPrec(512)
+ assert(v.UnmarshalText([]byte(r.String())) == nil)
+ return v
+}
+
+// @@@ Helpers
+
+// TODO(mdempsky): These should probably be removed. I think they're a
+// smell that the export data format is not yet quite right.
+
+// PeekPkgPath returns the package path for the specified package
+// index.
+func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
+ var path string
+ {
+ r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
+ path = r.String()
+ pr.RetireDecoder(&r)
+ }
+ if path == "" {
+ path = pr.pkgPath
+ }
+ return path
+}
+
+// PeekObj returns the package path, object name, and CodeObj for the
+// specified object index.
+func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
+ var ridx Index
+ var name string
+ var rcode int
+ {
+ r := pr.TempDecoder(RelocName, idx, SyncObject1)
+ r.Sync(SyncSym)
+ r.Sync(SyncPkg)
+ ridx = r.Reloc(RelocPkg)
+ name = r.String()
+ rcode = r.Code(SyncCodeObj)
+ pr.RetireDecoder(&r)
+ }
+
+ path := pr.PeekPkgPath(ridx)
+ assert(name != "")
+
+ tag := CodeObj(rcode)
+
+ return path, name, tag
+}