gopls/internal/lsp/cache/parse_cache.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cache

import (
	"container/heap"
	"context"
	"go/token"
	"runtime"
	"sort"
	"sync"

	"golang.org/x/sync/errgroup"
	"golang.org/x/tools/gopls/internal/lsp/source"
	"golang.org/x/tools/internal/memoize"
)

// This file contains an implementation of a bounded-size parse cache, that
// offsets the base token.Pos value of each cached file so that they may be
// later described by a single dedicated FileSet.
//
// This is achieved by tracking a monotonic offset in the token.Pos space, that
// is incremented before parsing allow room for the resulting parsed file.

// Keep 200 recently parsed files, based on the following rationale:
//   - One of the most important benefits of caching is avoiding re-parsing
//     everything in a package when working on a single file. No packages in
//     Kubernetes have > 200 files (only one has > 100).
//   - Experience has shown that ~1000 parsed files can use noticeable space.
//     200 feels like a sweet spot between limiting cache size and optimizing
//     cache hits for low-latency operations.
const parseCacheMaxFiles = 200

// parsePadding is additional padding allocated between entries in the parse
// cache to allow for increases in length (such as appending missing braces)
// caused by fixAST.
//
// This is used to mitigate a chicken and egg problem: we must know the base
// offset of the file we're about to parse, before we start parsing, and yet
// src fixups may affect the actual size of the parsed content (and therefore
// the offsets of subsequent files).
//
// When we encounter a file that no longer fits in its allocated space in the
// fileset, we have no choice but to re-parse it. Leaving a generous padding
// reduces the likelihood of this "slow path".
//
// This value is mutable for testing, so that we can exercise the slow path.
var parsePadding = 1000 // mutable for testing

// A parseCache holds a bounded number of recently accessed parsed Go files. As
// new files are stored, older files may be evicted from the cache.
//
// The parseCache.parseFiles method exposes a batch API for parsing (and
// caching) multiple files. This is necessary for type-checking, where files
// must be parsed in a common fileset.
type parseCache struct {
	mu         sync.Mutex
	m          map[parseKey]*parseCacheEntry
	lru        queue     // min-atime priority queue of *parseCacheEntry
	clock      uint64    // clock time, incremented when the cache is updated
	nextOffset token.Pos // token.Pos offset for the next parsed file
}

// parseKey uniquely identifies a parsed Go file.
type parseKey struct {
	file source.FileIdentity
	mode source.ParseMode
}

type parseCacheEntry struct {
	key      parseKey
	promise  *memoize.Promise // memoize.Promise[*source.ParsedGoFile]
	atime    uint64           // clock time of last access
	lruIndex int
}

// startParse prepares a parsing pass, using the following steps:
//   - search for cache hits
//   - create new promises for cache misses
//   - store as many new promises in the cache as space will allow
//
// The resulting slice has an entry for every given file handle, though some
// entries may be nil if there was an error reading the file (in which case the
// resulting error will be non-nil).
func (c *parseCache) startParse(mode source.ParseMode, fhs ...source.FileHandle) ([]*memoize.Promise, error) {
	c.mu.Lock()
	defer c.mu.Unlock()

	// Any parsing pass increments the clock, as we'll update access times.
	// (technically, if fhs is empty this isn't necessary, but that's a degenerate case).
	//
	// All entries parsed from a single call get the same access time.
	c.clock++

	// Read file data and collect cacheable files.
	var (
		data           = make([][]byte, len(fhs)) // file content for each readable file
		promises       = make([]*memoize.Promise, len(fhs))
		firstReadError error // first error from fh.Read, or nil
	)
	for i, fh := range fhs {
		src, err := fh.Read()
		if err != nil {
			if firstReadError == nil {
				firstReadError = err
			}
			continue
		}
		data[i] = src

		key := parseKey{
			file: fh.FileIdentity(),
			mode: mode,
		}

		// Check for a cache hit.
		if e, ok := c.m[key]; ok {
			e.atime = c.clock
			heap.Fix(&c.lru, e.lruIndex)
			promises[i] = e.promise
			continue
		}

		// ...otherwise, create a new promise to parse with a non-overlapping offset
		fset := token.NewFileSet()
		if c.nextOffset > 0 {
			// Add a dummy file so that this parsed file does not overlap with others.
			fset.AddFile("", 1, int(c.nextOffset))
		}
		c.nextOffset += token.Pos(len(src) + parsePadding + 1) // leave room for src fixes
		fh := fh
		promise := memoize.NewPromise(string(fh.URI()), func(ctx context.Context, _ interface{}) interface{} {
			return parseGoSrc(ctx, fset, fh.URI(), src, mode)
		})
		promises[i] = promise

		var e *parseCacheEntry
		if len(c.lru) < parseCacheMaxFiles {
			// add new entry
			e = new(parseCacheEntry)
			if c.m == nil {
				c.m = make(map[parseKey]*parseCacheEntry)
			}
		} else {
			// evict oldest entry
			e = heap.Pop(&c.lru).(*parseCacheEntry)
			delete(c.m, e.key)
		}
		e.key = key
		e.promise = promise
		e.atime = c.clock
		c.m[e.key] = e
		heap.Push(&c.lru, e)
	}

	if len(c.m) != len(c.lru) {
		panic("map and LRU are inconsistent")
	}

	return promises, firstReadError
}

// parseFiles returns a ParsedGoFile for the given file handles in the
// requested parse mode.
//
// If parseFiles returns an error, it still returns a slice,
// but with a nil entry for each file that could not be parsed.
//
// The second result is a FileSet describing all resulting parsed files.
//
// For parsed files that already exists in the cache, access time will be
// updated. For others, parseFiles will parse and store as many results in the
// cache as space allows.
func (c *parseCache) parseFiles(ctx context.Context, mode source.ParseMode, fhs ...source.FileHandle) ([]*source.ParsedGoFile, *token.FileSet, error) {
	promises, firstReadError := c.startParse(mode, fhs...)

	// Await all parsing.
	var g errgroup.Group
	g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
	pgfs := make([]*source.ParsedGoFile, len(fhs))
	for i, promise := range promises {
		if promise == nil {
			continue
		}
		i := i
		promise := promise
		g.Go(func() error {
			result, err := promise.Get(ctx, nil)
			if err != nil {
				return err
			}
			pgfs[i] = result.(*source.ParsedGoFile)
			return nil
		})
	}
	if err := g.Wait(); err != nil {
		return nil, nil, err
	}

	// Construct a token.FileSet mapping all parsed files, and update their
	// Tok to the corresponding file in the new fileset.
	//
	// In the unlikely event that a parsed file no longer fits in its allocated
	// space in the FileSet range, it will need to be re-parsed.

	var tokenFiles []*token.File
	fileIndex := make(map[*token.File]int) // to look up original indexes after sorting
	for i, pgf := range pgfs {
		if pgf == nil {
			continue
		}
		fileIndex[pgf.Tok] = i
		tokenFiles = append(tokenFiles, pgf.Tok)
	}

	sort.Slice(tokenFiles, func(i, j int) bool {
		return tokenFiles[i].Base() < tokenFiles[j].Base()
	})

	var needReparse []int // files requiring reparsing
	out := tokenFiles[:0]
	for i, f := range tokenFiles {
		if i < len(tokenFiles)-1 && f.Base()+f.Size() >= tokenFiles[i+1].Base() {
			if f != tokenFiles[i+1] { // no need to re-parse duplicates
				needReparse = append(needReparse, fileIndex[f])
			}
		} else {
			out = append(out, f)
		}
	}
	fset := source.FileSetFor(out...)

	// Re-parse any remaining files using the stitched fileSet.
	for _, i := range needReparse {
		// Start from scratch, rather than using ParsedGoFile.Src, so that source
		// fixing operates exactly the same (note that fixing stops after a limited
		// number of tries).
		fh := fhs[i]
		src, err := fh.Read()
		if err != nil {
			if firstReadError == nil {
				firstReadError = err
			}
			continue
		}
		pgfs[i] = parseGoSrc(ctx, fset, fh.URI(), src, mode)
	}

	// Ensure each PGF refers to a token.File from the new FileSet.
	for i, pgf := range pgfs {
		if pgf == nil {
			continue
		}
		newTok := fset.File(token.Pos(pgf.Tok.Base()))
		if newTok == nil {
			panic("internal error: missing tok for " + pgf.URI)
		}
		if newTok.Base() != pgf.Tok.Base() || newTok.Size() != pgf.Tok.Size() {
			panic("internal error: mismatching token.File in synthetic FileSet")
		}
		pgf2 := *pgf
		pgf2.Tok = newTok
		pgfs[i] = &pgf2
	}

	return pgfs, fset, firstReadError
}

// -- priority queue boilerplate --

// queue is a min-atime prority queue of cache entries.
type queue []*parseCacheEntry

func (q queue) Len() int { return len(q) }

func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime }

func (q queue) Swap(i, j int) {
	q[i], q[j] = q[j], q[i]
	q[i].lruIndex = i
	q[j].lruIndex = j
}

func (q *queue) Push(x interface{}) {
	e := x.(*parseCacheEntry)
	e.lruIndex = len(*q)
	*q = append(*q, e)
}

func (q *queue) Pop() interface{} {
	last := len(*q) - 1
	e := (*q)[last]
	(*q)[last] = nil // aid GC
	*q = (*q)[:last]
	return e
}