aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSasha Smundak <asmundak@google.com>2021-02-24 05:37:04 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-02-24 05:37:04 +0000
commitb1ac553fdc1a97171249fac35e4c743554cb8de4 (patch)
tree2efd7da6ec6557b9aaed7932d19e07e1beade288
parent7b6d2ef30437199c2b437649bc92fdf6f8379d09 (diff)
parent20e766c8368bbd769f453ce41f8571a75519c125 (diff)
downloadstarlark-go-b1ac553fdc1a97171249fac35e4c743554cb8de4.tar.gz
Import from upstream. am: 2ba90398f8 am: 0710991d7f am: 20e766c836
Original change: https://android-review.googlesource.com/c/platform/external/starlark-go/+/1592915 MUST ONLY BE SUBMITTED BY AUTOMERGER Change-Id: I7de853231e5f99ff3e6d803ba7d0e774f329fbc3
-rw-r--r--.travis.yml20
-rw-r--r--CNAME1
-rw-r--r--LICENSE29
-rw-r--r--README.md181
-rw-r--r--cmd/starlark/starlark.go141
-rw-r--r--doc/impl.md242
-rw-r--r--doc/spec.md4263
-rw-r--r--docs/CNAME1
-rw-r--r--docs/cmd/starlark/index.html9
-rw-r--r--docs/index.html11
-rw-r--r--docs/internal/chunkedfile/index.html9
-rw-r--r--docs/internal/compile/index.html9
-rw-r--r--docs/repl/index.html9
-rw-r--r--docs/resolve/index.html9
-rw-r--r--docs/starlark/index.html9
-rw-r--r--docs/starlarkstruct/index.html9
-rw-r--r--docs/starlarktest/index.html9
-rw-r--r--docs/syntax/index.html9
-rw-r--r--docs/update.go71
-rw-r--r--go.mod13
-rw-r--r--go.sum74
-rw-r--r--internal/chunkedfile/chunkedfile.go124
-rw-r--r--internal/compile/codegen_test.go118
-rw-r--r--internal/compile/compile.go1916
-rw-r--r--internal/compile/compile_test.go74
-rw-r--r--internal/compile/serial.go395
-rw-r--r--internal/spell/spell.go115
-rw-r--r--lib/proto/cmd/star2proto/star2proto.go142
-rw-r--r--lib/proto/proto.go1232
-rw-r--r--repl/repl.go185
-rw-r--r--resolve/binding.go74
-rw-r--r--resolve/resolve.go969
-rw-r--r--resolve/resolve_test.go89
-rw-r--r--resolve/testdata/resolve.star383
-rw-r--r--starlark/bench_test.go169
-rw-r--r--starlark/debug.go42
-rw-r--r--starlark/empty.s3
-rw-r--r--starlark/eval.go1618
-rw-r--r--starlark/eval_test.go945
-rw-r--r--starlark/example_test.go322
-rw-r--r--starlark/hashtable.go373
-rw-r--r--starlark/hashtable_test.go125
-rw-r--r--starlark/int.go436
-rw-r--r--starlark/int_generic.go33
-rw-r--r--starlark/int_posix64.go67
-rw-r--r--starlark/int_test.go102
-rw-r--r--starlark/interp.go669
-rw-r--r--starlark/library.go2251
-rw-r--r--starlark/profile.go449
-rw-r--r--starlark/profile_test.go83
-rw-r--r--starlark/testdata/assign.star354
-rw-r--r--starlark/testdata/benchmark.star62
-rw-r--r--starlark/testdata/bool.star62
-rw-r--r--starlark/testdata/builtins.star225
-rw-r--r--starlark/testdata/bytes.star159
-rw-r--r--starlark/testdata/control.star64
-rw-r--r--starlark/testdata/dict.star248
-rw-r--r--starlark/testdata/float.star504
-rw-r--r--starlark/testdata/function.star323
-rw-r--r--starlark/testdata/int.star260
-rw-r--r--starlark/testdata/json.star147
-rw-r--r--starlark/testdata/list.star276
-rw-r--r--starlark/testdata/misc.star139
-rw-r--r--starlark/testdata/module.star17
-rw-r--r--starlark/testdata/paths.star250
-rw-r--r--starlark/testdata/recursion.star43
-rw-r--r--starlark/testdata/set.star118
-rw-r--r--starlark/testdata/string.star472
-rw-r--r--starlark/testdata/tuple.star55
-rw-r--r--starlark/unpack.go319
-rw-r--r--starlark/value.go1431
-rw-r--r--starlark/value_test.go46
-rw-r--r--starlarkjson/json.go478
-rw-r--r--starlarkstruct/module.go43
-rw-r--r--starlarkstruct/struct.go281
-rw-r--r--starlarkstruct/struct_test.go69
-rw-r--r--starlarkstruct/testdata/struct.star63
-rw-r--r--starlarktest/assert.star51
-rw-r--r--starlarktest/starlarktest.go147
-rw-r--r--syntax/grammar.txt129
-rw-r--r--syntax/parse.go1028
-rw-r--r--syntax/parse_test.go487
-rw-r--r--syntax/quote.go309
-rw-r--r--syntax/quote_test.go65
-rw-r--r--syntax/scan.go1123
-rw-r--r--syntax/scan_test.go310
-rw-r--r--syntax/syntax.go529
-rw-r--r--syntax/testdata/errors.star212
-rw-r--r--syntax/testdata/scan.star1324
-rw-r--r--syntax/walk.go163
-rw-r--r--syntax/walk_test.go103
91 files changed, 31119 insertions, 0 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..23fcb4f
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,20 @@
+language: go
+
+go_import_path: go.starlark.net
+
+go:
+ - "1.13.x"
+ - "1.14.x"
+ - "1.15.x"
+ - "master"
+
+env:
+ - "GO111MODULE=on"
+
+script:
+ - "go test -mod=readonly ./..."
+ - "cp go.mod go.mod.orig"
+ - "cp go.sum go.sum.orig"
+ - "go mod tidy"
+ - "diff go.mod.orig go.mod"
+ - "diff go.sum.orig go.sum"
diff --git a/CNAME b/CNAME
new file mode 100644
index 0000000..7298e4c
--- /dev/null
+++ b/CNAME
@@ -0,0 +1 @@
+go.starlark.net
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a6609a1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,29 @@
+Copyright (c) 2017 The Bazel Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1ea6695
--- /dev/null
+++ b/README.md
@@ -0,0 +1,181 @@
+
+<!-- This file is the project homepage for go.starlark.net -->
+
+# Starlark in Go
+
+[![Travis CI](https://travis-ci.org/google/starlark-go.svg)](https://travis-ci.org/google/starlark-go)
+[![GoDoc](https://godoc.org/go.starlark.net/starlark?status.svg)](https://godoc.org/go.starlark.net/starlark)
+
+This is the home of the _Starlark in Go_ project.
+Starlark in Go is an interpreter for Starlark, implemented in Go.
+Starlark was formerly known as Skylark.
+The new import path for Go packages is `"go.starlark.net/starlark"`.
+
+Starlark is a dialect of Python intended for use as a configuration language.
+Like Python, it is an untyped dynamic language with high-level data
+types, first-class functions with lexical scope, and garbage collection.
+Unlike CPython, independent Starlark threads execute in parallel, so
+Starlark workloads scale well on parallel machines.
+Starlark is a small and simple language with a familiar and highly
+readable syntax. You can use it as an expressive notation for
+structured data, defining functions to eliminate repetition, or you
+can use it to add scripting capabilities to an existing application.
+
+A Starlark interpreter is typically embedded within a larger
+application, and the application may define additional domain-specific
+functions and data types beyond those provided by the core language.
+For example, Starlark was originally developed for the
+[Bazel build tool](https://bazel.build).
+Bazel uses Starlark as the notation both for its BUILD files (like
+Makefiles, these declare the executables, libraries, and tests in a
+directory) and for [its macro
+language](https://docs.bazel.build/versions/master/skylark/language.html),
+through which Bazel is extended with custom logic to support new
+languages and compilers.
+
+
+## Documentation
+
+* Language definition: [doc/spec.md](doc/spec.md)
+
+* About the Go implementation: [doc/impl.md](doc/impl.md)
+
+* API documentation: [godoc.org/go.starlark.net/starlark](https://godoc.org/go.starlark.net/starlark)
+
+* Mailing list: [starlark-go](https://groups.google.com/forum/#!forum/starlark-go)
+
+* Issue tracker: [https://github.com/google/starlark-go/issues](https://github.com/google/starlark-go/issues)
+
+### Getting started
+
+Build the code:
+
+```shell
+# check out the code and dependencies,
+# and install interpreter in $GOPATH/bin
+$ go get -u go.starlark.net/cmd/starlark
+```
+
+Run the interpreter:
+
+```console
+$ cat coins.star
+coins = {
+ 'dime': 10,
+ 'nickel': 5,
+ 'penny': 1,
+ 'quarter': 25,
+}
+print('By name:\t' + ', '.join(sorted(coins.keys())))
+print('By value:\t' + ', '.join(sorted(coins.keys(), key=coins.get)))
+
+$ starlark coins.star
+By name: dime, nickel, penny, quarter
+By value: penny, nickel, dime, quarter
+```
+
+Interact with the read-eval-print loop (REPL):
+
+```pycon
+$ starlark
+>>> def fibonacci(n):
+... res = list(range(n))
+... for i in res[2:]:
+... res[i] = res[i-2] + res[i-1]
+... return res
+...
+>>> fibonacci(10)
+[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
+>>>
+```
+
+When you have finished, type `Ctrl-D` to close the REPL's input stream.
+
+Embed the interpreter in your Go program:
+
+```go
+import "go.starlark.net/starlark"
+
+// Execute Starlark program in a file.
+thread := &starlark.Thread{Name: "my thread"}
+globals, err := starlark.ExecFile(thread, "fibonacci.star", nil, nil)
+if err != nil { ... }
+
+// Retrieve a module global.
+fibonacci := globals["fibonacci"]
+
+// Call Starlark function from Go.
+v, err := starlark.Call(thread, fibonacci, starlark.Tuple{starlark.MakeInt(10)}, nil)
+if err != nil { ... }
+fmt.Printf("fibonacci(10) = %v\n", v) // fibonacci(10) = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
+```
+
+See [starlark/example_test.go](starlark/example_test.go) for more examples.
+
+### Contributing
+
+We welcome submissions but please let us know what you're working on
+if you want to change or add to the Starlark repository.
+
+Before undertaking to write something new for the Starlark project,
+please file an issue or claim an existing issue.
+All significant changes to the language or to the interpreter's Go
+API must be discussed before they can be accepted.
+This gives all participants a chance to validate the design and to
+avoid duplication of effort.
+
+Despite some differences, the Go implementation of Starlark strives to
+match the behavior of [the Java implementation](https://github.com/bazelbuild/bazel)
+used by Bazel and maintained by the Bazel team.
+For that reason, proposals to change the language itself should
+generally be directed to [the Starlark site](
+https://github.com/bazelbuild/starlark/), not to the maintainers of this
+project.
+Only once there is consensus that a language change is desirable may
+its Go implementation proceed.
+
+We use GitHub pull requests for contributions.
+
+Please complete Google's contributor license agreement (CLA) before
+sending your first change to the project. If you are the copyright
+holder, you will need to agree to the
+[individual contributor license agreement](https://cla.developers.google.com/about/google-individual),
+which can be completed online.
+If your organization is the copyright holder, the organization will
+need to agree to the [corporate contributor license agreement](https://cla.developers.google.com/about/google-corporate).
+If the copyright holder for your contribution has already completed
+the agreement in connection with another Google open source project,
+it does not need to be completed again.
+
+### Stability
+
+We reserve the right to make breaking language and API changes at this
+stage in the project, although we will endeavor to keep them to a minimum.
+Once the Bazel team has finalized the version 1 language specification,
+we will be more rigorous with interface stability.
+
+### Credits
+
+Starlark was designed and implemented in Java by
+Ulf Adams,
+Lukács Berki,
+Jon Brandvein,
+John Field,
+Laurent Le Brun,
+Dmitry Lomov,
+Damien Martin-Guillerez,
+Vladimir Moskva, and
+Florian Weikert,
+standing on the shoulders of the Python community.
+The Go implementation was written by Alan Donovan and Jay Conrod;
+its scanner was derived from one written by Russ Cox.
+
+### Legal
+
+Starlark in Go is Copyright (c) 2018 The Bazel Authors.
+All rights reserved.
+
+It is provided under a 3-clause BSD license:
+[LICENSE](https://github.com/google/starlark-go/blob/master/LICENSE).
+
+Starlark in Go is not an official Google product.
diff --git a/cmd/starlark/starlark.go b/cmd/starlark/starlark.go
new file mode 100644
index 0000000..3825f00
--- /dev/null
+++ b/cmd/starlark/starlark.go
@@ -0,0 +1,141 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The starlark command interprets a Starlark file.
+// With no arguments, it starts a read-eval-print loop (REPL).
+package main // import "go.starlark.net/cmd/starlark"
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+ "runtime"
+ "runtime/pprof"
+ "strings"
+
+ "go.starlark.net/internal/compile"
+ "go.starlark.net/repl"
+ "go.starlark.net/resolve"
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkjson"
+)
+
+// flags
+var (
+ cpuprofile = flag.String("cpuprofile", "", "gather Go CPU profile in this file")
+ memprofile = flag.String("memprofile", "", "gather Go memory profile in this file")
+ profile = flag.String("profile", "", "gather Starlark time profile in this file")
+ showenv = flag.Bool("showenv", false, "on success, print final global environment")
+ execprog = flag.String("c", "", "execute program `prog`")
+)
+
+func init() {
+ flag.BoolVar(&compile.Disassemble, "disassemble", compile.Disassemble, "show disassembly during compilation of each function")
+
+ // non-standard dialect flags
+ flag.BoolVar(&resolve.AllowFloat, "float", resolve.AllowFloat, "obsolete; no effect")
+ flag.BoolVar(&resolve.AllowSet, "set", resolve.AllowSet, "allow set data type")
+ flag.BoolVar(&resolve.AllowLambda, "lambda", resolve.AllowLambda, "allow lambda expressions")
+ flag.BoolVar(&resolve.AllowRecursion, "recursion", resolve.AllowRecursion, "allow while statements and recursive functions")
+ flag.BoolVar(&resolve.AllowGlobalReassign, "globalreassign", resolve.AllowGlobalReassign, "allow reassignment of globals, and if/for/while statements at top level")
+}
+
+func main() {
+ os.Exit(doMain())
+}
+
+func doMain() int {
+ log.SetPrefix("starlark: ")
+ log.SetFlags(0)
+ flag.Parse()
+
+ if *cpuprofile != "" {
+ f, err := os.Create(*cpuprofile)
+ check(err)
+ err = pprof.StartCPUProfile(f)
+ check(err)
+ defer func() {
+ pprof.StopCPUProfile()
+ err := f.Close()
+ check(err)
+ }()
+ }
+ if *memprofile != "" {
+ f, err := os.Create(*memprofile)
+ check(err)
+ defer func() {
+ runtime.GC()
+ err := pprof.Lookup("heap").WriteTo(f, 0)
+ check(err)
+ err = f.Close()
+ check(err)
+ }()
+ }
+
+ if *profile != "" {
+ f, err := os.Create(*profile)
+ check(err)
+ err = starlark.StartProfile(f)
+ check(err)
+ defer func() {
+ err := starlark.StopProfile()
+ check(err)
+ }()
+ }
+
+ thread := &starlark.Thread{Load: repl.MakeLoad()}
+ globals := make(starlark.StringDict)
+
+ // Ideally this statement would update the predeclared environment.
+ // TODO(adonovan): plumb predeclared env through to the REPL.
+ starlark.Universe["json"] = starlarkjson.Module
+
+ switch {
+ case flag.NArg() == 1 || *execprog != "":
+ var (
+ filename string
+ src interface{}
+ err error
+ )
+ if *execprog != "" {
+ // Execute provided program.
+ filename = "cmdline"
+ src = *execprog
+ } else {
+ // Execute specified file.
+ filename = flag.Arg(0)
+ }
+ thread.Name = "exec " + filename
+ globals, err = starlark.ExecFile(thread, filename, src, nil)
+ if err != nil {
+ repl.PrintError(err)
+ return 1
+ }
+ case flag.NArg() == 0:
+ fmt.Println("Welcome to Starlark (go.starlark.net)")
+ thread.Name = "REPL"
+ repl.REPL(thread, globals)
+ default:
+ log.Print("want at most one Starlark file name")
+ return 1
+ }
+
+ // Print the global environment.
+ if *showenv {
+ for _, name := range globals.Keys() {
+ if !strings.HasPrefix(name, "_") {
+ fmt.Fprintf(os.Stderr, "%s = %s\n", name, globals[name])
+ }
+ }
+ }
+
+ return 0
+}
+
+func check(err error) {
+ if err != nil {
+ log.Fatal(err)
+ }
+}
diff --git a/doc/impl.md b/doc/impl.md
new file mode 100644
index 0000000..380e2d6
--- /dev/null
+++ b/doc/impl.md
@@ -0,0 +1,242 @@
+
+# Starlark in Go: Implementation
+
+This document (a work in progress) describes some of the design
+choices of the Go implementation of Starlark.
+
+ * [Scanner](#scanner)
+ * [Parser](#parser)
+ * [Resolver](#resolver)
+ * [Evaluator](#evaluator)
+ * [Data types](#data-types)
+ * [Freezing](#freezing)
+ * [Testing](#testing)
+
+
+## Scanner
+
+The scanner is derived from Russ Cox's
+[buildifier](https://github.com/bazelbuild/buildtools/tree/master/buildifier)
+tool, which pretty-prints Bazel BUILD files.
+
+Most of the work happens in `(*scanner).nextToken`.
+
+## Parser
+
+The parser is hand-written recursive-descent parser. It uses the
+technique of [precedence
+climbing](http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing)
+to reduce the number of productions.
+
+In some places the parser accepts a larger set of programs than are
+strictly valid, leaving the task of rejecting them to the subsequent
+resolver pass. For example, in the function call `f(a, b=c)` the
+parser accepts any expression for `a` and `b`, even though `b` may
+legally be only an identifier. For the parser to distinguish these
+cases would require additional lookahead.
+
+## Resolver
+
+The resolver reports structural errors in the program, such as the use
+of `break` and `continue` outside of a loop.
+
+Starlark has stricter syntactic limitations than Python. For example,
+it does not permit `for` loops or `if` statements at top level, nor
+does it permit global variables to be bound more than once.
+These limitations come from the Bazel project's desire to make it easy
+to identify the sole statement that defines each global, permitting
+accurate cross-reference documentation.
+
+In addition, the resolver validates all variable names, classifying
+them as references to universal, global, local, or free variables.
+Local and free variables are mapped to a small integer, allowing the
+evaluator to use an efficient (flat) representation for the
+environment.
+
+Not all features of the Go implementation are "standard" (that is,
+supported by Bazel's Java implementation), at least for now, so
+non-standard features such as `lambda`, `float`, and `set`
+are flag-controlled. The resolver reports
+any uses of dialect features that have not been enabled.
+
+
+## Evaluator
+
+### Data types
+
+<b>Integers:</b> Integers are representing using `big.Int`, an
+arbitrary precision integer. This representation was chosen because,
+for many applications, Starlark must be able to handle without loss
+protocol buffer values containing signed and unsigned 64-bit integers,
+which requires 65 bits of precision.
+
+Small integers (<256) are preallocated, but all other values require
+memory allocation. Integer performance is relatively poor, but it
+matters little for Bazel-like workloads which depend much
+more on lists of strings than on integers. (Recall that a typical loop
+over a list in Starlark does not materialize the loop index as an `int`.)
+
+An optimization worth trying would be to represent integers using
+either an `int32` or `big.Int`, with the `big.Int` used only when
+`int32` does not suffice. Using `int32`, not `int64`, for "small"
+numbers would make it easier to detect overflow from operations like
+`int32 * int32`, which would trigger the use of `big.Int`.
+
+<b>Floating point</b>:
+Floating point numbers are represented using Go's `float64`.
+Again, `float` support is required to support protocol buffers. The
+existence of floating-point NaN and its infamous comparison behavior
+(`NaN != NaN`) had many ramifications for the API, since we cannot
+assume the result of an ordered comparison is either less than,
+greater than, or equal: it may also fail.
+
+<b>Strings</b>:
+
+TODO: discuss UTF-8 and string.bytes method.
+
+<b>Dictionaries and sets</b>:
+Starlark dictionaries have predictable iteration order.
+Furthermore, many Starlark values are hashable in Starlark even though
+the Go values that represent them are not hashable in Go: big
+integers, for example.
+Consequently, we cannot use Go maps to implement Starlark's dictionary.
+
+We use a simple hash table whose buckets are linked lists, each
+element of which holds up to 8 key/value pairs. In a well-distributed
+table the list should rarely exceed length 1. In addition, each
+key/value item is part of doubly-linked list that maintains the
+insertion order of the elements for iteration.
+
+<b>Struct:</b>
+The `starlarkstruct` Go package provides a non-standard Starlark
+extension data type, `struct`, that maps field identifiers to
+arbitrary values. Fields are accessed using dot notation: `y = s.f`.
+This data type is extensively used in Bazel, but its specification is
+currently evolving.
+
+Starlark has no `class` mechanism, nor equivalent of Python's
+`namedtuple`, though it is likely that future versions will support
+some way to define a record data type of several fields, with a
+representation more efficient than a hash table.
+
+
+### Freezing
+
+All mutable values created during module initialization are _frozen_
+upon its completion. It is this property that permits a Starlark module
+to be referenced by two Starlark threads running concurrently (such as
+the initialization threads of two other modules) without the
+possibility of a data race.
+
+The Go implementation supports freezing by storing an additional
+"frozen" Boolean variable in each mutable object. Once this flag is set,
+all subsequent attempts at mutation fail. Every value defines a
+Freeze method that sets its own frozen flag if not already set, and
+calls Freeze for each value that it contains.
+For example, when a list is frozen, it freezes each of its elements;
+when a dictionary is frozen, it freezes each of its keys and values;
+and when a function value is frozen, it freezes each of the free
+variables and parameter default values implicitly referenced by its closure.
+Application-defined types must also follow this discipline.
+
+The freeze mechanism in the Go implementation is finer grained than in
+the Java implementation: in effect, the latter has one "frozen" flag
+per module, and every value holds a reference to the frozen flag of
+its module. This makes setting the frozen flag more efficient---a
+simple bit flip, no need to traverse the object graph---but coarser
+grained. Also, it complicates the API slightly because to construct a
+list, say, requires a reference to the frozen flag it should use.
+
+The Go implementation would also permit the freeze operation to be
+exposed to the program, for example as a built-in function.
+This has proven valuable in writing tests of the freeze mechanism
+itself, but is otherwise mostly a curiosity.
+
+
+### Fail-fast iterators
+
+In some languages (such as Go), a program may mutate a data structure
+while iterating over it; for example, a range loop over a map may
+delete map elements. In other languages (such as Java), iterators do
+extra bookkeeping so that modification of the underlying collection
+invalidates the iterator, and the next attempt to use it fails.
+This often helps to detect subtle mistakes.
+
+Starlark takes this a step further. Instead of mutation of the
+collection invalidating the iterator, the act of iterating makes the
+collection temporarily immutable, so that an attempt to, say, delete a
+dict element while looping over the dict, will fail. The error is
+reported against the delete operation, not the iteration.
+
+This is implemented by having each mutable iterable value record a
+counter of active iterators. Starting a loop increments this counter,
+and completing a loop decrements it. A collection with a nonzero
+counter behaves as if frozen. If the collection is actually frozen,
+the counter bookkeeping is unnecessary. (Consequently, iterator
+bookkeeping is needed only while objects are still mutable, before
+they can have been published to another thread, and thus no
+synchronization is necessary.)
+
+A consequence of this design is that in the Go API, it is imperative
+to call `Done` on each iterator once it is no longer needed.
+
+```
+TODO
+starlark.Value interface and subinterfaces
+argument passing to builtins: UnpackArgs, UnpackPositionalArgs.
+```
+
+<b>Evaluation strategy:</b>
+The evaluator uses a simple recursive tree walk, returning a value or
+an error for each expression. We have experimented with just-in-time
+compilation of syntax trees to bytecode, but two limitations in the
+current Go compiler prevent this strategy from outperforming the
+tree-walking evaluator.
+
+First, the Go compiler does not generate a "computed goto" for a
+switch statement ([Go issue
+5496](https://github.com/golang/go/issues/5496)). A bytecode
+interpreter's main loop is a for-loop around a switch statement with
+dozens or hundreds of cases, and the speed with which each case can be
+dispatched strongly affects overall performance.
+Currently, a switch statement generates a binary tree of ordered
+comparisons, requiring several branches instead of one.
+
+Second, the Go compiler's escape analysis assumes that the underlying
+array from a `make([]Value, n)` allocation always escapes
+([Go issue 20533](https://github.com/golang/go/issues/20533)).
+Because the bytecode interpreter's operand stack has a non-constant
+length, it must be allocated with `make`. The resulting allocation
+adds to the cost of each Starlark function call; this can be tolerated
+by amortizing one very large stack allocation across many calls.
+More problematic appears to be the cost of the additional GC write
+barriers incurred by every VM operation: every intermediate result is
+saved to the VM's operand stack, which is on the heap.
+By contrast, intermediate results in the tree-walking evaluator are
+never stored to the heap.
+
+```
+TODO
+frames, backtraces, errors.
+threads
+Print
+Load
+```
+
+## Testing
+
+```
+TODO
+starlarktest package
+`assert` module
+starlarkstruct
+integration with Go testing.T
+```
+
+
+## TODO
+
+
+```
+Discuss practical separation of code and data.
+```
diff --git a/doc/spec.md b/doc/spec.md
new file mode 100644
index 0000000..15e4dc2
--- /dev/null
+++ b/doc/spec.md
@@ -0,0 +1,4263 @@
+# Starlark in Go: Language definition
+
+Starlark is a dialect of Python intended for use as a configuration
+language. A Starlark interpreter is typically embedded within a larger
+application, and this application may define additional
+domain-specific functions and data types beyond those provided by the
+core language. For example, Starlark is embedded within (and was
+originally developed for) the [Bazel build tool](https://bazel.build),
+and [Bazel's build language](https://docs.bazel.build/versions/master/starlark/language.html) is based on Starlark.
+
+This document describes the Go implementation of Starlark
+at go.starlark.net/starlark.
+The language it defines is similar but not identical to
+[the Java-based implementation](https://github.com/bazelbuild/bazel/blob/master/src/main/java/com/google/devtools/starlark/Starlark.java)
+used by Bazel.
+We identify places where their behaviors differ, and an
+[appendix](#dialect-differences) provides a summary of those
+differences.
+We plan to converge both implementations on a single specification.
+
+This document is maintained by Alan Donovan <adonovan@google.com>.
+It was influenced by the Python specification,
+Copyright 1990&ndash;2017, Python Software Foundation,
+and the Go specification, Copyright 2009&ndash;2017, The Go Authors.
+
+Starlark was designed and implemented in Java by Laurent Le Brun,
+Dmitry Lomov, Jon Brandvin, and Damien Martin-Guillerez, standing on
+the shoulders of the Python community.
+The Go implementation was written by Alan Donovan and Jay Conrod;
+its scanner was derived from one written by Russ Cox.
+
+## Overview
+
+Starlark is an untyped dynamic language with high-level data types,
+first-class functions with lexical scope, and automatic memory
+management or _garbage collection_.
+
+Starlark is strongly influenced by Python, and is almost a subset of
+that language. In particular, its data types and syntax for
+statements and expressions will be very familiar to any Python
+programmer.
+However, Starlark is intended not for writing applications but for
+expressing configuration: its programs are short-lived and have no
+external side effects and their main result is structured data or side
+effects on the host application.
+As a result, Starlark has no need for classes, exceptions, reflection,
+concurrency, and other such features of Python.
+
+Starlark execution is _deterministic_: all functions and operators
+in the core language produce the same execution each time the program
+is run; there are no sources of random numbers, clocks, or unspecified
+iterators. This makes Starlark suitable for use in applications where
+reproducibility is paramount, such as build tools.
+
+## Contents
+
+<!-- WTF? No automatic TOC? -->
+
+ * [Overview](#overview)
+ * [Contents](#contents)
+ * [Lexical elements](#lexical-elements)
+ * [Data types](#data-types)
+ * [None](#none)
+ * [Booleans](#booleans)
+ * [Integers](#integers)
+ * [Floating-point numbers](#floating-point-numbers)
+ * [Strings](#strings)
+ * [Lists](#lists)
+ * [Tuples](#tuples)
+ * [Dictionaries](#dictionaries)
+ * [Sets](#sets)
+ * [Functions](#functions)
+ * [Built-in functions](#built-in-functions)
+ * [Name binding and variables](#name-binding-and-variables)
+ * [Value concepts](#value-concepts)
+ * [Identity and mutation](#identity-and-mutation)
+ * [Freezing a value](#freezing-a-value)
+ * [Hashing](#hashing)
+ * [Sequence types](#sequence-types)
+ * [Indexing](#indexing)
+ * [Expressions](#expressions)
+ * [Identifiers](#identifiers)
+ * [Literals](#literals)
+ * [Parenthesized expressions](#parenthesized-expressions)
+ * [Dictionary expressions](#dictionary-expressions)
+ * [List expressions](#list-expressions)
+ * [Unary operators](#unary-operators)
+ * [Binary operators](#binary-operators)
+ * [Conditional expressions](#conditional-expressions)
+ * [Comprehensions](#comprehensions)
+ * [Function and method calls](#function-and-method-calls)
+ * [Dot expressions](#dot-expressions)
+ * [Index expressions](#index-expressions)
+ * [Slice expressions](#slice-expressions)
+ * [Lambda expressions](#lambda-expressions)
+ * [Statements](#statements)
+ * [Pass statements](#pass-statements)
+ * [Assignments](#assignments)
+ * [Augmented assignments](#augmented-assignments)
+ * [Function definitions](#function-definitions)
+ * [Return statements](#return-statements)
+ * [Expression statements](#expression-statements)
+ * [If statements](#if-statements)
+ * [For loops](#for-loops)
+ * [Break and Continue](#break-and-continue)
+ * [Load statements](#load-statements)
+ * [Module execution](#module-execution)
+ * [Built-in constants and functions](#built-in-constants-and-functions)
+ * [None](#none)
+ * [True and False](#true-and-false)
+ * [any](#any)
+ * [all](#all)
+ * [bool](#bool)
+ * [chr](#chr)
+ * [dict](#dict)
+ * [dir](#dir)
+ * [enumerate](#enumerate)
+ * [fail](#fail)
+ * [float](#float)
+ * [getattr](#getattr)
+ * [hasattr](#hasattr)
+ * [hash](#hash)
+ * [int](#int)
+ * [len](#len)
+ * [list](#list)
+ * [max](#max)
+ * [min](#min)
+ * [ord](#ord)
+ * [print](#print)
+ * [range](#range)
+ * [repr](#repr)
+ * [reversed](#reversed)
+ * [set](#set)
+ * [sorted](#sorted)
+ * [str](#str)
+ * [tuple](#tuple)
+ * [type](#type)
+ * [zip](#zip)
+ * [Built-in methods](#built-in-methods)
+ * [dict·clear](#dict·clear)
+ * [dict·get](#dict·get)
+ * [dict·items](#dict·items)
+ * [dict·keys](#dict·keys)
+ * [dict·pop](#dict·pop)
+ * [dict·popitem](#dict·popitem)
+ * [dict·setdefault](#dict·setdefault)
+ * [dict·update](#dict·update)
+ * [dict·values](#dict·values)
+ * [list·append](#list·append)
+ * [list·clear](#list·clear)
+ * [list·extend](#list·extend)
+ * [list·index](#list·index)
+ * [list·insert](#list·insert)
+ * [list·pop](#list·pop)
+ * [list·remove](#list·remove)
+ * [set·union](#set·union)
+ * [string·capitalize](#string·capitalize)
+ * [string·codepoint_ords](#string·codepoint_ords)
+ * [string·codepoints](#string·codepoints)
+ * [string·count](#string·count)
+ * [string·elem_ords](#string·elem_ords)
+ * [string·elems](#string·elems)
+ * [string·endswith](#string·endswith)
+ * [string·find](#string·find)
+ * [string·format](#string·format)
+ * [string·index](#string·index)
+ * [string·isalnum](#string·isalnum)
+ * [string·isalpha](#string·isalpha)
+ * [string·isdigit](#string·isdigit)
+ * [string·islower](#string·islower)
+ * [string·isspace](#string·isspace)
+ * [string·istitle](#string·istitle)
+ * [string·isupper](#string·isupper)
+ * [string·join](#string·join)
+ * [string·lower](#string·lower)
+ * [string·lstrip](#string·lstrip)
+ * [string·partition](#string·partition)
+ * [string·replace](#string·replace)
+ * [string·rfind](#string·rfind)
+ * [string·rindex](#string·rindex)
+ * [string·rpartition](#string·rpartition)
+ * [string·rsplit](#string·rsplit)
+ * [string·rstrip](#string·rstrip)
+ * [string·split](#string·split)
+ * [string·splitlines](#string·splitlines)
+ * [string·startswith](#string·startswith)
+ * [string·strip](#string·strip)
+ * [string·title](#string·title)
+ * [string·upper](#string·upper)
+ * [Dialect differences](#dialect-differences)
+
+
+## Lexical elements
+
+A Starlark program consists of one or more modules.
+Each module is defined by a single UTF-8-encoded text file.
+
+A complete grammar of Starlark can be found in [grammar.txt](../syntax/grammar.txt).
+That grammar is presented piecemeal throughout this document
+in boxes such as this one, which explains the notation:
+
+```grammar {.good}
+Grammar notation
+
+- lowercase and 'quoted' items are lexical tokens.
+- Capitalized names denote grammar productions.
+- (...) implies grouping.
+- x | y means either x or y.
+- [x] means x is optional.
+- {x} means x is repeated zero or more times.
+- The end of each declaration is marked with a period.
+```
+
+The contents of a Starlark file are broken into a sequence of tokens of
+five kinds: white space, punctuation, keywords, identifiers, and literals.
+Each token is formed from the longest sequence of characters that
+would form a valid token of each kind.
+
+```grammar {.good}
+File = {Statement | newline} eof .
+```
+
+*White space* consists of spaces (U+0020), tabs (U+0009), carriage
+returns (U+000D), and newlines (U+000A). Within a line, white space
+has no effect other than to delimit the previous token, but newlines,
+and spaces at the start of a line, are significant tokens.
+
+*Comments*: A hash character (`#`) appearing outside of a string
+literal marks the start of a comment; the comment extends to the end
+of the line, not including the newline character.
+Comments are treated like other white space.
+
+*Punctuation*: The following punctuation characters or sequences of
+characters are tokens:
+
+```text
++ - * / // % =
++= -= *= /= //= %= == !=
+^ < > << >> & |
+^= <= >= <<= >>= &= |=
+. , ; : ~ **
+( ) [ ] { }
+```
+
+*Keywords*: The following tokens are keywords and may not be used as
+identifiers:
+
+```text
+and elif in or
+break else lambda pass
+continue for load return
+def if not while
+```
+
+The tokens below also may not be used as identifiers although they do not
+appear in the grammar; they are reserved as possible future keywords:
+
+<!-- and to remain a syntactic subset of Python -->
+
+```text
+as finally nonlocal
+assert from raise
+class global try
+del import with
+except is yield
+```
+
+<b>Implementation note:</b>
+The Go implementation permits `assert` to be used as an identifier,
+and this feature is widely used in its tests.
+
+*Identifiers*: an identifier is a sequence of Unicode letters, decimal
+ digits, and underscores (`_`), not starting with a digit.
+Identifiers are used as names for values.
+
+Examples:
+
+```text
+None True len
+x index starts_with arg0
+```
+
+*Literals*: literals are tokens that denote specific values. Starlark
+has string, integer, and floating-point literals.
+
+```text
+0 # int
+123 # decimal int
+0x7f # hexadecimal int
+0o755 # octal int
+0b1011 # binary int
+
+0.0 0. .0 # float
+1e10 1e+10 1e-10
+1.1e10 1.1e+10 1.1e-10
+
+"hello" 'hello' # string
+'''hello''' """hello""" # triple-quoted string
+r'hello' r"hello" # raw string literal
+```
+
+Integer and floating-point literal tokens are defined by the following grammar:
+
+```grammar {.good}
+int = decimal_lit | octal_lit | hex_lit | binary_lit .
+decimal_lit = ('1' … '9') {decimal_digit} | '0' .
+octal_lit = '0' ('o'|'O') octal_digit {octal_digit} .
+hex_lit = '0' ('x'|'X') hex_digit {hex_digit} .
+binary_lit = '0' ('b'|'B') binary_digit {binary_digit} .
+
+float = decimals '.' [decimals] [exponent]
+ | decimals exponent
+ | '.' decimals [exponent]
+ .
+decimals = decimal_digit {decimal_digit} .
+exponent = ('e'|'E') ['+'|'-'] decimals .
+
+decimal_digit = '0' … '9' .
+octal_digit = '0' … '7' .
+hex_digit = '0' … '9' | 'A' … 'F' | 'a' … 'f' .
+binary_digit = '0' | '1' .
+```
+
+### String literals
+
+A Starlark string literal denotes a string value.
+In its simplest form, it consists of the desired text
+surrounded by matching single- or double-quotation marks:
+
+```python
+"abc"
+'abc'
+```
+
+Literal occurrences of the chosen quotation mark character must be
+escaped by a preceding backslash. So, if a string contains several
+of one kind of quotation mark, it may be convenient to quote the string
+using the other kind, as in these examples:
+
+```python
+'Have you read "To Kill a Mockingbird?"'
+"Yes, it's a classic."
+
+"Have you read \"To Kill a Mockingbird?\""
+'Yes, it\'s a classic.'
+```
+
+Literal occurrences of the _opposite_ kind of quotation mark, such as
+an apostrophe within a double-quoted string literal, may be escaped
+by a backslash, but this is not necessary: `"it's"` and `"it\'s"` are
+equivalent.
+
+
+#### String escapes
+
+Within a string literal, the backslash character `\` indicates the
+start of an _escape sequence_, a notation for expressing things that
+are impossible or awkward to write directly.
+
+The following *traditional escape sequences* represent the ASCII control
+codes 7-13:
+
+```
+\a \x07 alert or bell
+\b \x08 backspace
+\f \x0C form feed
+\n \x0A line feed
+\r \x0D carriage return
+\t \x09 horizontal tab
+\v \x0B vertical tab
+```
+
+A *literal backslash* is written using the escape `\\`.
+
+An *escaped newline*---that is, a backslash at the end of a line---is ignored,
+allowing a long string to be split across multiple lines of the source file.
+
+```python
+"abc\
+def" # "abcdef"
+```
+
+An *octal escape* encodes a single byte using its octal value.
+It consists of a backslash followed by one, two, or three octal digits [0-7].
+It is error if the value is greater than decimal 255.
+
+```python
+'\0' # "\x00" a string containing a single NUL byte
+'\12' # "\n" octal 12 = decimal 10
+'\101-\132' # "A-Z"
+'\119' # "\t9" = "\11" + "9"
+```
+
+<b>Implementation note:</b>
+The Java implementation encodes strings using UTF-16,
+so an octal escape encodes a single UTF-16 code unit.
+Octal escapes for values above 127 are therefore not portable across implementations.
+There is little reason to use octal escapes in new code.
+
+A *hex escape* encodes a single byte using its hexadecimal value.
+It consists of `\x` followed by exactly two hexadecimal digits [0-9A-Fa-f].
+
+```python
+"\x00" # "\x00" a string containing a single NUL byte
+"(\x20)" # "( )" ASCII 0x20 = 32 = space
+
+red, reset = "\x1b[31m", "\x1b[0m" # ANSI terminal control codes for color
+"(" + red + "hello" + reset + ")" # "(hello)" with red text, if on a terminal
+```
+
+<b>Implementation note:</b>
+The Java implementation does not support hex escapes.
+
+An ordinary string literal may not contain an unescaped newline,
+but a *multiline string literal* may spread over multiple source lines.
+It is denoted using three quotation marks at start and end.
+Within it, unescaped newlines and quotation marks (or even pairs of
+quotation marks) have their literal meaning, but three quotation marks
+end the literal. This makes it easy to quote large blocks of text with
+few escapes.
+
+```
+haiku = '''
+Yesterday it worked.
+Today it is not working.
+That's computers. Sigh.
+'''
+```
+
+Regardless of the platform's convention for text line endings---for
+example, a linefeed (\n) on UNIX, or a carriage return followed by a
+linefeed (\r\n) on Microsoft Windows---an unescaped line ending in a
+multiline string literal always denotes a line feed (\n).
+
+Starlark also supports *raw string literals*, which look like an
+ordinary single- or double-quotation preceded by `r`. Within a raw
+string literal, there is no special processing of backslash escapes,
+other than an escaped quotation mark (which denotes a literal
+quotation mark), or an escaped newline (which denotes a backslash
+followed by a newline). This form of quotation is typically used when
+writing strings that contain many quotation marks or backslashes (such
+as regular expressions or shell commands) to reduce the burden of
+escaping:
+
+```python
+"a\nb" # "a\nb" = 'a' + '\n' + 'b'
+r"a\nb" # "a\\nb" = 'a' + '\\' + 'n' + 'b'
+
+"a\
+b" # "ab"
+r"a\
+b" # "a\\\nb"
+```
+
+It is an error for a backslash to appear within a string literal other
+than as part of one of the escapes described above.
+
+TODO: define indent, outdent, semicolon, newline, eof
+
+## Data types
+
+These are the main data types built in to the interpreter:
+
+```python
+NoneType # the type of None
+bool # True or False
+int # a signed integer of arbitrary magnitude
+float # an IEEE 754 double-precision floating point number
+string # a byte string
+list # a modifiable sequence of values
+tuple # an unmodifiable sequence of values
+dict # a mapping from values to values
+set # a set of values
+function # a function implemented in Starlark
+builtin_function_or_method # a function or method implemented by the interpreter or host application
+```
+
+Some functions, such as the iteration methods of `string`, or the
+`range` function, return instances of special-purpose types that don't
+appear in this list.
+Additional data types may be defined by the host application into
+which the interpreter is embedded, and those data types may
+participate in basic operations of the language such as arithmetic,
+comparison, indexing, and function calls.
+
+<!-- We needn't mention the stringIterable type here. -->
+
+Some operations can be applied to any Starlark value. For example,
+every value has a type string that can be obtained with the expression
+`type(x)`, and any value may be converted to a string using the
+expression `str(x)`, or to a Boolean truth value using the expression
+`bool(x)`. Other operations apply only to certain types. For
+example, the indexing operation `a[i]` works only with strings, lists,
+and tuples, and any application-defined types that are _indexable_.
+The [_value concepts_](#value-concepts) section explains the groupings of
+types by the operators they support.
+
+
+### None
+
+`None` is a distinguished value used to indicate the absence of any other value.
+For example, the result of a call to a function that contains no return statement is `None`.
+
+`None` is equal only to itself. Its [type](#type) is `"NoneType"`.
+The truth value of `None` is `False`.
+
+
+### Booleans
+
+There are two Boolean values, `True` and `False`, representing the
+truth or falsehood of a predicate. The [type](#type) of a Boolean is `"bool"`.
+
+Boolean values are typically used as conditions in `if`-statements,
+although any Starlark value used as a condition is implicitly
+interpreted as a Boolean.
+For example, the values `None`, `0`, `0.0`, and the empty sequences
+`""`, `()`, `[]`, and `{}` have a truth value of `False`, whereas non-zero
+numbers and non-empty sequences have a truth value of `True`.
+Application-defined types determine their own truth value.
+Any value may be explicitly converted to a Boolean using the built-in `bool`
+function.
+
+```python
+1 + 1 == 2 # True
+2 + 2 == 5 # False
+
+if 1 + 1:
+ print("True")
+else:
+ print("False")
+```
+
+### Integers
+
+The Starlark integer type represents integers. Its [type](#type) is `"int"`.
+
+Integers may be positive or negative, and arbitrarily large.
+Integer arithmetic is exact.
+Integers are totally ordered; comparisons follow mathematical
+tradition.
+
+The `+` and `-` operators perform addition and subtraction, respectively.
+The `*` operator performs multiplication.
+
+The `//` and `%` operations on integers compute floored division and
+remainder of floored division, respectively.
+If the signs of the operands differ, the sign of the remainder `x % y`
+matches that of the divisor, `y`.
+For all finite x and y (y ≠ 0), `(x // y) * y + (x % y) == x`.
+The `/` operator implements real division, and
+yields a `float` result even when its operands are both of type `int`.
+
+Integers, including negative values, may be interpreted as bit vectors.
+The `|`, `&`, and `^` operators implement bitwise OR, AND, and XOR,
+respectively. The unary `~` operator yields the bitwise inversion of its
+integer argument. The `<<` and `>>` operators shift the first argument
+to the left or right by the number of bits given by the second argument.
+
+Any bool, number, or string may be interpreted as an integer by using
+the `int` built-in function.
+
+An integer used in a Boolean context is considered true if it is
+non-zero.
+
+```python
+100 // 5 * 9 + 32 # 212
+3 // 2 # 1
+3 / 2 # 1.5
+111111111 * 111111111 # 12345678987654321
+"0x%x" % (0x1234 & 0xf00f) # "0x1004"
+int("ffff", 16) # 65535, 0xffff
+```
+
+### Floating-point numbers
+
+The Starlark floating-point data type represents an IEEE 754
+double-precision floating-point number. Its [type](#type) is `"float"`.
+
+Arithmetic on floats using the `+`, `-`, `*`, `/`, `//`, and `%`
+ operators follows the IEE 754 standard.
+However, computing the division or remainder of division by zero is a dynamic error.
+
+An arithmetic operation applied to a mixture of `float` and `int`
+operands works as if the `int` operand is first converted to a
+`float`. For example, `3.141 + 1` is equivalent to `3.141 +
+float(1)`.
+There are two floating-point division operators:
+`x / y ` yields the floating-point quotient of `x` and `y`,
+whereas `x // y` yields `floor(x / y)`, that is, the largest
+integer value not greater than `x / y`.
+Although the resulting number is integral, it is represented as a
+`float` if either operand is a `float`.
+
+The `%` operation computes the remainder of floored division.
+As with the corresponding operation on integers,
+if the signs of the operands differ, the sign of the remainder `x % y`
+matches that of the divisor, `y`.
+
+The infinite float values `+Inf` and `-Inf` represent numbers
+greater/less than all finite float values.
+
+The non-finite `NaN` value represents the result of dubious operations
+such as `Inf/Inf`. A NaN value compares neither less than, nor
+greater than, nor equal to any value, including itself.
+
+All floats other than NaN are totally ordered, so they may be compared
+using operators such as `==` and `<`.
+
+Any bool, number, or string may be interpreted as a floating-point
+number by using the `float` built-in function.
+
+A float used in a Boolean context is considered true if it is
+non-zero.
+
+```python
+1.23e45 * 1.23e45 # 1.5129e+90
+1.111111111111111 * 1.111111111111111 # 1.23457
+3.0 / 2 # 1.5
+3 / 2.0 # 1.5
+float(3) / 2 # 1.5
+3.0 // 2.0 # 1
+```
+
+### Strings
+
+A string represents an immutable sequence of bytes.
+The [type](#type) of a string is `"string"`.
+
+Strings can represent arbitrary binary data, including zero bytes, but
+most strings contain text, encoded by convention using UTF-8.
+
+The built-in `len` function returns the number of bytes in a string.
+
+Strings may be concatenated with the `+` operator.
+
+The substring expression `s[i:j]` returns the substring of `s` from
+index `i` up to index `j`. The index expression `s[i]` returns the
+1-byte substring `s[i:i+1]`.
+
+Strings are hashable, and thus may be used as keys in a dictionary.
+
+Strings are totally ordered lexicographically, so strings may be
+compared using operators such as `==` and `<`.
+
+Strings are _not_ iterable sequences, so they cannot be used as the operand of
+a `for`-loop, list comprehension, or any other operation than requires
+an iterable sequence.
+To obtain a view of a string as an iterable sequence of numeric byte
+values, 1-byte substrings, numeric Unicode code points, or 1-code
+point substrings, you must explicitly call one of its four methods:
+`elems`, `elem_ords`, `codepoints`, or `codepoint_ords`.
+
+Any value may formatted as a string using the `str` or `repr` built-in
+functions, the `str % tuple` operator, or the `str.format` method.
+
+A string used in a Boolean context is considered true if it is
+non-empty.
+
+Strings have several built-in methods:
+
+* [`capitalize`](#string·capitalize)
+* [`codepoint_ords`](#string·codepoint_ords)
+* [`codepoints`](#string·codepoints)
+* [`count`](#string·count)
+* [`elem_ords`](#string·elem_ords)
+* [`elems`](#string·elems)
+* [`endswith`](#string·endswith)
+* [`find`](#string·find)
+* [`format`](#string·format)
+* [`index`](#string·index)
+* [`isalnum`](#string·isalnum)
+* [`isalpha`](#string·isalpha)
+* [`isdigit`](#string·isdigit)
+* [`islower`](#string·islower)
+* [`isspace`](#string·isspace)
+* [`istitle`](#string·istitle)
+* [`isupper`](#string·isupper)
+* [`join`](#string·join)
+* [`lower`](#string·lower)
+* [`lstrip`](#string·lstrip)
+* [`partition`](#string·partition)
+* [`replace`](#string·replace)
+* [`rfind`](#string·rfind)
+* [`rindex`](#string·rindex)
+* [`rpartition`](#string·rpartition)
+* [`rsplit`](#string·rsplit)
+* [`rstrip`](#string·rstrip)
+* [`split`](#string·split)
+* [`splitlines`](#string·splitlines)
+* [`startswith`](#string·startswith)
+* [`strip`](#string·strip)
+* [`title`](#string·title)
+* [`upper`](#string·upper)
+
+<b>Implementation note:</b>
+The type of a string element varies across implementations.
+There is agreement that byte strings, with text conventionally encoded
+using UTF-8, is the ideal choice, but the Java implementation treats
+strings as sequences of UTF-16 codes and changing it appears
+intractible; see Google Issue b/36360490.
+
+<b>Implementation note:</b>
+The Java implementation does not consistently treat strings as
+iterable; see `testdata/string.star` in the test suite and Google Issue
+b/34385336 for further details.
+
+### Lists
+
+A list is a mutable sequence of values.
+The [type](#type) of a list is `"list"`.
+
+Lists are indexable sequences: the elements of a list may be iterated
+over by `for`-loops, list comprehensions, and various built-in
+functions.
+
+List may be constructed using bracketed list notation:
+
+```python
+[] # an empty list
+[1] # a 1-element list
+[1, 2] # a 2-element list
+```
+
+Lists can also be constructed from any iterable sequence by using the
+built-in `list` function.
+
+The built-in `len` function applied to a list returns the number of elements.
+The index expression `list[i]` returns the element at index i,
+and the slice expression `list[i:j]` returns a new list consisting of
+the elements at indices from i to j.
+
+List elements may be added using the `append` or `extend` methods,
+removed using the `remove` method, or reordered by assignments such as
+`list[i] = list[j]`.
+
+The concatenation operation `x + y` yields a new list containing all
+the elements of the two lists x and y.
+
+For most types, `x += y` is equivalent to `x = x + y`, except that it
+evaluates `x` only once, that is, it allocates a new list to hold
+the concatenation of `x` and `y`.
+However, if `x` refers to a list, the statement does not allocate a
+new list but instead mutates the original list in place, similar to
+`x.extend(y)`.
+
+Lists are not hashable, so may not be used in the keys of a dictionary.
+
+A list used in a Boolean context is considered true if it is
+non-empty.
+
+A [_list comprehension_](#comprehensions) creates a new list whose elements are the
+result of some expression applied to each element of another sequence.
+
+```python
+[x*x for x in [1, 2, 3, 4]] # [1, 4, 9, 16]
+```
+
+A list value has these methods:
+
+* [`append`](#list·append)
+* [`clear`](#list·clear)
+* [`extend`](#list·extend)
+* [`index`](#list·index)
+* [`insert`](#list·insert)
+* [`pop`](#list·pop)
+* [`remove`](#list·remove)
+
+### Tuples
+
+A tuple is an immutable sequence of values.
+The [type](#type) of a tuple is `"tuple"`.
+
+Tuples are constructed using parenthesized list notation:
+
+```python
+() # the empty tuple
+(1,) # a 1-tuple
+(1, 2) # a 2-tuple ("pair")
+(1, 2, 3) # a 3-tuple
+```
+
+Observe that for the 1-tuple, the trailing comma is necessary to
+distinguish it from the parenthesized expression `(1)`.
+1-tuples are seldom used.
+
+Starlark, unlike Python, does not permit a trailing comma to appear in
+an unparenthesized tuple expression:
+
+```python
+for k, v, in dict.items(): pass # syntax error at 'in'
+_ = [(v, k) for k, v, in dict.items()] # syntax error at 'in'
+f = lambda a, b, : None # syntax error at ':'
+
+sorted(3, 1, 4, 1,) # ok
+[1, 2, 3, ] # ok
+{1: 2, 3:4, } # ok
+```
+
+Any iterable sequence may be converted to a tuple by using the
+built-in `tuple` function.
+
+Like lists, tuples are indexed sequences, so they may be indexed and
+sliced. The index expression `tuple[i]` returns the tuple element at
+index i, and the slice expression `tuple[i:j]` returns a sub-sequence
+of a tuple.
+
+Tuples are iterable sequences, so they may be used as the operand of a
+`for`-loop, a list comprehension, or various built-in functions.
+
+Unlike lists, tuples cannot be modified.
+However, the mutable elements of a tuple may be modified.
+
+Tuples are hashable (assuming their elements are hashable),
+so they may be used as keys of a dictionary.
+
+Tuples may be concatenated using the `+` operator.
+
+A tuple used in a Boolean context is considered true if it is
+non-empty.
+
+
+### Dictionaries
+
+A dictionary is a mutable mapping from keys to values.
+The [type](#type) of a dictionary is `"dict"`.
+
+Dictionaries provide constant-time operations to insert an element, to
+look up the value for a key, or to remove an element. Dictionaries
+are implemented using hash tables, so keys must be hashable. Hashable
+values include `None`, Booleans, numbers, and strings, and tuples
+composed from hashable values. Most mutable values, such as lists,
+dictionaries, and sets, are not hashable, even when frozen.
+Attempting to use a non-hashable value as a key in a dictionary
+results in a dynamic error.
+
+A [dictionary expression](#dictionary-expressions) specifies a
+dictionary as a set of key/value pairs enclosed in braces:
+
+```python
+coins = {
+ "penny": 1,
+ "nickel": 5,
+ "dime": 10,
+ "quarter": 25,
+}
+```
+
+The expression `d[k]`, where `d` is a dictionary and `k` is a key,
+retrieves the value associated with the key. If the dictionary
+contains no such item, the operation fails:
+
+```python
+coins["penny"] # 1
+coins["dime"] # 10
+coins["silver dollar"] # error: key not found
+```
+
+The number of items in a dictionary `d` is given by `len(d)`.
+A key/value item may be added to a dictionary, or updated if the key
+is already present, by using `d[k]` on the left side of an assignment:
+
+```python
+len(coins) # 4
+coins["shilling"] = 20
+len(coins) # 5, item was inserted
+coins["shilling"] = 5
+len(coins) # 5, existing item was updated
+```
+
+A dictionary can also be constructed using a [dictionary
+comprehension](#comprehension), which evaluates a pair of expressions,
+the _key_ and the _value_, for every element of another iterable such
+as a list. This example builds a mapping from each word to its length
+in bytes:
+
+```python
+words = ["able", "baker", "charlie"]
+{x: len(x) for x in words} # {"charlie": 7, "baker": 5, "able": 4}
+```
+
+Dictionaries are iterable sequences, so they may be used as the
+operand of a `for`-loop, a list comprehension, or various built-in
+functions.
+Iteration yields the dictionary's keys in the order in which they were
+inserted; updating the value associated with an existing key does not
+affect the iteration order.
+
+```python
+x = dict([("a", 1), ("b", 2)]) # {"a": 1, "b": 2}
+x.update([("a", 3), ("c", 4)]) # {"a": 3, "b": 2, "c": 4}
+```
+
+```python
+for name in coins:
+ print(name, coins[name]) # prints "quarter 25", "dime 10", ...
+```
+
+Like all mutable values in Starlark, a dictionary can be frozen, and
+once frozen, all subsequent operations that attempt to update it will
+fail.
+
+A dictionary used in a Boolean context is considered true if it is
+non-empty.
+
+Dictionaries may be compared for equality using `==` and `!=`. Two
+dictionaries compare equal if they contain the same number of items
+and each key/value item (k, v) found in one dictionary is also present
+in the other. Dictionaries are not ordered; it is an error to compare
+two dictionaries with `<`.
+
+
+A dictionary value has these methods:
+
+* [`clear`](#dict·clear)
+* [`get`](#dict·get)
+* [`items`](#dict·items)
+* [`keys`](#dict·keys)
+* [`pop`](#dict·pop)
+* [`popitem`](#dict·popitem)
+* [`setdefault`](#dict·setdefault)
+* [`update`](#dict·update)
+* [`values`](#dict·values)
+
+### Sets
+
+A set is a mutable set of values.
+The [type](#type) of a set is `"set"`.
+
+Like dictionaries, sets are implemented using hash tables, so the
+elements of a set must be hashable.
+
+Sets may be compared for equality or inequality using `==` and `!=`.
+Two sets compare equal if they contain the same elements.
+
+Sets are iterable sequences, so they may be used as the operand of a
+`for`-loop, a list comprehension, or various built-in functions.
+Iteration yields the set's elements in the order in which they were
+inserted.
+
+The binary `|` and `&` operators compute union and intersection when
+applied to sets. The right operand of the `|` operator may be any
+iterable value. The binary `in` operator performs a set membership
+test when its right operand is a set.
+
+The binary `^` operator performs symmetric difference of two sets.
+
+Sets are instantiated by calling the built-in `set` function, which
+returns a set containing all the elements of its optional argument,
+which must be an iterable sequence. Sets have no literal syntax.
+
+The only method of a set is `union`, which is equivalent to the `|` operator.
+
+A set used in a Boolean context is considered true if it is non-empty.
+
+<b>Implementation note:</b>
+The Go implementation of Starlark requires the `-set` flag to
+enable support for sets.
+The Java implementation does not support sets.
+
+
+### Functions
+
+A function value represents a function defined in Starlark.
+Its [type](#type) is `"function"`.
+A function value used in a Boolean context is always considered true.
+
+Functions defined by a [`def` statement](#function-definitions) are named;
+functions defined by a [`lambda` expression](#lambda-expressions) are anonymous.
+
+Function definitions may be nested, and an inner function may refer to a local variable of an outer function.
+
+A function definition defines zero or more named parameters.
+Starlark has a rich mechanism for passing arguments to functions.
+
+<!-- TODO break up this explanation into caller-side and callee-side
+ parts, and put the former under function calls and the latter
+ under function definitions. Also try to convey that the Callable
+ interface sees the flattened-out args and kwargs and that's what
+ built-ins get.
+-->
+
+The example below shows a definition and call of a function of two
+required parameters, `x` and `y`.
+
+```python
+def idiv(x, y):
+ return x // y
+
+idiv(6, 3) # 2
+```
+
+A call may provide arguments to function parameters either by
+position, as in the example above, or by name, as in first two calls
+below, or by a mixture of the two forms, as in the third call below.
+All the positional arguments must precede all the named arguments.
+Named arguments may improve clarity, especially in functions of
+several parameters.
+
+```python
+idiv(x=6, y=3) # 2
+idiv(y=3, x=6) # 2
+
+idiv(6, y=3) # 2
+```
+
+<b>Optional parameters:</b> A parameter declaration may specify a
+default value using `name=value` syntax; such a parameter is
+_optional_. The default value expression is evaluated during
+execution of the `def` statement or evaluation of the `lambda`
+expression, and the default value forms part of the function value.
+All optional parameters must follow all non-optional parameters.
+A function call may omit arguments for any suffix of the optional
+parameters; the effective values of those arguments are supplied by
+the function's parameter defaults.
+
+```python
+def f(x, y=3):
+ return x, y
+
+f(1, 2) # (1, 2)
+f(1) # (1, 3)
+```
+
+If a function parameter's default value is a mutable expression,
+modifications to the value during one call may be observed by
+subsequent calls.
+Beware of this when using lists or dicts as default values.
+If the function becomes frozen, its parameters' default values become
+frozen too.
+
+```python
+# module a.star
+def f(x, list=[]):
+ list.append(x)
+ return list
+
+f(4, [1,2,3]) # [1, 2, 3, 4]
+f(1) # [1]
+f(2) # [1, 2], not [2]!
+
+# module b.star
+load("a.star", "f")
+f(3) # error: cannot append to frozen list
+```
+
+<b>Variadic functions:</b> Some functions allow callers to provide an
+arbitrary number of arguments.
+After all required and optional parameters, a function definition may
+specify a _variadic arguments_ or _varargs_ parameter, indicated by a
+star preceding the parameter name: `*args`.
+Any surplus positional arguments provided by the caller are formed
+into a tuple and assigned to the `args` parameter.
+
+```python
+def f(x, y, *args):
+ return x, y, args
+
+f(1, 2) # (1, 2, ())
+f(1, 2, 3, 4) # (1, 2, (3, 4))
+```
+
+<b>Keyword-variadic functions:</b> Some functions allow callers to
+provide an arbitrary sequence of `name=value` keyword arguments.
+A function definition may include a final _keyword arguments_ or
+_kwargs_ parameter, indicated by a double-star preceding the parameter
+name: `**kwargs`.
+Any surplus named arguments that do not correspond to named parameters
+are collected in a new dictionary and assigned to the `kwargs` parameter:
+
+```python
+def f(x, y, **kwargs):
+ return x, y, kwargs
+
+f(1, 2) # (1, 2, {})
+f(x=2, y=1) # (2, 1, {})
+f(x=2, y=1, z=3) # (2, 1, {"z": 3})
+```
+
+It is a static error if any two parameters of a function have the same name.
+
+Just as a function definition may accept an arbitrary number of
+positional or named arguments, a function call may provide an
+arbitrary number of positional or named arguments supplied by a
+list or dictionary:
+
+```python
+def f(a, b, c=5):
+ return a * b + c
+
+f(*[2, 3]) # 11
+f(*[2, 3, 7]) # 13
+f(*[2]) # error: f takes at least 2 arguments (1 given)
+
+f(**dict(b=3, a=2)) # 11
+f(**dict(c=7, a=2, b=3)) # 13
+f(**dict(a=2)) # error: f takes at least 2 arguments (1 given)
+f(**dict(d=4)) # error: f got unexpected keyword argument "d"
+```
+
+Once the parameters have been successfully bound to the arguments
+supplied by the call, the sequence of statements that comprise the
+function body is executed.
+
+It is a static error if a function call has two named arguments of the
+same name, such as `f(x=1, x=2)`. A call that provides a `**kwargs`
+argument may yet have two values for the same name, such as
+`f(x=1, **dict(x=2))`. This results in a dynamic error.
+
+Function arguments are evaluated in the order they appear in the call.
+<!-- see https://github.com/bazelbuild/starlark/issues/13 -->
+
+Unlike Python, Starlark does not allow more than one `*args` argument in a
+call, and if a `*args` argument is present it must appear after all
+positional and named arguments.
+
+The final argument to a function call may be followed by a trailing comma.
+
+A function call completes normally after the execution of either a
+`return` statement, or of the last statement in the function body.
+The result of the function call is the value of the return statement's
+operand, or `None` if the return statement had no operand or if the
+function completeted without executing a return statement.
+
+```python
+def f(x):
+ if x == 0:
+ return
+ if x < 0:
+ return -x
+ print(x)
+
+f(1) # returns None after printing "1"
+f(0) # returns None without printing
+f(-1) # returns 1 without printing
+```
+
+<b>Implementation note:</b>
+The Go implementation of Starlark requires the `-recursion`
+flag to allow recursive functions.
+
+
+If the `-recursion` flag is not specified it is a dynamic error for a
+function to call itself or another function value with the same
+declaration.
+
+```python
+def fib(x):
+ if x < 2:
+ return x
+ return fib(x-2) + fib(x-1) # dynamic error: function fib called recursively
+
+fib(5)
+```
+
+This rule, combined with the invariant that all loops are iterations
+over finite sequences, implies that Starlark programs can not be
+Turing complete unless the `-recursion` flag is specified.
+
+<!-- This rule is supposed to deter people from abusing Starlark for
+ inappropriate uses, especially in the build system.
+ It may work for that purpose, but it doesn't stop Starlark programs
+ from consuming too much time or space. Perhaps it should be a
+ dialect option.
+-->
+
+
+
+### Built-in functions
+
+A built-in function is a function or method implemented in Go by the interpreter
+or the application into which the interpreter is embedded.
+
+The [type](#type) of a built-in function is `"builtin_function_or_method"`.
+
+A built-in function value used in a Boolean context is always considered true.
+
+Many built-in functions are predeclared in the environment
+(see [Name Resolution](#name-resolution)).
+Some built-in functions such as `len` are _universal_, that is,
+available to all Starlark programs.
+The host application may predeclare additional built-in functions
+in the environment of a specific module.
+
+Except where noted, built-in functions accept only positional arguments.
+The parameter names serve merely as documentation.
+
+Most built-in functions that have a Boolean parameter require its
+argument to be `True` or `False`. Unlike `if` statements, other values
+are not implicitly converted to their truth value and instead cause a
+dynamic error.
+
+
+## Name binding and variables
+
+After a Starlark file is parsed, but before its execution begins, the
+Starlark interpreter checks statically that the program is well formed.
+For example, `break` and `continue` statements may appear only within
+a loop; a `return` statement may appear only within a
+function; and `load` statements may appear only outside any function.
+
+_Name resolution_ is the static checking process that
+resolves names to variable bindings.
+During execution, names refer to variables. Statically, names denote
+places in the code where variables are created; these places are
+called _bindings_. A name may denote different bindings at different
+places in the program. The region of text in which a particular name
+refers to the same binding is called that binding's _scope_.
+
+Four Starlark constructs bind names, as illustrated in the example below:
+`load` statements (`a` and `b`),
+`def` statements (`c`),
+function parameters (`d`),
+and assignments (`e`, `h`, including the augmented assignment `e += 1`).
+Variables may be assigned or re-assigned explicitly (`e`, `h`), or implicitly, as
+in a `for`-loop (`f`) or comprehension (`g`, `i`).
+
+```python
+load("lib.star", "a", b="B")
+
+def c(d):
+ e = 0
+ for f in d:
+ print([True for g in f])
+ e += 1
+
+h = [2*i for i in a]
+```
+
+The environment of a Starlark program is structured as a tree of
+_lexical blocks_, each of which may contain name bindings.
+The tree of blocks is parallel to the syntax tree.
+Blocks are of five kinds.
+
+<!-- Avoid the term "built-in" block since that's also a type. -->
+At the root of the tree is the _predeclared_ block,
+which binds several names implicitly.
+The set of predeclared names includes the universal
+constant values `None`, `True`, and `False`, and
+various built-in functions such as `len` and `list`;
+these functions are immutable and stateless.
+An application may pre-declare additional names
+to provide domain-specific functions to that file, for example.
+These additional functions may have side effects on the application.
+Starlark programs cannot change the set of predeclared bindings
+or assign new values to them.
+
+Nested beneath the predeclared block is the _module_ block,
+which contains the bindings of the current module.
+Bindings in the module block (such as `c`, and `h` in the
+example) are called _global_ and may be visible to other modules.
+The module block is empty at the start of the file
+and is populated by top-level binding statements.
+
+Nested beneath the module block is the _file_ block,
+which contains bindings local to the current file.
+Names in this block (such as `a` and `b` in the example)
+are bound only by `load` statements.
+The sets of names bound in the file block and in the module block do not overlap:
+it is an error for a load statement to bind the name of a global,
+or for a top-level statement to bind a name bound by a load statement.
+
+A file block contains a _function_ block for each top-level
+function, and a _comprehension_ block for each top-level comprehension.
+Bindings in either of these kinds of block,
+and in the file block itself, are called _local_.
+(In the example, the bindings for `e`, `f`, `g`, and `i` are all local.)
+Additional functions and comprehensions, and their blocks, may be
+nested in any order, to any depth.
+
+If name is bound anywhere within a block, all uses of the name within
+the block are treated as references to that binding,
+even if the use appears before the binding.
+This is true even at the top level, unlike Python.
+The binding of `y` on the last line of the example below makes `y`
+local to the function `hello`, so the use of `y` in the print
+statement also refers to the local `y`, even though it appears
+earlier.
+
+```python
+y = "goodbye"
+
+def hello():
+ for x in (1, 2):
+ if x == 2:
+ print(y) # prints "hello"
+ if x == 1:
+ y = "hello"
+```
+It is a dynamic error to evaluate a reference to a local variable
+before it has been bound:
+
+```python
+def f():
+ print(x) # dynamic error: local variable x referenced before assignment
+ x = "hello"
+```
+
+The same is true for global variables:
+
+```python
+print(x) # dynamic error: global variable x referenced before assignment
+x = "hello"
+```
+
+The same is also true for nested loops in comprehensions.
+In the (unnatural) examples below, the scope of the variables `x`, `y`,
+and `z` is the entire compehension block, except the operand of the first
+loop (`[]` or `[1]`), which is resolved in the enclosing environment.
+The second loop may thus refer to variables defined by the third (`z`),
+even though such references would fail if actually executed.
+
+```
+[1//0 for x in [] for y in z for z in ()] # [] (no error)
+[1//0 for x in [1] for y in z for z in ()] # dynamic error: local variable z referenced before assignment
+```
+
+
+<!-- This is similar to Python[23]. Presumed rational: it resembles
+ the desugaring to nested loop statements, in which the scope
+ of all three variables is the entire enclosing function,
+ including the portion before the bindings.
+
+ def f():
+ ...
+ for x in []:
+ for y in z:
+ for z in ():
+ 1//0
+-->
+
+It is a static error to refer to a name that has no binding at all.
+```
+def f():
+ if False:
+ g() # static error: undefined: g
+```
+(This behavior differs from Python, which treats such references as global,
+and thus does not report an error until the expression is evaluated.)
+
+<!-- Consequently, the REPL, which consumes one compound statement at a time,
+ cannot resolve forward references such as
+ def f(): return K
+ K = 1
+ because the first chunk has an unresolved reference to K.
+-->
+
+It is a static error to bind a global variable already explicitly bound in the file:
+
+```python
+x = 1
+x = 2 # static error: cannot reassign global x declared on line 1
+```
+
+<!-- The above rule, and the rule that forbids if-statements and loops at
+ top level, exist to ensure that there is exactly one statement
+ that binds each global variable, which makes cross-referenced
+ documentation more useful, the designers assure me, but
+ I am skeptical that it's worth the trouble. -->
+
+If a name was pre-bound by the application, the Starlark program may
+explicitly bind it, but only once.
+
+An augmented assignment statement such as `x += y` is considered both a
+reference to `x` and a binding use of `x`, so it may not be used at
+top level.
+
+<b>Implementation note:</b>
+The Go implementation of Starlark permits augmented assignments to appear
+at top level if the `-globalreassign` flag is enabled.
+
+A function may refer to variables defined in an enclosing function.
+In this example, the inner function `f` refers to a variable `x`
+that is local to the outer function `squarer`.
+`x` is a _free variable_ of `f`.
+The function value (`f`) created by a `def` statement holds a
+reference to each of its free variables so it may use
+them even after the enclosing function has returned.
+
+```python
+def squarer():
+ x = [0]
+ def f():
+ x[0] += 1
+ return x[0]*x[0]
+ return f
+
+sq = squarer()
+print(sq(), sq(), sq(), sq()) # "1 4 9 16"
+```
+
+An inner function cannot assign to a variable bound in an enclosing
+function, because the assignment would bind the variable in the
+inner function.
+In the example below, the `x += 1` statement binds `x` within `f`,
+hiding the outer `x`.
+Execution fails because the inner `x` has not been assigned before the
+attempt to increment it.
+
+```python
+def squarer():
+ x = 0
+ def f():
+ x += 1 # dynamic error: local variable x referenced before assignment
+ return x*x
+ return f
+
+sq = squarer()
+```
+
+(Starlark has no equivalent of Python's `nonlocal` or `global`
+declarations, but as the first version of `squarer` showed, this
+omission can be worked around by using a list of a single element.)
+
+
+A name appearing after a dot, such as `split` in
+`get_filename().split('/')`, is not resolved statically.
+The [dot expression](#dot-expressions) `.split` is a dynamic operation
+on the value returned by `get_filename()`.
+
+
+## Value concepts
+
+Starlark has eleven core [data types](#data-types). An application
+that embeds the Starlark intepreter may define additional types that
+behave like Starlark values. All values, whether core or
+application-defined, implement a few basic behaviors:
+
+```text
+str(x) -- return a string representation of x
+type(x) -- return a string describing the type of x
+bool(x) -- convert x to a Boolean truth value
+```
+
+### Identity and mutation
+
+Starlark is an imperative language: programs consist of sequences of
+statements executed for their side effects.
+For example, an assignment statement updates the value held by a
+variable, and calls to some built-in functions such as `print` change
+the state of the application that embeds the interpreter.
+
+Values of some data types, such as `NoneType`, `bool`, `int`, `float`, and
+`string`, are _immutable_; they can never change.
+Immutable values have no notion of _identity_: it is impossible for a
+Starlark program to tell whether two integers, for instance, are
+represented by the same object; it can tell only whether they are
+equal.
+
+Values of other data types, such as `list`, `dict`, and `set`, are
+_mutable_: they may be modified by a statement such as `a[i] = 0` or
+`items.clear()`. Although `tuple` and `function` values are not
+directly mutable, they may refer to mutable values indirectly, so for
+this reason we consider them mutable too. Starlark values of these
+types are actually _references_ to variables.
+
+Copying a reference to a variable, using an assignment statement for
+instance, creates an _alias_ for the variable, and the effects of
+operations applied to the variable through one alias are visible
+through all others.
+
+```python
+x = [] # x refers to a new empty list variable
+y = x # y becomes an alias for x
+x.append(1) # changes the variable referred to by x
+print(y) # "[1]"; y observes the mutation
+```
+
+Starlark uses _call-by-value_ parameter passing: in a function call,
+argument values are assigned to function parameters as if by
+assignment statements. If the values are references, the caller and
+callee may refer to the same variables, so if the called function
+changes the variable referred to by a parameter, the effect may also
+be observed by the caller:
+
+```python
+def f(y):
+ y.append(1) # changes the variable referred to by x
+
+x = [] # x refers to a new empty list variable
+f(x) # f's parameter y becomes an alias for x
+print(x) # "[1]"; x observes the mutation
+```
+
+
+As in all imperative languages, understanding _aliasing_, the
+relationship between reference values and the variables to which they
+refer, is crucial to writing correct programs.
+
+### Freezing a value
+
+Starlark has a feature unusual among imperative programming languages:
+a mutable value may be _frozen_ so that all subsequent attempts to
+mutate it fail with a dynamic error; the value, and all other values
+reachable from it, become _immutable_.
+
+Immediately after execution of a Starlark module, all values in its
+top-level environment are frozen. Because all the global variables of
+an initialized Starlark module are immutable, the module may be published to
+and used by other threads in a parallel program without the need for
+locks. For example, the Bazel build system loads and executes BUILD
+and .bzl files in parallel, and two modules being executed
+concurrently may freely access variables or call functions from a
+third without the possibility of a race condition.
+
+### Hashing
+
+The `dict` and `set` data types are implemented using hash tables, so
+only _hashable_ values are suitable as keys of a `dict` or elements of
+a `set`. Attempting to use a non-hashable value as the key in a hash
+table results in a dynamic error.
+
+The hash of a value is an unspecified integer chosen so that two equal
+values have the same hash, in other words, `x == y => hash(x) == hash(y)`.
+A hashable value has the same hash throughout its lifetime.
+
+Values of the types `NoneType`, `bool`, `int`, `float`, and `string`,
+which are all immutable, are hashable.
+
+Values of mutable types such as `list`, `dict`, and `set` are not
+hashable. These values remain unhashable even if they have become
+immutable due to _freezing_.
+
+A `tuple` value is hashable only if all its elements are hashable.
+Thus `("localhost", 80)` is hashable but `([127, 0, 0, 1], 80)` is not.
+
+Values of the types `function` and `builtin_function_or_method` are also hashable.
+Although functions are not necessarily immutable, as they may be
+closures that refer to mutable variables, instances of these types
+are compared by reference identity (see [Comparisons](#comparisons)),
+so their hash values are derived from their identity.
+
+
+### Sequence types
+
+Many Starlark data types represent a _sequence_ of values: lists,
+tuples, and sets are sequences of arbitrary values, and in many
+contexts dictionaries act like a sequence of their keys.
+
+We can classify different kinds of sequence types based on the
+operations they support.
+Each is listed below using the name of its corresponding interface in
+the interpreter's Go API.
+
+* `Iterable`: an _iterable_ value lets us process each of its elements in a fixed order.
+ Examples: `dict`, `set`, `list`, `tuple`, but not `string`.
+* `Sequence`: a _sequence of known length_ lets us know how many elements it
+ contains without processing them.
+ Examples: `dict`, `set`, `list`, `tuple`, but not `string`.
+* `Indexable`: an _indexed_ type has a fixed length and provides efficient
+ random access to its elements, which are identified by integer indices.
+ Examples: `string`, `tuple`, and `list`.
+* `SetIndexable`: a _settable indexed type_ additionally allows us to modify the
+ element at a given integer index. Example: `list`.
+* `Mapping`: a mapping is an association of keys to values. Example: `dict`.
+
+Although all of Starlark's core data types for sequences implement at
+least the `Sequence` contract, it's possible for an application
+that embeds the Starlark interpreter to define additional data types
+representing sequences of unknown length that implement only the `Iterable` contract.
+
+Strings are not iterable, though they do support the `len(s)` and
+`s[i]` operations. Starlark deviates from Python here to avoid a common
+pitfall in which a string is used by mistake where a list containing a
+single string was intended, resulting in its interpretation as a sequence
+of bytes.
+
+Most Starlark operators and built-in functions that need a sequence
+of values will accept any iterable.
+
+It is a dynamic error to mutate a sequence such as a list, set, or
+dictionary while iterating over it.
+
+```python
+def increment_values(dict):
+ for k in dict:
+ dict[k] += 1 # error: cannot insert into hash table during iteration
+
+dict = {"one": 1, "two": 2}
+increment_values(dict)
+```
+
+
+### Indexing
+
+Many Starlark operators and functions require an index operand `i`,
+such as `a[i]` or `list.insert(i, x)`. Others require two indices `i`
+and `j` that indicate the start and end of a sub-sequence, such as
+`a[i:j]`, `list.index(x, i, j)`, or `string.find(x, i, j)`.
+All such operations follow similar conventions, described here.
+
+Indexing in Starlark is *zero-based*. The first element of a string
+or list has index 0, the next 1, and so on. The last element of a
+sequence of length `n` has index `n-1`.
+
+```python
+"hello"[0] # "h"
+"hello"[4] # "o"
+"hello"[5] # error: index out of range
+```
+
+For sub-sequence operations that require two indices, the first is
+_inclusive_ and the second _exclusive_. Thus `a[i:j]` indicates the
+sequence starting with element `i` up to but not including element
+`j`. The length of this sub-sequence is `j-i`. This convention is known
+as *half-open indexing*.
+
+```python
+"hello"[1:4] # "ell"
+```
+
+Either or both of the index operands may be omitted. If omitted, the
+first is treated equivalent to 0 and the second is equivalent to the
+length of the sequence:
+
+```python
+"hello"[1:] # "ello"
+"hello"[:4] # "hell"
+```
+
+It is permissible to supply a negative integer to an indexing
+operation. The effective index is computed from the supplied value by
+the following two-step procedure. First, if the value is negative, the
+length of the sequence is added to it. This provides a convenient way
+to address the final elements of the sequence:
+
+```python
+"hello"[-1] # "o", like "hello"[4]
+"hello"[-3:-1] # "ll", like "hello"[2:4]
+```
+
+Second, for sub-sequence operations, if the value is still negative, it
+is replaced by zero, or if it is greater than the length `n` of the
+sequence, it is replaced by `n`. In effect, the index is "truncated" to
+the nearest value in the range `[0:n]`.
+
+```python
+"hello"[-1000:+1000] # "hello"
+```
+
+This truncation step does not apply to indices of individual elements:
+
+```python
+"hello"[-6] # error: index out of range
+"hello"[-5] # "h"
+"hello"[4] # "o"
+"hello"[5] # error: index out of range
+```
+
+
+## Expressions
+
+An expression specifies the computation of a value.
+
+The Starlark grammar defines several categories of expression.
+An _operand_ is an expression consisting of a single token (such as an
+identifier or a literal), or a bracketed expression.
+Operands are self-delimiting.
+An operand may be followed by any number of dot, call, or slice
+suffixes, to form a _primary_ expression.
+In some places in the Starlark grammar where an expression is expected,
+it is legal to provide a comma-separated list of expressions denoting
+a tuple.
+The grammar uses `Expression` where a multiple-component expression is allowed,
+and `Test` where it accepts an expression of only a single component.
+
+```grammar {.good}
+Expression = Test {',' Test} .
+
+Test = LambdaExpr | IfExpr | PrimaryExpr | UnaryExpr | BinaryExpr .
+
+PrimaryExpr = Operand
+ | PrimaryExpr DotSuffix
+ | PrimaryExpr CallSuffix
+ | PrimaryExpr SliceSuffix
+ .
+
+Operand = identifier
+ | int | float | string
+ | ListExpr | ListComp
+ | DictExpr | DictComp
+ | '(' [Expression] [,] ')'
+ | ('-' | '+') PrimaryExpr
+ .
+
+DotSuffix = '.' identifier .
+CallSuffix = '(' [Arguments [',']] ')' .
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+```
+
+TODO: resolve position of +x, -x, and 'not x' in grammar: Operand or UnaryExpr?
+
+### Identifiers
+
+```grammar {.good} {.good}
+Primary = identifier
+```
+
+An identifier is a name that identifies a value.
+
+Lookup of locals and globals may fail if not yet defined.
+
+### Literals
+
+Starlark supports literals of three different kinds:
+
+```grammar {.good}
+Primary = int | float | string
+```
+
+Evaluation of a literal yields a value of the given type (string, int,
+or float) with the given value.
+See [Literals](#lexical-elements) for details.
+
+### Parenthesized expressions
+
+```grammar {.good}
+Primary = '(' [Expression] ')'
+```
+
+A single expression enclosed in parentheses yields the result of that expression.
+Explicit parentheses may be used for clarity,
+or to override the default association of subexpressions.
+
+```python
+1 + 2 * 3 + 4 # 11
+(1 + 2) * (3 + 4) # 21
+```
+
+If the parentheses are empty, or contain a single expression followed
+by a comma, or contain two or more expressions, the expression yields a tuple.
+
+```python
+() # (), the empty tuple
+(1,) # (1,), a tuple of length 1
+(1, 2) # (1, 2), a 2-tuple or pair
+(1, 2, 3) # (1, 2, 3), a 3-tuple or triple
+```
+
+In some contexts, such as a `return` or assignment statement or the
+operand of a `for` statement, a tuple may be expressed without
+parentheses.
+
+```python
+x, y = 1, 2
+
+return 1, 2
+
+for x in 1, 2:
+ print(x)
+```
+
+Starlark (like Python 3) does not accept an unparenthesized tuple
+expression as the operand of a list comprehension:
+
+```python
+[2*x for x in 1, 2, 3] # parse error: unexpected ','
+```
+
+### Dictionary expressions
+
+A dictionary expression is a comma-separated list of colon-separated
+key/value expression pairs, enclosed in curly brackets, and it yields
+a new dictionary object.
+An optional comma may follow the final pair.
+
+```grammar {.good}
+DictExpr = '{' [Entries [',']] '}' .
+Entries = Entry {',' Entry} .
+Entry = Test ':' Test .
+```
+
+Examples:
+
+
+```python
+{}
+{"one": 1}
+{"one": 1, "two": 2,}
+```
+
+The key and value expressions are evaluated in left-to-right order.
+Evaluation fails if the same key is used multiple times.
+
+Only [hashable](#hashing) values may be used as the keys of a dictionary.
+This includes all built-in types except dictionaries, sets, and lists;
+a tuple is hashable only if its elements are hashable.
+
+
+### List expressions
+
+A list expression is a comma-separated list of element expressions,
+enclosed in square brackets, and it yields a new list object.
+An optional comma may follow the last element expression.
+
+```grammar {.good}
+ListExpr = '[' [Expression [',']] ']' .
+```
+
+Element expressions are evaluated in left-to-right order.
+
+Examples:
+
+```python
+[] # [], empty list
+[1] # [1], a 1-element list
+[1, 2, 3,] # [1, 2, 3], a 3-element list
+```
+
+### Unary operators
+
+There are three unary operators, all appearing before their operand:
+`+`, `-`, `~`, and `not`.
+
+```grammar {.good}
+UnaryExpr = '+' PrimaryExpr
+ | '-' PrimaryExpr
+ | '~' PrimaryExpr
+ | 'not' Test
+ .
+```
+
+```text
++ number unary positive (int, float)
+- number unary negation (int, float)
+~ number unary bitwise inversion (int)
+not x logical negation (any type)
+```
+
+The `+` and `-` operators may be applied to any number
+(`int` or `float`) and return the number unchanged.
+Unary `+` is never necessary in a correct program,
+but may serve as an assertion that its operand is a number,
+or as documentation.
+
+```python
+if x > 0:
+ return +1
+else if x < 0:
+ return -1
+else:
+ return 0
+```
+
+The `not` operator returns the negation of the truth value of its
+operand.
+
+```python
+not True # False
+not False # True
+not [1, 2, 3] # False
+not "" # True
+not 0 # True
+```
+
+The `~` operator yields the bitwise inversion of its integer argument.
+The bitwise inversion of x is defined as -(x+1).
+
+```python
+~1 # -2
+~-1 # 0
+~0 # -1
+```
+
+
+### Binary operators
+
+Starlark has the following binary operators, arranged in order of increasing precedence:
+
+```text
+or
+and
+== != < > <= >= in not in
+|
+^
+&
+<< >>
+- +
+* / // %
+```
+
+Comparison operators, `in`, and `not in` are non-associative,
+so the parser will not accept `0 <= i < n`.
+All other binary operators of equal precedence associate to the left.
+
+```grammar {.good}
+BinaryExpr = Test {Binop Test} .
+
+Binop = 'or'
+ | 'and'
+ | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
+ | '|'
+ | '^'
+ | '&'
+ | '-' | '+'
+ | '*' | '%' | '/' | '//'
+ | '<<' | '>>'
+ .
+```
+
+#### `or` and `and`
+
+The `or` and `and` operators yield, respectively, the logical disjunction and
+conjunction of their arguments, which need not be Booleans.
+The expression `x or y` yields the value of `x` if its truth value is `True`,
+or the value of `y` otherwise.
+
+```starlark
+False or False # False
+False or True # True
+True or False # True
+True or True # True
+
+0 or "hello" # "hello"
+1 or "hello" # 1
+```
+
+Similarly, `x and y` yields the value of `x` if its truth value is
+`False`, or the value of `y` otherwise.
+
+```starlark
+False and False # False
+False and True # False
+True and False # False
+True and True # True
+
+0 and "hello" # 0
+1 and "hello" # "hello"
+```
+
+These operators use "short circuit" evaluation, so the second
+expression is not evaluated if the value of the first expression has
+already determined the result, allowing constructions like these:
+
+```python
+len(x) > 0 and x[0] == 1 # x[0] is not evaluated if x is empty
+x and x[0] == 1
+len(x) == 0 or x[0] == ""
+not x or not x[0]
+```
+
+#### Comparisons
+
+The `==` operator reports whether its operands are equal; the `!=`
+operator is its negation.
+
+The operators `<`, `>`, `<=`, and `>=` perform an ordered comparison
+of their operands. It is an error to apply these operators to
+operands of unequal type, unless one of the operands is an `int` and
+the other is a `float`. Of the built-in types, only the following
+support ordered comparison, using the ordering relation shown:
+
+```shell
+NoneType # None <= None
+bool # False < True
+int # mathematical
+float # as defined by IEEE 754
+string # lexicographical
+tuple # lexicographical
+list # lexicographical
+```
+
+Comparison of floating point values follows the IEEE 754 standard,
+which breaks several mathematical identities. For example, if `x` is
+a `NaN` value, the comparisons `x < y`, `x == y`, and `x > y` all
+yield false for all values of `y`.
+
+Applications may define additional types that support ordered
+comparison.
+
+The remaining built-in types support only equality comparisons.
+Values of type `dict` or `set` compare equal if their elements compare
+equal, and values of type `function` or `builtin_function_or_method` are equal only to
+themselves.
+
+```shell
+dict # equal contents
+set # equal contents
+function # identity
+builtin_function_or_method # identity
+```
+
+#### Arithmetic operations
+
+The following table summarizes the binary arithmetic operations
+available for built-in types:
+
+```shell
+Arithmetic (int or float; result has type float unless both operands have type int)
+ number + number # addition
+ number - number # subtraction
+ number * number # multiplication
+ number / number # real division (result is always a float)
+ number // number # floored division
+ number % number # remainder of floored division
+ number ^ number # bitwise XOR
+ number << number # bitwise left shift
+ number >> number # bitwise right shift
+
+Concatenation
+ string + string
+ list + list
+ tuple + tuple
+
+Repetition (string/list/tuple)
+ int * sequence
+ sequence * int
+
+String interpolation
+ string % any # see String Interpolation
+
+Sets
+ int | int # bitwise union (OR)
+ set | set # set union
+ int & int # bitwise intersection (AND)
+ set & set # set intersection
+ set ^ set # set symmetric difference
+```
+
+The operands of the arithmetic operators `+`, `-`, `*`, `//`, and
+`%` must both be numbers (`int` or `float`) but need not have the same type.
+The type of the result has type `int` only if both operands have that type.
+The result of real division `/` always has type `float`.
+
+The `+` operator may be applied to non-numeric operands of the same
+type, such as two lists, two tuples, or two strings, in which case it
+computes the concatenation of the two operands and yields a new value of
+the same type.
+
+```python
+"Hello, " + "world" # "Hello, world"
+(1, 2) + (3, 4) # (1, 2, 3, 4)
+[1, 2] + [3, 4] # [1, 2, 3, 4]
+```
+
+The `*` operator may be applied to an integer _n_ and a value of type
+`string`, `list`, or `tuple`, in which case it yields a new value
+of the same sequence type consisting of _n_ repetitions of the original sequence.
+The order of the operands is immaterial.
+Negative values of _n_ behave like zero.
+
+```python
+'mur' * 2 # 'murmur'
+3 * range(3) # [0, 1, 2, 0, 1, 2, 0, 1, 2]
+```
+
+Applications may define additional types that support any subset of
+these operators.
+
+The `&` operator requires two operands of the same type, either `int` or `set`.
+For integers, it yields the bitwise intersection (AND) of its operands.
+For sets, it yields a new set containing the intersection of the
+elements of the operand sets, preserving the element order of the left
+operand.
+
+The `|` operator likewise computes bitwise or set unions.
+The result of `set | set` is a new set whose elements are the
+union of the operands, preserving the order of the elements of the
+operands, left before right.
+
+The `^` operator accepts operands of either `int` or `set` type.
+For integers, it yields the bitwise XOR (exclusive OR) of its operands.
+For sets, it yields a new set containing elements of either first or second
+operand but not both (symmetric difference).
+
+The `<<` and `>>` operators require operands of `int` type both. They shift
+the first operand to the left or right by the number of bits given by the
+second operand. It is a dynamic error if the second operand is negative.
+Implementations may impose a limit on the second operand of a left shift.
+
+```python
+0x12345678 & 0xFF # 0x00000078
+0x12345678 | 0xFF # 0x123456FF
+0b01011101 ^ 0b110101101 # 0b111110000
+0b01011101 >> 2 # 0b010111
+0b01011101 << 2 # 0b0101110100
+
+set([1, 2]) & set([2, 3]) # set([2])
+set([1, 2]) | set([2, 3]) # set([1, 2, 3])
+set([1, 2]) ^ set([2, 3]) # set([1, 3])
+```
+
+<b>Implementation note:</b>
+The Go implementation of Starlark requires the `-set` flag to
+enable support for sets.
+The Java implementation does not support sets.
+
+
+#### Membership tests
+
+```text
+ any in sequence (list, tuple, dict, set, string)
+ any not in sequence
+```
+
+The `in` operator reports whether its first operand is a member of its
+second operand, which must be a list, tuple, dict, set, or string.
+The `not in` operator is its negation.
+Both return a Boolean.
+
+The meaning of membership varies by the type of the second operand:
+the members of a list, tuple, or set are its elements;
+the members of a dict are its keys;
+the members of a string are all its substrings.
+
+```python
+1 in [1, 2, 3] # True
+4 in (1, 2, 3) # False
+4 not in set([1, 2, 3]) # True
+
+d = {"one": 1, "two": 2}
+"one" in d # True
+"three" in d # False
+1 in d # False
+[] in d # False
+
+"nasty" in "dynasty" # True
+"a" in "banana" # True
+"f" not in "way" # True
+```
+
+#### String interpolation
+
+The expression `format % args` performs _string interpolation_, a
+simple form of template expansion.
+The `format` string is interpreted as a sequence of literal portions
+and _conversions_.
+Each conversion, which starts with a `%` character, is replaced by its
+corresponding value from `args`.
+The characters following `%` in each conversion determine which
+argument it uses and how to convert it to a string.
+
+Each `%` character marks the start of a conversion specifier, unless
+it is immediately followed by another `%`, in which case both
+characters together denote a literal percent sign.
+
+If the `"%"` is immediately followed by `"(key)"`, the parenthesized
+substring specifies the key of the `args` dictionary whose
+corresponding value is the operand to convert.
+Otherwise, the conversion's operand is the next element of `args`,
+which must be a tuple with exactly one component per conversion,
+unless the format string contains only a single conversion, in which
+case `args` itself is its operand.
+
+Starlark does not support the flag, width, and padding specifiers
+supported by Python's `%` and other variants of C's `printf`.
+
+After the optional `(key)` comes a single letter indicating what
+operand types are valid and how to convert the operand `x` to a string:
+
+```text
+% none literal percent sign
+s any as if by str(x)
+r any as if by repr(x)
+d number signed integer decimal
+i number signed integer decimal
+o number signed octal
+x number signed hexadecimal, lowercase
+X number signed hexadecimal, uppercase
+e number float exponential format, lowercase
+E number float exponential format, uppercase
+f number float decimal format, lowercase
+F number float decimal format, uppercase
+g number like %e for large exponents, %f otherwise
+G number like %E for large exponents, %F otherwise
+c string x (string must encode a single Unicode code point)
+ int as if by chr(x)
+```
+
+It is an error if the argument does not have the type required by the
+conversion specifier. A Boolean argument is not considered a number.
+
+Examples:
+
+```python
+"Hello %s, your score is %d" % ("Bob", 75) # "Hello Bob, your score is 75"
+
+"%d %o %x %c" % (65, 65, 65, 65) # "65 101 41 A" (decimal, octal, hexadecimal, Unicode)
+
+"%(greeting)s, %(audience)s" % dict( # "Hello, world"
+ greeting="Hello",
+ audience="world",
+)
+
+"rate = %g%% APR" % 3.5 # "rate = 3.5% APR"
+```
+
+One subtlety: to use a tuple as the operand of a conversion in format
+string containing only a single conversion, you must wrap the tuple in
+a singleton tuple:
+
+```python
+"coordinates=%s" % (40.741491, -74.003680) # error: too many arguments for format string
+"coordinates=%s" % ((40.741491, -74.003680),) # "coordinates=(40.741491, -74.003680)"
+```
+
+TODO: specify `%e` and `%f` more precisely.
+
+### Conditional expressions
+
+A conditional expression has the form `a if cond else b`.
+It first evaluates the condition `cond`.
+If it's true, it evaluates `a` and yields its value;
+otherwise it yields the value of `b`.
+
+```grammar {.good}
+IfExpr = Test 'if' Test 'else' Test .
+```
+
+Example:
+
+```python
+"yes" if enabled else "no"
+```
+
+### Comprehensions
+
+A comprehension constructs new list or dictionary value by looping
+over one or more iterables and evaluating a _body_ expression that produces
+successive elements of the result.
+
+A list comprehension consists of a single expression followed by one
+or more _clauses_, the first of which must be a `for` clause.
+Each `for` clause resembles a `for` statement, and specifies an
+iterable operand and a set of variables to be assigned by successive
+values of the iterable.
+An `if` cause resembles an `if` statement, and specifies a condition
+that must be met for the body expression to be evaluated.
+A sequence of `for` and `if` clauses acts like a nested sequence of
+`for` and `if` statements.
+
+```grammar {.good}
+ListComp = '[' Test {CompClause} ']'.
+DictComp = '{' Entry {CompClause} '}' .
+
+CompClause = 'for' LoopVariables 'in' Test
+ | 'if' Test .
+
+LoopVariables = PrimaryExpr {',' PrimaryExpr} .
+```
+
+Examples:
+
+```python
+[x*x for x in range(5)] # [0, 1, 4, 9, 16]
+[x*x for x in range(5) if x%2 == 0] # [0, 4, 16]
+[(x, y) for x in range(5)
+ if x%2 == 0
+ for y in range(5)
+ if y > x] # [(0, 1), (0, 2), (0, 3), (0, 4), (2, 3), (2, 4)]
+```
+
+A dict comprehension resembles a list comprehension, but its body is a
+pair of expressions, `key: value`, separated by a colon,
+and its result is a dictionary containing the key/value pairs
+for which the body expression was evaluated.
+Evaluation fails if the value of any key is unhashable.
+
+As with a `for` loop, the loop variables may exploit compound
+assignment:
+
+```python
+[x*y+z for (x, y), z in [((2, 3), 5), (("o", 2), "!")]] # [11, 'oo!']
+```
+
+Starlark, following Python 3, does not accept an unparenthesized
+tuple or lambda expression as the operand of a `for` clause:
+
+```python
+[x*x for x in 1, 2, 3] # parse error: unexpected comma
+[x*x for x in lambda: 0] # parse error: unexpected lambda
+```
+
+Comprehensions in Starlark, again following Python 3, define a new lexical
+block, so assignments to loop variables have no effect on variables of
+the same name in an enclosing block:
+
+```python
+x = 1
+_ = [x for x in [2]] # new variable x is local to the comprehension
+print(x) # 1
+```
+
+The operand of a comprehension's first clause (always a `for`) is
+resolved in the lexical block enclosing the comprehension.
+In the examples below, identifiers referring to the outer variable
+named `x` have been distinguished by subscript.
+
+```python
+x₀ = (1, 2, 3)
+[x*x for x in x₀] # [1, 4, 9]
+[x*x for x in x₀ if x%2 == 0] # [4]
+```
+
+All subsequent `for` and `if` expressions are resolved within the
+comprehension's lexical block, as in this rather obscure example:
+
+```python
+x₀ = ([1, 2], [3, 4], [5, 6])
+[x*x for x in x₀ for x in x if x%2 == 0] # [4, 16, 36]
+```
+
+which would be more clearly rewritten as:
+
+```python
+x = ([1, 2], [3, 4], [5, 6])
+[z*z for y in x for z in y if z%2 == 0] # [4, 16, 36]
+```
+
+
+### Function and method calls
+
+```grammar {.good}
+CallSuffix = '(' [Arguments [',']] ')' .
+
+Arguments = Argument {',' Argument} .
+Argument = Test | identifier '=' Test | '*' Test | '**' Test .
+```
+
+A value `f` of type `function` or `builtin_function_or_method` may be called using the expression `f(...)`.
+Applications may define additional types whose values may be called in the same way.
+
+A method call such as `filename.endswith(".star")` is the composition
+of two operations, `m = filename.endswith` and `m(".star")`.
+The first, a dot operation, yields a _bound method_, a function value
+that pairs a receiver value (the `filename` string) with a choice of
+method ([string·endswith](#string·endswith)).
+
+Only built-in or application-defined types may have methods.
+
+See [Functions](#functions) for an explanation of function parameter passing.
+
+### Dot expressions
+
+A dot expression `x.f` selects the attribute `f` (a field or method)
+of the value `x`.
+
+Fields are possessed by none of the main Starlark [data types](#data-types),
+but some application-defined types have them.
+Methods belong to the built-in types `string`, `list`, `dict`, and
+`set`, and to many application-defined types.
+
+```grammar {.good}
+DotSuffix = '.' identifier .
+```
+
+A dot expression fails if the value does not have an attribute of the
+specified name.
+
+Use the built-in function `hasattr(x, "f")` to ascertain whether a
+value has a specific attribute, or `dir(x)` to enumerate all its
+attributes. The `getattr(x, "f")` function can be used to select an
+attribute when the name `"f"` is not known statically.
+
+A dot expression that selects a method typically appears within a call
+expression, as in these examples:
+
+```python
+["able", "baker", "charlie"].index("baker") # 1
+"banana".count("a") # 3
+"banana".reverse() # error: string has no .reverse field or method
+```
+
+But when not called immediately, the dot expression evaluates to a
+_bound method_, that is, a method coupled to a specific receiver
+value. A bound method can be called like an ordinary function,
+without a receiver argument:
+
+```python
+f = "banana".count
+f # <built-in method count of string value>
+f("a") # 3
+f("n") # 2
+```
+
+### Index expressions
+
+An index expression `a[i]` yields the `i`th element of an _indexable_
+type such as a string, tuple, or list. The index `i` must be an `int`
+value in the range -`n` ≤ `i` < `n`, where `n` is `len(a)`; any other
+index results in an error.
+
+```grammar {.good}
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+```
+
+A valid negative index `i` behaves like the non-negative index `n+i`,
+allowing for convenient indexing relative to the end of the
+sequence.
+
+```python
+"abc"[0] # "a"
+"abc"[1] # "b"
+"abc"[-1] # "c"
+
+("zero", "one", "two")[0] # "zero"
+("zero", "one", "two")[1] # "one"
+("zero", "one", "two")[-1] # "two"
+```
+
+An index expression `d[key]` may also be applied to a dictionary `d`,
+to obtain the value associated with the specified key. It is an error
+if the dictionary contains no such key.
+
+An index expression appearing on the left side of an assignment causes
+the specified list or dictionary element to be updated:
+
+```starlark
+a = range(3) # a == [0, 1, 2]
+a[2] = 7 # a == [0, 1, 7]
+
+coins["suzie b"] = 100
+```
+
+It is a dynamic error to attempt to update an element of an immutable
+type, such as a tuple or string, or a frozen value of a mutable type.
+
+### Slice expressions
+
+A slice expression `a[start:stop:stride]` yields a new value containing a
+sub-sequence of `a`, which must be a string, tuple, or list.
+
+```grammar {.good}
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+```
+
+Each of the `start`, `stop`, and `stride` operands is optional;
+if present, and not `None`, each must be an integer.
+The `stride` value defaults to 1.
+If the stride is not specified, the colon preceding it may be omitted too.
+It is an error to specify a stride of zero.
+
+Conceptually, these operands specify a sequence of values `i` starting
+at `start` and successively adding `stride` until `i` reaches or
+passes `stop`. The result consists of the concatenation of values of
+`a[i]` for which `i` is valid.`
+
+The effective start and stop indices are computed from the three
+operands as follows. Let `n` be the length of the sequence.
+
+<b>If the stride is positive:</b>
+If the `start` operand was omitted, it defaults to -infinity.
+If the `end` operand was omitted, it defaults to +infinity.
+For either operand, if a negative value was supplied, `n` is added to it.
+The `start` and `end` values are then "clamped" to the
+nearest value in the range 0 to `n`, inclusive.
+
+<b>If the stride is negative:</b>
+If the `start` operand was omitted, it defaults to +infinity.
+If the `end` operand was omitted, it defaults to -infinity.
+For either operand, if a negative value was supplied, `n` is added to it.
+The `start` and `end` values are then "clamped" to the
+nearest value in the range -1 to `n`-1, inclusive.
+
+```python
+"abc"[1:] # "bc" (remove first element)
+"abc"[:-1] # "ab" (remove last element)
+"abc"[1:-1] # "b" (remove first and last element)
+"banana"[1::2] # "aaa" (select alternate elements starting at index 1)
+"banana"[4::-2] # "nnb" (select alternate elements in reverse, starting at index 4)
+```
+
+Unlike Python, Starlark does not allow a slice expression on the left
+side of an assignment.
+
+Slicing a tuple or string may be more efficient than slicing a list
+because tuples and strings are immutable, so the result of the
+operation can share the underlying representation of the original
+operand (when the stride is 1). By contrast, slicing a list requires
+the creation of a new list and copying of the necessary elements.
+
+<!-- TODO tighten up this section -->
+
+### Lambda expressions
+
+A `lambda` expression yields a new function value.
+
+```grammar {.good}
+LambdaExpr = 'lambda' [Parameters] ':' Test .
+
+Parameters = Parameter {',' Parameter} .
+Parameter = identifier
+ | identifier '=' Test
+ | '*'
+ | '*' identifier
+ | '**' identifier
+ .
+```
+
+Syntactically, a lambda expression consists of the keyword `lambda`,
+followed by a parameter list like that of a `def` statement but
+unparenthesized, then a colon `:`, and a single expression, the
+_function body_.
+
+Example:
+
+```python
+def map(f, list):
+ return [f(x) for x in list]
+
+map(lambda x: 2*x, range(3)) # [2, 4, 6]
+```
+
+As with functions created by a `def` statement, a lambda function
+captures the syntax of its body, the default values of any optional
+parameters, the value of each free variable appearing in its body, and
+the global dictionary of the current module.
+
+The name of a function created by a lambda expression is `"lambda"`.
+
+The two statements below are essentially equivalent, but the
+function created by the `def` statement is named `twice` and the
+function created by the lambda expression is named `lambda`.
+
+```python
+def twice(x):
+ return x * 2
+
+twice = lambda x: x * 2
+```
+
+## Statements
+
+```grammar {.good}
+Statement = DefStmt | IfStmt | ForStmt | SimpleStmt .
+SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
+SmallStmt = ReturnStmt
+ | BreakStmt | ContinueStmt | PassStmt
+ | AssignStmt
+ | ExprStmt
+ | LoadStmt
+ .
+```
+
+### Pass statements
+
+A `pass` statement does nothing. Use a `pass` statement when the
+syntax requires a statement but no behavior is required, such as the
+body of a function that does nothing.
+
+```grammar {.good}
+PassStmt = 'pass' .
+```
+
+Example:
+
+```python
+def noop():
+ pass
+
+def list_to_dict(items):
+ # Convert list of tuples to dict
+ m = {}
+ for k, m[k] in items:
+ pass
+ return m
+```
+
+### Assignments
+
+An assignment statement has the form `lhs = rhs`. It evaluates the
+expression on the right-hand side then assigns its value (or values) to
+the variable (or variables) on the left-hand side.
+
+```grammar {.good}
+AssignStmt = Expression '=' Expression .
+```
+
+The expression on the left-hand side is called a _target_. The
+simplest target is the name of a variable, but a target may also have
+the form of an index expression, to update the element of a list or
+dictionary, or a dot expression, to update the field of an object:
+
+```python
+k = 1
+a[i] = v
+m.f = ""
+```
+
+Compound targets may consist of a comma-separated list of
+subtargets, optionally surrounded by parentheses or square brackets,
+and targets may be nested arbitarily in this way.
+An assignment to a compound target checks that the right-hand value is a
+sequence with the same number of elements as the target.
+Each element of the sequence is then assigned to the corresponding
+element of the target, recursively applying the same logic.
+
+```python
+pi, e = 3.141, 2.718
+(x, y) = f()
+[zero, one, two] = range(3)
+
+[(a, b), (c, d)] = {"a": "b", "c": "d"}.items()
+a, b = {"a": 1, "b": 2}
+```
+
+The same process for assigning a value to a target expression is used
+in `for` loops and in comprehensions.
+
+
+### Augmented assignments
+
+An augmented assignment, which has the form `lhs op= rhs` updates the
+variable `lhs` by applying a binary arithmetic operator `op` (one of
+`+`, `-`, `*`, `/`, `//`, `%`, `&`, `|`, `^`, `<<`, `>>`) to the previous
+value of `lhs` and the value of `rhs`.
+
+```grammar {.good}
+AssignStmt = Expression ('+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
+```
+
+The left-hand side must be a simple target:
+a name, an index expression, or a dot expression.
+
+```python
+x -= 1
+x.filename += ".star"
+a[index()] *= 2
+```
+
+Any subexpressions in the target on the left-hand side are evaluated
+exactly once, before the evaluation of `rhs`.
+The first two assignments above are thus equivalent to:
+
+```python
+x = x - 1
+x.filename = x.filename + ".star"
+```
+
+and the third assignment is similar in effect to the following two
+statements but does not declare a new temporary variable `i`:
+
+```python
+i = index()
+a[i] = a[i] * 2
+```
+
+### Function definitions
+
+A `def` statement creates a named function and assigns it to a variable.
+
+```grammar {.good}
+DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
+```
+
+Example:
+
+```python
+def twice(x):
+ return x * 2
+
+str(twice) # "<function twice>"
+twice(2) # 4
+twice("two") # "twotwo"
+```
+
+The function's name is preceded by the `def` keyword and followed by
+the parameter list (which is enclosed in parentheses), a colon, and
+then an indented block of statements which form the body of the function.
+
+The parameter list is a comma-separated list whose elements are of
+several kinds. First come zero or more required parameters, which are
+simple identifiers; all calls must provide an argument value for these parameters.
+
+The required parameters are followed by zero or more optional
+parameters, of the form `name=expression`. The expression specifies
+the default value for the parameter for use in calls that do not
+provide an argument value for it.
+
+The required parameters are optionally followed by a single parameter
+name preceded by a `*`. This is the called the _varargs_ parameter,
+and it accumulates surplus positional arguments specified by a call.
+It is conventionally named `*args`.
+
+The varargs parameter may be followed by zero or more
+parameters, again of the forms `name` or `name=expression`,
+but these parameters differ from earlier ones in that they are
+_keyword-only_: if a call provides their values, it must do so as
+keyword arguments, not positional ones.
+
+```python
+def f(a, *, b=2, c):
+ print(a, b, c)
+
+f(1) # error: function f missing 1 argument (c)
+f(1, 3) # error: function f accepts 1 positional argument (2 given)
+f(1, c=3) # "1 2 3"
+
+def g(a, *args, b=2, c):
+ print(a, b, c, args)
+
+g(1, 3) # error: function g missing 1 argument (c)
+g(1, 4, c=3) # "1 2 3 (4,)"
+
+```
+
+A non-variadic function may also declare keyword-only parameters,
+by using a bare `*` in place of the `*args` parameter.
+This form does not declare a parameter but marks the boundary
+between the earlier parameters and the keyword-only parameters.
+This form must be followed by at least one optional parameter.
+
+Finally, there may be an optional parameter name preceded by `**`.
+This is called the _keyword arguments_ parameter, and accumulates in a
+dictionary any surplus `name=value` arguments that do not match a
+prior parameter. It is conventionally named `**kwargs`.
+
+The final parameter may be followed by a trailing comma.
+
+Here are some example parameter lists:
+
+```python
+def f(): pass
+def f(a, b, c): pass
+def f(a, b, c=1): pass
+def f(a, b, c=1, *args): pass
+def f(a, b, c=1, *args, **kwargs): pass
+def f(**kwargs): pass
+def f(a, b, c=1, *, d=1): pass
+
+def f(
+ a,
+ *args,
+ **kwargs,
+)
+```
+
+Execution of a `def` statement creates a new function object. The
+function object contains: the syntax of the function body; the default
+value for each optional parameter; the value of each free variable
+referenced within the function body; and the global dictionary of the
+current module.
+
+<!-- this is too implementation-oriented; it's not a spec. -->
+
+
+### Return statements
+
+A `return` statement ends the execution of a function and returns a
+value to the caller of the function.
+
+```grammar {.good}
+ReturnStmt = 'return' [Expression] .
+```
+
+A return statement may have zero, one, or more
+result expressions separated by commas.
+With no expressions, the function has the result `None`.
+With a single expression, the function's result is the value of that expression.
+With multiple expressions, the function's result is a tuple.
+
+```python
+return # returns None
+return 1 # returns 1
+return 1, 2 # returns (1, 2)
+```
+
+### Expression statements
+
+An expression statement evaluates an expression and discards its result.
+
+```grammar {.good}
+ExprStmt = Expression .
+```
+
+Any expression may be used as a statement, but an expression statement is
+most often used to call a function for its side effects.
+
+```python
+list.append(1)
+```
+
+### If statements
+
+An `if` statement evaluates an expression (the _condition_), then, if
+the truth value of the condition is `True`, executes a list of
+statements.
+
+```grammar {.good}
+IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
+```
+
+Example:
+
+```python
+if score >= 100:
+ print("You win!")
+ return
+```
+
+An `if` statement may have an `else` block defining a second list of
+statements to be executed if the condition is false.
+
+```python
+if score >= 100:
+ print("You win!")
+ return
+else:
+ print("Keep trying...")
+ continue
+```
+
+It is common for the `else` block to contain another `if` statement.
+To avoid increasing the nesting depth unnecessarily, the `else` and
+following `if` may be combined as `elif`:
+
+```python
+if x > 0:
+ result = +1
+elif x < 0:
+ result = -1
+else:
+ result = 0
+```
+
+An `if` statement is permitted only within a function definition.
+An `if` statement at top level results in a static error.
+
+<b>Implementation note:</b>
+The Go implementation of Starlark permits `if`-statements to appear at top level
+if the `-globalreassign` flag is enabled.
+
+
+### While loops
+
+A `while` loop evaluates an expression (the _condition_) and if the truth
+value of the condition is `True`, it executes a list of statement and repeats
+the process until the truth value of the condition becomes `False`.
+
+```grammar {.good}
+WhileStmt = 'while' Test ':' Suite .
+```
+
+Example:
+
+```python
+while n > 0:
+ r = r + n
+ n = n - 1
+```
+
+A `while` statement is permitted only within a function definition.
+A `while` statement at top level results in a static error.
+
+<b>Implementation note:</b>
+The Go implementation of Starlark permits `while` loops only if the `-recursion` flag is enabled.
+A `while` statement is permitted at top level if the `-globalreassign` flag is enabled.
+
+
+### For loops
+
+A `for` loop evaluates its operand, which must be an iterable value.
+Then, for each element of the iterable's sequence, the loop assigns
+the successive element values to one or more variables and executes a
+list of statements, the _loop body_.
+
+```grammar {.good}
+ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
+```
+
+Example:
+
+```python
+for x in range(10):
+ print(10)
+```
+
+The assignment of each value to the loop variables follows the same
+rules as an ordinary assignment. In this example, two-element lists
+are repeatedly assigned to the pair of variables (a, i):
+
+```python
+for a, i in [["a", 1], ["b", 2], ["c", 3]]:
+ print(a, i) # prints "a 1", "b 2", "c 3"
+```
+
+Because Starlark loops always iterate over a finite sequence, they are
+guaranteed to terminate, unlike loops in most languages which can
+execute an arbitrary and perhaps unbounded number of iterations.
+
+Within the body of a `for` loop, `break` and `continue` statements may
+be used to stop the execution of the loop or advance to the next
+iteration.
+
+In Starlark, a `for` loop is permitted only within a function definition.
+A `for` loop at top level results in a static error.
+
+<b>Implementation note:</b>
+The Go implementation of Starlark permits loops to appear at top level
+if the `-globalreassign` flag is enabled.
+
+
+### Break and Continue
+
+The `break` and `continue` statements terminate the current iteration
+of a `for` loop. Whereas the `continue` statement resumes the loop at
+the next iteration, a `break` statement terminates the entire loop.
+
+```grammar {.good}
+BreakStmt = 'break' .
+ContinueStmt = 'continue' .
+```
+
+Example:
+
+```python
+for x in range(10):
+ if x%2 == 1:
+ continue # skip odd numbers
+ if x > 7:
+ break # stop at 8
+ print(x) # prints "0", "2", "4", "6"
+```
+
+Both statements affect only the innermost lexically enclosing loop.
+It is a static error to use a `break` or `continue` statement outside a
+loop.
+
+
+### Load statements
+
+The `load` statement loads another Starlark module, extracts one or
+more values from it, and binds them to names in the current module.
+
+<!--
+The awkwardness of load statements is a consequence of staying a
+strict subset of Python syntax, which allows reuse of existing tools
+such as editor support. Python import statements are inadequate for
+Starlark because they don't allow arbitrary file names for module names.
+-->
+
+Syntactically, a load statement looks like a function call `load(...)`.
+
+```grammar {.good}
+LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
+```
+
+A load statement requires at least two "arguments".
+The first must be a literal string; it identifies the module to load.
+Its interpretation is determined by the application into which the
+Starlark interpreter is embedded, and is not specified here.
+
+During execution, the application determines what action to take for a
+load statement.
+A typical implementation locates and executes a Starlark file,
+populating a cache of files executed so far to avoid duplicate work,
+to obtain a module, which is a mapping from global names to values.
+
+The remaining arguments are a mixture of literal strings, such as
+`"x"`, or named literal strings, such as `y="x"`.
+
+The literal string (`"x"`), which must denote a valid identifier not
+starting with `_`, specifies the name to extract from the loaded
+module. In effect, names starting with `_` are not exported.
+The name (`y`) specifies the local name;
+if no name is given, the local name matches the quoted name.
+
+```python
+load("module.star", "x", "y", "z") # assigns x, y, and z
+load("module.star", "x", y2="y", "z") # assigns x, y2, and z
+```
+
+A load statement may not be nested inside any other statement.
+
+
+## Module execution
+
+Each Starlark file defines a _module_, which is a mapping from the
+names of global variables to their values.
+When a Starlark file is executed, whether directly by the application
+or indirectly through a `load` statement, a new Starlark thread is
+created, and this thread executes all the top-level statements in the
+file.
+Because if-statements and for-loops cannot appear outside of a function,
+control flows from top to bottom.
+
+If execution reaches the end of the file, module initialization is
+successful.
+At that point, the value of each of the module's global variables is
+frozen, rendering subsequent mutation impossible.
+The module is then ready for use by another Starlark thread, such as
+one executing a load statement.
+Such threads may access values or call functions defined in the loaded
+module.
+
+A Starlark thread may carry state on behalf of the application into
+which it is embedded, and application-defined functions may behave
+differently depending on this thread state.
+Because module initialization always occurs in a new thread, thread
+state is never carried from a higher-level module into a lower-level
+one.
+The initialization behavior of a module is thus independent of
+whichever module triggered its initialization.
+
+If a Starlark thread encounters an error, execution stops and the error
+is reported to the application, along with a backtrace showing the
+stack of active function calls at the time of the error.
+If an error occurs during initialization of a Starlark module, any
+active `load` statements waiting for initialization of the module also
+fail.
+
+Starlark provides no mechanism by which errors can be handled within
+the language.
+
+
+## Built-in constants and functions
+
+The outermost block of the Starlark environment is known as the "predeclared" block.
+It defines a number of fundamental values and functions needed by all Starlark programs,
+such as `None`, `True`, `False`, and `len`, and possibly additional
+application-specific names.
+
+These names are not reserved words so Starlark programs are free to
+redefine them in a smaller block such as a function body or even at
+the top level of a module. However, doing so may be confusing to the
+reader. Nonetheless, this rule permits names to be added to the
+predeclared block in later versions of the language (or
+application-specific dialect) without breaking existing programs.
+
+
+### None
+
+`None` is the distinguished value of the type `NoneType`.
+
+### True and False
+
+`True` and `False` are the two values of type `bool`.
+
+### any
+
+`any(x)` returns `True` if any element of the iterable sequence x has a truth value of true.
+If the iterable is empty, it returns `False`.
+
+### all
+
+`all(x)` returns `False` if any element of the iterable sequence x has a truth value of false.
+If the iterable is empty, it returns `True`.
+
+### bool
+
+`bool(x)` interprets `x` as a Boolean value---`True` or `False`.
+With no argument, `bool()` returns `False`.
+
+
+### chr
+
+`chr(i)` returns a string that encodes the single Unicode code point
+whose value is specified by the integer `i`. `chr` fails unless 0 ≤
+`i` ≤ 0x10FFFF.
+
+Example:
+
+```python
+chr(65) # "A",
+chr(1049) # "Й", CYRILLIC CAPITAL LETTER SHORT I
+chr(0x1F63F) # "😿", CRYING CAT FACE
+```
+
+See also: `ord`.
+
+<b>Implementation note:</b> `chr` is not provided by the Java implementation.
+
+### dict
+
+`dict` creates a dictionary. It accepts up to one positional
+argument, which is interpreted as an iterable of two-element
+sequences (pairs), each specifying a key/value pair in
+the resulting dictionary.
+
+`dict` also accepts any number of keyword arguments, each of which
+specifies a key/value pair in the resulting dictionary;
+each keyword is treated as a string.
+
+```python
+dict() # {}, empty dictionary
+dict([(1, 2), (3, 4)]) # {1: 2, 3: 4}
+dict([(1, 2), ["a", "b"]]) # {1: 2, "a": "b"}
+dict(one=1, two=2) # {"one": 1, "two", 1}
+dict([(1, 2)], x=3) # {1: 2, "x": 3}
+```
+
+With no arguments, `dict()` returns a new empty dictionary.
+
+`dict(x)` where x is a dictionary returns a new copy of x.
+
+### dir
+
+`dir(x)` returns a new sorted list of the names of the attributes (fields and methods) of its operand.
+The attributes of a value `x` are the names `f` such that `x.f` is a valid expression.
+
+For example,
+
+```python
+dir("hello") # ['capitalize', 'count', ...], the methods of a string
+```
+
+Several types known to the interpreter, such as list, string, and dict, have methods, but none have fields.
+However, an application may define types with fields that may be read or set by statements such as these:
+
+```text
+y = x.f
+x.f = y
+```
+
+### enumerate
+
+`enumerate(x)` returns a list of (index, value) pairs, each containing
+successive values of the iterable sequence xand the index of the value
+within the sequence.
+
+The optional second parameter, `start`, specifies an integer value to
+add to each index.
+
+```python
+enumerate(["zero", "one", "two"]) # [(0, "zero"), (1, "one"), (2, "two")]
+enumerate(["one", "two"], 1) # [(1, "one"), (2, "two")]
+```
+
+### fail
+
+The `fail(*args, sep=" ")` function causes execution to fail
+with the specified error message.
+Like `print`, arguments are formatted as if by `str(x)` and
+separated by a space, unless an alternative separator is
+specified by a `sep` named argument.
+
+```python
+fail("oops") # "fail: oops"
+fail("oops", 1, False, sep='/') # "fail: oops/1/False"
+```
+
+### float
+
+`float(x)` interprets its argument as a floating-point number.
+
+If x is a `float`, the result is x.
+if x is an `int`, the result is the nearest floating point value to x.
+If x is a string, the string is interpreted as a floating-point literal.
+With no arguments, `float()` returns `0.0`.
+
+
+### getattr
+
+`getattr(x, name)` returns the value of the attribute (field or method) of x named `name`.
+It is a dynamic error if x has no such attribute.
+
+`getattr(x, "f")` is equivalent to `x.f`.
+
+```python
+getattr("banana", "split")("a") # ["b", "n", "n", ""], equivalent to "banana".split("a")
+```
+
+The three-argument form `getattr(x, name, default)` returns the
+provided `default` value instead of failing.
+
+### hasattr
+
+`hasattr(x, name)` reports whether x has an attribute (field or method) named `name`.
+
+### hash
+
+`hash(x)` returns an integer hash of a string x
+such that two equal strings have the same hash.
+In other words `x == y` implies `hash(x) == hash(y)`.
+
+In the interests of reproducibility of Starlark program behavior over time and
+across implementations, the specific hash function is the same as that implemented by
+[java.lang.String.hashCode](https://docs.oracle.com/javase/7/docs/api/java/lang/String.html#hashCode),
+a simple polynomial accumulator over the UTF-16 transcoding of the string:
+ ```
+s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
+```
+
+`hash` fails if given a non-string operand,
+even if the value is hashable and thus suitable as the key of dictionary.
+
+### int
+
+`int(x[, base])` interprets its argument as an integer.
+
+If x is an `int`, the result is x.
+If x is a `float`, the result is the integer value nearest to x,
+truncating towards zero; it is an error if x is not finite (`NaN`,
+`+Inf`, `-Inf`).
+If x is a `bool`, the result is 0 for `False` or 1 for `True`.
+
+If x is a string, it is interpreted as a sequence of digits in the
+specified base, decimal by default.
+If `base` is zero, x is interpreted like an integer literal, the base
+being inferred from an optional base prefix such as `0b`, `0o`, or
+`0x` preceding the first digit.
+When the `base` is provided explictly, a matching base prefix is
+also permitted, and has no effect.
+Irrespective of base, the string may start with an optional `+` or `-`
+sign indicating the sign of the result.
+
+```python
+int("11") # 11
+int("11", 0) # 11
+int("11", 10) # 11
+int("11", 2) # 3
+int("11", 8) # 9
+int("11", 16) # 17
+
+int("0x11", 0) # 17
+int("0x11", 16) # 17
+int("0b1", 16) # 177 (0xb1)
+int("0b1", 2) # 1
+int("0b1", 0) # 1
+
+int("0x11") # error: invalid literal with base 10
+```
+
+### len
+
+`len(x)` returns the number of elements in its argument.
+
+It is a dynamic error if its argument is not a sequence.
+
+### list
+
+`list` constructs a list.
+
+`list(x)` returns a new list containing the elements of the
+iterable sequence x.
+
+With no argument, `list()` returns a new empty list.
+
+### max
+
+`max(x)` returns the greatest element in the iterable sequence x.
+
+It is an error if any element does not support ordered comparison,
+or if the sequence is empty.
+
+The optional named parameter `key` specifies a function to be applied
+to each element prior to comparison.
+
+```python
+max([3, 1, 4, 1, 5, 9]) # 9
+max("two", "three", "four") # "two", the lexicographically greatest
+max("two", "three", "four", key=len) # "three", the longest
+```
+
+### min
+
+`min(x)` returns the least element in the iterable sequence x.
+
+It is an error if any element does not support ordered comparison,
+or if the sequence is empty.
+
+```python
+min([3, 1, 4, 1, 5, 9]) # 1
+min("two", "three", "four") # "four", the lexicographically least
+min("two", "three", "four", key=len) # "two", the shortest
+```
+
+
+### ord
+
+`ord(s)` returns the integer value of the sole Unicode code point encoded by the string `s`.
+
+If `s` does not encode exactly one Unicode code point, `ord` fails.
+Each invalid code within the string is treated as if it encodes the
+Unicode replacement character, U+FFFD.
+
+Example:
+
+```python
+ord("A") # 65
+ord("Й") # 1049
+ord("😿") # 0x1F63F
+ord("Й"[1:]) # 0xFFFD (Unicode replacement character)
+```
+
+See also: `chr`.
+
+<b>Implementation note:</b> `ord` is not provided by the Java implementation.
+
+### print
+
+`print(*args, sep=" ")` prints its arguments, followed by a newline.
+Arguments are formatted as if by `str(x)` and separated with a space,
+unless an alternative separator is specified by a `sep` named argument.
+
+Example:
+
+```python
+print(1, "hi") # "1 hi\n"
+print("hello", "world") # "hello world\n"
+print("hello", "world", sep=", ") # "hello, world\n"
+```
+
+Typically the formatted string is printed to the standard error file,
+but the exact behavior is a property of the Starlark thread and is
+determined by the host application.
+
+### range
+
+`range` returns an immutable sequence of integers defined by the specified interval and stride.
+
+```python
+range(stop) # equivalent to range(0, stop)
+range(start, stop) # equivalent to range(start, stop, 1)
+range(start, stop, step)
+```
+
+`range` requires between one and three integer arguments.
+With one argument, `range(stop)` returns the ascending sequence of non-negative integers less than `stop`.
+With two arguments, `range(start, stop)` returns only integers not less than `start`.
+
+With three arguments, `range(start, stop, step)` returns integers
+formed by successively adding `step` to `start` until the value meets or passes `stop`.
+A call to `range` fails if the value of `step` is zero.
+
+A call to `range` does not materialize the entire sequence, but
+returns a fixed-size value of type `"range"` that represents the
+parameters that define the sequence.
+The `range` value is iterable and may be indexed efficiently.
+
+```python
+list(range(10)) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+list(range(3, 10)) # [3, 4, 5, 6, 7, 8, 9]
+list(range(3, 10, 2)) # [3, 5, 7, 9]
+list(range(10, 3, -2)) # [10, 8, 6, 4]
+```
+
+The `len` function applied to a `range` value returns its length.
+The truth value of a `range` value is `True` if its length is non-zero.
+
+Range values are comparable: two `range` values compare equal if they
+denote the same sequence of integers, even if they were created using
+different parameters.
+
+Range values are not hashable. <!-- should they be? -->
+
+The `str` function applied to a `range` value yields a string of the
+form `range(10)`, `range(1, 10)`, or `range(1, 10, 2)`.
+
+The `x in y` operator, where `y` is a range, reports whether `x` is equal to
+some member of the sequence `y`; the operation fails unless `x` is a
+number.
+
+### repr
+
+`repr(x)` formats its argument as a string.
+
+All strings in the result are double-quoted.
+
+```python
+repr(1) # '1'
+repr("x") # '"x"'
+repr([1, "x"]) # '[1, "x"]'
+```
+
+### reversed
+
+`reversed(x)` returns a new list containing the elements of the iterable sequence x in reverse order.
+
+```python
+reversed(range(5)) # [4, 3, 2, 1, 0]
+reversed("stressed".codepoints()) # ["d", "e", "s", "s", "e", "r", "t", "s"]
+reversed({"one": 1, "two": 2}.keys()) # ["two", "one"]
+```
+
+### set
+
+`set(x)` returns a new set containing the elements of the iterable x.
+With no argument, `set()` returns a new empty set.
+
+```python
+set([3, 1, 4, 1, 5, 9]) # set([3, 1, 4, 5, 9])
+```
+
+<b>Implementation note:</b>
+Sets are an optional feature of the Go implementation of Starlark,
+enabled by the `-set` flag.
+
+
+### sorted
+
+`sorted(x)` returns a new list containing the elements of the iterable sequence x,
+in sorted order. The sort algorithm is stable.
+
+The optional named parameter `reverse`, if true, causes `sorted` to
+return results in reverse sorted order.
+
+The optional named parameter `key` specifies a function of one
+argument to apply to obtain the value's sort key.
+The default behavior is the identity function.
+
+```python
+sorted(set("harbors".codepoints())) # ['a', 'b', 'h', 'o', 'r', 's']
+sorted([3, 1, 4, 1, 5, 9]) # [1, 1, 3, 4, 5, 9]
+sorted([3, 1, 4, 1, 5, 9], reverse=True) # [9, 5, 4, 3, 1, 1]
+
+sorted(["two", "three", "four"], key=len) # ["two", "four", "three"], shortest to longest
+sorted(["two", "three", "four"], key=len, reverse=True) # ["three", "four", "two"], longest to shortest
+```
+
+
+### str
+
+`str(x)` formats its argument as a string.
+
+If x is a string, the result is x (without quotation).
+All other strings, such as elements of a list of strings, are double-quoted.
+
+```python
+str(1) # '1'
+str("x") # 'x'
+str([1, "x"]) # '[1, "x"]'
+```
+
+### tuple
+
+`tuple(x)` returns a tuple containing the elements of the iterable x.
+
+With no arguments, `tuple()` returns the empty tuple.
+
+### type
+
+type(x) returns a string describing the type of its operand.
+
+```python
+type(None) # "NoneType"
+type(0) # "int"
+type(0.0) # "float"
+```
+
+### zip
+
+`zip()` returns a new list of n-tuples formed from corresponding
+elements of each of the n iterable sequences provided as arguments to
+`zip`. That is, the first tuple contains the first element of each of
+the sequences, the second element contains the second element of each
+of the sequences, and so on. The result list is only as long as the
+shortest of the input sequences.
+
+```python
+zip() # []
+zip(range(5)) # [(0,), (1,), (2,), (3,), (4,)]
+zip(range(5), "abc") # [(0, "a"), (1, "b"), (2, "c")]
+```
+
+## Built-in methods
+
+This section lists the methods of built-in types. Methods are selected
+using [dot expressions](#dot-expressions).
+For example, strings have a `count` method that counts
+occurrences of a substring; `"banana".count("a")` yields `3`.
+
+As with built-in functions, built-in methods accept only positional
+arguments except where noted.
+The parameter names serve merely as documentation.
+
+
+<a id='dict·clear'></a>
+### dict·clear
+
+`D.clear()` removes all the entries of dictionary D and returns `None`.
+It fails if the dictionary is frozen or if there are active iterators.
+
+```python
+x = {"one": 1, "two": 2}
+x.clear() # None
+print(x) # {}
+```
+
+<a id='dict·get'></a>
+### dict·get
+
+`D.get(key[, default])` returns the dictionary value corresponding to the given key.
+If the dictionary contains no such value, `get` returns `None`, or the
+value of the optional `default` parameter if present.
+
+`get` fails if `key` is unhashable, or the dictionary is frozen or has active iterators.
+
+```python
+x = {"one": 1, "two": 2}
+x.get("one") # 1
+x.get("three") # None
+x.get("three", 0) # 0
+```
+
+<a id='dict·items'></a>
+### dict·items
+
+`D.items()` returns a new list of key/value pairs, one per element in
+dictionary D, in the same order as they would be returned by a `for` loop.
+
+```python
+x = {"one": 1, "two": 2}
+x.items() # [("one", 1), ("two", 2)]
+```
+
+<a id='dict·keys'></a>
+### dict·keys
+
+`D.keys()` returns a new list containing the keys of dictionary D, in the
+same order as they would be returned by a `for` loop.
+
+```python
+x = {"one": 1, "two": 2}
+x.keys() # ["one", "two"]
+```
+
+<a id='dict·pop'></a>
+### dict·pop
+
+`D.pop(key[, default])` returns the value corresponding to the specified
+key, and removes it from the dictionary. If the dictionary contains no
+such value, and the optional `default` parameter is present, `pop`
+returns that value; otherwise, it fails.
+
+`pop` fails if `key` is unhashable, or the dictionary is frozen or has active iterators.
+
+```python
+x = {"one": 1, "two": 2}
+x.pop("one") # 1
+x # {"two": 2}
+x.pop("three", 0) # 0
+x.pop("four") # error: missing key
+```
+
+<a id='dict·popitem'></a>
+### dict·popitem
+
+`D.popitem()` returns the first key/value pair, removing it from the dictionary.
+
+`popitem` fails if the dictionary is empty, frozen, or has active iterators.
+
+```python
+x = {"one": 1, "two": 2}
+x.popitem() # ("one", 1)
+x.popitem() # ("two", 2)
+x.popitem() # error: empty dict
+```
+
+<a id='dict·setdefault'></a>
+### dict·setdefault
+
+`D.setdefault(key[, default])` returns the dictionary value corresponding to the given key.
+If the dictionary contains no such value, `setdefault`, like `get`,
+returns `None` or the value of the optional `default` parameter if
+present; `setdefault` additionally inserts the new key/value entry into the dictionary.
+
+`setdefault` fails if the key is unhashable, or if the dictionary is frozen or has active iterators.
+
+```python
+x = {"one": 1, "two": 2}
+x.setdefault("one") # 1
+x.setdefault("three", 0) # 0
+x # {"one": 1, "two": 2, "three": 0}
+x.setdefault("four") # None
+x # {"one": 1, "two": 2, "three": None}
+```
+
+<a id='dict·update'></a>
+### dict·update
+
+`D.update([pairs][, name=value[, ...])` makes a sequence of key/value
+insertions into dictionary D, then returns `None.`
+
+If the positional argument `pairs` is present, it must be `None`,
+another `dict`, or some other iterable.
+If it is another `dict`, then its key/value pairs are inserted into D.
+If it is an iterable, it must provide a sequence of pairs (or other iterables of length 2),
+each of which is treated as a key/value pair to be inserted into D.
+
+For each `name=value` argument present, the name is converted to a
+string and used as the key for an insertion into D, with its corresponding
+value being `value`.
+
+`update` fails if the dictionary is frozen or has active iterators.
+
+```python
+x = {}
+x.update([("a", 1), ("b", 2)], c=3)
+x.update({"d": 4})
+x.update(e=5)
+x # {"a": 1, "b": "2", "c": 3, "d": 4, "e": 5}
+```
+
+<a id='dict·values'></a>
+### dict·values
+
+`D.values()` returns a new list containing the dictionary's values, in the
+same order as they would be returned by a `for` loop over the
+dictionary.
+
+```python
+x = {"one": 1, "two": 2}
+x.values() # [1, 2]
+```
+
+<a id='list·append'></a>
+### list·append
+
+`L.append(x)` appends `x` to the list L, and returns `None`.
+
+`append` fails if the list is frozen or has active iterators.
+
+```python
+x = []
+x.append(1) # None
+x.append(2) # None
+x.append(3) # None
+x # [1, 2, 3]
+```
+
+<a id='list·clear'></a>
+### list·clear
+
+`L.clear()` removes all the elements of the list L and returns `None`.
+It fails if the list is frozen or if there are active iterators.
+
+```python
+x = [1, 2, 3]
+x.clear() # None
+x # []
+```
+
+<a id='list·extend'></a>
+### list·extend
+
+`L.extend(x)` appends the elements of `x`, which must be iterable, to
+the list L, and returns `None`.
+
+`extend` fails if `x` is not iterable, or if the list L is frozen or has active iterators.
+
+```python
+x = []
+x.extend([1, 2, 3]) # None
+x.extend(["foo"]) # None
+x # [1, 2, 3, "foo"]
+```
+
+<a id='list·index'></a>
+### list·index
+
+`L.index(x[, start[, end]])` finds `x` within the list L and returns its index.
+
+The optional `start` and `end` parameters restrict the portion of
+list L that is inspected. If provided and not `None`, they must be list
+indices of type `int`. If an index is negative, `len(L)` is effectively
+added to it, then if the index is outside the range `[0:len(L)]`, the
+nearest value within that range is used; see [Indexing](#indexing).
+
+`index` fails if `x` is not found in L, or if `start` or `end`
+is not a valid index (`int` or `None`).
+
+```python
+x = list("banana".codepoints())
+x.index("a") # 1 (bAnana)
+x.index("a", 2) # 3 (banAna)
+x.index("a", -2) # 5 (bananA)
+```
+
+<a id='list·insert'></a>
+### list·insert
+
+`L.insert(i, x)` inserts the value `x` in the list L at index `i`, moving
+higher-numbered elements along by one. It returns `None`.
+
+As usual, the index `i` must be an `int`. If its value is negative,
+the length of the list is added, then its value is clamped to the
+nearest value in the range `[0:len(L)]` to yield the effective index.
+
+`insert` fails if the list is frozen or has active iterators.
+
+```python
+x = ["b", "c", "e"]
+x.insert(0, "a") # None
+x.insert(-1, "d") # None
+x # ["a", "b", "c", "d", "e"]
+```
+
+<a id='list·pop'></a>
+### list·pop
+
+`L.pop([index])` removes and returns the last element of the list L, or,
+if the optional index is provided, at that index.
+
+`pop` fails if the index is not valid for `L[i]`,
+or if the list is frozen or has active iterators.
+
+```python
+x = [1, 2, 3, 4, 5]
+x.pop() # 5
+x # [1, 2, 3, 4]
+x.pop(-2) # 3
+x # [1, 2, 4]
+x.pop(-3) # 1
+x # [2, 4]
+x.pop() # 4
+x # [2]
+```
+
+<a id='list·remove'></a>
+### list·remove
+
+`L.remove(x)` removes the first occurrence of the value `x` from the list L, and returns `None`.
+
+`remove` fails if the list does not contain `x`, is frozen, or has active iterators.
+
+```python
+x = [1, 2, 3, 2]
+x.remove(2) # None (x == [1, 3, 2])
+x.remove(2) # None (x == [1, 3])
+x.remove(2) # error: element not found
+```
+
+<a id='set·union'></a>
+### set·union
+
+`S.union(iterable)` returns a new set into which have been inserted
+all the elements of set S and all the elements of the argument, which
+must be iterable.
+
+`union` fails if any element of the iterable is not hashable.
+
+```python
+x = set([1, 2])
+y = set([2, 3])
+x.union(y) # set([1, 2, 3])
+```
+
+<a id='string·elem_ords'></a>
+### string·elem_ords
+
+`S.elem_ords()` returns an iterable value containing the
+sequence of numeric bytes values in the string S.
+
+To materialize the entire sequence of bytes, apply `list(...)` to the result.
+
+Example:
+
+```python
+list("Hello, 世界".elem_ords()) # [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]
+```
+
+See also: `string·elems`.
+
+<b>Implementation note:</b> `elem_ords` is not provided by the Java implementation.
+
+<a id='string·capitalize'></a>
+### string·capitalize
+
+`S.capitalize()` returns a copy of string S with its first code point
+changed to its title case and all subsequent letters changed to their
+lower case.
+
+```python
+"hello, world!".capitalize() # "Hello, world!"
+"hElLo, wOrLd!".capitalize() # "Hello, world!"
+"¿Por qué?".capitalize() # "¿por qué?"
+```
+
+<a id='string·codepoint_ords'></a>
+### string·codepoint_ords
+
+`S.codepoint_ords()` returns an iterable value containing the
+sequence of integer Unicode code points encoded by the string S.
+Each invalid code within the string is treated as if it encodes the
+Unicode replacement character, U+FFFD.
+
+By returning an iterable, not a list, the cost of decoding the string
+is deferred until actually needed; apply `list(...)` to the result to
+materialize the entire sequence.
+
+Example:
+
+```python
+list("Hello, 世界".codepoint_ords()) # [72, 101, 108, 108, 111, 44, 32, 19990, 30028]
+
+for cp in "Hello, 世界".codepoint_ords():
+ print(chr(cp)) # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界'
+```
+
+See also: `string·codepoints`.
+
+<b>Implementation note:</b> `codepoint_ords` is not provided by the Java implementation.
+
+<a id='string·count'></a>
+### string·count
+
+`S.count(sub[, start[, end]])` returns the number of occcurences of
+`sub` within the string S, or, if the optional substring indices
+`start` and `end` are provided, within the designated substring of S.
+They are interpreted according to Starlark's [indexing conventions](#indexing).
+
+```python
+"hello, world!".count("o") # 2
+"hello, world!".count("o", 7, 12) # 1 (in "world")
+```
+
+<a id='string·endswith'></a>
+### string·endswith
+
+`S.endswith(suffix[, start[, end]])` reports whether the string
+`S[start:end]` has the specified suffix.
+
+```python
+"filename.star".endswith(".star") # True
+```
+
+The `suffix` argument may be a tuple of strings, in which case the
+function reports whether any one of them is a suffix.
+
+```python
+'foo.cc'.endswith(('.cc', '.h')) # True
+```
+
+
+<a id='string·find'></a>
+### string·find
+
+`S.find(sub[, start[, end]])` returns the index of the first
+occurrence of the substring `sub` within S.
+
+If either or both of `start` or `end` are specified,
+they specify a subrange of S to which the search should be restricted.
+They are interpreted according to Starlark's [indexing conventions](#indexing).
+
+If no occurrence is found, `found` returns -1.
+
+```python
+"bonbon".find("on") # 1
+"bonbon".find("on", 2) # 4
+"bonbon".find("on", 2, 5) # -1
+```
+
+<a id='string·format'></a>
+### string·format
+
+`S.format(*args, **kwargs)` returns a version of the format string S
+in which bracketed portions `{...}` are replaced
+by arguments from `args` and `kwargs`.
+
+Within the format string, a pair of braces `{{` or `}}` is treated as
+a literal open or close brace.
+Each unpaired open brace must be matched by a close brace `}`.
+The optional text between corresponding open and close braces
+specifies which argument to use and how to format it, and consists of
+three components, all optional:
+a field name, a conversion preceded by '`!`', and a format specifier
+preceded by '`:`'.
+
+```text
+{field}
+{field:spec}
+{field!conv}
+{field!conv:spec}
+```
+
+The *field name* may be either a decimal number or a keyword.
+A number is interpreted as the index of a positional argument;
+a keyword specifies the value of a keyword argument.
+If all the numeric field names form the sequence 0, 1, 2, and so on,
+they may be omitted and those values will be implied; however,
+the explicit and implicit forms may not be mixed.
+
+The *conversion* specifies how to convert an argument value `x` to a
+string. It may be either `!r`, which converts the value using
+`repr(x)`, or `!s`, which converts the value using `str(x)` and is
+the default.
+
+The *format specifier*, after a colon, specifies field width,
+alignment, padding, and numeric precision.
+Currently it must be empty, but it is reserved for future use.
+
+```python
+"a{x}b{y}c{}".format(1, x=2, y=3) # "a2b3c1"
+"a{}b{}c".format(1, 2) # "a1b2c"
+"({1}, {0})".format("zero", "one") # "(one, zero)"
+"Is {0!r} {0!s}?".format('heterological') # 'is "heterological" heterological?'
+```
+
+<a id='string·index'></a>
+### string·index
+
+`S.index(sub[, start[, end]])` returns the index of the first
+occurrence of the substring `sub` within S, like `S.find`, except
+that if the substring is not found, the operation fails.
+
+```python
+"bonbon".index("on") # 1
+"bonbon".index("on", 2) # 4
+"bonbon".index("on", 2, 5) # error: substring not found (in "nbo")
+```
+
+<a id='string·isalnum'></a>
+### string·isalnum
+
+`S.isalnum()` reports whether the string S is non-empty and consists only
+Unicode letters and digits.
+
+```python
+"base64".isalnum() # True
+"Catch-22".isalnum() # False
+```
+
+<a id='string·isalpha'></a>
+### string·isalpha
+
+`S.isalpha()` reports whether the string S is non-empty and consists only of Unicode letters.
+
+```python
+"ABC".isalpha() # True
+"Catch-22".isalpha() # False
+"".isalpha() # False
+```
+
+<a id='string·isdigit'></a>
+### string·isdigit
+
+`S.isdigit()` reports whether the string S is non-empty and consists only of Unicode digits.
+
+```python
+"123".isdigit() # True
+"Catch-22".isdigit() # False
+"".isdigit() # False
+```
+
+<a id='string·islower'></a>
+### string·islower
+
+`S.islower()` reports whether the string S contains at least one cased Unicode
+letter, and all such letters are lowercase.
+
+```python
+"hello, world".islower() # True
+"Catch-22".islower() # False
+"123".islower() # False
+```
+
+<a id='string·isspace'></a>
+### string·isspace
+
+`S.isspace()` reports whether the string S is non-empty and consists only of Unicode spaces.
+
+```python
+" ".isspace() # True
+"\r\t\n".isspace() # True
+"".isspace() # False
+```
+
+<a id='string·istitle'></a>
+### string·istitle
+
+`S.istitle()` reports whether the string S contains at least one cased Unicode
+letter, and all such letters that begin a word are in title case.
+
+```python
+"Hello, World!".istitle() # True
+"Catch-22".istitle() # True
+"HAL-9000".istitle() # False
+"Dženan".istitle() # True
+"DŽenan".istitle() # False ("DŽ" is a single Unicode letter)
+"123".istitle() # False
+```
+
+<a id='string·isupper'></a>
+### string·isupper
+
+`S.isupper()` reports whether the string S contains at least one cased Unicode
+letter, and all such letters are uppercase.
+
+```python
+"HAL-9000".isupper() # True
+"Catch-22".isupper() # False
+"123".isupper() # False
+```
+
+<a id='string·join'></a>
+### string·join
+
+`S.join(iterable)` returns the string formed by concatenating each
+element of its argument, with a copy of the string S between
+successive elements. The argument must be an iterable whose elements
+are strings.
+
+```python
+", ".join(["one", "two", "three"]) # "one, two, three"
+"a".join("ctmrn".codepoints()) # "catamaran"
+```
+
+<a id='string·lower'></a>
+### string·lower
+
+`S.lower()` returns a copy of the string S with letters converted to lowercase.
+
+```python
+"Hello, World!".lower() # "hello, world!"
+```
+
+<a id='string·lstrip'></a>
+### string·lstrip
+
+`S.lstrip()` returns a copy of the string S with leading whitespace removed.
+
+Like `strip`, it accepts an optional string parameter that specifies an
+alternative set of Unicode code points to remove.
+
+```python
+" hello ".lstrip() # "hello "
+" hello ".lstrip("h o") # "ello "
+```
+
+<a id='string·partition'></a>
+### string·partition
+
+`S.partition(x)` splits string S into three parts and returns them as
+a tuple: the portion before the first occurrence of string `x`, `x` itself,
+and the portion following it.
+If S does not contain `x`, `partition` returns `(S, "", "")`.
+
+`partition` fails if `x` is not a string, or is the empty string.
+
+```python
+"one/two/three".partition("/") # ("one", "/", "two/three")
+```
+
+<a id='string·replace'></a>
+### string·replace
+
+`S.replace(old, new[, count])` returns a copy of string S with all
+occurrences of substring `old` replaced by `new`. If the optional
+argument `count`, which must be an `int`, is non-negative, it
+specifies a maximum number of occurrences to replace.
+
+```python
+"banana".replace("a", "o") # "bonono"
+"banana".replace("a", "o", 2) # "bonona"
+```
+
+<a id='string·rfind'></a>
+### string·rfind
+
+`S.rfind(sub[, start[, end]])` returns the index of the substring `sub` within
+S, like `S.find`, except that `rfind` returns the index of the substring's
+_last_ occurrence.
+
+```python
+"bonbon".rfind("on") # 4
+"bonbon".rfind("on", None, 5) # 1
+"bonbon".rfind("on", 2, 5) # -1
+```
+
+<a id='string·rindex'></a>
+### string·rindex
+
+`S.rindex(sub[, start[, end]])` returns the index of the substring `sub` within
+S, like `S.index`, except that `rindex` returns the index of the substring's
+_last_ occurrence.
+
+```python
+"bonbon".rindex("on") # 4
+"bonbon".rindex("on", None, 5) # 1 (in "bonbo")
+"bonbon".rindex("on", 2, 5) # error: substring not found (in "nbo")
+```
+
+<a id='string·rpartition'></a>
+### string·rpartition
+
+`S.rpartition(x)` is like `partition`, but splits `S` at the last occurrence of `x`.
+
+```python
+"one/two/three".partition("/") # ("one/two", "/", "three")
+```
+
+<a id='string·rsplit'></a>
+### string·rsplit
+
+`S.rsplit([sep[, maxsplit]])` splits a string into substrings like `S.split`,
+except that when a maximum number of splits is specified, `rsplit` chooses the
+rightmost splits.
+
+```python
+"banana".rsplit("n") # ["ba", "a", "a"]
+"banana".rsplit("n", 1) # ["bana", "a"]
+"one two three".rsplit(None, 1) # ["one two", "three"]
+"".rsplit("n") # [""]
+```
+
+<a id='string·rstrip'></a>
+### string·rstrip
+
+`S.rstrip()` returns a copy of the string S with trailing whitespace removed.
+
+Like `strip`, it accepts an optional string parameter that specifies an
+alternative set of Unicode code points to remove.
+
+```python
+" hello ".rstrip() # " hello"
+" hello ".rstrip("h o") # " hell"
+```
+
+<a id='string·split'></a>
+### string·split
+
+`S.split([sep [, maxsplit]])` returns the list of substrings of S,
+splitting at occurrences of the delimiter string `sep`.
+
+Consecutive occurrences of `sep` are considered to delimit empty
+strings, so `'food'.split('o')` returns `['f', '', 'd']`.
+Splitting an empty string with a specified separator returns `['']`.
+If `sep` is the empty string, `split` fails.
+
+If `sep` is not specified or is `None`, `split` uses a different
+algorithm: it removes all leading spaces from S
+(or trailing spaces in the case of `rsplit`),
+then splits the string around each consecutive non-empty sequence of
+Unicode white space characters.
+If S consists only of white space, `S.split()` returns the empty list.
+
+If `maxsplit` is given and non-negative, it specifies a maximum number of splits.
+
+```python
+"one two three".split() # ["one", "two", "three"]
+"one two three".split(" ") # ["one", "two", "", "three"]
+"one two three".split(None, 1) # ["one", "two three"]
+"banana".split("n") # ["ba", "a", "a"]
+"banana".split("n", 1) # ["ba", "ana"]
+"".split("n") # [""]
+```
+
+<a id='string·elems'></a>
+### string·elems
+
+`S.elems()` returns an iterable value containing successive
+1-byte substrings of S.
+To materialize the entire sequence, apply `list(...)` to the result.
+
+Example:
+
+```python
+list('Hello, 世界'.elems()) # ["H", "e", "l", "l", "o", ",", " ", "\xe4", "\xb8", "\x96", "\xe7", "\x95", "\x8c"]
+```
+
+See also: `string·elem_ords`.
+
+
+<a id='string·codepoints'></a>
+### string·codepoints
+
+`S.codepoints()` returns an iterable value containing the sequence of
+substrings of S that each encode a single Unicode code point.
+Each invalid code within the string is treated as if it encodes the
+Unicode replacement character, U+FFFD.
+
+By returning an iterable, not a list, the cost of decoding the string
+is deferred until actually needed; apply `list(...)` to the result to
+materialize the entire sequence.
+
+Example:
+
+```python
+list('Hello, 世界'.codepoints()) # ['H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界']
+
+for cp in 'Hello, 世界'.codepoints():
+ print(cp) # prints 'H', 'e', 'l', 'l', 'o', ',', ' ', '世', '界'
+```
+
+See also: `string·codepoint_ords`.
+
+<b>Implementation note:</b> `codepoints` is not provided by the Java implementation.
+
+<a id='string·splitlines'></a>
+### string·splitlines
+
+`S.splitlines([keepends])` returns a list whose elements are the
+successive lines of S, that is, the strings formed by splitting S at
+line terminators (currently assumed to be a single newline, `\n`,
+regardless of platform).
+
+The optional argument, `keepends`, is interpreted as a Boolean.
+If true, line terminators are preserved in the result, though
+the final element does not necessarily end with a line terminator.
+
+As a special case, if S is the empty string,
+`splitlines` returns the empty list.
+
+```python
+"one\n\ntwo".splitlines() # ["one", "", "two"]
+"one\n\ntwo".splitlines(True) # ["one\n", "\n", "two"]
+"".splitlines() # [] -- a special case
+```
+
+<a id='string·startswith'></a>
+### string·startswith
+
+`S.startswith(prefix[, start[, end]])` reports whether the string
+`S[start:end]` has the specified prefix.
+
+```python
+"filename.star".startswith("filename") # True
+```
+
+The `prefix` argument may be a tuple of strings, in which case the
+function reports whether any one of them is a prefix.
+
+```python
+'abc'.startswith(('a', 'A')) # True
+'ABC'.startswith(('a', 'A')) # True
+'def'.startswith(('a', 'A')) # False
+```
+
+<a id='string·strip'></a>
+### string·strip
+
+`S.strip()` returns a copy of the string S with leading and trailing whitespace removed.
+
+It accepts an optional string argument:
+`S.strip(cutset)` instead removes all leading
+and trailing Unicode code points contained in `cutset`.
+
+```python
+" hello ".strip() # "hello"
+" hello ".strip("h o") # "ell"
+```
+
+<a id='string·title'></a>
+### string·title
+
+`S.title()` returns a copy of the string S with letters converted to title case.
+
+Letters are converted to upper case at the start of words, lower case elsewhere.
+
+```python
+"hElLo, WoRlD!".title() # "Hello, World!"
+"dženan".title() # "Dženan" ("Dž" is a single Unicode letter)
+```
+
+<a id='string·upper'></a>
+### string·upper
+
+`S.upper()` returns a copy of the string S with letters converted to uppercase.
+
+```python
+"Hello, World!".upper() # "HELLO, WORLD!"
+```
+
+## Dialect differences
+
+The list below summarizes features of the Go implementation that are
+known to differ from the Java implementation of Starlark used by Bazel.
+Some of these features may be controlled by global options to allow
+applications to mimic the Bazel dialect more closely. Our goal is
+eventually to eliminate all such differences on a case-by-case basis.
+See [Starlark spec issue 20](https://github.com/bazelbuild/starlark/issues/20).
+
+* String interpolation supports the `[ioxXc]` conversions.
+* String elements are bytes.
+* Non-ASCII strings are encoded using UTF-8.
+* Strings support hex byte escapes.
+* Strings have the additional methods `elem_ords`, `codepoint_ords`, and `codepoints`.
+* The `chr` and `ord` built-in functions are supported.
+* The `set` built-in function is provided (option: `-set`).
+* `set & set` and `set | set` compute set intersection and union, respectively.
+* `assert` is a valid identifier.
+* `if`, `for`, and `while` are permitted at top level (option: `-globalreassign`).
+* top-level rebindings are permitted (option: `-globalreassign`).
diff --git a/docs/CNAME b/docs/CNAME
new file mode 100644
index 0000000..63f0a06
--- /dev/null
+++ b/docs/CNAME
@@ -0,0 +1 @@
+go.starlark.net \ No newline at end of file
diff --git a/docs/cmd/starlark/index.html b/docs/cmd/starlark/index.html
new file mode 100644
index 0000000..29d9c83
--- /dev/null
+++ b/docs/cmd/starlark/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/cmd/starlark'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/cmd/starlark...
+</body>
+</html>
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..ec44a6e
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,11 @@
+<html>
+ <!-- This file will be served at go.starlark.net by GitHub pages. -->
+ <head>
+ <!-- This tag causes "go get go.starklark.net" to redirect to GitHub. -->
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://github.com/google/starlark-go'" />
+ </head>
+ <body>
+ Redirecting to GitHub project github.com/google/starlark-go...
+ </body>
+</html>
diff --git a/docs/internal/chunkedfile/index.html b/docs/internal/chunkedfile/index.html
new file mode 100644
index 0000000..7710919
--- /dev/null
+++ b/docs/internal/chunkedfile/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/internal/chunkedfile'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/internal/chunkedfile...
+</body>
+</html>
diff --git a/docs/internal/compile/index.html b/docs/internal/compile/index.html
new file mode 100644
index 0000000..12eb87f
--- /dev/null
+++ b/docs/internal/compile/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/internal/compile'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/internal/compile...
+</body>
+</html>
diff --git a/docs/repl/index.html b/docs/repl/index.html
new file mode 100644
index 0000000..bbcc4b2
--- /dev/null
+++ b/docs/repl/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/repl'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/repl...
+</body>
+</html>
diff --git a/docs/resolve/index.html b/docs/resolve/index.html
new file mode 100644
index 0000000..6d63ca6
--- /dev/null
+++ b/docs/resolve/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/resolve'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/resolve...
+</body>
+</html>
diff --git a/docs/starlark/index.html b/docs/starlark/index.html
new file mode 100644
index 0000000..58e38f0
--- /dev/null
+++ b/docs/starlark/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlark'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/starlark...
+</body>
+</html>
diff --git a/docs/starlarkstruct/index.html b/docs/starlarkstruct/index.html
new file mode 100644
index 0000000..e187004
--- /dev/null
+++ b/docs/starlarkstruct/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlarkstruct'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/starlarkstruct...
+</body>
+</html>
diff --git a/docs/starlarktest/index.html b/docs/starlarktest/index.html
new file mode 100644
index 0000000..d808e12
--- /dev/null
+++ b/docs/starlarktest/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/starlarktest'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/starlarktest...
+</body>
+</html>
diff --git a/docs/syntax/index.html b/docs/syntax/index.html
new file mode 100644
index 0000000..a629e81
--- /dev/null
+++ b/docs/syntax/index.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/go.starlark.net/syntax'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for go.starlark.net/syntax...
+</body>
+</html>
diff --git a/docs/update.go b/docs/update.go
new file mode 100644
index 0000000..be40427
--- /dev/null
+++ b/docs/update.go
@@ -0,0 +1,71 @@
+//+build ignore
+
+// The update command creates/updates the <html><head> elements of
+// each subpackage beneath docs so that "go get" requests redirect
+// to GitHub and other HTTP requests redirect to godoc.corp.
+//
+// Usage:
+//
+// $ cd $GOPATH/src/go.starlark.net
+// $ go run docs/update.go
+//
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+)
+
+func main() {
+ log.SetFlags(0)
+ log.SetPrefix("update: ")
+
+ cwd, err := os.Getwd()
+ if err != nil {
+ log.Fatal(err)
+ }
+ if filepath.Base(cwd) != "go.starlark.net" {
+ log.Fatalf("must run from the go.starlark.net directory")
+ }
+
+ cmd := exec.Command("go", "list", "./...")
+ cmd.Stdout = new(bytes.Buffer)
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ log.Fatal(err)
+ }
+ for _, pkg := range strings.Split(strings.TrimSpace(fmt.Sprint(cmd.Stdout)), "\n") {
+ rel := strings.TrimPrefix(pkg, "go.starlark.net/") // e.g. "cmd/starlark"
+ subdir := filepath.Join("docs", rel)
+ if err := os.MkdirAll(subdir, 0777); err != nil {
+ log.Fatal(err)
+ }
+
+ // Create missing docs/$rel/index.html files.
+ html := filepath.Join(subdir, "index.html")
+ if _, err := os.Stat(html); os.IsNotExist(err) {
+ data := strings.Replace(defaultHTML, "$PKG", pkg, -1)
+ if err := ioutil.WriteFile(html, []byte(data), 0666); err != nil {
+ log.Fatal(err)
+ }
+ log.Printf("created %s", html)
+ }
+ }
+}
+
+const defaultHTML = `<html>
+<head>
+ <meta name="go-import" content="go.starlark.net git https://github.com/google/starlark-go"></meta>
+ <meta http-equiv="refresh" content="0;URL='http://godoc.org/$PKG'" /></meta>
+</head>
+<body>
+ Redirecting to godoc.org page for $PKG...
+</body>
+</html>
+`
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..d14060e
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,13 @@
+module go.starlark.net
+
+go 1.13
+
+require (
+ github.com/chzyer/logex v1.1.10 // indirect
+ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
+ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
+ github.com/google/go-cmp v0.5.1 // indirect
+ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f
+ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
+ google.golang.org/protobuf v1.25.0
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..90a8048
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,74 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k=
+github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/internal/chunkedfile/chunkedfile.go b/internal/chunkedfile/chunkedfile.go
new file mode 100644
index 0000000..a591524
--- /dev/null
+++ b/internal/chunkedfile/chunkedfile.go
@@ -0,0 +1,124 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chunkedfile provides utilities for testing that source code
+// errors are reported in the appropriate places.
+//
+// A chunked file consists of several chunks of input text separated by
+// "---" lines. Each chunk is an input to the program under test, such
+// as an evaluator. Lines containing "###" are interpreted as
+// expectations of failure: the following text is a Go string literal
+// denoting a regular expression that should match the failure message.
+//
+// Example:
+//
+// x = 1 / 0 ### "division by zero"
+// ---
+// x = 1
+// print(x + "") ### "int + string not supported"
+//
+// A client test feeds each chunk of text into the program under test,
+// then calls chunk.GotError for each error that actually occurred. Any
+// discrepancy between the actual and expected errors is reported using
+// the client's reporter, which is typically a testing.T.
+package chunkedfile // import "go.starlark.net/internal/chunkedfile"
+
+import (
+ "fmt"
+ "io/ioutil"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+const debug = false
+
+// A Chunk is a portion of a source file.
+// It contains a set of expected errors.
+type Chunk struct {
+ Source string
+ filename string
+ report Reporter
+ wantErrs map[int]*regexp.Regexp
+}
+
+// Reporter is implemented by *testing.T.
+type Reporter interface {
+ Errorf(format string, args ...interface{})
+}
+
+// Read parses a chunked file and returns its chunks.
+// It reports failures using the reporter.
+//
+// Error messages of the form "file.star:line:col: ..." are prefixed
+// by a newline so that the Go source position added by (*testing.T).Errorf
+// appears on a separate line so as not to confused editors.
+func Read(filename string, report Reporter) (chunks []Chunk) {
+ data, err := ioutil.ReadFile(filename)
+ if err != nil {
+ report.Errorf("%s", err)
+ return
+ }
+ linenum := 1
+ for i, chunk := range strings.Split(string(data), "\n---\n") {
+ if debug {
+ fmt.Printf("chunk %d at line %d: %s\n", i, linenum, chunk)
+ }
+ // Pad with newlines so the line numbers match the original file.
+ src := strings.Repeat("\n", linenum-1) + chunk
+
+ wantErrs := make(map[int]*regexp.Regexp)
+
+ // Parse comments of the form:
+ // ### "expected error".
+ lines := strings.Split(chunk, "\n")
+ for j := 0; j < len(lines); j, linenum = j+1, linenum+1 {
+ line := lines[j]
+ hashes := strings.Index(line, "###")
+ if hashes < 0 {
+ continue
+ }
+ rest := strings.TrimSpace(line[hashes+len("###"):])
+ pattern, err := strconv.Unquote(rest)
+ if err != nil {
+ report.Errorf("\n%s:%d: not a quoted regexp: %s", filename, linenum, rest)
+ continue
+ }
+ rx, err := regexp.Compile(pattern)
+ if err != nil {
+ report.Errorf("\n%s:%d: %v", filename, linenum, err)
+ continue
+ }
+ wantErrs[linenum] = rx
+ if debug {
+ fmt.Printf("\t%d\t%s\n", linenum, rx)
+ }
+ }
+ linenum++
+
+ chunks = append(chunks, Chunk{src, filename, report, wantErrs})
+ }
+ return chunks
+}
+
+// GotError should be called by the client to report an error at a particular line.
+// GotError reports unexpected errors to the chunk's reporter.
+func (chunk *Chunk) GotError(linenum int, msg string) {
+ if rx, ok := chunk.wantErrs[linenum]; ok {
+ delete(chunk.wantErrs, linenum)
+ if !rx.MatchString(msg) {
+ chunk.report.Errorf("\n%s:%d: error %q does not match pattern %q", chunk.filename, linenum, msg, rx)
+ }
+ } else {
+ chunk.report.Errorf("\n%s:%d: unexpected error: %v", chunk.filename, linenum, msg)
+ }
+}
+
+// Done should be called by the client to indicate that the chunk has no more errors.
+// Done reports expected errors that did not occur to the chunk's reporter.
+func (chunk *Chunk) Done() {
+ for linenum, rx := range chunk.wantErrs {
+ chunk.report.Errorf("\n%s:%d: expected error matching %q", chunk.filename, linenum, rx)
+ }
+}
diff --git a/internal/compile/codegen_test.go b/internal/compile/codegen_test.go
new file mode 100644
index 0000000..f67204f
--- /dev/null
+++ b/internal/compile/codegen_test.go
@@ -0,0 +1,118 @@
+package compile
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "go.starlark.net/resolve"
+ "go.starlark.net/syntax"
+)
+
+// TestPlusFolding ensures that the compiler generates optimized code for
+// n-ary addition of strings, lists, and tuples.
+func TestPlusFolding(t *testing.T) {
+ isPredeclared := func(name string) bool { return name == "x" }
+ isUniversal := func(name string) bool { return false }
+ for i, test := range []struct {
+ src string // source expression
+ want string // disassembled code
+ }{
+ {
+ // string folding
+ `"a" + "b" + "c" + "d"`,
+ `constant "abcd"; return`,
+ },
+ {
+ // string folding with variable:
+ `"a" + "b" + x + "c" + "d"`,
+ `constant "ab"; predeclared x; plus; constant "cd"; plus; return`,
+ },
+ {
+ // list folding
+ `[1] + [2] + [3]`,
+ `constant 1; constant 2; constant 3; makelist<3>; return`,
+ },
+ {
+ // list folding with variable
+ `[1] + [2] + x + [3]`,
+ `constant 1; constant 2; makelist<2>; ` +
+ `predeclared x; plus; ` +
+ `constant 3; makelist<1>; plus; ` +
+ `return`,
+ },
+ {
+ // tuple folding
+ `() + (1,) + (2, 3)`,
+ `constant 1; constant 2; constant 3; maketuple<3>; return`,
+ },
+ {
+ // tuple folding with variable
+ `() + (1,) + x + (2, 3)`,
+ `constant 1; maketuple<1>; predeclared x; plus; ` +
+ `constant 2; constant 3; maketuple<2>; plus; ` +
+ `return`,
+ },
+ } {
+ expr, err := syntax.ParseExpr("in.star", test.src, 0)
+ if err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ locals, err := resolve.Expr(expr, isPredeclared, isUniversal)
+ if err != nil {
+ t.Errorf("#%d: %v", i, err)
+ continue
+ }
+ got := disassemble(Expr(expr, "<expr>", locals).Toplevel)
+ if test.want != got {
+ t.Errorf("expression <<%s>> generated <<%s>>, want <<%s>>",
+ test.src, got, test.want)
+ }
+ }
+}
+
+// disassemble is a trivial disassembler tailored to the accumulator test.
+func disassemble(f *Funcode) string {
+ out := new(bytes.Buffer)
+ code := f.Code
+ for pc := 0; pc < len(code); {
+ op := Opcode(code[pc])
+ pc++
+ // TODO(adonovan): factor in common with interpreter.
+ var arg uint32
+ if op >= OpcodeArgMin {
+ for s := uint(0); ; s += 7 {
+ b := code[pc]
+ pc++
+ arg |= uint32(b&0x7f) << s
+ if b < 0x80 {
+ break
+ }
+ }
+ }
+
+ if out.Len() > 0 {
+ out.WriteString("; ")
+ }
+ fmt.Fprintf(out, "%s", op)
+ if op >= OpcodeArgMin {
+ switch op {
+ case CONSTANT:
+ switch x := f.Prog.Constants[arg].(type) {
+ case string:
+ fmt.Fprintf(out, " %q", x)
+ default:
+ fmt.Fprintf(out, " %v", x)
+ }
+ case LOCAL:
+ fmt.Fprintf(out, " %s", f.Locals[arg].Name)
+ case PREDECLARED:
+ fmt.Fprintf(out, " %s", f.Prog.Names[arg])
+ default:
+ fmt.Fprintf(out, "<%d>", arg)
+ }
+ }
+ }
+ return out.String()
+}
diff --git a/internal/compile/compile.go b/internal/compile/compile.go
new file mode 100644
index 0000000..c314e6e
--- /dev/null
+++ b/internal/compile/compile.go
@@ -0,0 +1,1916 @@
+// Package compile defines the Starlark bytecode compiler.
+// It is an internal package of the Starlark interpreter and is not directly accessible to clients.
+//
+// The compiler generates byte code with optional uint32 operands for a
+// virtual machine with the following components:
+// - a program counter, which is an index into the byte code array.
+// - an operand stack, whose maximum size is computed for each function by the compiler.
+// - an stack of active iterators.
+// - an array of local variables.
+// The number of local variables and their indices are computed by the resolver.
+// Locals (possibly including parameters) that are shared with nested functions
+// are 'cells': their locals array slot will contain a value of type 'cell',
+// an indirect value in a box that is explicitly read/updated by instructions.
+// - an array of free variables, for nested functions.
+// Free variables are a subset of the ancestors' cell variables.
+// As with locals and cells, these are computed by the resolver.
+// - an array of global variables, shared among all functions in the same module.
+// All elements are initially nil.
+// - two maps of predeclared and universal identifiers.
+//
+// Each function has a line number table that maps each program counter
+// offset to a source position, including the column number.
+//
+// Operands, logically uint32s, are encoded using little-endian 7-bit
+// varints, the top bit indicating that more bytes follow.
+//
+package compile // import "go.starlark.net/internal/compile"
+
+import (
+ "bytes"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+
+ "go.starlark.net/resolve"
+ "go.starlark.net/syntax"
+)
+
+// Disassemble causes the assembly code for each function
+// to be printed to stderr as it is generated.
+var Disassemble = false
+
+const debug = false // make code generation verbose, for debugging the compiler
+
+// Increment this to force recompilation of saved bytecode files.
+const Version = 12
+
+type Opcode uint8
+
+// "x DUP x x" is a "stack picture" that describes the state of the
+// stack before and after execution of the instruction.
+//
+// OP<index> indicates an immediate operand that is an index into the
+// specified table: locals, names, freevars, constants.
+const (
+ NOP Opcode = iota // - NOP -
+
+ // stack operations
+ DUP // x DUP x x
+ DUP2 // x y DUP2 x y x y
+ POP // x POP -
+ EXCH // x y EXCH y x
+
+ // binary comparisons
+ // (order must match Token)
+ LT
+ GT
+ GE
+ LE
+ EQL
+ NEQ
+
+ // binary arithmetic
+ // (order must match Token)
+ PLUS
+ MINUS
+ STAR
+ SLASH
+ SLASHSLASH
+ PERCENT
+ AMP
+ PIPE
+ CIRCUMFLEX
+ LTLT
+ GTGT
+
+ IN
+
+ // unary operators
+ UPLUS // x UPLUS x
+ UMINUS // x UMINUS -x
+ TILDE // x TILDE ~x
+
+ NONE // - NONE None
+ TRUE // - TRUE True
+ FALSE // - FALSE False
+ MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args]
+
+ ITERPUSH // iterable ITERPUSH - [pushes the iterator stack]
+ ITERPOP // - ITERPOP - [pops the iterator stack]
+ NOT // value NOT bool
+ RETURN // value RETURN -
+ SETINDEX // a i new SETINDEX -
+ INDEX // a i INDEX elem
+ SETDICT // dict key value SETDICT -
+ SETDICTUNIQ // dict key value SETDICTUNIQ -
+ APPEND // list elem APPEND -
+ SLICE // x lo hi step SLICE slice
+ INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y)
+ MAKEDICT // - MAKEDICT dict
+
+ // --- opcodes with an argument must go below this line ---
+
+ // control flow
+ JMP // - JMP<addr> -
+ CJMP // cond CJMP<addr> -
+ ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator]
+ // or: - ITERJMP<addr> - (and jump)
+
+ CONSTANT // - CONSTANT<constant> value
+ MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple
+ MAKELIST // x1 ... xn MAKELIST<n> list
+ MAKEFUNC // defaults+freevars MAKEFUNC<func> fn
+ LOAD // from1 ... fromN module LOAD<n> v1 ... vN
+ SETLOCAL // value SETLOCAL<local> -
+ SETGLOBAL // value SETGLOBAL<global> -
+ LOCAL // - LOCAL<local> value
+ FREE // - FREE<freevar> cell
+ FREECELL // - FREECELL<freevar> value (content of FREE cell)
+ LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell)
+ SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell)
+ GLOBAL // - GLOBAL<global> value
+ PREDECLARED // - PREDECLARED<name> value
+ UNIVERSAL // - UNIVERSAL<name> value
+ ATTR // x ATTR<name> y y = x.name
+ SETFIELD // x y SETFIELD<name> - x.name = y
+ UNPACK // iterable UNPACK<n> vn ... v1
+
+ // n>>8 is #positional args and n&0xff is #named args (pairs).
+ CALL // fn positional named CALL<n> result
+ CALL_VAR // fn positional named *args CALL_VAR<n> result
+ CALL_KW // fn positional named **kwargs CALL_KW<n> result
+ CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result
+
+ OpcodeArgMin = JMP
+ OpcodeMax = CALL_VAR_KW
+)
+
+// TODO(adonovan): add dynamic checks for missing opcodes in the tables below.
+
+var opcodeNames = [...]string{
+ AMP: "amp",
+ APPEND: "append",
+ ATTR: "attr",
+ CALL: "call",
+ CALL_KW: "call_kw ",
+ CALL_VAR: "call_var",
+ CALL_VAR_KW: "call_var_kw",
+ CIRCUMFLEX: "circumflex",
+ CJMP: "cjmp",
+ CONSTANT: "constant",
+ DUP2: "dup2",
+ DUP: "dup",
+ EQL: "eql",
+ EXCH: "exch",
+ FALSE: "false",
+ FREE: "free",
+ FREECELL: "freecell",
+ GE: "ge",
+ GLOBAL: "global",
+ GT: "gt",
+ GTGT: "gtgt",
+ IN: "in",
+ INDEX: "index",
+ INPLACE_ADD: "inplace_add",
+ ITERJMP: "iterjmp",
+ ITERPOP: "iterpop",
+ ITERPUSH: "iterpush",
+ JMP: "jmp",
+ LE: "le",
+ LOAD: "load",
+ LOCAL: "local",
+ LOCALCELL: "localcell",
+ LT: "lt",
+ LTLT: "ltlt",
+ MAKEDICT: "makedict",
+ MAKEFUNC: "makefunc",
+ MAKELIST: "makelist",
+ MAKETUPLE: "maketuple",
+ MANDATORY: "mandatory",
+ MINUS: "minus",
+ NEQ: "neq",
+ NONE: "none",
+ NOP: "nop",
+ NOT: "not",
+ PERCENT: "percent",
+ PIPE: "pipe",
+ PLUS: "plus",
+ POP: "pop",
+ PREDECLARED: "predeclared",
+ RETURN: "return",
+ SETDICT: "setdict",
+ SETDICTUNIQ: "setdictuniq",
+ SETFIELD: "setfield",
+ SETGLOBAL: "setglobal",
+ SETINDEX: "setindex",
+ SETLOCAL: "setlocal",
+ SETLOCALCELL: "setlocalcell",
+ SLASH: "slash",
+ SLASHSLASH: "slashslash",
+ SLICE: "slice",
+ STAR: "star",
+ TILDE: "tilde",
+ TRUE: "true",
+ UMINUS: "uminus",
+ UNIVERSAL: "universal",
+ UNPACK: "unpack",
+ UPLUS: "uplus",
+}
+
+const variableStackEffect = 0x7f
+
+// stackEffect records the effect on the size of the operand stack of
+// each kind of instruction. For some instructions this requires computation.
+var stackEffect = [...]int8{
+ AMP: -1,
+ APPEND: -2,
+ ATTR: 0,
+ CALL: variableStackEffect,
+ CALL_KW: variableStackEffect,
+ CALL_VAR: variableStackEffect,
+ CALL_VAR_KW: variableStackEffect,
+ CIRCUMFLEX: -1,
+ CJMP: -1,
+ CONSTANT: +1,
+ DUP2: +2,
+ DUP: +1,
+ EQL: -1,
+ FALSE: +1,
+ FREE: +1,
+ FREECELL: +1,
+ GE: -1,
+ GLOBAL: +1,
+ GT: -1,
+ GTGT: -1,
+ IN: -1,
+ INDEX: -1,
+ INPLACE_ADD: -1,
+ ITERJMP: variableStackEffect,
+ ITERPOP: 0,
+ ITERPUSH: -1,
+ JMP: 0,
+ LE: -1,
+ LOAD: -1,
+ LOCAL: +1,
+ LOCALCELL: +1,
+ LT: -1,
+ LTLT: -1,
+ MAKEDICT: +1,
+ MAKEFUNC: 0,
+ MAKELIST: variableStackEffect,
+ MAKETUPLE: variableStackEffect,
+ MANDATORY: +1,
+ MINUS: -1,
+ NEQ: -1,
+ NONE: +1,
+ NOP: 0,
+ NOT: 0,
+ PERCENT: -1,
+ PIPE: -1,
+ PLUS: -1,
+ POP: -1,
+ PREDECLARED: +1,
+ RETURN: -1,
+ SETLOCALCELL: -1,
+ SETDICT: -3,
+ SETDICTUNIQ: -3,
+ SETFIELD: -2,
+ SETGLOBAL: -1,
+ SETINDEX: -3,
+ SETLOCAL: -1,
+ SLASH: -1,
+ SLASHSLASH: -1,
+ SLICE: -3,
+ STAR: -1,
+ TRUE: +1,
+ UMINUS: 0,
+ UNIVERSAL: +1,
+ UNPACK: variableStackEffect,
+ UPLUS: 0,
+}
+
+func (op Opcode) String() string {
+ if op < OpcodeMax {
+ if name := opcodeNames[op]; name != "" {
+ return name
+ }
+ }
+ return fmt.Sprintf("illegal op (%d)", op)
+}
+
+// A Program is a Starlark file in executable form.
+//
+// Programs are serialized by the Program.Encode method,
+// which must be updated whenever this declaration is changed.
+type Program struct {
+ Loads []Binding // name (really, string) and position of each load stmt
+ Names []string // names of attributes and predeclared variables
+ Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes
+ Functions []*Funcode
+ Globals []Binding // for error messages and tracing
+ Toplevel *Funcode // module initialization function
+}
+
+// The type of a bytes literal value, to distinguish from text string.
+type Bytes string
+
+// A Funcode is the code of a compiled Starlark function.
+//
+// Funcodes are serialized by the encoder.function method,
+// which must be updated whenever this declaration is changed.
+type Funcode struct {
+ Prog *Program
+ Pos syntax.Position // position of def or lambda token
+ Name string // name of this function
+ Doc string // docstring of this function
+ Code []byte // the byte code
+ pclinetab []uint16 // mapping from pc to linenum
+ Locals []Binding // locals, parameters first
+ Cells []int // indices of Locals that require cells
+ Freevars []Binding // for tracing
+ MaxStack int
+ NumParams int
+ NumKwonlyParams int
+ HasVarargs, HasKwargs bool
+
+ // -- transient state --
+
+ lntOnce sync.Once
+ lnt []pclinecol // decoded line number table
+}
+
+type pclinecol struct {
+ pc uint32
+ line, col int32
+}
+
+// A Binding is the name and position of a binding identifier.
+type Binding struct {
+ Name string
+ Pos syntax.Position
+}
+
+// A pcomp holds the compiler state for a Program.
+type pcomp struct {
+ prog *Program // what we're building
+
+ names map[string]uint32
+ constants map[interface{}]uint32
+ functions map[*Funcode]uint32
+}
+
+// An fcomp holds the compiler state for a Funcode.
+type fcomp struct {
+ fn *Funcode // what we're building
+
+ pcomp *pcomp
+ pos syntax.Position // current position of generated code
+ loops []loop
+ block *block
+}
+
+type loop struct {
+ break_, continue_ *block
+}
+
+type block struct {
+ insns []insn
+
+ // If the last insn is a RETURN, jmp and cjmp are nil.
+ // If the last insn is a CJMP or ITERJMP,
+ // cjmp and jmp are the "true" and "false" successors.
+ // Otherwise, jmp is the sole successor.
+ jmp, cjmp *block
+
+ initialstack int // for stack depth computation
+
+ // Used during encoding
+ index int // -1 => not encoded yet
+ addr uint32
+}
+
+type insn struct {
+ op Opcode
+ arg uint32
+ line, col int32
+}
+
+// Position returns the source position for program counter pc.
+func (fn *Funcode) Position(pc uint32) syntax.Position {
+ fn.lntOnce.Do(fn.decodeLNT)
+
+ // Binary search to find last LNT entry not greater than pc.
+ // To avoid dynamic dispatch, this is a specialization of
+ // sort.Search using this predicate:
+ // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc)
+ n := len(fn.lnt)
+ i, j := 0, n
+ for i < j {
+ h := int(uint(i+j) >> 1)
+ if !(h >= n-1 || fn.lnt[h+1].pc > pc) {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+
+ var line, col int32
+ if i < n {
+ line = fn.lnt[i].line
+ col = fn.lnt[i].col
+ }
+
+ pos := fn.Pos // copy the (annoyingly inaccessible) filename
+ pos.Col = col
+ pos.Line = line
+ return pos
+}
+
+// decodeLNT decodes the line number table and populates fn.lnt.
+// It is called at most once.
+func (fn *Funcode) decodeLNT() {
+ // Conceptually the table contains rows of the form
+ // (pc uint32, line int32, col int32), sorted by pc.
+ // We use a delta encoding, since the differences
+ // between successive pc, line, and column values
+ // are typically small and positive (though line and
+ // especially column differences may be negative).
+ // The delta encoding starts from
+ // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}.
+ //
+ // Each entry is packed into one or more 16-bit values:
+ // Δpc uint4
+ // Δline int5
+ // Δcol int6
+ // incomplete uint1
+ // The top 4 bits are the unsigned delta pc.
+ // The next 5 bits are the signed line number delta.
+ // The next 6 bits are the signed column number delta.
+ // The bottom bit indicates that more rows follow because
+ // one of the deltas was maxed out.
+ // These field widths were chosen from a sample of real programs,
+ // and allow >97% of rows to be encoded in a single uint16.
+
+ fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation
+ entry := pclinecol{
+ pc: 0,
+ line: fn.Pos.Line,
+ col: fn.Pos.Col,
+ }
+ for _, x := range fn.pclinetab {
+ entry.pc += uint32(x) >> 12
+ entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline
+ entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol
+ if (x & 1) == 0 {
+ fn.lnt = append(fn.lnt, entry)
+ }
+ }
+}
+
+// bindings converts resolve.Bindings to compiled form.
+func bindings(bindings []*resolve.Binding) []Binding {
+ res := make([]Binding, len(bindings))
+ for i, bind := range bindings {
+ res[i].Name = bind.First.Name
+ res[i].Pos = bind.First.NamePos
+ }
+ return res
+}
+
+// Expr compiles an expression to a program whose toplevel function evaluates it.
+func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program {
+ pos := syntax.Start(expr)
+ stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}}
+ return File(stmts, pos, name, locals, nil)
+}
+
+// File compiles the statements of a file into a program.
+func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program {
+ pcomp := &pcomp{
+ prog: &Program{
+ Globals: bindings(globals),
+ },
+ names: make(map[string]uint32),
+ constants: make(map[interface{}]uint32),
+ functions: make(map[*Funcode]uint32),
+ }
+ pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil)
+
+ return pcomp.prog
+}
+
+func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode {
+ fcomp := &fcomp{
+ pcomp: pcomp,
+ pos: pos,
+ fn: &Funcode{
+ Prog: pcomp.prog,
+ Pos: pos,
+ Name: name,
+ Doc: docStringFromBody(stmts),
+ Locals: bindings(locals),
+ Freevars: bindings(freevars),
+ },
+ }
+
+ // Record indices of locals that require cells.
+ for i, local := range locals {
+ if local.Scope == resolve.Cell {
+ fcomp.fn.Cells = append(fcomp.fn.Cells, i)
+ }
+ }
+
+ if debug {
+ fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos)
+ }
+
+ // Convert AST to a CFG of instructions.
+ entry := fcomp.newBlock()
+ fcomp.block = entry
+ fcomp.stmts(stmts)
+ if fcomp.block != nil {
+ fcomp.emit(NONE)
+ fcomp.emit(RETURN)
+ }
+
+ var oops bool // something bad happened
+
+ setinitialstack := func(b *block, depth int) {
+ if b.initialstack == -1 {
+ b.initialstack = depth
+ } else if b.initialstack != depth {
+ fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n",
+ b.index, b.initialstack, depth)
+ oops = true
+ }
+ }
+
+ // Linearize the CFG:
+ // compute order, address, and initial
+ // stack depth of each reachable block.
+ var pc uint32
+ var blocks []*block
+ var maxstack int
+ var visit func(b *block)
+ visit = func(b *block) {
+ if b.index >= 0 {
+ return // already visited
+ }
+ b.index = len(blocks)
+ b.addr = pc
+ blocks = append(blocks, b)
+
+ stack := b.initialstack
+ if debug {
+ fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack)
+ }
+ var cjmpAddr *uint32
+ var isiterjmp int
+ for i, insn := range b.insns {
+ pc++
+
+ // Compute size of argument.
+ if insn.op >= OpcodeArgMin {
+ switch insn.op {
+ case ITERJMP:
+ isiterjmp = 1
+ fallthrough
+ case CJMP:
+ cjmpAddr = &b.insns[i].arg
+ pc += 4
+ default:
+ pc += uint32(argLen(insn.arg))
+ }
+ }
+
+ // Compute effect on stack.
+ se := insn.stackeffect()
+ if debug {
+ fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se)
+ }
+ stack += se
+ if stack < 0 {
+ fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc)
+ oops = true
+ }
+ if stack+isiterjmp > maxstack {
+ maxstack = stack + isiterjmp
+ }
+ }
+
+ if debug {
+ fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n",
+ b.addr, b.index)
+ if b.jmp != nil {
+ fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index)
+ }
+ if b.cjmp != nil {
+ fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index)
+ }
+ }
+
+ // Place the jmp block next.
+ if b.jmp != nil {
+ // jump threading (empty cycles are impossible)
+ for b.jmp.insns == nil {
+ b.jmp = b.jmp.jmp
+ }
+
+ setinitialstack(b.jmp, stack+isiterjmp)
+ if b.jmp.index < 0 {
+ // Successor is not yet visited:
+ // place it next and fall through.
+ visit(b.jmp)
+ } else {
+ // Successor already visited;
+ // explicit backward jump required.
+ pc += 5
+ }
+ }
+
+ // Then the cjmp block.
+ if b.cjmp != nil {
+ // jump threading (empty cycles are impossible)
+ for b.cjmp.insns == nil {
+ b.cjmp = b.cjmp.jmp
+ }
+
+ setinitialstack(b.cjmp, stack)
+ visit(b.cjmp)
+
+ // Patch the CJMP/ITERJMP, if present.
+ if cjmpAddr != nil {
+ *cjmpAddr = b.cjmp.addr
+ }
+ }
+ }
+ setinitialstack(entry, 0)
+ visit(entry)
+
+ fn := fcomp.fn
+ fn.MaxStack = maxstack
+
+ // Emit bytecode (and position table).
+ if Disassemble {
+ fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc)
+ }
+ fcomp.generate(blocks, pc)
+
+ if debug {
+ fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack)
+ }
+
+ // Don't panic until we've completed printing of the function.
+ if oops {
+ panic("internal error")
+ }
+
+ if debug {
+ fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos)
+ }
+
+ return fn
+}
+
+func docStringFromBody(body []syntax.Stmt) string {
+ if len(body) == 0 {
+ return ""
+ }
+ expr, ok := body[0].(*syntax.ExprStmt)
+ if !ok {
+ return ""
+ }
+ lit, ok := expr.X.(*syntax.Literal)
+ if !ok {
+ return ""
+ }
+ if lit.Token != syntax.STRING {
+ return ""
+ }
+ return lit.Value.(string)
+}
+
+func (insn *insn) stackeffect() int {
+ se := int(stackEffect[insn.op])
+ if se == variableStackEffect {
+ arg := int(insn.arg)
+ switch insn.op {
+ case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW:
+ se = -int(2*(insn.arg&0xff) + insn.arg>>8)
+ if insn.op != CALL {
+ se--
+ }
+ if insn.op == CALL_VAR_KW {
+ se--
+ }
+ case ITERJMP:
+ // Stack effect differs by successor:
+ // +1 for jmp/false/ok
+ // 0 for cjmp/true/exhausted
+ // Handled specially in caller.
+ se = 0
+ case MAKELIST, MAKETUPLE:
+ se = 1 - arg
+ case UNPACK:
+ se = arg - 1
+ default:
+ panic(insn.op)
+ }
+ }
+ return se
+}
+
+// generate emits the linear instruction stream from the CFG,
+// and builds the PC-to-line number table.
+func (fcomp *fcomp) generate(blocks []*block, codelen uint32) {
+ code := make([]byte, 0, codelen)
+ var pclinetab []uint16
+ prev := pclinecol{
+ pc: 0,
+ line: fcomp.fn.Pos.Line,
+ col: fcomp.fn.Pos.Col,
+ }
+
+ for _, b := range blocks {
+ if Disassemble {
+ fmt.Fprintf(os.Stderr, "%d:\n", b.index)
+ }
+ pc := b.addr
+ for _, insn := range b.insns {
+ if insn.line != 0 {
+ // Instruction has a source position. Delta-encode it.
+ // See Funcode.Position for the encoding.
+ for {
+ var incomplete uint16
+
+ // Δpc, uint4
+ deltapc := pc - prev.pc
+ if deltapc > 0x0f {
+ deltapc = 0x0f
+ incomplete = 1
+ }
+ prev.pc += deltapc
+
+ // Δline, int5
+ deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f)
+ if !ok {
+ incomplete = 1
+ }
+ prev.line += deltaline
+
+ // Δcol, int6
+ deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f)
+ if !ok {
+ incomplete = 1
+ }
+ prev.col += deltacol
+
+ entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete
+ pclinetab = append(pclinetab, entry)
+ if incomplete == 0 {
+ break
+ }
+ }
+
+ if Disassemble {
+ fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n",
+ filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col)
+ }
+ }
+ if Disassemble {
+ PrintOp(fcomp.fn, pc, insn.op, insn.arg)
+ }
+ code = append(code, byte(insn.op))
+ pc++
+ if insn.op >= OpcodeArgMin {
+ if insn.op == CJMP || insn.op == ITERJMP {
+ code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes
+ } else {
+ code = addUint32(code, insn.arg, 0)
+ }
+ pc = uint32(len(code))
+ }
+ }
+
+ if b.jmp != nil && b.jmp.index != b.index+1 {
+ addr := b.jmp.addr
+ if Disassemble {
+ fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n",
+ pc, addr, b.jmp.index)
+ }
+ code = append(code, byte(JMP))
+ code = addUint32(code, addr, 4)
+ }
+ }
+ if len(code) != int(codelen) {
+ panic("internal error: wrong code length")
+ }
+
+ fcomp.fn.pclinetab = pclinetab
+ fcomp.fn.Code = code
+}
+
+// clip returns the value nearest x in the range [min...max],
+// and whether it equals x.
+func clip(x, min, max int32) (int32, bool) {
+ if x > max {
+ return max, false
+ } else if x < min {
+ return min, false
+ } else {
+ return x, true
+ }
+}
+
+// addUint32 encodes x as 7-bit little-endian varint.
+// TODO(adonovan): opt: steal top two bits of opcode
+// to encode the number of complete bytes that follow.
+func addUint32(code []byte, x uint32, min int) []byte {
+ end := len(code) + min
+ for x >= 0x80 {
+ code = append(code, byte(x)|0x80)
+ x >>= 7
+ }
+ code = append(code, byte(x))
+ // Pad the operand with NOPs to exactly min bytes.
+ for len(code) < end {
+ code = append(code, byte(NOP))
+ }
+ return code
+}
+
+func argLen(x uint32) int {
+ n := 0
+ for x >= 0x80 {
+ n++
+ x >>= 7
+ }
+ return n + 1
+}
+
+// PrintOp prints an instruction.
+// It is provided for debugging.
+func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) {
+ if op < OpcodeArgMin {
+ fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op)
+ return
+ }
+
+ var comment string
+ switch op {
+ case CONSTANT:
+ switch x := fn.Prog.Constants[arg].(type) {
+ case string:
+ comment = strconv.Quote(x)
+ case Bytes:
+ comment = "b" + strconv.Quote(string(x))
+ default:
+ comment = fmt.Sprint(x)
+ }
+ case MAKEFUNC:
+ comment = fn.Prog.Functions[arg].Name
+ case SETLOCAL, LOCAL:
+ comment = fn.Locals[arg].Name
+ case SETGLOBAL, GLOBAL:
+ comment = fn.Prog.Globals[arg].Name
+ case ATTR, SETFIELD, PREDECLARED, UNIVERSAL:
+ comment = fn.Prog.Names[arg]
+ case FREE:
+ comment = fn.Freevars[arg].Name
+ case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW:
+ comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff)
+ default:
+ // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK:
+ // arg is just a number
+ }
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg)
+ if comment != "" {
+ fmt.Fprint(&buf, "\t; ", comment)
+ }
+ fmt.Fprintln(&buf)
+ os.Stderr.Write(buf.Bytes())
+}
+
+// newBlock returns a new block.
+func (fcomp) newBlock() *block {
+ return &block{index: -1, initialstack: -1}
+}
+
+// emit emits an instruction to the current block.
+func (fcomp *fcomp) emit(op Opcode) {
+ if op >= OpcodeArgMin {
+ panic("missing arg: " + op.String())
+ }
+ insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col}
+ fcomp.block.insns = append(fcomp.block.insns, insn)
+ fcomp.pos.Line = 0
+ fcomp.pos.Col = 0
+}
+
+// emit1 emits an instruction with an immediate operand.
+func (fcomp *fcomp) emit1(op Opcode, arg uint32) {
+ if op < OpcodeArgMin {
+ panic("unwanted arg: " + op.String())
+ }
+ insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col}
+ fcomp.block.insns = append(fcomp.block.insns, insn)
+ fcomp.pos.Line = 0
+ fcomp.pos.Col = 0
+}
+
+// jump emits a jump to the specified block.
+// On return, the current block is unset.
+func (fcomp *fcomp) jump(b *block) {
+ if b == fcomp.block {
+ panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs
+ }
+ fcomp.block.jmp = b
+ fcomp.block = nil
+}
+
+// condjump emits a conditional jump (CJMP or ITERJMP)
+// to the specified true/false blocks.
+// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.)
+// On return, the current block is unset.
+func (fcomp *fcomp) condjump(op Opcode, t, f *block) {
+ if !(op == CJMP || op == ITERJMP) {
+ panic("not a conditional jump: " + op.String())
+ }
+ fcomp.emit1(op, 0) // fill in address later
+ fcomp.block.cjmp = t
+ fcomp.jump(f)
+}
+
+// nameIndex returns the index of the specified name
+// within the name pool, adding it if necessary.
+func (pcomp *pcomp) nameIndex(name string) uint32 {
+ index, ok := pcomp.names[name]
+ if !ok {
+ index = uint32(len(pcomp.prog.Names))
+ pcomp.names[name] = index
+ pcomp.prog.Names = append(pcomp.prog.Names, name)
+ }
+ return index
+}
+
+// constantIndex returns the index of the specified constant
+// within the constant pool, adding it if necessary.
+func (pcomp *pcomp) constantIndex(v interface{}) uint32 {
+ index, ok := pcomp.constants[v]
+ if !ok {
+ index = uint32(len(pcomp.prog.Constants))
+ pcomp.constants[v] = index
+ pcomp.prog.Constants = append(pcomp.prog.Constants, v)
+ }
+ return index
+}
+
+// functionIndex returns the index of the specified function
+// AST the nestedfun pool, adding it if necessary.
+func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 {
+ index, ok := pcomp.functions[fn]
+ if !ok {
+ index = uint32(len(pcomp.prog.Functions))
+ pcomp.functions[fn] = index
+ pcomp.prog.Functions = append(pcomp.prog.Functions, fn)
+ }
+ return index
+}
+
+// string emits code to push the specified string.
+func (fcomp *fcomp) string(s string) {
+ fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s))
+}
+
+// setPos sets the current source position.
+// It should be called prior to any operation that can fail dynamically.
+// All positions are assumed to belong to the same file.
+func (fcomp *fcomp) setPos(pos syntax.Position) {
+ fcomp.pos = pos
+}
+
+// set emits code to store the top-of-stack value
+// to the specified local, cell, or global variable.
+func (fcomp *fcomp) set(id *syntax.Ident) {
+ bind := id.Binding.(*resolve.Binding)
+ switch bind.Scope {
+ case resolve.Local:
+ fcomp.emit1(SETLOCAL, uint32(bind.Index))
+ case resolve.Cell:
+ fcomp.emit1(SETLOCALCELL, uint32(bind.Index))
+ case resolve.Global:
+ fcomp.emit1(SETGLOBAL, uint32(bind.Index))
+ default:
+ log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope)
+ }
+}
+
+// lookup emits code to push the value of the specified variable.
+func (fcomp *fcomp) lookup(id *syntax.Ident) {
+ bind := id.Binding.(*resolve.Binding)
+ if bind.Scope != resolve.Universal { // (universal lookup can't fail)
+ fcomp.setPos(id.NamePos)
+ }
+ switch bind.Scope {
+ case resolve.Local:
+ fcomp.emit1(LOCAL, uint32(bind.Index))
+ case resolve.Free:
+ fcomp.emit1(FREECELL, uint32(bind.Index))
+ case resolve.Cell:
+ fcomp.emit1(LOCALCELL, uint32(bind.Index))
+ case resolve.Global:
+ fcomp.emit1(GLOBAL, uint32(bind.Index))
+ case resolve.Predeclared:
+ fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name))
+ case resolve.Universal:
+ fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name))
+ default:
+ log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope)
+ }
+}
+
+func (fcomp *fcomp) stmts(stmts []syntax.Stmt) {
+ for _, stmt := range stmts {
+ fcomp.stmt(stmt)
+ }
+}
+
+func (fcomp *fcomp) stmt(stmt syntax.Stmt) {
+ switch stmt := stmt.(type) {
+ case *syntax.ExprStmt:
+ if _, ok := stmt.X.(*syntax.Literal); ok {
+ // Opt: don't compile doc comments only to pop them.
+ return
+ }
+ fcomp.expr(stmt.X)
+ fcomp.emit(POP)
+
+ case *syntax.BranchStmt:
+ // Resolver invariant: break/continue appear only within loops.
+ switch stmt.Token {
+ case syntax.PASS:
+ // no-op
+ case syntax.BREAK:
+ b := fcomp.loops[len(fcomp.loops)-1].break_
+ fcomp.jump(b)
+ fcomp.block = fcomp.newBlock() // dead code
+ case syntax.CONTINUE:
+ b := fcomp.loops[len(fcomp.loops)-1].continue_
+ fcomp.jump(b)
+ fcomp.block = fcomp.newBlock() // dead code
+ }
+
+ case *syntax.IfStmt:
+ // Keep consistent with CondExpr.
+ t := fcomp.newBlock()
+ f := fcomp.newBlock()
+ done := fcomp.newBlock()
+
+ fcomp.ifelse(stmt.Cond, t, f)
+
+ fcomp.block = t
+ fcomp.stmts(stmt.True)
+ fcomp.jump(done)
+
+ fcomp.block = f
+ fcomp.stmts(stmt.False)
+ fcomp.jump(done)
+
+ fcomp.block = done
+
+ case *syntax.AssignStmt:
+ switch stmt.Op {
+ case syntax.EQ:
+ // simple assignment: x = y
+ fcomp.expr(stmt.RHS)
+ fcomp.assign(stmt.OpPos, stmt.LHS)
+
+ case syntax.PLUS_EQ,
+ syntax.MINUS_EQ,
+ syntax.STAR_EQ,
+ syntax.SLASH_EQ,
+ syntax.SLASHSLASH_EQ,
+ syntax.PERCENT_EQ,
+ syntax.AMP_EQ,
+ syntax.PIPE_EQ,
+ syntax.CIRCUMFLEX_EQ,
+ syntax.LTLT_EQ,
+ syntax.GTGT_EQ:
+ // augmented assignment: x += y
+
+ var set func()
+
+ // Evaluate "address" of x exactly once to avoid duplicate side-effects.
+ switch lhs := unparen(stmt.LHS).(type) {
+ case *syntax.Ident:
+ // x = ...
+ fcomp.lookup(lhs)
+ set = func() {
+ fcomp.set(lhs)
+ }
+
+ case *syntax.IndexExpr:
+ // x[y] = ...
+ fcomp.expr(lhs.X)
+ fcomp.expr(lhs.Y)
+ fcomp.emit(DUP2)
+ fcomp.setPos(lhs.Lbrack)
+ fcomp.emit(INDEX)
+ set = func() {
+ fcomp.setPos(lhs.Lbrack)
+ fcomp.emit(SETINDEX)
+ }
+
+ case *syntax.DotExpr:
+ // x.f = ...
+ fcomp.expr(lhs.X)
+ fcomp.emit(DUP)
+ name := fcomp.pcomp.nameIndex(lhs.Name.Name)
+ fcomp.setPos(lhs.Dot)
+ fcomp.emit1(ATTR, name)
+ set = func() {
+ fcomp.setPos(lhs.Dot)
+ fcomp.emit1(SETFIELD, name)
+ }
+
+ default:
+ panic(lhs)
+ }
+
+ fcomp.expr(stmt.RHS)
+
+ if stmt.Op == syntax.PLUS_EQ {
+ // Allow the runtime to optimize list += iterable.
+ fcomp.setPos(stmt.OpPos)
+ fcomp.emit(INPLACE_ADD)
+ } else {
+ fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS)
+ }
+ set()
+ }
+
+ case *syntax.DefStmt:
+ fcomp.function(stmt.Function.(*resolve.Function))
+ fcomp.set(stmt.Name)
+
+ case *syntax.ForStmt:
+ // Keep consistent with ForClause.
+ head := fcomp.newBlock()
+ body := fcomp.newBlock()
+ tail := fcomp.newBlock()
+
+ fcomp.expr(stmt.X)
+ fcomp.setPos(stmt.For)
+ fcomp.emit(ITERPUSH)
+ fcomp.jump(head)
+
+ fcomp.block = head
+ fcomp.condjump(ITERJMP, tail, body)
+
+ fcomp.block = body
+ fcomp.assign(stmt.For, stmt.Vars)
+ fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head})
+ fcomp.stmts(stmt.Body)
+ fcomp.loops = fcomp.loops[:len(fcomp.loops)-1]
+ fcomp.jump(head)
+
+ fcomp.block = tail
+ fcomp.emit(ITERPOP)
+
+ case *syntax.WhileStmt:
+ head := fcomp.newBlock()
+ body := fcomp.newBlock()
+ done := fcomp.newBlock()
+
+ fcomp.jump(head)
+ fcomp.block = head
+ fcomp.ifelse(stmt.Cond, body, done)
+
+ fcomp.block = body
+ fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head})
+ fcomp.stmts(stmt.Body)
+ fcomp.loops = fcomp.loops[:len(fcomp.loops)-1]
+ fcomp.jump(head)
+
+ fcomp.block = done
+
+ case *syntax.ReturnStmt:
+ if stmt.Result != nil {
+ fcomp.expr(stmt.Result)
+ } else {
+ fcomp.emit(NONE)
+ }
+ fcomp.emit(RETURN)
+ fcomp.block = fcomp.newBlock() // dead code
+
+ case *syntax.LoadStmt:
+ for i := range stmt.From {
+ fcomp.string(stmt.From[i].Name)
+ }
+ module := stmt.Module.Value.(string)
+ fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{
+ Name: module,
+ Pos: stmt.Module.TokenPos,
+ })
+ fcomp.string(module)
+ fcomp.setPos(stmt.Load)
+ fcomp.emit1(LOAD, uint32(len(stmt.From)))
+ for i := range stmt.To {
+ fcomp.set(stmt.To[len(stmt.To)-1-i])
+ }
+
+ default:
+ start, _ := stmt.Span()
+ log.Panicf("%s: exec: unexpected statement %T", start, stmt)
+ }
+}
+
+// assign implements lhs = rhs for arbitrary expressions lhs.
+// RHS is on top of stack, consumed.
+func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) {
+ switch lhs := lhs.(type) {
+ case *syntax.ParenExpr:
+ // (lhs) = rhs
+ fcomp.assign(pos, lhs.X)
+
+ case *syntax.Ident:
+ // x = rhs
+ fcomp.set(lhs)
+
+ case *syntax.TupleExpr:
+ // x, y = rhs
+ fcomp.assignSequence(pos, lhs.List)
+
+ case *syntax.ListExpr:
+ // [x, y] = rhs
+ fcomp.assignSequence(pos, lhs.List)
+
+ case *syntax.IndexExpr:
+ // x[y] = rhs
+ fcomp.expr(lhs.X)
+ fcomp.emit(EXCH)
+ fcomp.expr(lhs.Y)
+ fcomp.emit(EXCH)
+ fcomp.setPos(lhs.Lbrack)
+ fcomp.emit(SETINDEX)
+
+ case *syntax.DotExpr:
+ // x.f = rhs
+ fcomp.expr(lhs.X)
+ fcomp.emit(EXCH)
+ fcomp.setPos(lhs.Dot)
+ fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name))
+
+ default:
+ panic(lhs)
+ }
+}
+
+func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) {
+ fcomp.setPos(pos)
+ fcomp.emit1(UNPACK, uint32(len(lhs)))
+ for i := range lhs {
+ fcomp.assign(pos, lhs[i])
+ }
+}
+
+func (fcomp *fcomp) expr(e syntax.Expr) {
+ switch e := e.(type) {
+ case *syntax.ParenExpr:
+ fcomp.expr(e.X)
+
+ case *syntax.Ident:
+ fcomp.lookup(e)
+
+ case *syntax.Literal:
+ // e.Value is int64, float64, *bigInt, string
+ v := e.Value
+ if e.Token == syntax.BYTES {
+ v = Bytes(v.(string))
+ }
+ fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v))
+
+ case *syntax.ListExpr:
+ for _, x := range e.List {
+ fcomp.expr(x)
+ }
+ fcomp.emit1(MAKELIST, uint32(len(e.List)))
+
+ case *syntax.CondExpr:
+ // Keep consistent with IfStmt.
+ t := fcomp.newBlock()
+ f := fcomp.newBlock()
+ done := fcomp.newBlock()
+
+ fcomp.ifelse(e.Cond, t, f)
+
+ fcomp.block = t
+ fcomp.expr(e.True)
+ fcomp.jump(done)
+
+ fcomp.block = f
+ fcomp.expr(e.False)
+ fcomp.jump(done)
+
+ fcomp.block = done
+
+ case *syntax.IndexExpr:
+ fcomp.expr(e.X)
+ fcomp.expr(e.Y)
+ fcomp.setPos(e.Lbrack)
+ fcomp.emit(INDEX)
+
+ case *syntax.SliceExpr:
+ fcomp.setPos(e.Lbrack)
+ fcomp.expr(e.X)
+ if e.Lo != nil {
+ fcomp.expr(e.Lo)
+ } else {
+ fcomp.emit(NONE)
+ }
+ if e.Hi != nil {
+ fcomp.expr(e.Hi)
+ } else {
+ fcomp.emit(NONE)
+ }
+ if e.Step != nil {
+ fcomp.expr(e.Step)
+ } else {
+ fcomp.emit(NONE)
+ }
+ fcomp.emit(SLICE)
+
+ case *syntax.Comprehension:
+ if e.Curly {
+ fcomp.emit(MAKEDICT)
+ } else {
+ fcomp.emit1(MAKELIST, 0)
+ }
+ fcomp.comprehension(e, 0)
+
+ case *syntax.TupleExpr:
+ fcomp.tuple(e.List)
+
+ case *syntax.DictExpr:
+ fcomp.emit(MAKEDICT)
+ for _, entry := range e.List {
+ entry := entry.(*syntax.DictEntry)
+ fcomp.emit(DUP)
+ fcomp.expr(entry.Key)
+ fcomp.expr(entry.Value)
+ fcomp.setPos(entry.Colon)
+ fcomp.emit(SETDICTUNIQ)
+ }
+
+ case *syntax.UnaryExpr:
+ fcomp.expr(e.X)
+ fcomp.setPos(e.OpPos)
+ switch e.Op {
+ case syntax.MINUS:
+ fcomp.emit(UMINUS)
+ case syntax.PLUS:
+ fcomp.emit(UPLUS)
+ case syntax.NOT:
+ fcomp.emit(NOT)
+ case syntax.TILDE:
+ fcomp.emit(TILDE)
+ default:
+ log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op)
+ }
+
+ case *syntax.BinaryExpr:
+ switch e.Op {
+ // short-circuit operators
+ // TODO(adonovan): use ifelse to simplify conditions.
+ case syntax.OR:
+ // x or y => if x then x else y
+ done := fcomp.newBlock()
+ y := fcomp.newBlock()
+
+ fcomp.expr(e.X)
+ fcomp.emit(DUP)
+ fcomp.condjump(CJMP, done, y)
+
+ fcomp.block = y
+ fcomp.emit(POP) // discard X
+ fcomp.expr(e.Y)
+ fcomp.jump(done)
+
+ fcomp.block = done
+
+ case syntax.AND:
+ // x and y => if x then y else x
+ done := fcomp.newBlock()
+ y := fcomp.newBlock()
+
+ fcomp.expr(e.X)
+ fcomp.emit(DUP)
+ fcomp.condjump(CJMP, y, done)
+
+ fcomp.block = y
+ fcomp.emit(POP) // discard X
+ fcomp.expr(e.Y)
+ fcomp.jump(done)
+
+ fcomp.block = done
+
+ case syntax.PLUS:
+ fcomp.plus(e)
+
+ default:
+ // all other strict binary operator (includes comparisons)
+ fcomp.expr(e.X)
+ fcomp.expr(e.Y)
+ fcomp.binop(e.OpPos, e.Op)
+ }
+
+ case *syntax.DotExpr:
+ fcomp.expr(e.X)
+ fcomp.setPos(e.Dot)
+ fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name))
+
+ case *syntax.CallExpr:
+ fcomp.call(e)
+
+ case *syntax.LambdaExpr:
+ fcomp.function(e.Function.(*resolve.Function))
+
+ default:
+ start, _ := e.Span()
+ log.Panicf("%s: unexpected expr %T", start, e)
+ }
+}
+
+type summand struct {
+ x syntax.Expr
+ plusPos syntax.Position
+}
+
+// plus emits optimized code for ((a+b)+...)+z that avoids naive
+// quadratic behavior for strings, tuples, and lists,
+// and folds together adjacent literals of the same type.
+func (fcomp *fcomp) plus(e *syntax.BinaryExpr) {
+ // Gather all the right operands of the left tree of plusses.
+ // A tree (((a+b)+c)+d) becomes args=[a +b +c +d].
+ args := make([]summand, 0, 2) // common case: 2 operands
+ for plus := e; ; {
+ args = append(args, summand{unparen(plus.Y), plus.OpPos})
+ left := unparen(plus.X)
+ x, ok := left.(*syntax.BinaryExpr)
+ if !ok || x.Op != syntax.PLUS {
+ args = append(args, summand{x: left})
+ break
+ }
+ plus = x
+ }
+ // Reverse args to syntactic order.
+ for i, n := 0, len(args)/2; i < n; i++ {
+ j := len(args) - 1 - i
+ args[i], args[j] = args[j], args[i]
+ }
+
+ // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+().
+ out := args[:0] // compact in situ
+ for i := 0; i < len(args); {
+ j := i + 1
+ if code := addable(args[i].x); code != 0 {
+ for j < len(args) && addable(args[j].x) == code {
+ j++
+ }
+ if j > i+1 {
+ args[i].x = add(code, args[i:j])
+ }
+ }
+ out = append(out, args[i])
+ i = j
+ }
+ args = out
+
+ // Emit code for an n-ary sum (n > 0).
+ fcomp.expr(args[0].x)
+ for _, summand := range args[1:] {
+ fcomp.expr(summand.x)
+ fcomp.setPos(summand.plusPos)
+ fcomp.emit(PLUS)
+ }
+
+ // If len(args) > 2, use of an accumulator instead of a chain of
+ // PLUS operations may be more efficient.
+ // However, no gain was measured on a workload analogous to Bazel loading;
+ // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload.
+ //
+ // We cannot use a single n-ary SUM operation
+ // a b c SUM<3>
+ // because we need to report a distinct error for each
+ // individual '+' operation, so three additional operations are
+ // needed:
+ //
+ // ACCSTART => create buffer and append to it
+ // ACCUM => append to buffer
+ // ACCEND => get contents of buffer
+ //
+ // For string, list, and tuple values, the interpreter can
+ // optimize these operations by using a mutable buffer.
+ // For all other types, ACCSTART and ACCEND would behave like
+ // the identity function and ACCUM behaves like PLUS.
+ // ACCUM must correctly support user-defined operations
+ // such as list+foo.
+ //
+ // fcomp.emit(ACCSTART)
+ // for _, summand := range args[1:] {
+ // fcomp.expr(summand.x)
+ // fcomp.setPos(summand.plusPos)
+ // fcomp.emit(ACCUM)
+ // }
+ // fcomp.emit(ACCEND)
+}
+
+// addable reports whether e is a statically addable
+// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple.
+func addable(e syntax.Expr) rune {
+ switch e := e.(type) {
+ case *syntax.Literal:
+ // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding.
+ switch e.Token {
+ case syntax.STRING:
+ return 's'
+ case syntax.BYTES:
+ return 'b'
+ }
+ case *syntax.ListExpr:
+ return 'l'
+ case *syntax.TupleExpr:
+ return 't'
+ }
+ return 0
+}
+
+// add returns an expression denoting the sum of args,
+// which are all addable values of the type indicated by code.
+// The resulting syntax is degenerate, lacking position, etc.
+func add(code rune, args []summand) syntax.Expr {
+ switch code {
+ case 's', 'b':
+ var buf strings.Builder
+ for _, arg := range args {
+ buf.WriteString(arg.x.(*syntax.Literal).Value.(string))
+ }
+ tok := syntax.STRING
+ if code == 'b' {
+ tok = syntax.BYTES
+ }
+ return &syntax.Literal{Token: tok, Value: buf.String()}
+ case 'l':
+ var elems []syntax.Expr
+ for _, arg := range args {
+ elems = append(elems, arg.x.(*syntax.ListExpr).List...)
+ }
+ return &syntax.ListExpr{List: elems}
+ case 't':
+ var elems []syntax.Expr
+ for _, arg := range args {
+ elems = append(elems, arg.x.(*syntax.TupleExpr).List...)
+ }
+ return &syntax.TupleExpr{List: elems}
+ }
+ panic(code)
+}
+
+func unparen(e syntax.Expr) syntax.Expr {
+ if p, ok := e.(*syntax.ParenExpr); ok {
+ return unparen(p.X)
+ }
+ return e
+}
+
+func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) {
+ // TODO(adonovan): simplify by assuming syntax and compiler constants align.
+ fcomp.setPos(pos)
+ switch op {
+ // arithmetic
+ case syntax.PLUS:
+ fcomp.emit(PLUS)
+ case syntax.MINUS:
+ fcomp.emit(MINUS)
+ case syntax.STAR:
+ fcomp.emit(STAR)
+ case syntax.SLASH:
+ fcomp.emit(SLASH)
+ case syntax.SLASHSLASH:
+ fcomp.emit(SLASHSLASH)
+ case syntax.PERCENT:
+ fcomp.emit(PERCENT)
+ case syntax.AMP:
+ fcomp.emit(AMP)
+ case syntax.PIPE:
+ fcomp.emit(PIPE)
+ case syntax.CIRCUMFLEX:
+ fcomp.emit(CIRCUMFLEX)
+ case syntax.LTLT:
+ fcomp.emit(LTLT)
+ case syntax.GTGT:
+ fcomp.emit(GTGT)
+ case syntax.IN:
+ fcomp.emit(IN)
+ case syntax.NOT_IN:
+ fcomp.emit(IN)
+ fcomp.emit(NOT)
+
+ // comparisons
+ case syntax.EQL,
+ syntax.NEQ,
+ syntax.GT,
+ syntax.LT,
+ syntax.LE,
+ syntax.GE:
+ fcomp.emit(Opcode(op-syntax.EQL) + EQL)
+
+ default:
+ log.Panicf("%s: unexpected binary op: %s", pos, op)
+ }
+}
+
+func (fcomp *fcomp) call(call *syntax.CallExpr) {
+ // TODO(adonovan): opt: Use optimized path for calling methods
+ // of built-ins: x.f(...) to avoid materializing a closure.
+ // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok {
+ // fcomp.expr(dot.X)
+ // fcomp.args(call)
+ // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name))
+ // return
+ // }
+
+ // usual case
+ fcomp.expr(call.Fn)
+ op, arg := fcomp.args(call)
+ fcomp.setPos(call.Lparen)
+ fcomp.emit1(op, arg)
+}
+
+// args emits code to push a tuple of positional arguments
+// and a tuple of named arguments containing alternating keys and values.
+// Either or both tuples may be empty (TODO(adonovan): optimize).
+func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) {
+ var callmode int
+ // Compute the number of each kind of parameter.
+ var p, n int // number of positional, named arguments
+ var varargs, kwargs syntax.Expr
+ for _, arg := range call.Args {
+ if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ {
+
+ // named argument (name, value)
+ fcomp.string(binary.X.(*syntax.Ident).Name)
+ fcomp.expr(binary.Y)
+ n++
+ continue
+ }
+ if unary, ok := arg.(*syntax.UnaryExpr); ok {
+ if unary.Op == syntax.STAR {
+ callmode |= 1
+ varargs = unary.X
+ continue
+ } else if unary.Op == syntax.STARSTAR {
+ callmode |= 2
+ kwargs = unary.X
+ continue
+ }
+ }
+
+ // positional argument
+ fcomp.expr(arg)
+ p++
+ }
+
+ // Python2 and Python3 both permit named arguments
+ // to appear both before and after a *args argument:
+ // f(1, 2, x=3, *[4], y=5, **dict(z=6))
+ //
+ // They also differ in their evaluation order:
+ // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last)
+ // Python3: 1 2 4 3 5 6 (positional args evaluated before named args)
+ // Starlark-in-Java historically used a third order:
+ // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right)
+ //
+ // After discussion in github.com/bazelbuild/starlark#13, the
+ // spec now requires Starlark to statically reject named
+ // arguments after *args (e.g. y=5), and to use Python2-style
+ // evaluation order. This is both easy to implement and
+ // consistent with lexical order:
+ //
+ // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6
+
+ // *args
+ if varargs != nil {
+ fcomp.expr(varargs)
+ }
+
+ // **kwargs
+ if kwargs != nil {
+ fcomp.expr(kwargs)
+ }
+
+ // TODO(adonovan): avoid this with a more flexible encoding.
+ if p >= 256 || n >= 256 {
+ // resolve already checked this; should be unreachable
+ panic("too many arguments in call")
+ }
+
+ return CALL + Opcode(callmode), uint32(p<<8 | n)
+}
+
+func (fcomp *fcomp) tuple(elems []syntax.Expr) {
+ for _, elem := range elems {
+ fcomp.expr(elem)
+ }
+ fcomp.emit1(MAKETUPLE, uint32(len(elems)))
+}
+
+func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) {
+ if clauseIndex == len(comp.Clauses) {
+ fcomp.emit(DUP) // accumulator
+ if comp.Curly {
+ // dict: {k:v for ...}
+ // Parser ensures that body is of form k:v.
+ // Python-style set comprehensions {body for vars in x}
+ // are not supported.
+ entry := comp.Body.(*syntax.DictEntry)
+ fcomp.expr(entry.Key)
+ fcomp.expr(entry.Value)
+ fcomp.setPos(entry.Colon)
+ fcomp.emit(SETDICT)
+ } else {
+ // list: [body for vars in x]
+ fcomp.expr(comp.Body)
+ fcomp.emit(APPEND)
+ }
+ return
+ }
+
+ clause := comp.Clauses[clauseIndex]
+ switch clause := clause.(type) {
+ case *syntax.IfClause:
+ t := fcomp.newBlock()
+ done := fcomp.newBlock()
+ fcomp.ifelse(clause.Cond, t, done)
+
+ fcomp.block = t
+ fcomp.comprehension(comp, clauseIndex+1)
+ fcomp.jump(done)
+
+ fcomp.block = done
+ return
+
+ case *syntax.ForClause:
+ // Keep consistent with ForStmt.
+ head := fcomp.newBlock()
+ body := fcomp.newBlock()
+ tail := fcomp.newBlock()
+
+ fcomp.expr(clause.X)
+ fcomp.setPos(clause.For)
+ fcomp.emit(ITERPUSH)
+ fcomp.jump(head)
+
+ fcomp.block = head
+ fcomp.condjump(ITERJMP, tail, body)
+
+ fcomp.block = body
+ fcomp.assign(clause.For, clause.Vars)
+ fcomp.comprehension(comp, clauseIndex+1)
+ fcomp.jump(head)
+
+ fcomp.block = tail
+ fcomp.emit(ITERPOP)
+ return
+ }
+
+ start, _ := clause.Span()
+ log.Panicf("%s: unexpected comprehension clause %T", start, clause)
+}
+
+func (fcomp *fcomp) function(f *resolve.Function) {
+ // Evaluation of the defaults may fail, so record the position.
+ fcomp.setPos(f.Pos)
+
+ // To reduce allocation, we emit a combined tuple
+ // for the defaults and the freevars.
+ // The function knows where to split it at run time.
+
+ // Generate tuple of parameter defaults. For:
+ // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs)
+ // the tuple is:
+ // (dp2, dp3, MANDATORY, dk2, MANDATORY).
+ ndefaults := 0
+ seenStar := false
+ for _, param := range f.Params {
+ switch param := param.(type) {
+ case *syntax.BinaryExpr:
+ fcomp.expr(param.Y)
+ ndefaults++
+ case *syntax.UnaryExpr:
+ seenStar = true // * or *args (also **kwargs)
+ case *syntax.Ident:
+ if seenStar {
+ fcomp.emit(MANDATORY)
+ ndefaults++
+ }
+ }
+ }
+
+ // Capture the cells of the function's
+ // free variables from the lexical environment.
+ for _, freevar := range f.FreeVars {
+ // Don't call fcomp.lookup because we want
+ // the cell itself, not its content.
+ switch freevar.Scope {
+ case resolve.Free:
+ fcomp.emit1(FREE, uint32(freevar.Index))
+ case resolve.Cell:
+ fcomp.emit1(LOCAL, uint32(freevar.Index))
+ }
+ }
+
+ fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars)))
+
+ funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars)
+
+ if debug {
+ // TODO(adonovan): do compilations sequentially not as a tree,
+ // to make the log easier to read.
+ // Simplify by identifying Toplevel and functionIndex 0.
+ fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos)
+ }
+
+ // def f(a, *, b=1) has only 2 parameters.
+ numParams := len(f.Params)
+ if f.NumKwonlyParams > 0 && !f.HasVarargs {
+ numParams--
+ }
+
+ funcode.NumParams = numParams
+ funcode.NumKwonlyParams = f.NumKwonlyParams
+ funcode.HasVarargs = f.HasVarargs
+ funcode.HasKwargs = f.HasKwargs
+ fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode))
+}
+
+// ifelse emits a Boolean control flow decision.
+// On return, the current block is unset.
+func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) {
+ switch cond := cond.(type) {
+ case *syntax.UnaryExpr:
+ if cond.Op == syntax.NOT {
+ // if not x then goto t else goto f
+ // =>
+ // if x then goto f else goto t
+ fcomp.ifelse(cond.X, f, t)
+ return
+ }
+
+ case *syntax.BinaryExpr:
+ switch cond.Op {
+ case syntax.AND:
+ // if x and y then goto t else goto f
+ // =>
+ // if x then ifelse(y, t, f) else goto f
+ fcomp.expr(cond.X)
+ y := fcomp.newBlock()
+ fcomp.condjump(CJMP, y, f)
+
+ fcomp.block = y
+ fcomp.ifelse(cond.Y, t, f)
+ return
+
+ case syntax.OR:
+ // if x or y then goto t else goto f
+ // =>
+ // if x then goto t else ifelse(y, t, f)
+ fcomp.expr(cond.X)
+ y := fcomp.newBlock()
+ fcomp.condjump(CJMP, t, y)
+
+ fcomp.block = y
+ fcomp.ifelse(cond.Y, t, f)
+ return
+ case syntax.NOT_IN:
+ // if x not in y then goto t else goto f
+ // =>
+ // if x in y then goto f else goto t
+ copy := *cond
+ copy.Op = syntax.IN
+ fcomp.expr(&copy)
+ fcomp.condjump(CJMP, f, t)
+ return
+ }
+ }
+
+ // general case
+ fcomp.expr(cond)
+ fcomp.condjump(CJMP, t, f)
+}
diff --git a/internal/compile/compile_test.go b/internal/compile/compile_test.go
new file mode 100644
index 0000000..2c9917a
--- /dev/null
+++ b/internal/compile/compile_test.go
@@ -0,0 +1,74 @@
+package compile_test
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+
+ "go.starlark.net/starlark"
+)
+
+// TestSerialization verifies that a serialized program can be loaded,
+// deserialized, and executed.
+func TestSerialization(t *testing.T) {
+ predeclared := starlark.StringDict{
+ "x": starlark.String("mur"),
+ "n": starlark.MakeInt(2),
+ }
+ const src = `
+def mul(a, b):
+ return a * b
+
+y = mul(x, n)
+`
+ _, oldProg, err := starlark.SourceProgram("mul.star", src, predeclared.Has)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ buf := new(bytes.Buffer)
+ if err := oldProg.Write(buf); err != nil {
+ t.Fatalf("oldProg.WriteTo: %v", err)
+ }
+
+ newProg, err := starlark.CompiledProgram(buf)
+ if err != nil {
+ t.Fatalf("CompiledProgram: %v", err)
+ }
+
+ thread := new(starlark.Thread)
+ globals, err := newProg.Init(thread, predeclared)
+ if err != nil {
+ t.Fatalf("newProg.Init: %v", err)
+ }
+ if got, want := globals["y"], starlark.String("murmur"); got != want {
+ t.Errorf("Value of global was %s, want %s", got, want)
+ t.Logf("globals: %v", globals)
+ }
+
+ // Verify stack frame.
+ predeclared["n"] = starlark.None
+ _, err = newProg.Init(thread, predeclared)
+ evalErr, ok := err.(*starlark.EvalError)
+ if !ok {
+ t.Fatalf("newProg.Init call returned err %v, want *EvalError", err)
+ }
+ const want = `Traceback (most recent call last):
+ mul.star:5:8: in <toplevel>
+ mul.star:3:14: in mul
+Error: unknown binary op: string * NoneType`
+ if got := evalErr.Backtrace(); got != want {
+ t.Fatalf("got <<%s>>, want <<%s>>", got, want)
+ }
+}
+
+func TestGarbage(t *testing.T) {
+ const garbage = "This is not a compiled Starlark program."
+ _, err := starlark.CompiledProgram(strings.NewReader(garbage))
+ if err == nil {
+ t.Fatalf("CompiledProgram did not report an error when decoding garbage")
+ }
+ if !strings.Contains(err.Error(), "not a compiled module") {
+ t.Fatalf("CompiledProgram reported the wrong error when decoding garbage: %v", err)
+ }
+}
diff --git a/internal/compile/serial.go b/internal/compile/serial.go
new file mode 100644
index 0000000..adadabf
--- /dev/null
+++ b/internal/compile/serial.go
@@ -0,0 +1,395 @@
+package compile
+
+// This file defines functions to read and write a compile.Program to a file.
+//
+// It is the client's responsibility to avoid version skew between the
+// compiler used to produce a file and the interpreter that consumes it.
+// The version number is provided as a constant.
+// Incompatible protocol changes should also increment the version number.
+//
+// Encoding
+//
+// Program:
+// "sky!" [4]byte # magic number
+// str uint32le # offset of <strings> section
+// version varint # must match Version
+// filename string
+// numloads varint
+// loads []Ident
+// numnames varint
+// names []string
+// numconsts varint
+// consts []Constant
+// numglobals varint
+// globals []Ident
+// toplevel Funcode
+// numfuncs varint
+// funcs []Funcode
+// <strings> []byte # concatenation of all referenced strings
+// EOF
+//
+// Funcode:
+// id Ident
+// code []byte
+// pclinetablen varint
+// pclinetab []varint
+// numlocals varint
+// locals []Ident
+// numcells varint
+// cells []int
+// numfreevars varint
+// freevar []Ident
+// maxstack varint
+// numparams varint
+// numkwonlyparams varint
+// hasvarargs varint (0 or 1)
+// haskwargs varint (0 or 1)
+//
+// Ident:
+// filename string
+// line, col varint
+//
+// Constant: # type data
+// type varint # 0=string string
+// data ... # 1=bytes string
+// # 2=int varint
+// # 3=float varint (bits as uint64)
+// # 4=bigint string (decimal ASCII text)
+//
+// The encoding starts with a four-byte magic number.
+// The next four bytes are a little-endian uint32
+// that provides the offset of the string section
+// at the end of the file, which contains the ordered
+// concatenation of all strings referenced by the
+// program. This design permits the decoder to read
+// the first and second parts of the file into different
+// memory allocations: the first (the encoded program)
+// is transient, but the second (the strings) persists
+// for the life of the Program.
+//
+// Within the encoded program, all strings are referred
+// to by their length. As the encoder and decoder process
+// the entire file sequentially, they are in lock step,
+// so the start offset of each string is implicit.
+//
+// Program.Code is represented as a []byte slice to permit
+// modification when breakpoints are set. All other strings
+// are represented as strings. They all (unsafely) share the
+// same backing byte slice.
+//
+// Aside from the str field, all integers are encoded as varints.
+
+import (
+ "encoding/binary"
+ "fmt"
+ "math"
+ "math/big"
+ debugpkg "runtime/debug"
+ "unsafe"
+
+ "go.starlark.net/syntax"
+)
+
+const magic = "!sky"
+
+// Encode encodes a compiled Starlark program.
+func (prog *Program) Encode() []byte {
+ var e encoder
+ e.p = append(e.p, magic...)
+ e.p = append(e.p, "????"...) // string data offset; filled in later
+ e.int(Version)
+ e.string(prog.Toplevel.Pos.Filename())
+ e.bindings(prog.Loads)
+ e.int(len(prog.Names))
+ for _, name := range prog.Names {
+ e.string(name)
+ }
+ e.int(len(prog.Constants))
+ for _, c := range prog.Constants {
+ switch c := c.(type) {
+ case string:
+ e.int(0)
+ e.string(c)
+ case Bytes:
+ e.int(1)
+ e.string(string(c))
+ case int64:
+ e.int(2)
+ e.int64(c)
+ case float64:
+ e.int(3)
+ e.uint64(math.Float64bits(c))
+ case *big.Int:
+ e.int(4)
+ e.string(c.Text(10))
+ }
+ }
+ e.bindings(prog.Globals)
+ e.function(prog.Toplevel)
+ e.int(len(prog.Functions))
+ for _, fn := range prog.Functions {
+ e.function(fn)
+ }
+
+ // Patch in the offset of the string data section.
+ binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
+
+ return append(e.p, e.s...)
+}
+
+type encoder struct {
+ p []byte // encoded program
+ s []byte // strings
+ tmp [binary.MaxVarintLen64]byte
+}
+
+func (e *encoder) int(x int) {
+ e.int64(int64(x))
+}
+
+func (e *encoder) int64(x int64) {
+ n := binary.PutVarint(e.tmp[:], x)
+ e.p = append(e.p, e.tmp[:n]...)
+}
+
+func (e *encoder) uint64(x uint64) {
+ n := binary.PutUvarint(e.tmp[:], x)
+ e.p = append(e.p, e.tmp[:n]...)
+}
+
+func (e *encoder) string(s string) {
+ e.int(len(s))
+ e.s = append(e.s, s...)
+}
+
+func (e *encoder) bytes(b []byte) {
+ e.int(len(b))
+ e.s = append(e.s, b...)
+}
+
+func (e *encoder) binding(bind Binding) {
+ e.string(bind.Name)
+ e.int(int(bind.Pos.Line))
+ e.int(int(bind.Pos.Col))
+}
+
+func (e *encoder) bindings(binds []Binding) {
+ e.int(len(binds))
+ for _, bind := range binds {
+ e.binding(bind)
+ }
+}
+
+func (e *encoder) function(fn *Funcode) {
+ e.binding(Binding{fn.Name, fn.Pos})
+ e.string(fn.Doc)
+ e.bytes(fn.Code)
+ e.int(len(fn.pclinetab))
+ for _, x := range fn.pclinetab {
+ e.int64(int64(x))
+ }
+ e.bindings(fn.Locals)
+ e.int(len(fn.Cells))
+ for _, index := range fn.Cells {
+ e.int(index)
+ }
+ e.bindings(fn.Freevars)
+ e.int(fn.MaxStack)
+ e.int(fn.NumParams)
+ e.int(fn.NumKwonlyParams)
+ e.int(b2i(fn.HasVarargs))
+ e.int(b2i(fn.HasKwargs))
+}
+
+func b2i(b bool) int {
+ if b {
+ return 1
+ } else {
+ return 0
+ }
+}
+
+// DecodeProgram decodes a compiled Starlark program from data.
+func DecodeProgram(data []byte) (_ *Program, err error) {
+ if len(data) < len(magic) {
+ return nil, fmt.Errorf("not a compiled module: no magic number")
+ }
+ if got := string(data[:4]); got != magic {
+ return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
+ got, magic)
+ }
+ defer func() {
+ if x := recover(); x != nil {
+ debugpkg.PrintStack()
+ err = fmt.Errorf("internal error while decoding program: %v", x)
+ }
+ }()
+
+ offset := binary.LittleEndian.Uint32(data[4:8])
+ d := decoder{
+ p: data[8:offset],
+ s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
+ }
+
+ if v := d.int(); v != Version {
+ return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
+ }
+
+ filename := d.string()
+ d.filename = &filename
+
+ loads := d.bindings()
+
+ names := make([]string, d.int())
+ for i := range names {
+ names[i] = d.string()
+ }
+
+ // constants
+ constants := make([]interface{}, d.int())
+ for i := range constants {
+ var c interface{}
+ switch d.int() {
+ case 0:
+ c = d.string()
+ case 1:
+ c = Bytes(d.string())
+ case 2:
+ c = d.int64()
+ case 3:
+ c = math.Float64frombits(d.uint64())
+ case 4:
+ c, _ = new(big.Int).SetString(d.string(), 10)
+ }
+ constants[i] = c
+ }
+
+ globals := d.bindings()
+ toplevel := d.function()
+ funcs := make([]*Funcode, d.int())
+ for i := range funcs {
+ funcs[i] = d.function()
+ }
+
+ prog := &Program{
+ Loads: loads,
+ Names: names,
+ Constants: constants,
+ Globals: globals,
+ Functions: funcs,
+ Toplevel: toplevel,
+ }
+ toplevel.Prog = prog
+ for _, f := range funcs {
+ f.Prog = prog
+ }
+
+ if len(d.p)+len(d.s) > 0 {
+ return nil, fmt.Errorf("internal error: unconsumed data during decoding")
+ }
+
+ return prog, nil
+}
+
+type decoder struct {
+ p []byte // encoded program
+ s []byte // strings
+ filename *string // (indirect to avoid keeping decoder live)
+}
+
+func (d *decoder) int() int {
+ return int(d.int64())
+}
+
+func (d *decoder) int64() int64 {
+ x, len := binary.Varint(d.p[:])
+ d.p = d.p[len:]
+ return x
+}
+
+func (d *decoder) uint64() uint64 {
+ x, len := binary.Uvarint(d.p[:])
+ d.p = d.p[len:]
+ return x
+}
+
+func (d *decoder) string() (s string) {
+ if slice := d.bytes(); len(slice) > 0 {
+ // Avoid a memory allocation for each string
+ // by unsafely aliasing slice.
+ type string struct {
+ data *byte
+ len int
+ }
+ ptr := (*string)(unsafe.Pointer(&s))
+ ptr.data = &slice[0]
+ ptr.len = len(slice)
+ }
+ return s
+}
+
+func (d *decoder) bytes() []byte {
+ len := d.int()
+ r := d.s[:len:len]
+ d.s = d.s[len:]
+ return r
+}
+
+func (d *decoder) binding() Binding {
+ name := d.string()
+ line := int32(d.int())
+ col := int32(d.int())
+ return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
+}
+
+func (d *decoder) bindings() []Binding {
+ bindings := make([]Binding, d.int())
+ for i := range bindings {
+ bindings[i] = d.binding()
+ }
+ return bindings
+}
+
+func (d *decoder) ints() []int {
+ ints := make([]int, d.int())
+ for i := range ints {
+ ints[i] = d.int()
+ }
+ return ints
+}
+
+func (d *decoder) bool() bool { return d.int() != 0 }
+
+func (d *decoder) function() *Funcode {
+ id := d.binding()
+ doc := d.string()
+ code := d.bytes()
+ pclinetab := make([]uint16, d.int())
+ for i := range pclinetab {
+ pclinetab[i] = uint16(d.int())
+ }
+ locals := d.bindings()
+ cells := d.ints()
+ freevars := d.bindings()
+ maxStack := d.int()
+ numParams := d.int()
+ numKwonlyParams := d.int()
+ hasVarargs := d.int() != 0
+ hasKwargs := d.int() != 0
+ return &Funcode{
+ // Prog is filled in later.
+ Pos: id.Pos,
+ Name: id.Name,
+ Doc: doc,
+ Code: code,
+ pclinetab: pclinetab,
+ Locals: locals,
+ Cells: cells,
+ Freevars: freevars,
+ MaxStack: maxStack,
+ NumParams: numParams,
+ NumKwonlyParams: numKwonlyParams,
+ HasVarargs: hasVarargs,
+ HasKwargs: hasKwargs,
+ }
+}
diff --git a/internal/spell/spell.go b/internal/spell/spell.go
new file mode 100644
index 0000000..7739fab
--- /dev/null
+++ b/internal/spell/spell.go
@@ -0,0 +1,115 @@
+// Package spell file defines a simple spelling checker for use in attribute errors
+// such as "no such field .foo; did you mean .food?".
+package spell
+
+import (
+ "strings"
+ "unicode"
+)
+
+// Nearest returns the element of candidates
+// nearest to x using the Levenshtein metric,
+// or "" if none were promising.
+func Nearest(x string, candidates []string) string {
+ // Ignore underscores and case when matching.
+ fold := func(s string) string {
+ return strings.Map(func(r rune) rune {
+ if r == '_' {
+ return -1
+ }
+ return unicode.ToLower(r)
+ }, s)
+ }
+
+ x = fold(x)
+
+ var best string
+ bestD := (len(x) + 1) / 2 // allow up to 50% typos
+ for _, c := range candidates {
+ d := levenshtein(x, fold(c), bestD)
+ if d < bestD {
+ bestD = d
+ best = c
+ }
+ }
+ return best
+}
+
+// levenshtein returns the non-negative Levenshtein edit distance
+// between the byte strings x and y.
+//
+// If the computed distance exceeds max,
+// the function may return early with an approximate value > max.
+func levenshtein(x, y string, max int) int {
+ // This implementation is derived from one by Laurent Le Brun in
+ // Bazel that uses the single-row space efficiency trick
+ // described at bitbucket.org/clearer/iosifovich.
+
+ // Let x be the shorter string.
+ if len(x) > len(y) {
+ x, y = y, x
+ }
+
+ // Remove common prefix.
+ for i := 0; i < len(x); i++ {
+ if x[i] != y[i] {
+ x = x[i:]
+ y = y[i:]
+ break
+ }
+ }
+ if x == "" {
+ return len(y)
+ }
+
+ if d := abs(len(x) - len(y)); d > max {
+ return d // excessive length divergence
+ }
+
+ row := make([]int, len(y)+1)
+ for i := range row {
+ row[i] = i
+ }
+
+ for i := 1; i <= len(x); i++ {
+ row[0] = i
+ best := i
+ prev := i - 1
+ for j := 1; j <= len(y); j++ {
+ a := prev + b2i(x[i-1] != y[j-1]) // substitution
+ b := 1 + row[j-1] // deletion
+ c := 1 + row[j] // insertion
+ k := min(a, min(b, c))
+ prev, row[j] = row[j], k
+ best = min(best, k)
+ }
+ if best > max {
+ return best
+ }
+ }
+ return row[len(y)]
+}
+
+func b2i(b bool) int {
+ if b {
+ return 1
+ } else {
+ return 0
+ }
+}
+
+func min(x, y int) int {
+ if x < y {
+ return x
+ } else {
+ return y
+ }
+}
+
+func abs(x int) int {
+ if x >= 0 {
+ return x
+ } else {
+ return -x
+ }
+}
diff --git a/lib/proto/cmd/star2proto/star2proto.go b/lib/proto/cmd/star2proto/star2proto.go
new file mode 100644
index 0000000..7911723
--- /dev/null
+++ b/lib/proto/cmd/star2proto/star2proto.go
@@ -0,0 +1,142 @@
+// The star2proto command executes a Starlark file and prints a protocol
+// message, which it expects to find in a module-level variable named 'result'.
+//
+// THIS COMMAND IS EXPERIMENTAL AND ITS INTERFACE MAY CHANGE.
+package main
+
+// TODO(adonovan): add features to make this a useful tool for querying,
+// converting, and building messages in proto, JSON, and YAML.
+// - define operations for reading and writing files.
+// - support (e.g.) querying a proto file given a '-e expr' flag.
+// This will need a convenient way to put the relevant descriptors in scope.
+
+import (
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "os"
+ "strings"
+
+ starlarkproto "go.starlark.net/lib/proto"
+ "go.starlark.net/resolve"
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkjson"
+ "google.golang.org/protobuf/encoding/protojson"
+ "google.golang.org/protobuf/encoding/prototext"
+ "google.golang.org/protobuf/proto"
+ "google.golang.org/protobuf/reflect/protodesc"
+ "google.golang.org/protobuf/reflect/protoreflect"
+ "google.golang.org/protobuf/reflect/protoregistry"
+ "google.golang.org/protobuf/types/descriptorpb"
+)
+
+// flags
+var (
+ outputFlag = flag.String("output", "text", "output format (text, wire, json)")
+ varFlag = flag.String("var", "result", "the variable to output")
+ descriptors = flag.String("descriptors", "", "comma-separated list of names of files containing proto.FileDescriptorProto messages")
+)
+
+// Starlark dialect flags
+func init() {
+ flag.BoolVar(&resolve.AllowFloat, "fp", true, "allow floating-point numbers")
+ flag.BoolVar(&resolve.AllowSet, "set", resolve.AllowSet, "allow set data type")
+ flag.BoolVar(&resolve.AllowLambda, "lambda", resolve.AllowLambda, "allow lambda expressions")
+ flag.BoolVar(&resolve.AllowNestedDef, "nesteddef", resolve.AllowNestedDef, "allow nested def statements")
+}
+
+func main() {
+ log.SetPrefix("star2proto: ")
+ log.SetFlags(0)
+ flag.Parse()
+ if len(flag.Args()) != 1 {
+ fatalf("requires a single Starlark file name")
+ }
+ filename := flag.Args()[0]
+
+ // By default, use the linked-in descriptors
+ // (very few in star2proto, e.g. descriptorpb itself).
+ pool := protoregistry.GlobalFiles
+
+ // Load a user-provided FileDescriptorSet produced by a command such as:
+ // $ protoc --descriptor_set_out=foo.fds foo.proto
+ if *descriptors != "" {
+ var fdset descriptorpb.FileDescriptorSet
+ for i, filename := range strings.Split(*descriptors, ",") {
+ data, err := ioutil.ReadFile(filename)
+ if err != nil {
+ log.Fatalf("--descriptors[%d]: %s", i, err)
+ }
+ // Accumulate into the repeated field of FileDescriptors.
+ if err := (proto.UnmarshalOptions{Merge: true}).Unmarshal(data, &fdset); err != nil {
+ log.Fatalf("%s does not contain a proto2.FileDescriptorSet: %v", filename, err)
+ }
+ }
+
+ files, err := protodesc.NewFiles(&fdset)
+ if err != nil {
+ log.Fatalf("protodesc.NewFiles: could not build FileDescriptor index: %v", err)
+ }
+ pool = files
+ }
+
+ // Execute the Starlark file.
+ thread := &starlark.Thread{
+ Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) },
+ }
+ starlarkproto.SetPool(thread, pool)
+ predeclared := starlark.StringDict{
+ "proto": starlarkproto.Module,
+ "json": starlarkjson.Module,
+ }
+ globals, err := starlark.ExecFile(thread, filename, nil, predeclared)
+ if err != nil {
+ if evalErr, ok := err.(*starlark.EvalError); ok {
+ fatalf("%s", evalErr.Backtrace())
+ } else {
+ fatalf("%s", err)
+ }
+ }
+
+ // Print the output variable as a message.
+ // TODO(adonovan): this is clumsy.
+ // Let the user call print(), or provide an expression on the command line.
+ result, ok := globals[*varFlag]
+ if !ok {
+ fatalf("%s must define a module-level variable named %q", filename, *varFlag)
+ }
+ msgwrap, ok := result.(*starlarkproto.Message)
+ if !ok {
+ fatalf("got %s, want proto.Message, for %q", result.Type(), *varFlag)
+ }
+ msg := msgwrap.Message()
+
+ // -output
+ var marshal func(protoreflect.ProtoMessage) ([]byte, error)
+ switch *outputFlag {
+ case "wire":
+ marshal = proto.Marshal
+
+ case "text":
+ marshal = prototext.MarshalOptions{Multiline: true, Indent: "\t"}.Marshal
+
+ case "json":
+ marshal = protojson.MarshalOptions{Multiline: true, Indent: "\t"}.Marshal
+
+ default:
+ fatalf("unsupported -output format: %s", *outputFlag)
+ }
+ data, err := marshal(msg)
+ if err != nil {
+ fatalf("%s", err)
+ }
+ os.Stdout.Write(data)
+}
+
+func fatalf(format string, args ...interface{}) {
+ fmt.Fprintf(os.Stderr, "star2proto: ")
+ fmt.Fprintf(os.Stderr, format, args...)
+ fmt.Fprintln(os.Stderr)
+ os.Exit(1)
+}
diff --git a/lib/proto/proto.go b/lib/proto/proto.go
new file mode 100644
index 0000000..149162d
--- /dev/null
+++ b/lib/proto/proto.go
@@ -0,0 +1,1232 @@
+// Copyright 2020 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package proto defines a module of utilities for constructing and
+// accessing protocol messages within Starlark programs.
+//
+// THIS PACKAGE IS EXPERIMENTAL AND ITS INTERFACE MAY CHANGE.
+//
+// This package defines several types of Starlark value:
+//
+// Message -- a protocol message
+// RepeatedField -- a repeated field of a message, like a list
+//
+// FileDescriptor -- information about a .proto file
+// FieldDescriptor -- information about a message field (or extension field)
+// MessageDescriptor -- information about the type of a message
+// EnumDescriptor -- information about an enumerated type
+// EnumValueDescriptor -- a value of an enumerated type
+//
+// A Message value is a wrapper around a protocol message instance.
+// Starlark programs may access and update Messages using dot notation:
+//
+// x = msg.field
+// msg.field = x + 1
+// msg.field += 1
+//
+// Assignments to message fields perform dynamic checks on the type and
+// range of the value to ensure that the message is at all times valid.
+//
+// The value of a repeated field of a message is represented by the
+// list-like data type, RepeatedField. Its elements may be accessed,
+// iterated, and updated in the usual ways. As with assignments to
+// message fields, an assignment to an element of a RepeatedField
+// performs a dynamic check to ensure that the RepeatedField holds
+// only elements of the correct type.
+//
+// type(msg.uint32s) # "proto.repeated<uint32>"
+// msg.uint32s[0] = 1
+// msg.uint32s[0] = -1 # error: invalid uint32: -1
+//
+// Any iterable may be assigned to a repeated field of a message. If
+// the iterable is itself a value of type RepeatedField, the message
+// field holds a reference to it.
+//
+// msg2.uint32s = msg.uint32s # both messages share one RepeatedField
+// msg.uint32s[0] = 123
+// print(msg2.uint32s[0]) # "123"
+//
+// The RepeatedFields' element types must match.
+// It is not enough for the values to be merely valid:
+//
+// msg.uint32s = [1, 2, 3] # makes a copy
+// msg.uint64s = msg.uint32s # error: repeated field has wrong type
+// msg.uint64s = list(msg.uint32s) # ok; makes a copy
+//
+// For all other iterables, a new RepeatedField is constructed from the
+// elements of the iterable.
+//
+// msg.uints32s = [1, 2, 3]
+// print(type(msg.uints32s)) # "proto.repeated<uint32>"
+//
+//
+// To construct a Message from encoded binary or text data, call
+// Unmarshal or UnmarshalText. These two functions are exposed to
+// Starlark programs as proto.unmarshal{,_text}.
+//
+// To construct a Message from an existing Go proto.Message instance,
+// you must first encode the Go message to binary, then decode it using
+// Unmarshal. This ensures that messages visible to Starlark are
+// encapsulated and cannot be mutated once their Starlark wrapper values
+// are frozen.
+//
+// TODO(adonovan): document descriptors, enums, message instantiation.
+//
+// See proto_test.go for an example of how to use the 'proto'
+// module in an application that embeds Starlark.
+//
+package proto
+
+// TODO(adonovan): Go and Starlark API improvements:
+// - Make Message and RepeatedField comparable.
+// (NOTE: proto.Equal works only with generated message types.)
+// - Support maps, oneof, any. But not messageset if we can avoid it.
+// - Support "well-known types".
+// - Defend against cycles in object graph.
+// - Test missing required fields in marshalling.
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "strings"
+ "unsafe"
+ _ "unsafe" // for linkname hack
+
+ "google.golang.org/protobuf/encoding/prototext"
+ "google.golang.org/protobuf/proto"
+ "google.golang.org/protobuf/reflect/protoreflect"
+ "google.golang.org/protobuf/reflect/protoregistry"
+ "google.golang.org/protobuf/types/dynamicpb"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkstruct"
+ "go.starlark.net/syntax"
+)
+
+// SetPool associates with the specified Starlark thread the
+// descriptor pool used to find descriptors for .proto files and to
+// instantiate messages from descriptors. Clients must call SetPool
+// for a Starlark thread to use this package.
+//
+// For example:
+// SetPool(thread, protoregistry.GlobalFiles)
+//
+func SetPool(thread *starlark.Thread, pool DescriptorPool) {
+ thread.SetLocal(contextKey, pool)
+}
+
+// Pool returns the descriptor pool previously associated with this thread.
+func Pool(thread *starlark.Thread) DescriptorPool {
+ pool, _ := thread.Local(contextKey).(DescriptorPool)
+ return pool
+}
+
+const contextKey = "proto.DescriptorPool"
+
+// A DescriptorPool loads FileDescriptors by path name or package name,
+// possibly on demand.
+//
+// It is a superinterface of protodesc.Resolver, so any Resolver
+// implementation is a valid pool. For example.
+// protoregistry.GlobalFiles, which loads FileDescriptors from the
+// compressed binary information in all the *.pb.go files linked into
+// the process; and protodesc.NewFiles, which holds a set of
+// FileDescriptorSet messages. See star2proto for example usage.
+type DescriptorPool interface {
+ FindFileByPath(string) (protoreflect.FileDescriptor, error)
+}
+
+var Module = &starlarkstruct.Module{
+ Name: "proto",
+ Members: starlark.StringDict{
+ "file": starlark.NewBuiltin("proto.file", file),
+ "has": starlark.NewBuiltin("proto.has", has),
+ "marshal": starlark.NewBuiltin("proto.marshal", marshal),
+ "marshal_text": starlark.NewBuiltin("proto.marshal_text", marshal),
+ "set_field": starlark.NewBuiltin("proto.set_field", setFieldStarlark),
+ "get_field": starlark.NewBuiltin("proto.get_field", getFieldStarlark),
+ "unmarshal": starlark.NewBuiltin("proto.unmarshal", unmarshal),
+ "unmarshal_text": starlark.NewBuiltin("proto.unmarshal_text", unmarshal_text),
+
+ // TODO(adonovan):
+ // - merge(msg, msg) -> msg
+ // - equals(msg, msg) -> bool
+ // - diff(msg, msg) -> string
+ // - clone(msg) -> msg
+ },
+}
+
+// file(filename) loads the FileDescriptor of the given name, or the
+// first if the pool contains more than one.
+//
+// It's unfortunate that renaming a .proto file in effect breaks the
+// interface it presents to Starlark. Ideally one would import
+// descriptors by package name, but there may be many FileDescriptors
+// for the same package name, and there is no "package descriptor".
+// (Technically a pool may also have many FileDescriptors with the same
+// file name, but this can't happen with a single consistent snapshot.)
+func file(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var filename string
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &filename); err != nil {
+ return nil, err
+ }
+
+ pool := Pool(thread)
+ if pool == nil {
+ return nil, fmt.Errorf("internal error: SetPool was not called")
+ }
+
+ desc, err := pool.FindFileByPath(filename)
+ if err != nil {
+ return nil, err
+ }
+
+ return FileDescriptor{Desc: desc}, nil
+}
+
+// has(msg, field) reports whether the specified field of the message is present.
+// A field may be specified by name (string) or FieldDescriptor.
+// has reports an error if the message type has no such field.
+func has(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var x, field starlark.Value
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &x, &field); err != nil {
+ return nil, err
+ }
+ msg, ok := x.(*Message)
+ if !ok {
+ return nil, fmt.Errorf("%s: got %s, want proto.Message", fn.Name(), x.Type())
+ }
+
+ var fdesc protoreflect.FieldDescriptor
+ switch field := field.(type) {
+ case starlark.String:
+ var err error
+ fdesc, err = fieldDesc(msg.desc(), string(field))
+ if err != nil {
+ return nil, err
+ }
+
+ case FieldDescriptor:
+ if field.Desc.ContainingMessage() != msg.desc() {
+ return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field)
+ }
+ fdesc = field.Desc
+
+ default:
+ return nil, fmt.Errorf("%s: for field argument, got %s, want string or proto.FieldDescriptor", fn.Name(), field.Type())
+ }
+
+ return starlark.Bool(msg.msg.Has(fdesc)), nil
+}
+
+// marshal{,_text}(msg) encodes a Message value to binary or text form.
+func marshal(_ *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var m *Message
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 1, &m); err != nil {
+ return nil, err
+ }
+ if fn.Name() == "proto.marshal" {
+ data, err := proto.Marshal(m.Message())
+ if err != nil {
+ return nil, fmt.Errorf("%s: %v", fn.Name(), err)
+ }
+ return starlark.Bytes(data), nil
+ } else {
+ text, err := prototext.MarshalOptions{Indent: " "}.Marshal(m.Message())
+ if err != nil {
+ return nil, fmt.Errorf("%s: %v", fn.Name(), err)
+ }
+ return starlark.String(text), nil
+ }
+}
+
+// unmarshal(msg) decodes a binary protocol message to a Message.
+func unmarshal(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var desc MessageDescriptor
+ var data starlark.Bytes
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil {
+ return nil, err
+ }
+ return unmarshalData(desc.Desc, []byte(data), true)
+}
+
+// unmarshal_text(msg) decodes a text protocol message to a Message.
+func unmarshal_text(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var desc MessageDescriptor
+ var data string
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil {
+ return nil, err
+ }
+ return unmarshalData(desc.Desc, []byte(data), false)
+}
+
+// set_field(msg, field, value) updates the value of a field.
+// It is typically used for extensions, which cannot be updated using msg.field = v notation.
+func setFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ // TODO(adonovan): allow field to be specified by name (for non-extension fields), like has?
+ var m *Message
+ var field FieldDescriptor
+ var v starlark.Value
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 3, &m, &field, &v); err != nil {
+ return nil, err
+ }
+
+ if *m.frozen {
+ return nil, fmt.Errorf("%s: cannot set %v field of frozen %v message", fn.Name(), field, m.desc().FullName())
+ }
+
+ if field.Desc.ContainingMessage() != m.desc() {
+ return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), m.desc().FullName(), field)
+ }
+
+ return starlark.None, setField(m.msg, field.Desc, v)
+}
+
+// get_field(msg, field) retrieves the value of a field.
+// It is typically used for extension fields, which cannot be accessed using msg.field notation.
+func getFieldStarlark(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ // TODO(adonovan): allow field to be specified by name (for non-extension fields), like has?
+ var msg *Message
+ var field FieldDescriptor
+ if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &msg, &field); err != nil {
+ return nil, err
+ }
+
+ if field.Desc.ContainingMessage() != msg.desc() {
+ return nil, fmt.Errorf("%s: %v does not have field %v", fn.Name(), msg.desc().FullName(), field)
+ }
+
+ return msg.getField(field.Desc), nil
+}
+
+// The Call method implements the starlark.Callable interface.
+// When a message descriptor is called, it returns a new instance of the
+// protocol message it describes.
+//
+// Message(msg) -- return a shallow copy of an existing message
+// Message(k=v, ...) -- return a new message with the specified fields
+// Message(dict(...)) -- return a new message with the specified fields
+//
+func (d MessageDescriptor) CallInternal(thread *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ dest := &Message{
+ msg: newMessage(d.Desc),
+ frozen: new(bool),
+ }
+
+ // Single positional argument?
+ if len(args) > 0 {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("%s: got both positional and named arguments", d.Desc.Name())
+ }
+ if len(args) > 1 {
+ return nil, fmt.Errorf("%s: got %d positional arguments, want at most 1", d.Desc.Name(), len(args))
+ }
+
+ // Keep consistent with MessageKind case of toProto.
+ // (support the same argument types).
+ switch src := args[0].(type) {
+ case *Message:
+ if dest.desc() != src.desc() {
+ return nil, fmt.Errorf("%s: got message of type %s, want type %s", d.Desc.Name(), src.desc().FullName(), dest.desc().FullName())
+ }
+
+ // Make shallow copy of message.
+ // TODO(adonovan): How does frozen work if we have shallow copy?
+ src.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool {
+ dest.msg.Set(fdesc, v)
+ return true
+ })
+ return dest, nil
+
+ case *starlark.Dict:
+ kwargs = src.Items()
+ // fall through
+
+ default:
+ return nil, fmt.Errorf("%s: got %s, want dict or message", d.Desc.Name(), src.Type())
+ }
+ }
+
+ // Convert named arguments to field values.
+ err := setFields(dest.msg, kwargs)
+ return dest, err
+}
+
+// setFields updates msg as if by msg.name=value for each (name, value) in items.
+func setFields(msg protoreflect.Message, items []starlark.Tuple) error {
+ for _, item := range items {
+ name, ok := starlark.AsString(item[0])
+ if !ok {
+ return fmt.Errorf("got %s, want string", item[0].Type())
+ }
+ fdesc, err := fieldDesc(msg.Descriptor(), name)
+ if err != nil {
+ return err
+ }
+ if err := setField(msg, fdesc, item[1]); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// setField validates a Starlark field value, converts it to canonical form,
+// and assigns to the field of msg. If value is None, the field is unset.
+func setField(msg protoreflect.Message, fdesc protoreflect.FieldDescriptor, value starlark.Value) error {
+ // None unsets a field.
+ if value == starlark.None {
+ msg.Clear(fdesc)
+ return nil
+ }
+
+ // Assigning to a repeated field must make a copy,
+ // because the fields.Set doesn't specify whether
+ // it aliases the list or not, so we cannot assume.
+ //
+ // This is potentially surprising as
+ // x = []; msg.x = x; y = msg.x
+ // causes x and y not to alias.
+ if fdesc.IsList() {
+ iter := starlark.Iterate(value)
+ if iter == nil {
+ return fmt.Errorf("got %s for .%s field, want iterable", value.Type(), fdesc.Name())
+ }
+ defer iter.Done()
+
+ // TODO(adonovan): handle maps
+ list := msg.Mutable(fdesc).List()
+ var x starlark.Value
+ for i := 0; iter.Next(&x); i++ {
+ v, err := toProto(fdesc, x)
+ if err != nil {
+ return fmt.Errorf("index %d: %v", i, err)
+ }
+ list.Append(v)
+ }
+ return nil
+ }
+
+ v, err := toProto(fdesc, value)
+ if err != nil {
+ return fmt.Errorf("in field %s: %v", fdesc.Name(), err)
+ }
+
+ if fdesc.IsExtension() {
+ // The protoreflect.Message.NewField method must be able
+ // to return a new instance of the field type. Without
+ // having the Go type information available for extensions,
+ // the implementation of NewField won't know what to do.
+ //
+ // Thus we must augment the FieldDescriptor to one that
+ // additional holds Go representation type information
+ // (based in this case on dynamicpb).
+ fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor()
+ _ = fdesc.(protoreflect.ExtensionTypeDescriptor)
+ }
+
+ msg.Set(fdesc, v)
+ return nil
+}
+
+// toProto converts a Starlark value for a message field into protoreflect form.
+func toProto(fdesc protoreflect.FieldDescriptor, v starlark.Value) (protoreflect.Value, error) {
+ switch fdesc.Kind() {
+ case protoreflect.BoolKind:
+ // To avoid mistakes, we require v be exactly a bool.
+ if v, ok := v.(starlark.Bool); ok {
+ return protoreflect.ValueOfBool(bool(v)), nil
+ }
+
+ case protoreflect.Fixed32Kind,
+ protoreflect.Uint32Kind:
+ // uint32
+ if i, ok := v.(starlark.Int); ok {
+ if u, ok := i.Uint64(); ok && uint64(uint32(u)) == u {
+ return protoreflect.ValueOfUint32(uint32(u)), nil
+ }
+ return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
+ }
+
+ case protoreflect.Int32Kind,
+ protoreflect.Sfixed32Kind,
+ protoreflect.Sint32Kind:
+ // int32
+ if i, ok := v.(starlark.Int); ok {
+ if i, ok := i.Int64(); ok && int64(int32(i)) == i {
+ return protoreflect.ValueOfInt32(int32(i)), nil
+ }
+ return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
+ }
+
+ case protoreflect.Uint64Kind,
+ protoreflect.Fixed64Kind:
+ // uint64
+ if i, ok := v.(starlark.Int); ok {
+ if u, ok := i.Uint64(); ok {
+ return protoreflect.ValueOfUint64(u), nil
+ }
+ return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
+ }
+
+ case protoreflect.Int64Kind,
+ protoreflect.Sfixed64Kind,
+ protoreflect.Sint64Kind:
+ // int64
+ if i, ok := v.(starlark.Int); ok {
+ if i, ok := i.Int64(); ok {
+ return protoreflect.ValueOfInt64(i), nil
+ }
+ return noValue, fmt.Errorf("invalid %s: %v", typeString(fdesc), i)
+ }
+
+ case protoreflect.StringKind:
+ if s, ok := starlark.AsString(v); ok {
+ return protoreflect.ValueOfString(s), nil
+ } else if b, ok := v.(starlark.Bytes); ok {
+ // TODO(adonovan): allow bytes for string? Not friendly to a Java port.
+ return protoreflect.ValueOfBytes([]byte(b)), nil
+ }
+
+ case protoreflect.BytesKind:
+ if s, ok := starlark.AsString(v); ok {
+ // TODO(adonovan): don't allow string for bytes: it's hostile to a Java port.
+ // Instead provide b"..." literals in the core
+ // and a bytes(str) conversion.
+ return protoreflect.ValueOfBytes([]byte(s)), nil
+ } else if b, ok := v.(starlark.Bytes); ok {
+ return protoreflect.ValueOfBytes([]byte(b)), nil
+ }
+
+ case protoreflect.DoubleKind:
+ switch v := v.(type) {
+ case starlark.Float:
+ return protoreflect.ValueOfFloat64(float64(v)), nil
+ case starlark.Int:
+ return protoreflect.ValueOfFloat64(float64(v.Float())), nil
+ }
+
+ case protoreflect.FloatKind:
+ switch v := v.(type) {
+ case starlark.Float:
+ return protoreflect.ValueOfFloat32(float32(v)), nil
+ case starlark.Int:
+ return protoreflect.ValueOfFloat32(float32(v.Float())), nil
+ }
+
+ case protoreflect.GroupKind,
+ protoreflect.MessageKind:
+ // Keep consistent with MessageDescriptor.CallInternal!
+ desc := fdesc.Message()
+ switch v := v.(type) {
+ case *Message:
+ if desc != v.desc() {
+ return noValue, fmt.Errorf("got %s, want %s", v.desc().FullName(), desc.FullName())
+ }
+ return protoreflect.ValueOfMessage(v.msg), nil // alias it directly
+
+ case *starlark.Dict:
+ dest := newMessage(desc)
+ err := setFields(dest, v.Items())
+ return protoreflect.ValueOfMessage(dest), err
+ }
+
+ case protoreflect.EnumKind:
+ enumval, err := enumValueOf(fdesc.Enum(), v)
+ if err != nil {
+ return noValue, err
+ }
+ return protoreflect.ValueOfEnum(enumval.Number()), nil
+ }
+
+ return noValue, fmt.Errorf("got %s, want %s", v.Type(), typeString(fdesc))
+}
+
+var noValue protoreflect.Value
+
+// toStarlark returns a Starlark value for the value x of a message field.
+// If the result is a repeated field or message,
+// the result aliases the original and has the specified "frozenness" flag.
+//
+// fdesc is only used for the type, not other properties of the field.
+func toStarlark(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value {
+ if list, ok := x.Interface().(protoreflect.List); ok {
+ return &RepeatedField{
+ typ: typ,
+ list: list,
+ frozen: frozen,
+ }
+ }
+ return toStarlark1(typ, x, frozen)
+}
+
+// toStarlark1, for scalar (non-repeated) values only.
+func toStarlark1(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen *bool) starlark.Value {
+
+ switch typ.Kind() {
+ case protoreflect.BoolKind:
+ return starlark.Bool(x.Bool())
+
+ case protoreflect.Fixed32Kind,
+ protoreflect.Uint32Kind,
+ protoreflect.Uint64Kind,
+ protoreflect.Fixed64Kind:
+ return starlark.MakeUint64(x.Uint())
+
+ case protoreflect.Int32Kind,
+ protoreflect.Sfixed32Kind,
+ protoreflect.Sint32Kind,
+ protoreflect.Int64Kind,
+ protoreflect.Sfixed64Kind,
+ protoreflect.Sint64Kind:
+ return starlark.MakeInt64(x.Int())
+
+ case protoreflect.StringKind:
+ return starlark.String(x.String())
+
+ case protoreflect.BytesKind:
+ return starlark.Bytes(x.Bytes())
+
+ case protoreflect.DoubleKind, protoreflect.FloatKind:
+ return starlark.Float(x.Float())
+
+ case protoreflect.GroupKind, protoreflect.MessageKind:
+ return &Message{
+ msg: x.Message(),
+ frozen: frozen,
+ }
+
+ case protoreflect.EnumKind:
+ // Invariant: only EnumValueDescriptor may appear here.
+ enumval := typ.Enum().Values().ByNumber(x.Enum())
+ return EnumValueDescriptor{Desc: enumval}
+ }
+
+ panic(fmt.Sprintf("got %T, want %s", x, typeString(typ)))
+}
+
+// A Message is a Starlark value that wraps a protocol message.
+//
+// Two Messages are equivalent if and only if they are identical.
+//
+// When a Message value becomes frozen, a Starlark program may
+// not modify the underlying protocol message, nor any Message
+// or RepeatedField wrapper values derived from it.
+type Message struct {
+ msg protoreflect.Message // any concrete type is allowed
+ frozen *bool // shared by a group of related Message/RepeatedField wrappers
+}
+
+// Message returns the wrapped message.
+func (m *Message) Message() protoreflect.ProtoMessage { return m.msg.Interface() }
+
+func (m *Message) desc() protoreflect.MessageDescriptor { return m.msg.Descriptor() }
+
+var _ starlark.HasSetField = (*Message)(nil)
+
+// Unmarshal parses the data as a binary protocol message of the specified type,
+// and returns it as a new Starlark message value.
+func Unmarshal(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) {
+ return unmarshalData(desc, data, true)
+}
+
+// UnmarshalText parses the data as a text protocol message of the specified type,
+// and returns it as a new Starlark message value.
+func UnmarshalText(desc protoreflect.MessageDescriptor, data []byte) (*Message, error) {
+ return unmarshalData(desc, data, false)
+}
+
+// unmarshalData constructs a Starlark proto.Message by decoding binary or text data.
+func unmarshalData(desc protoreflect.MessageDescriptor, data []byte, binary bool) (*Message, error) {
+ m := &Message{
+ msg: newMessage(desc),
+ frozen: new(bool),
+ }
+ var err error
+ if binary {
+ err = proto.Unmarshal(data, m.Message())
+ } else {
+ err = prototext.Unmarshal(data, m.Message())
+ }
+ if err != nil {
+ return nil, fmt.Errorf("unmarshalling %s failed: %v", desc.FullName(), err)
+ }
+ return m, nil
+}
+
+func (m *Message) String() string {
+ buf := new(bytes.Buffer)
+ buf.WriteString(string(m.desc().FullName()))
+ buf.WriteByte('(')
+
+ // Sort fields (including extensions) by number.
+ var fields []protoreflect.FieldDescriptor
+ m.msg.Range(func(fdesc protoreflect.FieldDescriptor, v protoreflect.Value) bool {
+ // TODO(adonovan): opt: save v in table too.
+ fields = append(fields, fdesc)
+ return true
+ })
+ sort.Slice(fields, func(i, j int) bool {
+ return fields[i].Number() < fields[j].Number()
+ })
+
+ for i, fdesc := range fields {
+ if i > 0 {
+ buf.WriteString(", ")
+ }
+ if fdesc.IsExtension() {
+ // extension field: "[pkg.Msg.field]"
+ buf.WriteString(string(fdesc.FullName()))
+ } else if fdesc.Kind() != protoreflect.GroupKind {
+ // ordinary field: "field"
+ buf.WriteString(string(fdesc.Name()))
+ } else {
+ // group field: "MyGroup"
+ //
+ // The name of a group is the mangled version,
+ // while the true name of a group is the message itself.
+ // For example, for a group called "MyGroup",
+ // the inlined message will be called "MyGroup",
+ // but the field will be named "mygroup".
+ // This rule complicates name logic everywhere.
+ buf.WriteString(string(fdesc.Message().Name()))
+ }
+ buf.WriteString("=")
+ writeString(buf, fdesc, m.msg.Get(fdesc))
+ }
+ buf.WriteByte(')')
+ return buf.String()
+}
+
+func (m *Message) Type() string { return "proto.Message" }
+func (m *Message) Truth() starlark.Bool { return true }
+func (m *Message) Freeze() { *m.frozen = true }
+func (m *Message) Hash() (h uint32, err error) { return uint32(uintptr(unsafe.Pointer(m))), nil } // identity hash
+
+// Attr returns the value of this message's field of the specified name.
+// Extension fields are not accessible this way as their names are not unique.
+func (m *Message) Attr(name string) (starlark.Value, error) {
+ // The name 'descriptor' is already effectively reserved
+ // by the Go API for generated message types.
+ if name == "descriptor" {
+ return MessageDescriptor{Desc: m.desc()}, nil
+ }
+
+ fdesc, err := fieldDesc(m.desc(), name)
+ if err != nil {
+ return nil, err
+ }
+ return m.getField(fdesc), nil
+}
+
+func (m *Message) getField(fdesc protoreflect.FieldDescriptor) starlark.Value {
+ if fdesc.IsExtension() {
+ // See explanation in setField.
+ fdesc = dynamicpb.NewExtensionType(fdesc).TypeDescriptor()
+ }
+
+ if m.msg.Has(fdesc) {
+ return toStarlark(fdesc, m.msg.Get(fdesc), m.frozen)
+ }
+ return defaultValue(fdesc)
+}
+
+//go:linkname detrandDisable google.golang.org/protobuf/internal/detrand.Disable
+func detrandDisable()
+
+func init() {
+ // Nasty hack to disable the randomization of output that occurs in textproto.
+ // TODO(adonovan): once go/proto-proposals/canonical-serialization
+ // is resolved the need for the hack should go away. See also go/go-proto-stability.
+ // If the proposal is rejected, we will need our own text-mode formatter.
+ detrandDisable()
+}
+
+// defaultValue returns the (frozen) default Starlark value for a given message field.
+func defaultValue(fdesc protoreflect.FieldDescriptor) starlark.Value {
+ frozen := true
+
+ // The default value of a repeated field is an empty list.
+ if fdesc.IsList() {
+ return &RepeatedField{typ: fdesc, list: emptyList{}, frozen: &frozen}
+ }
+
+ // The zero value for a message type is an empty instance of that message.
+ if desc := fdesc.Message(); desc != nil {
+ return &Message{msg: newMessage(desc), frozen: &frozen}
+ }
+
+ // Convert the default value, which is not necessarily zero, to Starlark.
+ // The frozenness isn't used as the remaining types are all immutable.
+ return toStarlark1(fdesc, fdesc.Default(), &frozen)
+}
+
+// A frozen empty implementation of protoreflect.List.
+type emptyList struct{ protoreflect.List }
+
+func (emptyList) Len() int { return 0 }
+
+// newMessage returns a new empty instance of the message type described by desc.
+func newMessage(desc protoreflect.MessageDescriptor) protoreflect.Message {
+ // If desc refers to a built-in message,
+ // use the more efficient generated type descriptor (a Go struct).
+ mt, err := protoregistry.GlobalTypes.FindMessageByName(desc.FullName())
+ if err == nil && mt.Descriptor() == desc {
+ return mt.New()
+ }
+
+ // For all others, use the generic dynamicpb representation.
+ return dynamicpb.NewMessage(desc).ProtoReflect()
+}
+
+// fieldDesc returns the descriptor for the named non-extension field.
+func fieldDesc(desc protoreflect.MessageDescriptor, name string) (protoreflect.FieldDescriptor, error) {
+ if fdesc := desc.Fields().ByName(protoreflect.Name(name)); fdesc != nil {
+ return fdesc, nil
+ }
+ return nil, starlark.NoSuchAttrError(fmt.Sprintf("%s has no .%s field", desc.FullName(), name))
+}
+
+// SetField updates a non-extension field of this message.
+// It implements the HasSetField interface.
+func (m *Message) SetField(name string, v starlark.Value) error {
+ fdesc, err := fieldDesc(m.desc(), name)
+ if err != nil {
+ return err
+ }
+ if *m.frozen {
+ return fmt.Errorf("cannot set .%s field of frozen %s message",
+ name, m.desc().FullName())
+ }
+ return setField(m.msg, fdesc, v)
+}
+
+// AttrNames returns the set of field names defined for this message.
+// It satisfies the starlark.HasAttrs interface.
+func (m *Message) AttrNames() []string {
+ seen := make(map[string]bool)
+
+ // standard fields
+ seen["descriptor"] = true
+
+ // non-extension fields
+ fields := m.desc().Fields()
+ for i := 0; i < fields.Len(); i++ {
+ fdesc := fields.Get(i)
+ if !fdesc.IsExtension() {
+ seen[string(fdesc.Name())] = true
+ }
+ }
+
+ names := make([]string, 0, len(seen))
+ for name := range seen {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+ return names
+}
+
+// typeString returns a user-friendly description of the type of a
+// protocol message field (or element of a repeated field).
+func typeString(fdesc protoreflect.FieldDescriptor) string {
+ switch fdesc.Kind() {
+ case protoreflect.GroupKind,
+ protoreflect.MessageKind:
+ return string(fdesc.Message().FullName())
+
+ case protoreflect.EnumKind:
+ return string(fdesc.Enum().FullName())
+
+ default:
+ return strings.ToLower(strings.TrimPrefix(fdesc.Kind().String(), "TYPE_"))
+ }
+}
+
+// A RepeatedField is a Starlark value that wraps a repeated field of a protocol message.
+//
+// An assignment to an element of a repeated field incurs a dynamic
+// check that the new value has (or can be converted to) the correct
+// type using conversions similar to those done when calling a
+// MessageDescriptor to construct a message.
+//
+// TODO(adonovan): make RepeatedField implement starlark.Comparable.
+// Should the comparison include type, or be defined on the elements alone?
+type RepeatedField struct {
+ typ protoreflect.FieldDescriptor // only for type information, not field name
+ list protoreflect.List
+ frozen *bool
+ itercount int
+}
+
+var _ starlark.HasSetIndex = (*RepeatedField)(nil)
+
+func (rf *RepeatedField) Type() string {
+ return fmt.Sprintf("proto.repeated<%s>", typeString(rf.typ))
+}
+
+func (rf *RepeatedField) SetIndex(i int, v starlark.Value) error {
+ if *rf.frozen {
+ return fmt.Errorf("cannot insert value in frozen repeated field")
+ }
+ if rf.itercount > 0 {
+ return fmt.Errorf("cannot insert value in repeated field with active iterators")
+ }
+ x, err := toProto(rf.typ, v)
+ if err != nil {
+ // The repeated field value cannot know which field it
+ // belongs to---it might be shared by several of the
+ // same type---so the error message is suboptimal.
+ return fmt.Errorf("setting element of repeated field: %v", err)
+ }
+ rf.list.Set(i, x)
+ return nil
+}
+
+func (rf *RepeatedField) Freeze() { *rf.frozen = true }
+func (rf *RepeatedField) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", rf.Type()) }
+func (rf *RepeatedField) Index(i int) starlark.Value {
+ return toStarlark1(rf.typ, rf.list.Get(i), rf.frozen)
+}
+func (rf *RepeatedField) Iterate() starlark.Iterator {
+ if !*rf.frozen {
+ rf.itercount++
+ }
+ return &repeatedFieldIterator{rf, 0}
+}
+func (rf *RepeatedField) Len() int { return rf.list.Len() }
+func (rf *RepeatedField) String() string {
+ // We use list [...] notation even though it not exactly a list.
+ buf := new(bytes.Buffer)
+ buf.WriteByte('[')
+ for i := 0; i < rf.list.Len(); i++ {
+ if i > 0 {
+ buf.WriteString(", ")
+ }
+ writeString(buf, rf.typ, rf.list.Get(i))
+ }
+ buf.WriteByte(']')
+ return buf.String()
+}
+func (rf *RepeatedField) Truth() starlark.Bool { return rf.list.Len() > 0 }
+
+type repeatedFieldIterator struct {
+ rf *RepeatedField
+ i int
+}
+
+func (it *repeatedFieldIterator) Next(p *starlark.Value) bool {
+ if it.i < it.rf.Len() {
+ *p = it.rf.Index(it.i)
+ it.i++
+ return true
+ }
+ return false
+}
+
+func (it *repeatedFieldIterator) Done() {
+ if !*it.rf.frozen {
+ it.rf.itercount--
+ }
+}
+
+func writeString(buf *bytes.Buffer, fdesc protoreflect.FieldDescriptor, v protoreflect.Value) {
+ // TODO(adonovan): opt: don't materialize the Starlark value.
+ // TODO(adonovan): skip message type when printing submessages? {...}?
+ var frozen bool // ignored
+ x := toStarlark(fdesc, v, &frozen)
+ buf.WriteString(x.String())
+}
+
+// -------- descriptor values --------
+
+// A FileDescriptor is an immutable Starlark value that describes a
+// .proto file. It is a reference to a protoreflect.FileDescriptor.
+// Two FileDescriptor values compare equal if and only if they refer to
+// the same protoreflect.FileDescriptor.
+//
+// Its fields are the names of the message types (MessageDescriptor) and enum
+// types (EnumDescriptor).
+type FileDescriptor struct {
+ Desc protoreflect.FileDescriptor // TODO(adonovan): hide field, expose method?
+}
+
+var _ starlark.HasAttrs = FileDescriptor{}
+
+func (f FileDescriptor) String() string { return string(f.Desc.Path()) }
+func (f FileDescriptor) Type() string { return "proto.FileDescriptor" }
+func (f FileDescriptor) Truth() starlark.Bool { return true }
+func (f FileDescriptor) Freeze() {} // immutable
+func (f FileDescriptor) Hash() (h uint32, err error) { return starlark.String(f.Desc.Path()).Hash() }
+func (f FileDescriptor) Attr(name string) (starlark.Value, error) {
+ if desc := f.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil {
+ return MessageDescriptor{Desc: desc}, nil
+ }
+ if desc := f.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil {
+ return FieldDescriptor{desc}, nil
+ }
+ if enum := f.Desc.Enums().ByName(protoreflect.Name(name)); enum != nil {
+ return EnumDescriptor{Desc: enum}, nil
+ }
+ return nil, nil
+}
+func (f FileDescriptor) AttrNames() []string {
+ var names []string
+ messages := f.Desc.Messages()
+ for i, n := 0, messages.Len(); i < n; i++ {
+ names = append(names, string(messages.Get(i).Name()))
+ }
+ extensions := f.Desc.Extensions()
+ for i, n := 0, extensions.Len(); i < n; i++ {
+ names = append(names, string(extensions.Get(i).Name()))
+ }
+ enums := f.Desc.Enums()
+ for i, n := 0, enums.Len(); i < n; i++ {
+ names = append(names, string(enums.Get(i).Name()))
+ }
+ sort.Strings(names)
+ return names
+}
+
+// A MessageDescriptor is an immutable Starlark value that describes a protocol
+// message type.
+//
+// A MessageDescriptor value contains a reference to a protoreflect.MessageDescriptor.
+// Two MessageDescriptor values compare equal if and only if they refer to the
+// same protoreflect.MessageDescriptor.
+//
+// The fields of a MessageDescriptor value are the names of any message types
+// (MessageDescriptor), fields or extension fields (FieldDescriptor),
+// and enum types (EnumDescriptor) nested within the declaration of this message type.
+type MessageDescriptor struct {
+ Desc protoreflect.MessageDescriptor
+}
+
+var (
+ _ starlark.Callable = MessageDescriptor{}
+ _ starlark.HasAttrs = MessageDescriptor{}
+)
+
+func (d MessageDescriptor) String() string { return string(d.Desc.FullName()) }
+func (d MessageDescriptor) Type() string { return "proto.MessageDescriptor" }
+func (d MessageDescriptor) Truth() starlark.Bool { return true }
+func (d MessageDescriptor) Freeze() {} // immutable
+func (d MessageDescriptor) Hash() (h uint32, err error) {
+ return starlark.String(d.Desc.FullName()).Hash()
+}
+func (d MessageDescriptor) Attr(name string) (starlark.Value, error) {
+ if desc := d.Desc.Messages().ByName(protoreflect.Name(name)); desc != nil {
+ return MessageDescriptor{desc}, nil
+ }
+ if desc := d.Desc.Extensions().ByName(protoreflect.Name(name)); desc != nil {
+ return FieldDescriptor{desc}, nil
+ }
+ if desc := d.Desc.Fields().ByName(protoreflect.Name(name)); desc != nil {
+ return FieldDescriptor{desc}, nil
+ }
+ if desc := d.Desc.Enums().ByName(protoreflect.Name(name)); desc != nil {
+ return EnumDescriptor{desc}, nil
+ }
+ return nil, nil
+}
+func (d MessageDescriptor) AttrNames() []string {
+ var names []string
+ messages := d.Desc.Messages()
+ for i, n := 0, messages.Len(); i < n; i++ {
+ names = append(names, string(messages.Get(i).Name()))
+ }
+ enums := d.Desc.Enums()
+ for i, n := 0, enums.Len(); i < n; i++ {
+ names = append(names, string(enums.Get(i).Name()))
+ }
+ sort.Strings(names)
+ return names
+}
+func (d MessageDescriptor) Name() string { return string(d.Desc.Name()) } // for Callable
+
+// A FieldDescriptor is an immutable Starlark value that describes
+// a field (possibly an extension field) of protocol message.
+//
+// A FieldDescriptor value contains a reference to a protoreflect.FieldDescriptor.
+// Two FieldDescriptor values compare equal if and only if they refer to the
+// same protoreflect.FieldDescriptor.
+//
+// The primary use for FieldDescriptors is to access extension fields of a message.
+//
+// A FieldDescriptor value has not attributes.
+// TODO(adonovan): expose metadata fields (e.g. name, type).
+type FieldDescriptor struct {
+ Desc protoreflect.FieldDescriptor
+}
+
+var (
+ _ starlark.HasAttrs = FieldDescriptor{}
+)
+
+func (d FieldDescriptor) String() string { return string(d.Desc.FullName()) }
+func (d FieldDescriptor) Type() string { return "proto.FieldDescriptor" }
+func (d FieldDescriptor) Truth() starlark.Bool { return true }
+func (d FieldDescriptor) Freeze() {} // immutable
+func (d FieldDescriptor) Hash() (h uint32, err error) {
+ return starlark.String(d.Desc.FullName()).Hash()
+}
+func (d FieldDescriptor) Attr(name string) (starlark.Value, error) {
+ // TODO(adonovan): expose metadata fields of Desc?
+ return nil, nil
+}
+func (d FieldDescriptor) AttrNames() []string {
+ var names []string
+ // TODO(adonovan): expose metadata fields of Desc?
+ sort.Strings(names)
+ return names
+}
+
+// An EnumDescriptor is an immutable Starlark value that describes an
+// protocol enum type.
+//
+// An EnumDescriptor contains a reference to a protoreflect.EnumDescriptor.
+// Two EnumDescriptor values compare equal if and only if they
+// refer to the same protoreflect.EnumDescriptor.
+//
+// An EnumDescriptor may be called like a function. It converts its
+// sole argument, which must be an int, string, or EnumValueDescriptor,
+// to an EnumValueDescriptor.
+//
+// The fields of an EnumDescriptor value are the values of the
+// enumeration, each of type EnumValueDescriptor.
+type EnumDescriptor struct {
+ Desc protoreflect.EnumDescriptor
+}
+
+var (
+ _ starlark.HasAttrs = EnumDescriptor{}
+ _ starlark.Callable = EnumDescriptor{}
+)
+
+func (e EnumDescriptor) String() string { return string(e.Desc.FullName()) }
+func (e EnumDescriptor) Type() string { return "proto.EnumDescriptor" }
+func (e EnumDescriptor) Truth() starlark.Bool { return true }
+func (e EnumDescriptor) Freeze() {} // immutable
+func (e EnumDescriptor) Hash() (h uint32, err error) { return 0, nil } // TODO(adonovan): number?
+func (e EnumDescriptor) Attr(name string) (starlark.Value, error) {
+ if v := e.Desc.Values().ByName(protoreflect.Name(name)); v != nil {
+ return EnumValueDescriptor{v}, nil
+ }
+ return nil, nil
+}
+func (e EnumDescriptor) AttrNames() []string {
+ var names []string
+ values := e.Desc.Values()
+ for i, n := 0, values.Len(); i < n; i++ {
+ names = append(names, string(values.Get(i).Name()))
+ }
+ sort.Strings(names)
+ return names
+}
+func (e EnumDescriptor) Name() string { return string(e.Desc.Name()) } // for Callable
+
+// The Call method implements the starlark.Callable interface.
+// A call to an enum descriptor converts its argument to a value of that enum type.
+func (e EnumDescriptor) CallInternal(_ *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var x starlark.Value
+ if err := starlark.UnpackPositionalArgs(string(e.Desc.Name()), args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+ v, err := enumValueOf(e.Desc, x)
+ if err != nil {
+ return nil, fmt.Errorf("%s: %v", e.Desc.Name(), err)
+ }
+ return EnumValueDescriptor{Desc: v}, nil
+}
+
+// enumValueOf converts an int, string, or enum value to a value of the specified enum type.
+func enumValueOf(enum protoreflect.EnumDescriptor, x starlark.Value) (protoreflect.EnumValueDescriptor, error) {
+ switch x := x.(type) {
+ case starlark.Int:
+ i, err := starlark.AsInt32(x)
+ if err != nil {
+ return nil, fmt.Errorf("invalid number %s for %s enum", x, enum.Name())
+ }
+ desc := enum.Values().ByNumber(protoreflect.EnumNumber(i))
+ if desc == nil {
+ return nil, fmt.Errorf("invalid number %d for %s enum", i, enum.Name())
+ }
+ return desc, nil
+
+ case starlark.String:
+ name := protoreflect.Name(x)
+ desc := enum.Values().ByName(name)
+ if desc == nil {
+ return nil, fmt.Errorf("invalid name %q for %s enum", name, enum.Name())
+ }
+ return desc, nil
+
+ case EnumValueDescriptor:
+ if parent := x.Desc.Parent(); parent != enum {
+ return nil, fmt.Errorf("invalid value %s.%s for %s enum",
+ parent.Name(), x.Desc.Name(), enum.Name())
+ }
+ return x.Desc, nil
+ }
+
+ return nil, fmt.Errorf("cannot convert %s to %s enum", x.Type(), enum.Name())
+}
+
+// An EnumValueDescriptor is an immutable Starlark value that represents one value of an enumeration.
+//
+// An EnumValueDescriptor contains a reference to a protoreflect.EnumValueDescriptor.
+// Two EnumValueDescriptor values compare equal if and only if they
+// refer to the same protoreflect.EnumValueDescriptor.
+//
+// An EnumValueDescriptor has the following fields:
+//
+// index -- int, index of this value within the enum sequence
+// name -- string, name of this enum value
+// number -- int, numeric value of this enum value
+// type -- EnumDescriptor, the enum type to which this value belongs
+//
+type EnumValueDescriptor struct {
+ Desc protoreflect.EnumValueDescriptor
+}
+
+var (
+ _ starlark.HasAttrs = EnumValueDescriptor{}
+ _ starlark.Comparable = EnumValueDescriptor{}
+)
+
+func (e EnumValueDescriptor) String() string {
+ enum := e.Desc.Parent()
+ return string(enum.Name() + "." + e.Desc.Name()) // "Enum.EnumValue"
+}
+func (e EnumValueDescriptor) Type() string { return "proto.EnumValueDescriptor" }
+func (e EnumValueDescriptor) Truth() starlark.Bool { return true }
+func (e EnumValueDescriptor) Freeze() {} // immutable
+func (e EnumValueDescriptor) Hash() (h uint32, err error) { return uint32(e.Desc.Number()), nil }
+func (e EnumValueDescriptor) AttrNames() []string {
+ return []string{"index", "name", "number", "type"}
+}
+func (e EnumValueDescriptor) Attr(name string) (starlark.Value, error) {
+ switch name {
+ case "index":
+ return starlark.MakeInt(e.Desc.Index()), nil
+ case "name":
+ return starlark.String(e.Desc.Name()), nil
+ case "number":
+ return starlark.MakeInt(int(e.Desc.Number())), nil
+ case "type":
+ enum := e.Desc.Parent()
+ return EnumDescriptor{Desc: enum.(protoreflect.EnumDescriptor)}, nil
+ }
+ return nil, nil
+}
+func (x EnumValueDescriptor) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) {
+ y := y_.(EnumValueDescriptor)
+ switch op {
+ case syntax.EQL:
+ return x.Desc == y.Desc, nil
+ case syntax.NEQ:
+ return x.Desc != y.Desc, nil
+ default:
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y_.Type())
+ }
+}
diff --git a/repl/repl.go b/repl/repl.go
new file mode 100644
index 0000000..97109c6
--- /dev/null
+++ b/repl/repl.go
@@ -0,0 +1,185 @@
+// Package repl provides a read/eval/print loop for Starlark.
+//
+// It supports readline-style command editing,
+// and interrupts through Control-C.
+//
+// If an input line can be parsed as an expression,
+// the REPL parses and evaluates it and prints its result.
+// Otherwise the REPL reads lines until a blank line,
+// then tries again to parse the multi-line input as an
+// expression. If the input still cannot be parsed as an expression,
+// the REPL parses and executes it as a file (a list of statements),
+// for side effects.
+package repl // import "go.starlark.net/repl"
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "os"
+ "os/signal"
+
+ "github.com/chzyer/readline"
+ "go.starlark.net/resolve"
+ "go.starlark.net/starlark"
+ "go.starlark.net/syntax"
+)
+
+var interrupted = make(chan os.Signal, 1)
+
+// REPL executes a read, eval, print loop.
+//
+// Before evaluating each expression, it sets the Starlark thread local
+// variable named "context" to a context.Context that is cancelled by a
+// SIGINT (Control-C). Client-supplied global functions may use this
+// context to make long-running operations interruptable.
+//
+func REPL(thread *starlark.Thread, globals starlark.StringDict) {
+ signal.Notify(interrupted, os.Interrupt)
+ defer signal.Stop(interrupted)
+
+ rl, err := readline.New(">>> ")
+ if err != nil {
+ PrintError(err)
+ return
+ }
+ defer rl.Close()
+ for {
+ if err := rep(rl, thread, globals); err != nil {
+ if err == readline.ErrInterrupt {
+ fmt.Println(err)
+ continue
+ }
+ break
+ }
+ }
+ fmt.Println()
+}
+
+// rep reads, evaluates, and prints one item.
+//
+// It returns an error (possibly readline.ErrInterrupt)
+// only if readline failed. Starlark errors are printed.
+func rep(rl *readline.Instance, thread *starlark.Thread, globals starlark.StringDict) error {
+ // Each item gets its own context,
+ // which is cancelled by a SIGINT.
+ //
+ // Note: during Readline calls, Control-C causes Readline to return
+ // ErrInterrupt but does not generate a SIGINT.
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+ go func() {
+ select {
+ case <-interrupted:
+ cancel()
+ case <-ctx.Done():
+ }
+ }()
+
+ thread.SetLocal("context", ctx)
+
+ eof := false
+
+ // readline returns EOF, ErrInterrupted, or a line including "\n".
+ rl.SetPrompt(">>> ")
+ readline := func() ([]byte, error) {
+ line, err := rl.Readline()
+ rl.SetPrompt("... ")
+ if err != nil {
+ if err == io.EOF {
+ eof = true
+ }
+ return nil, err
+ }
+ return []byte(line + "\n"), nil
+ }
+
+ // parse
+ f, err := syntax.ParseCompoundStmt("<stdin>", readline)
+ if err != nil {
+ if eof {
+ return io.EOF
+ }
+ PrintError(err)
+ return nil
+ }
+
+ // Treat load bindings as global (like they used to be) in the REPL.
+ // This is a workaround for github.com/google/starlark-go/issues/224.
+ // TODO(adonovan): not safe wrt concurrent interpreters.
+ // Come up with a more principled solution (or plumb options everywhere).
+ defer func(prev bool) { resolve.LoadBindsGlobally = prev }(resolve.LoadBindsGlobally)
+ resolve.LoadBindsGlobally = true
+
+ if expr := soleExpr(f); expr != nil {
+ // eval
+ v, err := starlark.EvalExpr(thread, expr, globals)
+ if err != nil {
+ PrintError(err)
+ return nil
+ }
+
+ // print
+ if v != starlark.None {
+ fmt.Println(v)
+ }
+ } else if err := starlark.ExecREPLChunk(f, thread, globals); err != nil {
+ PrintError(err)
+ return nil
+ }
+
+ return nil
+}
+
+func soleExpr(f *syntax.File) syntax.Expr {
+ if len(f.Stmts) == 1 {
+ if stmt, ok := f.Stmts[0].(*syntax.ExprStmt); ok {
+ return stmt.X
+ }
+ }
+ return nil
+}
+
+// PrintError prints the error to stderr,
+// or its backtrace if it is a Starlark evaluation error.
+func PrintError(err error) {
+ if evalErr, ok := err.(*starlark.EvalError); ok {
+ fmt.Fprintln(os.Stderr, evalErr.Backtrace())
+ } else {
+ fmt.Fprintln(os.Stderr, err)
+ }
+}
+
+// MakeLoad returns a simple sequential implementation of module loading
+// suitable for use in the REPL.
+// Each function returned by MakeLoad accesses a distinct private cache.
+func MakeLoad() func(thread *starlark.Thread, module string) (starlark.StringDict, error) {
+ type entry struct {
+ globals starlark.StringDict
+ err error
+ }
+
+ var cache = make(map[string]*entry)
+
+ return func(thread *starlark.Thread, module string) (starlark.StringDict, error) {
+ e, ok := cache[module]
+ if e == nil {
+ if ok {
+ // request for package whose loading is in progress
+ return nil, fmt.Errorf("cycle in load graph")
+ }
+
+ // Add a placeholder to indicate "load in progress".
+ cache[module] = nil
+
+ // Load it.
+ thread := &starlark.Thread{Name: "exec " + module, Load: thread.Load}
+ globals, err := starlark.ExecFile(thread, module, nil, nil)
+ e = &entry{globals, err}
+
+ // Update the cache.
+ cache[module] = e
+ }
+ return e.globals, e.err
+ }
+}
diff --git a/resolve/binding.go b/resolve/binding.go
new file mode 100644
index 0000000..6b99f4b
--- /dev/null
+++ b/resolve/binding.go
@@ -0,0 +1,74 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package resolve
+
+import "go.starlark.net/syntax"
+
+// This file defines resolver data types saved in the syntax tree.
+// We cannot guarantee API stability for these types
+// as they are closely tied to the implementation.
+
+// A Binding contains resolver information about an identifer.
+// The resolver populates the Binding field of each syntax.Identifier.
+// The Binding ties together all identifiers that denote the same variable.
+type Binding struct {
+ Scope Scope
+
+ // Index records the index into the enclosing
+ // - {DefStmt,File}.Locals, if Scope==Local
+ // - DefStmt.FreeVars, if Scope==Free
+ // - File.Globals, if Scope==Global.
+ // It is zero if Scope is Predeclared, Universal, or Undefined.
+ Index int
+
+ First *syntax.Ident // first binding use (iff Scope==Local/Free/Global)
+}
+
+// The Scope of Binding indicates what kind of scope it has.
+type Scope uint8
+
+const (
+ Undefined Scope = iota // name is not defined
+ Local // name is local to its function or file
+ Cell // name is function-local but shared with a nested function
+ Free // name is cell of some enclosing function
+ Global // name is global to module
+ Predeclared // name is predeclared for this module (e.g. glob)
+ Universal // name is universal (e.g. len)
+)
+
+var scopeNames = [...]string{
+ Undefined: "undefined",
+ Local: "local",
+ Cell: "cell",
+ Free: "free",
+ Global: "global",
+ Predeclared: "predeclared",
+ Universal: "universal",
+}
+
+func (scope Scope) String() string { return scopeNames[scope] }
+
+// A Module contains resolver information about a file.
+// The resolver populates the Module field of each syntax.File.
+type Module struct {
+ Locals []*Binding // the file's (comprehension-)local variables
+ Globals []*Binding // the file's global variables
+}
+
+// A Function contains resolver information about a named or anonymous function.
+// The resolver populates the Function field of each syntax.DefStmt and syntax.LambdaExpr.
+type Function struct {
+ Pos syntax.Position // of DEF or LAMBDA
+ Name string // name of def, or "lambda"
+ Params []syntax.Expr // param = ident | ident=expr | * | *ident | **ident
+ Body []syntax.Stmt // contains synthetic 'return expr' for lambda
+
+ HasVarargs bool // whether params includes *args (convenience)
+ HasKwargs bool // whether params includes **kwargs (convenience)
+ NumKwonlyParams int // number of keyword-only optional parameters
+ Locals []*Binding // this function's local/cell variables, parameters first
+ FreeVars []*Binding // enclosing cells to capture in closure
+}
diff --git a/resolve/resolve.go b/resolve/resolve.go
new file mode 100644
index 0000000..56e33ba
--- /dev/null
+++ b/resolve/resolve.go
@@ -0,0 +1,969 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package resolve defines a name-resolution pass for Starlark abstract
+// syntax trees.
+//
+// The resolver sets the Locals and FreeVars arrays of each DefStmt and
+// the LocalIndex field of each syntax.Ident that refers to a local or
+// free variable. It also sets the Locals array of a File for locals
+// bound by top-level comprehensions and load statements.
+// Identifiers for global variables do not get an index.
+package resolve // import "go.starlark.net/resolve"
+
+// All references to names are statically resolved. Names may be
+// predeclared, global, or local to a function or file.
+// File-local variables include those bound by top-level comprehensions
+// and by load statements. ("Top-level" means "outside of any function".)
+// The resolver maps each global name to a small integer and each local
+// name to a small integer; these integers enable a fast and compact
+// representation of globals and locals in the evaluator.
+//
+// As an optimization, the resolver classifies each predeclared name as
+// either universal (e.g. None, len) or per-module (e.g. glob in Bazel's
+// build language), enabling the evaluator to share the representation
+// of the universal environment across all modules.
+//
+// The lexical environment is a tree of blocks with the file block at
+// its root. The file's child blocks may be of two kinds: functions
+// and comprehensions, and these may have further children of either
+// kind.
+//
+// Python-style resolution requires multiple passes because a name is
+// determined to be local to a function only if the function contains a
+// "binding" use of it; similarly, a name is determined to be global (as
+// opposed to predeclared) if the module contains a top-level binding use.
+// Unlike ordinary top-level assignments, the bindings created by load
+// statements are local to the file block.
+// A non-binding use may lexically precede the binding to which it is resolved.
+// In the first pass, we inspect each function, recording in
+// 'uses' each identifier and the environment block in which it occurs.
+// If a use of a name is binding, such as a function parameter or
+// assignment, we add the name to the block's bindings mapping and add a
+// local variable to the enclosing function.
+//
+// As we finish resolving each function, we inspect all the uses within
+// that function and discard ones that were found to be function-local. The
+// remaining ones must be either free (local to some lexically enclosing
+// function), or top-level (global, predeclared, or file-local), but we cannot tell
+// which until we have finished inspecting the outermost enclosing
+// function. At that point, we can distinguish local from top-level names
+// (and this is when Python would compute free variables).
+//
+// However, Starlark additionally requires that all references to global
+// names are satisfied by some declaration in the current module;
+// Starlark permits a function to forward-reference a global or file-local
+// that has not
+// been declared yet so long as it is declared before the end of the
+// module. So, instead of re-resolving the unresolved references after
+// each top-level function, we defer this until the end of the module
+// and ensure that all such references are satisfied by some definition.
+//
+// At the end of the module, we visit each of the nested function blocks
+// in bottom-up order, doing a recursive lexical lookup for each
+// unresolved name. If the name is found to be local to some enclosing
+// function, we must create a DefStmt.FreeVar (capture) parameter for
+// each intervening function. We enter these synthetic bindings into
+// the bindings map so that we create at most one freevar per name. If
+// the name was not local, we check that it was defined at module level.
+//
+// We resolve all uses of locals in the module (due to load statements
+// and comprehensions) in a similar way and compute the file's set of
+// local variables.
+//
+// Starlark enforces that all global names are assigned at most once on
+// all control flow paths by forbidding if/else statements and loops at
+// top level. A global may be used before it is defined, leading to a
+// dynamic error. However, the AllowGlobalReassign flag (really: allow
+// top-level reassign) makes the resolver allow multiple to a variable
+// at top-level. It also allows if-, for-, and while-loops at top-level,
+// which in turn may make the evaluator dynamically assign multiple
+// values to a variable at top-level. (These two roles should be separated.)
+
+import (
+ "fmt"
+ "log"
+ "sort"
+ "strings"
+
+ "go.starlark.net/internal/spell"
+ "go.starlark.net/syntax"
+)
+
+const debug = false
+const doesnt = "this Starlark dialect does not "
+
+// global options
+// These features are either not standard Starlark (yet), or deprecated
+// features of the BUILD language, so we put them behind flags.
+var (
+ AllowSet = false // allow the 'set' built-in
+ AllowGlobalReassign = false // allow reassignment to top-level names; also, allow if/for/while at top-level
+ AllowRecursion = false // allow while statements and recursive functions
+ LoadBindsGlobally = false // load creates global not file-local bindings (deprecated)
+
+ // obsolete flags for features that are now standard. No effect.
+ AllowNestedDef = true
+ AllowLambda = true
+ AllowFloat = true
+ AllowBitwise = true
+)
+
+// File resolves the specified file and records information about the
+// module in file.Module.
+//
+// The isPredeclared and isUniversal predicates report whether a name is
+// a pre-declared identifier (visible in the current module) or a
+// universal identifier (visible in every module).
+// Clients should typically pass predeclared.Has for the first and
+// starlark.Universe.Has for the second, where predeclared is the
+// module's StringDict of predeclared names and starlark.Universe is the
+// standard set of built-ins.
+// The isUniverse predicate is supplied a parameter to avoid a cyclic
+// dependency upon starlark.Universe, not because users should ever need
+// to redefine it.
+func File(file *syntax.File, isPredeclared, isUniversal func(name string) bool) error {
+ return REPLChunk(file, nil, isPredeclared, isUniversal)
+}
+
+// REPLChunk is a generalization of the File function that supports a
+// non-empty initial global block, as occurs in a REPL.
+func REPLChunk(file *syntax.File, isGlobal, isPredeclared, isUniversal func(name string) bool) error {
+ r := newResolver(isGlobal, isPredeclared, isUniversal)
+ r.stmts(file.Stmts)
+
+ r.env.resolveLocalUses()
+
+ // At the end of the module, resolve all non-local variable references,
+ // computing closures.
+ // Function bodies may contain forward references to later global declarations.
+ r.resolveNonLocalUses(r.env)
+
+ file.Module = &Module{
+ Locals: r.moduleLocals,
+ Globals: r.moduleGlobals,
+ }
+
+ if len(r.errors) > 0 {
+ return r.errors
+ }
+ return nil
+}
+
+// Expr resolves the specified expression.
+// It returns the local variables bound within the expression.
+//
+// The isPredeclared and isUniversal predicates behave as for the File function.
+func Expr(expr syntax.Expr, isPredeclared, isUniversal func(name string) bool) ([]*Binding, error) {
+ r := newResolver(nil, isPredeclared, isUniversal)
+ r.expr(expr)
+ r.env.resolveLocalUses()
+ r.resolveNonLocalUses(r.env) // globals & universals
+ if len(r.errors) > 0 {
+ return nil, r.errors
+ }
+ return r.moduleLocals, nil
+}
+
+// An ErrorList is a non-empty list of resolver error messages.
+type ErrorList []Error // len > 0
+
+func (e ErrorList) Error() string { return e[0].Error() }
+
+// An Error describes the nature and position of a resolver error.
+type Error struct {
+ Pos syntax.Position
+ Msg string
+}
+
+func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }
+
+func newResolver(isGlobal, isPredeclared, isUniversal func(name string) bool) *resolver {
+ file := new(block)
+ return &resolver{
+ file: file,
+ env: file,
+ isGlobal: isGlobal,
+ isPredeclared: isPredeclared,
+ isUniversal: isUniversal,
+ globals: make(map[string]*Binding),
+ predeclared: make(map[string]*Binding),
+ }
+}
+
+type resolver struct {
+ // env is the current local environment:
+ // a linked list of blocks, innermost first.
+ // The tail of the list is the file block.
+ env *block
+ file *block // file block (contains load bindings)
+
+ // moduleLocals contains the local variables of the module
+ // (due to load statements and comprehensions outside any function).
+ // moduleGlobals contains the global variables of the module.
+ moduleLocals []*Binding
+ moduleGlobals []*Binding
+
+ // globals maps each global name in the module to its binding.
+ // predeclared does the same for predeclared and universal names.
+ globals map[string]*Binding
+ predeclared map[string]*Binding
+
+ // These predicates report whether a name is
+ // pre-declared, either in this module or universally,
+ // or already declared in the module globals (as in a REPL).
+ // isGlobal may be nil.
+ isGlobal, isPredeclared, isUniversal func(name string) bool
+
+ loops int // number of enclosing for/while loops
+ ifstmts int // number of enclosing if statements loops
+
+ errors ErrorList
+}
+
+// container returns the innermost enclosing "container" block:
+// a function (function != nil) or file (function == nil).
+// Container blocks accumulate local variable bindings.
+func (r *resolver) container() *block {
+ for b := r.env; ; b = b.parent {
+ if b.function != nil || b == r.file {
+ return b
+ }
+ }
+}
+
+func (r *resolver) push(b *block) {
+ r.env.children = append(r.env.children, b)
+ b.parent = r.env
+ r.env = b
+}
+
+func (r *resolver) pop() { r.env = r.env.parent }
+
+type block struct {
+ parent *block // nil for file block
+
+ // In the file (root) block, both these fields are nil.
+ function *Function // only for function blocks
+ comp *syntax.Comprehension // only for comprehension blocks
+
+ // bindings maps a name to its binding.
+ // A local binding has an index into its innermost enclosing container's locals array.
+ // A free binding has an index into its innermost enclosing function's freevars array.
+ bindings map[string]*Binding
+
+ // children records the child blocks of the current one.
+ children []*block
+
+ // uses records all identifiers seen in this container (function or file),
+ // and a reference to the environment in which they appear.
+ // As we leave each container block, we resolve them,
+ // so that only free and global ones remain.
+ // At the end of each top-level function we compute closures.
+ uses []use
+}
+
+func (b *block) bind(name string, bind *Binding) {
+ if b.bindings == nil {
+ b.bindings = make(map[string]*Binding)
+ }
+ b.bindings[name] = bind
+}
+
+func (b *block) String() string {
+ if b.function != nil {
+ return "function block at " + fmt.Sprint(b.function.Pos)
+ }
+ if b.comp != nil {
+ return "comprehension block at " + fmt.Sprint(b.comp.Span())
+ }
+ return "file block"
+}
+
+func (r *resolver) errorf(posn syntax.Position, format string, args ...interface{}) {
+ r.errors = append(r.errors, Error{posn, fmt.Sprintf(format, args...)})
+}
+
+// A use records an identifier and the environment in which it appears.
+type use struct {
+ id *syntax.Ident
+ env *block
+}
+
+// bind creates a binding for id: a global (not file-local)
+// binding at top-level, a local binding otherwise.
+// At top-level, it reports an error if a global or file-local
+// binding already exists, unless AllowGlobalReassign.
+// It sets id.Binding to the binding (whether old or new),
+// and returns whether a binding already existed.
+func (r *resolver) bind(id *syntax.Ident) bool {
+ // Binding outside any local (comprehension/function) block?
+ if r.env == r.file {
+ bind, ok := r.file.bindings[id.Name]
+ if !ok {
+ bind, ok = r.globals[id.Name]
+ if !ok {
+ // first global binding of this name
+ bind = &Binding{
+ First: id,
+ Scope: Global,
+ Index: len(r.moduleGlobals),
+ }
+ r.globals[id.Name] = bind
+ r.moduleGlobals = append(r.moduleGlobals, bind)
+ }
+ }
+ if ok && !AllowGlobalReassign {
+ r.errorf(id.NamePos, "cannot reassign %s %s declared at %s",
+ bind.Scope, id.Name, bind.First.NamePos)
+ }
+ id.Binding = bind
+ return ok
+ }
+
+ return r.bindLocal(id)
+}
+
+func (r *resolver) bindLocal(id *syntax.Ident) bool {
+ // Mark this name as local to current block.
+ // Assign it a new local (positive) index in the current container.
+ _, ok := r.env.bindings[id.Name]
+ if !ok {
+ var locals *[]*Binding
+ if fn := r.container().function; fn != nil {
+ locals = &fn.Locals
+ } else {
+ locals = &r.moduleLocals
+ }
+ bind := &Binding{
+ First: id,
+ Scope: Local,
+ Index: len(*locals),
+ }
+ r.env.bind(id.Name, bind)
+ *locals = append(*locals, bind)
+ }
+
+ r.use(id)
+ return ok
+}
+
+func (r *resolver) use(id *syntax.Ident) {
+ use := use{id, r.env}
+
+ // The spec says that if there is a global binding of a name
+ // then all references to that name in that block refer to the
+ // global, even if the use precedes the def---just as for locals.
+ // For example, in this code,
+ //
+ // print(len); len=1; print(len)
+ //
+ // both occurrences of len refer to the len=1 binding, which
+ // completely shadows the predeclared len function.
+ //
+ // The rationale for these semantics, which differ from Python,
+ // is that the static meaning of len (a reference to a global)
+ // does not change depending on where it appears in the file.
+ // Of course, its dynamic meaning does change, from an error
+ // into a valid reference, so it's not clear these semantics
+ // have any practical advantage.
+ //
+ // In any case, the Bazel implementation lags behind the spec
+ // and follows Python behavior, so the first use of len refers
+ // to the predeclared function. This typically used in a BUILD
+ // file that redefines a predeclared name half way through,
+ // for example:
+ //
+ // proto_library(...) # built-in rule
+ // load("myproto.bzl", "proto_library")
+ // proto_library(...) # user-defined rule
+ //
+ // We will piggyback support for the legacy semantics on the
+ // AllowGlobalReassign flag, which is loosely related and also
+ // required for Bazel.
+ if AllowGlobalReassign && r.env == r.file {
+ r.useToplevel(use)
+ return
+ }
+
+ b := r.container()
+ b.uses = append(b.uses, use)
+}
+
+// useToplevel resolves use.id as a reference to a name visible at top-level.
+// The use.env field captures the original environment for error reporting.
+func (r *resolver) useToplevel(use use) (bind *Binding) {
+ id := use.id
+
+ if prev, ok := r.file.bindings[id.Name]; ok {
+ // use of load-defined name in file block
+ bind = prev
+ } else if prev, ok := r.globals[id.Name]; ok {
+ // use of global declared by module
+ bind = prev
+ } else if r.isGlobal != nil && r.isGlobal(id.Name) {
+ // use of global defined in a previous REPL chunk
+ bind = &Binding{
+ First: id, // wrong: this is not even a binding use
+ Scope: Global,
+ Index: len(r.moduleGlobals),
+ }
+ r.globals[id.Name] = bind
+ r.moduleGlobals = append(r.moduleGlobals, bind)
+ } else if prev, ok := r.predeclared[id.Name]; ok {
+ // repeated use of predeclared or universal
+ bind = prev
+ } else if r.isPredeclared(id.Name) {
+ // use of pre-declared name
+ bind = &Binding{Scope: Predeclared}
+ r.predeclared[id.Name] = bind // save it
+ } else if r.isUniversal(id.Name) {
+ // use of universal name
+ if !AllowSet && id.Name == "set" {
+ r.errorf(id.NamePos, doesnt+"support sets")
+ }
+ bind = &Binding{Scope: Universal}
+ r.predeclared[id.Name] = bind // save it
+ } else {
+ bind = &Binding{Scope: Undefined}
+ var hint string
+ if n := r.spellcheck(use); n != "" {
+ hint = fmt.Sprintf(" (did you mean %s?)", n)
+ }
+ r.errorf(id.NamePos, "undefined: %s%s", id.Name, hint)
+ }
+ id.Binding = bind
+ return bind
+}
+
+// spellcheck returns the most likely misspelling of
+// the name use.id in the environment use.env.
+func (r *resolver) spellcheck(use use) string {
+ var names []string
+
+ // locals
+ for b := use.env; b != nil; b = b.parent {
+ for name := range b.bindings {
+ names = append(names, name)
+ }
+ }
+
+ // globals
+ //
+ // We have no way to enumerate the sets whose membership
+ // tests are isPredeclared, isUniverse, and isGlobal,
+ // which includes prior names in the REPL session.
+ for _, bind := range r.moduleGlobals {
+ names = append(names, bind.First.Name)
+ }
+
+ sort.Strings(names)
+ return spell.Nearest(use.id.Name, names)
+}
+
+// resolveLocalUses is called when leaving a container (function/module)
+// block. It resolves all uses of locals/cells within that block.
+func (b *block) resolveLocalUses() {
+ unresolved := b.uses[:0]
+ for _, use := range b.uses {
+ if bind := lookupLocal(use); bind != nil && (bind.Scope == Local || bind.Scope == Cell) {
+ use.id.Binding = bind
+ } else {
+ unresolved = append(unresolved, use)
+ }
+ }
+ b.uses = unresolved
+}
+
+func (r *resolver) stmts(stmts []syntax.Stmt) {
+ for _, stmt := range stmts {
+ r.stmt(stmt)
+ }
+}
+
+func (r *resolver) stmt(stmt syntax.Stmt) {
+ switch stmt := stmt.(type) {
+ case *syntax.ExprStmt:
+ r.expr(stmt.X)
+
+ case *syntax.BranchStmt:
+ if r.loops == 0 && (stmt.Token == syntax.BREAK || stmt.Token == syntax.CONTINUE) {
+ r.errorf(stmt.TokenPos, "%s not in a loop", stmt.Token)
+ }
+
+ case *syntax.IfStmt:
+ if !AllowGlobalReassign && r.container().function == nil {
+ r.errorf(stmt.If, "if statement not within a function")
+ }
+ r.expr(stmt.Cond)
+ r.ifstmts++
+ r.stmts(stmt.True)
+ r.stmts(stmt.False)
+ r.ifstmts--
+
+ case *syntax.AssignStmt:
+ r.expr(stmt.RHS)
+ isAugmented := stmt.Op != syntax.EQ
+ r.assign(stmt.LHS, isAugmented)
+
+ case *syntax.DefStmt:
+ r.bind(stmt.Name)
+ fn := &Function{
+ Name: stmt.Name.Name,
+ Pos: stmt.Def,
+ Params: stmt.Params,
+ Body: stmt.Body,
+ }
+ stmt.Function = fn
+ r.function(fn, stmt.Def)
+
+ case *syntax.ForStmt:
+ if !AllowGlobalReassign && r.container().function == nil {
+ r.errorf(stmt.For, "for loop not within a function")
+ }
+ r.expr(stmt.X)
+ const isAugmented = false
+ r.assign(stmt.Vars, isAugmented)
+ r.loops++
+ r.stmts(stmt.Body)
+ r.loops--
+
+ case *syntax.WhileStmt:
+ if !AllowRecursion {
+ r.errorf(stmt.While, doesnt+"support while loops")
+ }
+ if !AllowGlobalReassign && r.container().function == nil {
+ r.errorf(stmt.While, "while loop not within a function")
+ }
+ r.expr(stmt.Cond)
+ r.loops++
+ r.stmts(stmt.Body)
+ r.loops--
+
+ case *syntax.ReturnStmt:
+ if r.container().function == nil {
+ r.errorf(stmt.Return, "return statement not within a function")
+ }
+ if stmt.Result != nil {
+ r.expr(stmt.Result)
+ }
+
+ case *syntax.LoadStmt:
+ // A load statement may not be nested in any other statement.
+ if r.container().function != nil {
+ r.errorf(stmt.Load, "load statement within a function")
+ } else if r.loops > 0 {
+ r.errorf(stmt.Load, "load statement within a loop")
+ } else if r.ifstmts > 0 {
+ r.errorf(stmt.Load, "load statement within a conditional")
+ }
+
+ for i, from := range stmt.From {
+ if from.Name == "" {
+ r.errorf(from.NamePos, "load: empty identifier")
+ continue
+ }
+ if from.Name[0] == '_' {
+ r.errorf(from.NamePos, "load: names with leading underscores are not exported: %s", from.Name)
+ }
+
+ id := stmt.To[i]
+ if LoadBindsGlobally {
+ r.bind(id)
+ } else if r.bindLocal(id) && !AllowGlobalReassign {
+ // "Global" in AllowGlobalReassign is a misnomer for "toplevel".
+ // Sadly we can't report the previous declaration
+ // as id.Binding may not be set yet.
+ r.errorf(id.NamePos, "cannot reassign top-level %s", id.Name)
+ }
+ }
+
+ default:
+ log.Panicf("unexpected stmt %T", stmt)
+ }
+}
+
+func (r *resolver) assign(lhs syntax.Expr, isAugmented bool) {
+ switch lhs := lhs.(type) {
+ case *syntax.Ident:
+ // x = ...
+ r.bind(lhs)
+
+ case *syntax.IndexExpr:
+ // x[i] = ...
+ r.expr(lhs.X)
+ r.expr(lhs.Y)
+
+ case *syntax.DotExpr:
+ // x.f = ...
+ r.expr(lhs.X)
+
+ case *syntax.TupleExpr:
+ // (x, y) = ...
+ if isAugmented {
+ r.errorf(syntax.Start(lhs), "can't use tuple expression in augmented assignment")
+ }
+ for _, elem := range lhs.List {
+ r.assign(elem, isAugmented)
+ }
+
+ case *syntax.ListExpr:
+ // [x, y, z] = ...
+ if isAugmented {
+ r.errorf(syntax.Start(lhs), "can't use list expression in augmented assignment")
+ }
+ for _, elem := range lhs.List {
+ r.assign(elem, isAugmented)
+ }
+
+ case *syntax.ParenExpr:
+ r.assign(lhs.X, isAugmented)
+
+ default:
+ name := strings.ToLower(strings.TrimPrefix(fmt.Sprintf("%T", lhs), "*syntax."))
+ r.errorf(syntax.Start(lhs), "can't assign to %s", name)
+ }
+}
+
+func (r *resolver) expr(e syntax.Expr) {
+ switch e := e.(type) {
+ case *syntax.Ident:
+ r.use(e)
+
+ case *syntax.Literal:
+
+ case *syntax.ListExpr:
+ for _, x := range e.List {
+ r.expr(x)
+ }
+
+ case *syntax.CondExpr:
+ r.expr(e.Cond)
+ r.expr(e.True)
+ r.expr(e.False)
+
+ case *syntax.IndexExpr:
+ r.expr(e.X)
+ r.expr(e.Y)
+
+ case *syntax.DictEntry:
+ r.expr(e.Key)
+ r.expr(e.Value)
+
+ case *syntax.SliceExpr:
+ r.expr(e.X)
+ if e.Lo != nil {
+ r.expr(e.Lo)
+ }
+ if e.Hi != nil {
+ r.expr(e.Hi)
+ }
+ if e.Step != nil {
+ r.expr(e.Step)
+ }
+
+ case *syntax.Comprehension:
+ // The 'in' operand of the first clause (always a ForClause)
+ // is resolved in the outer block; consider: [x for x in x].
+ clause := e.Clauses[0].(*syntax.ForClause)
+ r.expr(clause.X)
+
+ // A list/dict comprehension defines a new lexical block.
+ // Locals defined within the block will be allotted
+ // distinct slots in the locals array of the innermost
+ // enclosing container (function/module) block.
+ r.push(&block{comp: e})
+
+ const isAugmented = false
+ r.assign(clause.Vars, isAugmented)
+
+ for _, clause := range e.Clauses[1:] {
+ switch clause := clause.(type) {
+ case *syntax.IfClause:
+ r.expr(clause.Cond)
+ case *syntax.ForClause:
+ r.assign(clause.Vars, isAugmented)
+ r.expr(clause.X)
+ }
+ }
+ r.expr(e.Body) // body may be *DictEntry
+ r.pop()
+
+ case *syntax.TupleExpr:
+ for _, x := range e.List {
+ r.expr(x)
+ }
+
+ case *syntax.DictExpr:
+ for _, entry := range e.List {
+ entry := entry.(*syntax.DictEntry)
+ r.expr(entry.Key)
+ r.expr(entry.Value)
+ }
+
+ case *syntax.UnaryExpr:
+ r.expr(e.X)
+
+ case *syntax.BinaryExpr:
+ r.expr(e.X)
+ r.expr(e.Y)
+
+ case *syntax.DotExpr:
+ r.expr(e.X)
+ // ignore e.Name
+
+ case *syntax.CallExpr:
+ r.expr(e.Fn)
+ var seenVarargs, seenKwargs bool
+ var seenName map[string]bool
+ var n, p int
+ for _, arg := range e.Args {
+ pos, _ := arg.Span()
+ if unop, ok := arg.(*syntax.UnaryExpr); ok && unop.Op == syntax.STARSTAR {
+ // **kwargs
+ if seenKwargs {
+ r.errorf(pos, "multiple **kwargs not allowed")
+ }
+ seenKwargs = true
+ r.expr(arg)
+ } else if ok && unop.Op == syntax.STAR {
+ // *args
+ if seenKwargs {
+ r.errorf(pos, "*args may not follow **kwargs")
+ } else if seenVarargs {
+ r.errorf(pos, "multiple *args not allowed")
+ }
+ seenVarargs = true
+ r.expr(arg)
+ } else if binop, ok := arg.(*syntax.BinaryExpr); ok && binop.Op == syntax.EQ {
+ // k=v
+ n++
+ if seenKwargs {
+ r.errorf(pos, "keyword argument may not follow **kwargs")
+ } else if seenVarargs {
+ r.errorf(pos, "keyword argument may not follow *args")
+ }
+ x := binop.X.(*syntax.Ident)
+ if seenName[x.Name] {
+ r.errorf(x.NamePos, "keyword argument %s repeated", x.Name)
+ } else {
+ if seenName == nil {
+ seenName = make(map[string]bool)
+ }
+ seenName[x.Name] = true
+ }
+ r.expr(binop.Y)
+ } else {
+ // positional argument
+ p++
+ if seenVarargs {
+ r.errorf(pos, "positional argument may not follow *args")
+ } else if seenKwargs {
+ r.errorf(pos, "positional argument may not follow **kwargs")
+ } else if len(seenName) > 0 {
+ r.errorf(pos, "positional argument may not follow named")
+ }
+ r.expr(arg)
+ }
+ }
+
+ // Fail gracefully if compiler-imposed limit is exceeded.
+ if p >= 256 {
+ pos, _ := e.Span()
+ r.errorf(pos, "%v positional arguments in call, limit is 255", p)
+ }
+ if n >= 256 {
+ pos, _ := e.Span()
+ r.errorf(pos, "%v keyword arguments in call, limit is 255", n)
+ }
+
+ case *syntax.LambdaExpr:
+ fn := &Function{
+ Name: "lambda",
+ Pos: e.Lambda,
+ Params: e.Params,
+ Body: []syntax.Stmt{&syntax.ReturnStmt{Result: e.Body}},
+ }
+ e.Function = fn
+ r.function(fn, e.Lambda)
+
+ case *syntax.ParenExpr:
+ r.expr(e.X)
+
+ default:
+ log.Panicf("unexpected expr %T", e)
+ }
+}
+
+func (r *resolver) function(function *Function, pos syntax.Position) {
+ // Resolve defaults in enclosing environment.
+ for _, param := range function.Params {
+ if binary, ok := param.(*syntax.BinaryExpr); ok {
+ r.expr(binary.Y)
+ }
+ }
+
+ // Enter function block.
+ b := &block{function: function}
+ r.push(b)
+
+ var seenOptional bool
+ var star *syntax.UnaryExpr // * or *args param
+ var starStar *syntax.Ident // **kwargs ident
+ var numKwonlyParams int
+ for _, param := range function.Params {
+ switch param := param.(type) {
+ case *syntax.Ident:
+ // e.g. x
+ if starStar != nil {
+ r.errorf(param.NamePos, "required parameter may not follow **%s", starStar.Name)
+ } else if star != nil {
+ numKwonlyParams++
+ } else if seenOptional {
+ r.errorf(param.NamePos, "required parameter may not follow optional")
+ }
+ if r.bind(param) {
+ r.errorf(param.NamePos, "duplicate parameter: %s", param.Name)
+ }
+
+ case *syntax.BinaryExpr:
+ // e.g. y=dflt
+ if starStar != nil {
+ r.errorf(param.OpPos, "optional parameter may not follow **%s", starStar.Name)
+ } else if star != nil {
+ numKwonlyParams++
+ }
+ if id := param.X.(*syntax.Ident); r.bind(id) {
+ r.errorf(param.OpPos, "duplicate parameter: %s", id.Name)
+ }
+ seenOptional = true
+
+ case *syntax.UnaryExpr:
+ // * or *args or **kwargs
+ if param.Op == syntax.STAR {
+ if starStar != nil {
+ r.errorf(param.OpPos, "* parameter may not follow **%s", starStar.Name)
+ } else if star != nil {
+ r.errorf(param.OpPos, "multiple * parameters not allowed")
+ } else {
+ star = param
+ }
+ } else {
+ if starStar != nil {
+ r.errorf(param.OpPos, "multiple ** parameters not allowed")
+ }
+ starStar = param.X.(*syntax.Ident)
+ }
+ }
+ }
+
+ // Bind the *args and **kwargs parameters at the end,
+ // so that regular parameters a/b/c are contiguous and
+ // there is no hole for the "*":
+ // def f(a, b, *args, c=0, **kwargs)
+ // def f(a, b, *, c=0, **kwargs)
+ if star != nil {
+ if id, _ := star.X.(*syntax.Ident); id != nil {
+ // *args
+ if r.bind(id) {
+ r.errorf(id.NamePos, "duplicate parameter: %s", id.Name)
+ }
+ function.HasVarargs = true
+ } else if numKwonlyParams == 0 {
+ r.errorf(star.OpPos, "bare * must be followed by keyword-only parameters")
+ }
+ }
+ if starStar != nil {
+ if r.bind(starStar) {
+ r.errorf(starStar.NamePos, "duplicate parameter: %s", starStar.Name)
+ }
+ function.HasKwargs = true
+ }
+
+ function.NumKwonlyParams = numKwonlyParams
+ r.stmts(function.Body)
+
+ // Resolve all uses of this function's local vars,
+ // and keep just the remaining uses of free/global vars.
+ b.resolveLocalUses()
+
+ // Leave function block.
+ r.pop()
+
+ // References within the function body to globals are not
+ // resolved until the end of the module.
+}
+
+func (r *resolver) resolveNonLocalUses(b *block) {
+ // First resolve inner blocks.
+ for _, child := range b.children {
+ r.resolveNonLocalUses(child)
+ }
+ for _, use := range b.uses {
+ use.id.Binding = r.lookupLexical(use, use.env)
+ }
+}
+
+// lookupLocal looks up an identifier within its immediately enclosing function.
+func lookupLocal(use use) *Binding {
+ for env := use.env; env != nil; env = env.parent {
+ if bind, ok := env.bindings[use.id.Name]; ok {
+ if bind.Scope == Free {
+ // shouldn't exist till later
+ log.Panicf("%s: internal error: %s, %v", use.id.NamePos, use.id.Name, bind)
+ }
+ return bind // found
+ }
+ if env.function != nil {
+ break
+ }
+ }
+ return nil // not found in this function
+}
+
+// lookupLexical looks up an identifier use.id within its lexically enclosing environment.
+// The use.env field captures the original environment for error reporting.
+func (r *resolver) lookupLexical(use use, env *block) (bind *Binding) {
+ if debug {
+ fmt.Printf("lookupLexical %s in %s = ...\n", use.id.Name, env)
+ defer func() { fmt.Printf("= %v\n", bind) }()
+ }
+
+ // Is this the file block?
+ if env == r.file {
+ return r.useToplevel(use) // file-local, global, predeclared, or not found
+ }
+
+ // Defined in this block?
+ bind, ok := env.bindings[use.id.Name]
+ if !ok {
+ // Defined in parent block?
+ bind = r.lookupLexical(use, env.parent)
+ if env.function != nil && (bind.Scope == Local || bind.Scope == Free || bind.Scope == Cell) {
+ // Found in parent block, which belongs to enclosing function.
+ // Add the parent's binding to the function's freevars,
+ // and add a new 'free' binding to the inner function's block,
+ // and turn the parent's local into cell.
+ if bind.Scope == Local {
+ bind.Scope = Cell
+ }
+ index := len(env.function.FreeVars)
+ env.function.FreeVars = append(env.function.FreeVars, bind)
+ bind = &Binding{
+ First: bind.First,
+ Scope: Free,
+ Index: index,
+ }
+ if debug {
+ fmt.Printf("creating freevar %v in function at %s: %s\n",
+ len(env.function.FreeVars), env.function.Pos, use.id.Name)
+ }
+ }
+
+ // Memoize, to avoid duplicate free vars
+ // and redundant global (failing) lookups.
+ env.bind(use.id.Name, bind)
+ }
+ return bind
+}
diff --git a/resolve/resolve_test.go b/resolve/resolve_test.go
new file mode 100644
index 0000000..50d1cc5
--- /dev/null
+++ b/resolve/resolve_test.go
@@ -0,0 +1,89 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package resolve_test
+
+import (
+ "strings"
+ "testing"
+
+ "go.starlark.net/internal/chunkedfile"
+ "go.starlark.net/resolve"
+ "go.starlark.net/starlarktest"
+ "go.starlark.net/syntax"
+)
+
+func setOptions(src string) {
+ resolve.AllowGlobalReassign = option(src, "globalreassign")
+ resolve.AllowRecursion = option(src, "recursion")
+ resolve.AllowSet = option(src, "set")
+ resolve.LoadBindsGlobally = option(src, "loadbindsglobally")
+}
+
+func option(chunk, name string) bool {
+ return strings.Contains(chunk, "option:"+name)
+}
+
+func TestResolve(t *testing.T) {
+ defer setOptions("")
+ filename := starlarktest.DataFile("resolve", "testdata/resolve.star")
+ for _, chunk := range chunkedfile.Read(filename, t) {
+ f, err := syntax.Parse(filename, chunk.Source, 0)
+ if err != nil {
+ t.Error(err)
+ continue
+ }
+
+ // A chunk may set options by containing e.g. "option:recursion".
+ setOptions(chunk.Source)
+
+ if err := resolve.File(f, isPredeclared, isUniversal); err != nil {
+ for _, err := range err.(resolve.ErrorList) {
+ chunk.GotError(int(err.Pos.Line), err.Msg)
+ }
+ }
+ chunk.Done()
+ }
+}
+
+func TestDefVarargsAndKwargsSet(t *testing.T) {
+ source := "def f(*args, **kwargs): pass\n"
+ file, err := syntax.Parse("foo.star", source, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := resolve.File(file, isPredeclared, isUniversal); err != nil {
+ t.Fatal(err)
+ }
+ fn := file.Stmts[0].(*syntax.DefStmt).Function.(*resolve.Function)
+ if !fn.HasVarargs {
+ t.Error("HasVarargs not set")
+ }
+ if !fn.HasKwargs {
+ t.Error("HasKwargs not set")
+ }
+}
+
+func TestLambdaVarargsAndKwargsSet(t *testing.T) {
+ resolve.AllowLambda = true
+ source := "f = lambda *args, **kwargs: 0\n"
+ file, err := syntax.Parse("foo.star", source, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := resolve.File(file, isPredeclared, isUniversal); err != nil {
+ t.Fatal(err)
+ }
+ lam := file.Stmts[0].(*syntax.AssignStmt).RHS.(*syntax.LambdaExpr).Function.(*resolve.Function)
+ if !lam.HasVarargs {
+ t.Error("HasVarargs not set")
+ }
+ if !lam.HasKwargs {
+ t.Error("HasKwargs not set")
+ }
+}
+
+func isPredeclared(name string) bool { return name == "M" }
+
+func isUniversal(name string) bool { return name == "U" || name == "float" }
diff --git a/resolve/testdata/resolve.star b/resolve/testdata/resolve.star
new file mode 100644
index 0000000..ce67110
--- /dev/null
+++ b/resolve/testdata/resolve.star
@@ -0,0 +1,383 @@
+# Tests of resolver errors.
+#
+# The initial environment contains the predeclared names "M"
+# (module-specific) and "U" (universal). This distinction
+# should be unobservable to the Starlark program.
+
+# use of declared global
+x = 1
+_ = x
+
+---
+# premature use of global is not a static error;
+# see github.com/google/skylark/issues/116.
+_ = x
+x = 1
+
+---
+# use of undefined global
+_ = x ### "undefined: x"
+
+---
+# redeclaration of global
+x = 1
+x = 2 ### "cannot reassign global x declared at .*resolve.star:23:1"
+
+---
+# Redeclaration of predeclared names is allowed.
+#
+# This rule permits tool maintainers to add members to the predeclared
+# environment without breaking existing programs.
+
+# module-specific predeclared name
+M = 1 # ok
+M = 2 ### "cannot reassign global M declared at .*/resolve.star"
+
+# universal predeclared name
+U = 1 # ok
+U = 1 ### "cannot reassign global U declared at .*/resolve.star"
+
+---
+# A global declaration shadows all references to a predeclared;
+# see github.com/google/skylark/issues/116.
+
+a = U # ok: U is a reference to the global defined on the next line.
+U = 1
+
+---
+# reference to predeclared name
+M()
+
+---
+# locals may be referenced before they are defined
+
+def f():
+ M(x) # dynamic error
+ x = 1
+
+---
+# Various forms of assignment:
+
+def f(x): # parameter
+ M(x)
+ M(y) ### "undefined: y"
+
+(a, b) = 1, 2
+M(a)
+M(b)
+M(c) ### "undefined: c"
+
+[p, q] = 1, 2
+M(p)
+M(q)
+M(r) ### "undefined: r"
+
+---
+# a comprehension introduces a separate lexical block
+
+_ = [x for x in "abc"]
+M(x) ### "undefined: x"
+
+---
+# Functions may have forward refs.
+def f():
+ g()
+ h() ### "undefined: h"
+ def inner():
+ i()
+ i = lambda: 0
+
+def g():
+ f()
+
+---
+# It is not permitted to rebind a global using a += assignment.
+
+x = [1]
+x.extend([2]) # ok
+x += [3] ### `cannot reassign global x`
+
+def f():
+ x += [4] # x is local to f
+
+y = 1
+y += 2 ### `cannot reassign global y`
+z += 3 # ok (but fails dynamically because z is undefined)
+
+---
+def f(a):
+ if 1==1:
+ b = 1
+ c = 1
+ M(a) # ok: param
+ M(b) # ok: maybe bound local
+ M(c) # ok: bound local
+ M(d) # NB: we don't do a use-before-def check on local vars!
+ M(e) # ok: global
+ M(f) # ok: global
+ d = 1
+
+e = 1
+
+---
+# This program should resolve successfully but fail dynamically.
+x = 1
+
+def f():
+ M(x) # dynamic error: reference to undefined local
+ x = 2
+
+f()
+
+---
+load("module", "name") # ok
+
+def f():
+ load("foo", "bar") ### "load statement within a function"
+
+load("foo",
+ "", ### "load: empty identifier"
+ "_a", ### "load: names with leading underscores are not exported: _a"
+ b="", ### "load: empty identifier"
+ c="_d", ### "load: names with leading underscores are not exported: _d"
+ _e="f") # ok
+
+---
+# option:globalreassign
+if M:
+ load("foo", "bar") ### "load statement within a conditional"
+
+---
+# option:globalreassign
+for x in M:
+ load("foo", "bar") ### "load statement within a loop"
+
+---
+# option:recursion option:globalreassign
+while M:
+ load("foo", "bar") ### "load statement within a loop"
+
+---
+# return statements must be within a function
+
+return ### "return statement not within a function"
+
+---
+# if-statements and for-loops at top-level are forbidden
+# (without globalreassign option)
+
+for x in "abc": ### "for loop not within a function"
+ pass
+
+if x: ### "if statement not within a function"
+ pass
+
+---
+# option:globalreassign
+
+for x in "abc": # ok
+ pass
+
+if x: # ok
+ pass
+
+---
+# while loops are forbidden (without -recursion option)
+
+def f():
+ while U: ### "dialect does not support while loops"
+ pass
+
+---
+# option:recursion
+
+def f():
+ while U: # ok
+ pass
+
+while U: ### "while loop not within a function"
+ pass
+
+---
+# option:globalreassign option:recursion
+
+while U: # ok
+ pass
+
+---
+# The parser allows any expression on the LHS of an assignment.
+
+1 = 0 ### "can't assign to literal"
+1+2 = 0 ### "can't assign to binaryexpr"
+f() = 0 ### "can't assign to callexpr"
+
+[a, b] = 0
+[c, d] += 0 ### "can't use list expression in augmented assignment"
+(e, f) += 0 ### "can't use tuple expression in augmented assignment"
+
+[] = 0 # ok
+() = 0 # ok
+
+---
+# break and continue statements must appear within a loop
+
+break ### "break not in a loop"
+
+continue ### "continue not in a loop"
+
+pass
+
+---
+# Positional arguments (and required parameters)
+# must appear before named arguments (and optional parameters).
+
+M(x=1, 2) ### `positional argument may not follow named`
+
+def f(x=1, y): pass ### `required parameter may not follow optional`
+---
+# No parameters may follow **kwargs in a declaration.
+
+def f(**kwargs, x): ### `parameter may not follow \*\*kwargs`
+ pass
+
+def g(**kwargs, *args): ### `\* parameter may not follow \*\*kwargs`
+ pass
+
+def h(**kwargs1, **kwargs2): ### `multiple \*\* parameters not allowed`
+ pass
+
+---
+# Only keyword-only params and **kwargs may follow *args in a declaration.
+
+def f(*args, x): # ok
+ pass
+
+def g(*args1, *args2): ### `multiple \* parameters not allowed`
+ pass
+
+def h(*, ### `bare \* must be followed by keyword-only parameters`
+ *): ### `multiple \* parameters not allowed`
+ pass
+
+def i(*args, *): ### `multiple \* parameters not allowed`
+ pass
+
+def j(*, ### `bare \* must be followed by keyword-only parameters`
+ *args): ### `multiple \* parameters not allowed`
+ pass
+
+def k(*, **kwargs): ### `bare \* must be followed by keyword-only parameters`
+ pass
+
+def l(*): ### `bare \* must be followed by keyword-only parameters`
+ pass
+
+def m(*args, a=1, **kwargs): # ok
+ pass
+
+def n(*, a=1, **kwargs): # ok
+ pass
+
+---
+# No arguments may follow **kwargs in a call.
+def f(*args, **kwargs):
+ pass
+
+f(**{}, 1) ### `argument may not follow \*\*kwargs`
+f(**{}, x=1) ### `argument may not follow \*\*kwargs`
+f(**{}, *[]) ### `\*args may not follow \*\*kwargs`
+f(**{}, **{}) ### `multiple \*\*kwargs not allowed`
+
+---
+# Only **kwargs may follow *args in a call.
+def f(*args, **kwargs):
+ pass
+
+f(*[], 1) ### `positional argument may not follow \*args`
+f(*[], a=1) ### `keyword argument may not follow \*args`
+f(*[], *[]) ### `multiple \*args not allowed`
+f(*[], **{}) # ok
+
+---
+# Parameter names must be unique.
+
+def f(a, b, a): pass ### "duplicate parameter: a"
+def g(args, b, *args): pass ### "duplicate parameter: args"
+def h(kwargs, a, **kwargs): pass ### "duplicate parameter: kwargs"
+def i(*x, **x): pass ### "duplicate parameter: x"
+
+---
+# Floating-point support is now standard.
+a = float("3.141")
+b = 1 / 2
+c = 3.141
+
+---
+# option:globalreassign
+# Legacy Bazel (and Python) semantics: def must precede use even for globals.
+
+_ = x ### `undefined: x`
+x = 1
+
+---
+# option:globalreassign
+# Legacy Bazel (and Python) semantics: reassignment of globals is allowed.
+x = 1
+x = 2 # ok
+
+---
+# option:globalreassign
+# Redeclaration of predeclared names is allowed.
+
+# module-specific predeclared name
+M = 1 # ok
+M = 2 # ok (legacy)
+
+# universal predeclared name
+U = 1 # ok
+U = 1 # ok (legacy)
+
+---
+# https://github.com/bazelbuild/starlark/starlark/issues/21
+def f(**kwargs): pass
+f(a=1, a=1) ### `keyword argument a repeated`
+
+
+---
+# spelling
+
+print = U
+
+hello = 1
+print(hollo) ### `undefined: hollo \(did you mean hello\?\)`
+
+def f(abc):
+ print(abd) ### `undefined: abd \(did you mean abc\?\)`
+ print(goodbye) ### `undefined: goodbye$`
+
+---
+load("module", "x") # ok
+x = 1 ### `cannot reassign local x`
+load("module", "x") ### `cannot reassign top-level x`
+
+---
+# option:loadbindsglobally
+load("module", "x") # ok
+x = 1 ### `cannot reassign global x`
+load("module", "x") ### `cannot reassign global x`
+
+---
+# option:globalreassign
+load("module", "x") # ok
+x = 1 # ok
+load("module", "x") # ok
+
+---
+# option:globalreassign option:loadbindsglobally
+load("module", "x") # ok
+x = 1
+load("module", "x") # ok
+
+---
+_ = x # forward ref to file-local
+load("module", "x") # ok
diff --git a/starlark/bench_test.go b/starlark/bench_test.go
new file mode 100644
index 0000000..7cfefe0
--- /dev/null
+++ b/starlark/bench_test.go
@@ -0,0 +1,169 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark_test
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarktest"
+)
+
+func Benchmark(b *testing.B) {
+ defer setOptions("")
+
+ testdata := starlarktest.DataFile("starlark", ".")
+ thread := new(starlark.Thread)
+ for _, file := range []string{
+ "testdata/benchmark.star",
+ // ...
+ } {
+
+ filename := filepath.Join(testdata, file)
+
+ src, err := ioutil.ReadFile(filename)
+ if err != nil {
+ b.Error(err)
+ continue
+ }
+ setOptions(string(src))
+
+ // Evaluate the file once.
+ globals, err := starlark.ExecFile(thread, filename, src, nil)
+ if err != nil {
+ reportEvalError(b, err)
+ }
+
+ // Repeatedly call each global function named bench_* as a benchmark.
+ for _, name := range globals.Keys() {
+ value := globals[name]
+ if fn, ok := value.(*starlark.Function); ok && strings.HasPrefix(name, "bench_") {
+ b.Run(name, func(b *testing.B) {
+ _, err := starlark.Call(thread, fn, starlark.Tuple{benchmark{b}}, nil)
+ if err != nil {
+ reportEvalError(b, err)
+ }
+ })
+ }
+ }
+ }
+}
+
+// A benchmark is passed to each bench_xyz(b) function in a bench_*.star file.
+// It provides b.n, the number of iterations that must be executed by the function,
+// which is typically of the form:
+//
+// def bench_foo(b):
+// for _ in range(b.n):
+// ...work...
+//
+// It also provides stop, start, and restart methods to stop the clock in case
+// there is significant set-up work that should not count against the measured
+// operation.
+//
+// (This interface is inspired by Go's testing.B, and is also implemented
+// by the java.starlark.net implementation; see
+// https://github.com/bazelbuild/starlark/pull/75#pullrequestreview-275604129.)
+type benchmark struct {
+ b *testing.B
+}
+
+func (benchmark) Freeze() {}
+func (benchmark) Truth() starlark.Bool { return true }
+func (benchmark) Type() string { return "benchmark" }
+func (benchmark) String() string { return "<benchmark>" }
+func (benchmark) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: benchmark") }
+func (benchmark) AttrNames() []string { return []string{"n", "restart", "start", "stop"} }
+func (b benchmark) Attr(name string) (starlark.Value, error) {
+ switch name {
+ case "n":
+ return starlark.MakeInt(b.b.N), nil
+ case "restart":
+ return benchmarkRestart.BindReceiver(b), nil
+ case "start":
+ return benchmarkStart.BindReceiver(b), nil
+ case "stop":
+ return benchmarkStop.BindReceiver(b), nil
+ }
+ return nil, nil
+}
+
+var (
+ benchmarkRestart = starlark.NewBuiltin("restart", benchmarkRestartImpl)
+ benchmarkStart = starlark.NewBuiltin("start", benchmarkStartImpl)
+ benchmarkStop = starlark.NewBuiltin("stop", benchmarkStopImpl)
+)
+
+func benchmarkRestartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ b.Receiver().(benchmark).b.ResetTimer()
+ return starlark.None, nil
+}
+
+func benchmarkStartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ b.Receiver().(benchmark).b.StartTimer()
+ return starlark.None, nil
+}
+
+func benchmarkStopImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ b.Receiver().(benchmark).b.StopTimer()
+ return starlark.None, nil
+}
+
+// BenchmarkProgram measures operations relevant to compiled programs.
+// TODO(adonovan): use a bigger testdata program.
+func BenchmarkProgram(b *testing.B) {
+ // Measure time to read a source file (approx 600us but depends on hardware and file system).
+ filename := starlarktest.DataFile("starlark", "testdata/paths.star")
+ var src []byte
+ b.Run("read", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var err error
+ src, err = ioutil.ReadFile(filename)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+
+ // Measure time to turn a source filename into a compiled program (approx 450us).
+ var prog *starlark.Program
+ b.Run("compile", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var err error
+ _, prog, err = starlark.SourceProgram(filename, src, starlark.StringDict(nil).Has)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+
+ // Measure time to encode a compiled program to a memory buffer
+ // (approx 20us; was 75-120us with gob encoding).
+ var out bytes.Buffer
+ b.Run("encode", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ out.Reset()
+ if err := prog.Write(&out); err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+
+ // Measure time to decode a compiled program from a memory buffer
+ // (approx 20us; was 135-250us with gob encoding)
+ b.Run("decode", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ in := bytes.NewReader(out.Bytes())
+ if _, err := starlark.CompiledProgram(in); err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+}
diff --git a/starlark/debug.go b/starlark/debug.go
new file mode 100644
index 0000000..22a2124
--- /dev/null
+++ b/starlark/debug.go
@@ -0,0 +1,42 @@
+package starlark
+
+import "go.starlark.net/syntax"
+
+// This file defines an experimental API for the debugging tools.
+// Some of these declarations expose details of internal packages.
+// (The debugger makes liberal use of exported fields of unexported types.)
+// Breaking changes may occur without notice.
+
+// Local returns the value of the i'th local variable.
+// It may be nil if not yet assigned.
+//
+// Local may be called only for frames whose Callable is a *Function (a
+// function defined by Starlark source code), and only while the frame
+// is active; it will panic otherwise.
+//
+// This function is provided only for debugging tools.
+//
+// THIS API IS EXPERIMENTAL AND MAY CHANGE WITHOUT NOTICE.
+func (fr *frame) Local(i int) Value { return fr.locals[i] }
+
+// DebugFrame is the debugger API for a frame of the interpreter's call stack.
+//
+// Most applications have no need for this API; use CallFrame instead.
+//
+// Clients must not retain a DebugFrame nor call any of its methods once
+// the current built-in call has returned or execution has resumed
+// after a breakpoint as this may have unpredictable effects, including
+// but not limited to retention of object that would otherwise be garbage.
+type DebugFrame interface {
+ Callable() Callable // returns the frame's function
+ Local(i int) Value // returns the value of the (Starlark) frame's ith local variable
+ Position() syntax.Position // returns the current position of execution in this frame
+}
+
+// DebugFrame returns the debugger interface for
+// the specified frame of the interpreter's call stack.
+// Frame numbering is as for Thread.CallFrame.
+//
+// This function is intended for use in debugging tools.
+// Most applications should have no need for it; use CallFrame instead.
+func (thread *Thread) DebugFrame(depth int) DebugFrame { return thread.frameAt(depth) }
diff --git a/starlark/empty.s b/starlark/empty.s
new file mode 100644
index 0000000..3b82169
--- /dev/null
+++ b/starlark/empty.s
@@ -0,0 +1,3 @@
+// The presence of this file allows the package to use the
+// "go:linkname" hack to call non-exported functions in the
+// Go runtime, such as hardware-accelerated string hashing.
diff --git a/starlark/eval.go b/starlark/eval.go
new file mode 100644
index 0000000..d0ad91f
--- /dev/null
+++ b/starlark/eval.go
@@ -0,0 +1,1618 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "math/big"
+ "sort"
+ "strings"
+ "sync/atomic"
+ "time"
+ "unicode"
+ "unicode/utf8"
+ "unsafe"
+
+ "go.starlark.net/internal/compile"
+ "go.starlark.net/internal/spell"
+ "go.starlark.net/resolve"
+ "go.starlark.net/syntax"
+)
+
+// A Thread contains the state of a Starlark thread,
+// such as its call stack and thread-local storage.
+// The Thread is threaded throughout the evaluator.
+type Thread struct {
+ // Name is an optional name that describes the thread, for debugging.
+ Name string
+
+ // stack is the stack of (internal) call frames.
+ stack []*frame
+
+ // Print is the client-supplied implementation of the Starlark
+ // 'print' function. If nil, fmt.Fprintln(os.Stderr, msg) is
+ // used instead.
+ Print func(thread *Thread, msg string)
+
+ // Load is the client-supplied implementation of module loading.
+ // Repeated calls with the same module name must return the same
+ // module environment or error.
+ // The error message need not include the module name.
+ //
+ // See example_test.go for some example implementations of Load.
+ Load func(thread *Thread, module string) (StringDict, error)
+
+ // steps counts abstract computation steps executed by this thread.
+ steps, maxSteps uint64
+
+ // cancelReason records the reason from the first call to Cancel.
+ cancelReason *string
+
+ // locals holds arbitrary "thread-local" Go values belonging to the client.
+ // They are accessible to the client but not to any Starlark program.
+ locals map[string]interface{}
+
+ // proftime holds the accumulated execution time since the last profile event.
+ proftime time.Duration
+}
+
+// ExecutionSteps returns a count of abstract computation steps executed
+// by this thread. It is incremented by the interpreter. It may be used
+// as a measure of the approximate cost of Starlark execution, by
+// computing the difference in its value before and after a computation.
+//
+// The precise meaning of "step" is not specified and may change.
+func (thread *Thread) ExecutionSteps() uint64 {
+ return thread.steps
+}
+
+// SetMaxExecutionSteps sets a limit on the number of Starlark
+// computation steps that may be executed by this thread. If the
+// thread's step counter exceeds this limit, the interpreter calls
+// thread.Cancel("too many steps").
+func (thread *Thread) SetMaxExecutionSteps(max uint64) {
+ thread.maxSteps = max
+}
+
+// Cancel causes execution of Starlark code in the specified thread to
+// promptly fail with an EvalError that includes the specified reason.
+// There may be a delay before the interpreter observes the cancellation
+// if the thread is currently in a call to a built-in function.
+//
+// Cancellation cannot be undone.
+//
+// Unlike most methods of Thread, it is safe to call Cancel from any
+// goroutine, even if the thread is actively executing.
+func (thread *Thread) Cancel(reason string) {
+ // Atomically set cancelReason, preserving earlier reason if any.
+ atomic.CompareAndSwapPointer((*unsafe.Pointer)(unsafe.Pointer(&thread.cancelReason)), nil, unsafe.Pointer(&reason))
+}
+
+// SetLocal sets the thread-local value associated with the specified key.
+// It must not be called after execution begins.
+func (thread *Thread) SetLocal(key string, value interface{}) {
+ if thread.locals == nil {
+ thread.locals = make(map[string]interface{})
+ }
+ thread.locals[key] = value
+}
+
+// Local returns the thread-local value associated with the specified key.
+func (thread *Thread) Local(key string) interface{} {
+ return thread.locals[key]
+}
+
+// CallFrame returns a copy of the specified frame of the callstack.
+// It should only be used in built-ins called from Starlark code.
+// Depth 0 means the frame of the built-in itself, 1 is its caller, and so on.
+//
+// It is equivalent to CallStack().At(depth), but more efficient.
+func (thread *Thread) CallFrame(depth int) CallFrame {
+ return thread.frameAt(depth).asCallFrame()
+}
+
+func (thread *Thread) frameAt(depth int) *frame {
+ return thread.stack[len(thread.stack)-1-depth]
+}
+
+// CallStack returns a new slice containing the thread's stack of call frames.
+func (thread *Thread) CallStack() CallStack {
+ frames := make([]CallFrame, len(thread.stack))
+ for i, fr := range thread.stack {
+ frames[i] = fr.asCallFrame()
+ }
+ return frames
+}
+
+// CallStackDepth returns the number of frames in the current call stack.
+func (thread *Thread) CallStackDepth() int { return len(thread.stack) }
+
+// A StringDict is a mapping from names to values, and represents
+// an environment such as the global variables of a module.
+// It is not a true starlark.Value.
+type StringDict map[string]Value
+
+// Keys returns a new sorted slice of d's keys.
+func (d StringDict) Keys() []string {
+ names := make([]string, 0, len(d))
+ for name := range d {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+ return names
+}
+
+func (d StringDict) String() string {
+ buf := new(strings.Builder)
+ buf.WriteByte('{')
+ sep := ""
+ for _, name := range d.Keys() {
+ buf.WriteString(sep)
+ buf.WriteString(name)
+ buf.WriteString(": ")
+ writeValue(buf, d[name], nil)
+ sep = ", "
+ }
+ buf.WriteByte('}')
+ return buf.String()
+}
+
+func (d StringDict) Freeze() {
+ for _, v := range d {
+ v.Freeze()
+ }
+}
+
+// Has reports whether the dictionary contains the specified key.
+func (d StringDict) Has(key string) bool { _, ok := d[key]; return ok }
+
+// A frame records a call to a Starlark function (including module toplevel)
+// or a built-in function or method.
+type frame struct {
+ callable Callable // current function (or toplevel) or built-in
+ pc uint32 // program counter (Starlark frames only)
+ locals []Value // local variables (Starlark frames only)
+ spanStart int64 // start time of current profiler span
+}
+
+// Position returns the source position of the current point of execution in this frame.
+func (fr *frame) Position() syntax.Position {
+ switch c := fr.callable.(type) {
+ case *Function:
+ // Starlark function
+ return c.funcode.Position(fr.pc)
+ case callableWithPosition:
+ // If a built-in Callable defines
+ // a Position method, use it.
+ return c.Position()
+ }
+ return syntax.MakePosition(&builtinFilename, 0, 0)
+}
+
+var builtinFilename = "<builtin>"
+
+// Function returns the frame's function or built-in.
+func (fr *frame) Callable() Callable { return fr.callable }
+
+// A CallStack is a stack of call frames, outermost first.
+type CallStack []CallFrame
+
+// At returns a copy of the frame at depth i.
+// At(0) returns the topmost frame.
+func (stack CallStack) At(i int) CallFrame { return stack[len(stack)-1-i] }
+
+// Pop removes and returns the topmost frame.
+func (stack *CallStack) Pop() CallFrame {
+ last := len(*stack) - 1
+ top := (*stack)[last]
+ *stack = (*stack)[:last]
+ return top
+}
+
+// String returns a user-friendly description of the stack.
+func (stack CallStack) String() string {
+ out := new(strings.Builder)
+ if len(stack) > 0 {
+ fmt.Fprintf(out, "Traceback (most recent call last):\n")
+ }
+ for _, fr := range stack {
+ fmt.Fprintf(out, " %s: in %s\n", fr.Pos, fr.Name)
+ }
+ return out.String()
+}
+
+// An EvalError is a Starlark evaluation error and
+// a copy of the thread's stack at the moment of the error.
+type EvalError struct {
+ Msg string
+ CallStack CallStack
+ cause error
+}
+
+// A CallFrame represents the function name and current
+// position of execution of an enclosing call frame.
+type CallFrame struct {
+ Name string
+ Pos syntax.Position
+}
+
+func (fr *frame) asCallFrame() CallFrame {
+ return CallFrame{
+ Name: fr.Callable().Name(),
+ Pos: fr.Position(),
+ }
+}
+
+func (thread *Thread) evalError(err error) *EvalError {
+ return &EvalError{
+ Msg: err.Error(),
+ CallStack: thread.CallStack(),
+ cause: err,
+ }
+}
+
+func (e *EvalError) Error() string { return e.Msg }
+
+// Backtrace returns a user-friendly error message describing the stack
+// of calls that led to this error.
+func (e *EvalError) Backtrace() string {
+ // If the topmost stack frame is a built-in function,
+ // remove it from the stack and add print "Error in fn:".
+ stack := e.CallStack
+ suffix := ""
+ if last := len(stack) - 1; last >= 0 && stack[last].Pos.Filename() == builtinFilename {
+ suffix = " in " + stack[last].Name
+ stack = stack[:last]
+ }
+ return fmt.Sprintf("%sError%s: %s", stack, suffix, e.Msg)
+}
+
+func (e *EvalError) Unwrap() error { return e.cause }
+
+// A Program is a compiled Starlark program.
+//
+// Programs are immutable, and contain no Values.
+// A Program may be created by parsing a source file (see SourceProgram)
+// or by loading a previously saved compiled program (see CompiledProgram).
+type Program struct {
+ compiled *compile.Program
+}
+
+// CompilerVersion is the version number of the protocol for compiled
+// files. Applications must not run programs compiled by one version
+// with an interpreter at another version, and should thus incorporate
+// the compiler version into the cache key when reusing compiled code.
+const CompilerVersion = compile.Version
+
+// Filename returns the name of the file from which this program was loaded.
+func (prog *Program) Filename() string { return prog.compiled.Toplevel.Pos.Filename() }
+
+func (prog *Program) String() string { return prog.Filename() }
+
+// NumLoads returns the number of load statements in the compiled program.
+func (prog *Program) NumLoads() int { return len(prog.compiled.Loads) }
+
+// Load(i) returns the name and position of the i'th module directly
+// loaded by this one, where 0 <= i < NumLoads().
+// The name is unresolved---exactly as it appears in the source.
+func (prog *Program) Load(i int) (string, syntax.Position) {
+ id := prog.compiled.Loads[i]
+ return id.Name, id.Pos
+}
+
+// WriteTo writes the compiled module to the specified output stream.
+func (prog *Program) Write(out io.Writer) error {
+ data := prog.compiled.Encode()
+ _, err := out.Write(data)
+ return err
+}
+
+// ExecFile parses, resolves, and executes a Starlark file in the
+// specified global environment, which may be modified during execution.
+//
+// Thread is the state associated with the Starlark thread.
+//
+// The filename and src parameters are as for syntax.Parse:
+// filename is the name of the file to execute,
+// and the name that appears in error messages;
+// src is an optional source of bytes to use
+// instead of filename.
+//
+// predeclared defines the predeclared names specific to this module.
+// Execution does not modify this dictionary, though it may mutate
+// its values.
+//
+// If ExecFile fails during evaluation, it returns an *EvalError
+// containing a backtrace.
+func ExecFile(thread *Thread, filename string, src interface{}, predeclared StringDict) (StringDict, error) {
+ // Parse, resolve, and compile a Starlark source file.
+ _, mod, err := SourceProgram(filename, src, predeclared.Has)
+ if err != nil {
+ return nil, err
+ }
+
+ g, err := mod.Init(thread, predeclared)
+ g.Freeze()
+ return g, err
+}
+
+// SourceProgram produces a new program by parsing, resolving,
+// and compiling a Starlark source file.
+// On success, it returns the parsed file and the compiled program.
+// The filename and src parameters are as for syntax.Parse.
+//
+// The isPredeclared predicate reports whether a name is
+// a pre-declared identifier of the current module.
+// Its typical value is predeclared.Has,
+// where predeclared is a StringDict of pre-declared values.
+func SourceProgram(filename string, src interface{}, isPredeclared func(string) bool) (*syntax.File, *Program, error) {
+ f, err := syntax.Parse(filename, src, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ prog, err := FileProgram(f, isPredeclared)
+ return f, prog, err
+}
+
+// FileProgram produces a new program by resolving,
+// and compiling the Starlark source file syntax tree.
+// On success, it returns the compiled program.
+//
+// Resolving a syntax tree mutates it.
+// Do not call FileProgram more than once on the same file.
+//
+// The isPredeclared predicate reports whether a name is
+// a pre-declared identifier of the current module.
+// Its typical value is predeclared.Has,
+// where predeclared is a StringDict of pre-declared values.
+func FileProgram(f *syntax.File, isPredeclared func(string) bool) (*Program, error) {
+ if err := resolve.File(f, isPredeclared, Universe.Has); err != nil {
+ return nil, err
+ }
+
+ var pos syntax.Position
+ if len(f.Stmts) > 0 {
+ pos = syntax.Start(f.Stmts[0])
+ } else {
+ pos = syntax.MakePosition(&f.Path, 1, 1)
+ }
+
+ module := f.Module.(*resolve.Module)
+ compiled := compile.File(f.Stmts, pos, "<toplevel>", module.Locals, module.Globals)
+
+ return &Program{compiled}, nil
+}
+
+// CompiledProgram produces a new program from the representation
+// of a compiled program previously saved by Program.Write.
+func CompiledProgram(in io.Reader) (*Program, error) {
+ data, err := ioutil.ReadAll(in)
+ if err != nil {
+ return nil, err
+ }
+ compiled, err := compile.DecodeProgram(data)
+ if err != nil {
+ return nil, err
+ }
+ return &Program{compiled}, nil
+}
+
+// Init creates a set of global variables for the program,
+// executes the toplevel code of the specified program,
+// and returns a new, unfrozen dictionary of the globals.
+func (prog *Program) Init(thread *Thread, predeclared StringDict) (StringDict, error) {
+ toplevel := makeToplevelFunction(prog.compiled, predeclared)
+
+ _, err := Call(thread, toplevel, nil, nil)
+
+ // Convert the global environment to a map.
+ // We return a (partial) map even in case of error.
+ return toplevel.Globals(), err
+}
+
+// ExecREPLChunk compiles and executes file f in the specified thread
+// and global environment. This is a variant of ExecFile specialized to
+// the needs of a REPL, in which a sequence of input chunks, each
+// syntactically a File, manipulates the same set of module globals,
+// which are not frozen after execution.
+//
+// This function is intended to support only go.starlark.net/repl.
+// Its API stability is not guaranteed.
+func ExecREPLChunk(f *syntax.File, thread *Thread, globals StringDict) error {
+ var predeclared StringDict
+
+ // -- variant of FileProgram --
+
+ if err := resolve.REPLChunk(f, globals.Has, predeclared.Has, Universe.Has); err != nil {
+ return err
+ }
+
+ var pos syntax.Position
+ if len(f.Stmts) > 0 {
+ pos = syntax.Start(f.Stmts[0])
+ } else {
+ pos = syntax.MakePosition(&f.Path, 1, 1)
+ }
+
+ module := f.Module.(*resolve.Module)
+ compiled := compile.File(f.Stmts, pos, "<toplevel>", module.Locals, module.Globals)
+ prog := &Program{compiled}
+
+ // -- variant of Program.Init --
+
+ toplevel := makeToplevelFunction(prog.compiled, predeclared)
+
+ // Initialize module globals from parameter.
+ for i, id := range prog.compiled.Globals {
+ if v := globals[id.Name]; v != nil {
+ toplevel.module.globals[i] = v
+ }
+ }
+
+ _, err := Call(thread, toplevel, nil, nil)
+
+ // Reflect changes to globals back to parameter, even after an error.
+ for i, id := range prog.compiled.Globals {
+ if v := toplevel.module.globals[i]; v != nil {
+ globals[id.Name] = v
+ }
+ }
+
+ return err
+}
+
+func makeToplevelFunction(prog *compile.Program, predeclared StringDict) *Function {
+ // Create the Starlark value denoted by each program constant c.
+ constants := make([]Value, len(prog.Constants))
+ for i, c := range prog.Constants {
+ var v Value
+ switch c := c.(type) {
+ case int64:
+ v = MakeInt64(c)
+ case *big.Int:
+ v = MakeBigInt(c)
+ case string:
+ v = String(c)
+ case compile.Bytes:
+ v = Bytes(c)
+ case float64:
+ v = Float(c)
+ default:
+ log.Panicf("unexpected constant %T: %v", c, c)
+ }
+ constants[i] = v
+ }
+
+ return &Function{
+ funcode: prog.Toplevel,
+ module: &module{
+ program: prog,
+ predeclared: predeclared,
+ globals: make([]Value, len(prog.Globals)),
+ constants: constants,
+ },
+ }
+}
+
+// Eval parses, resolves, and evaluates an expression within the
+// specified (predeclared) environment.
+//
+// Evaluation cannot mutate the environment dictionary itself,
+// though it may modify variables reachable from the dictionary.
+//
+// The filename and src parameters are as for syntax.Parse.
+//
+// If Eval fails during evaluation, it returns an *EvalError
+// containing a backtrace.
+func Eval(thread *Thread, filename string, src interface{}, env StringDict) (Value, error) {
+ expr, err := syntax.ParseExpr(filename, src, 0)
+ if err != nil {
+ return nil, err
+ }
+ f, err := makeExprFunc(expr, env)
+ if err != nil {
+ return nil, err
+ }
+ return Call(thread, f, nil, nil)
+}
+
+// EvalExpr resolves and evaluates an expression within the
+// specified (predeclared) environment.
+// Evaluating a comma-separated list of expressions yields a tuple value.
+//
+// Resolving an expression mutates it.
+// Do not call EvalExpr more than once for the same expression.
+//
+// Evaluation cannot mutate the environment dictionary itself,
+// though it may modify variables reachable from the dictionary.
+//
+// If Eval fails during evaluation, it returns an *EvalError
+// containing a backtrace.
+func EvalExpr(thread *Thread, expr syntax.Expr, env StringDict) (Value, error) {
+ fn, err := makeExprFunc(expr, env)
+ if err != nil {
+ return nil, err
+ }
+ return Call(thread, fn, nil, nil)
+}
+
+// ExprFunc returns a no-argument function
+// that evaluates the expression whose source is src.
+func ExprFunc(filename string, src interface{}, env StringDict) (*Function, error) {
+ expr, err := syntax.ParseExpr(filename, src, 0)
+ if err != nil {
+ return nil, err
+ }
+ return makeExprFunc(expr, env)
+}
+
+// makeExprFunc returns a no-argument function whose body is expr.
+func makeExprFunc(expr syntax.Expr, env StringDict) (*Function, error) {
+ locals, err := resolve.Expr(expr, env.Has, Universe.Has)
+ if err != nil {
+ return nil, err
+ }
+
+ return makeToplevelFunction(compile.Expr(expr, "<expr>", locals), env), nil
+}
+
+// The following functions are primitive operations of the byte code interpreter.
+
+// list += iterable
+func listExtend(x *List, y Iterable) {
+ if ylist, ok := y.(*List); ok {
+ // fast path: list += list
+ x.elems = append(x.elems, ylist.elems...)
+ } else {
+ iter := y.Iterate()
+ defer iter.Done()
+ var z Value
+ for iter.Next(&z) {
+ x.elems = append(x.elems, z)
+ }
+ }
+}
+
+// getAttr implements x.dot.
+func getAttr(x Value, name string) (Value, error) {
+ hasAttr, ok := x.(HasAttrs)
+ if !ok {
+ return nil, fmt.Errorf("%s has no .%s field or method", x.Type(), name)
+ }
+
+ var errmsg string
+ v, err := hasAttr.Attr(name)
+ if err == nil {
+ if v != nil {
+ return v, nil // success
+ }
+ // (nil, nil) => generic error
+ errmsg = fmt.Sprintf("%s has no .%s field or method", x.Type(), name)
+ } else if nsa, ok := err.(NoSuchAttrError); ok {
+ errmsg = string(nsa)
+ } else {
+ return nil, err // return error as is
+ }
+
+ // add spelling hint
+ if n := spell.Nearest(name, hasAttr.AttrNames()); n != "" {
+ errmsg = fmt.Sprintf("%s (did you mean .%s?)", errmsg, n)
+ }
+
+ return nil, fmt.Errorf("%s", errmsg)
+}
+
+// setField implements x.name = y.
+func setField(x Value, name string, y Value) error {
+ if x, ok := x.(HasSetField); ok {
+ err := x.SetField(name, y)
+ if _, ok := err.(NoSuchAttrError); ok {
+ // No such field: check spelling.
+ if n := spell.Nearest(name, x.AttrNames()); n != "" {
+ err = fmt.Errorf("%s (did you mean .%s?)", err, n)
+ }
+ }
+ return err
+ }
+
+ return fmt.Errorf("can't assign to .%s field of %s", name, x.Type())
+}
+
+// getIndex implements x[y].
+func getIndex(x, y Value) (Value, error) {
+ switch x := x.(type) {
+ case Mapping: // dict
+ z, found, err := x.Get(y)
+ if err != nil {
+ return nil, err
+ }
+ if !found {
+ return nil, fmt.Errorf("key %v not in %s", y, x.Type())
+ }
+ return z, nil
+
+ case Indexable: // string, list, tuple
+ n := x.Len()
+ i, err := AsInt32(y)
+ if err != nil {
+ return nil, fmt.Errorf("%s index: %s", x.Type(), err)
+ }
+ origI := i
+ if i < 0 {
+ i += n
+ }
+ if i < 0 || i >= n {
+ return nil, outOfRange(origI, n, x)
+ }
+ return x.Index(i), nil
+ }
+ return nil, fmt.Errorf("unhandled index operation %s[%s]", x.Type(), y.Type())
+}
+
+func outOfRange(i, n int, x Value) error {
+ if n == 0 {
+ return fmt.Errorf("index %d out of range: empty %s", i, x.Type())
+ } else {
+ return fmt.Errorf("%s index %d out of range [%d:%d]", x.Type(), i, -n, n-1)
+ }
+}
+
+// setIndex implements x[y] = z.
+func setIndex(x, y, z Value) error {
+ switch x := x.(type) {
+ case HasSetKey:
+ if err := x.SetKey(y, z); err != nil {
+ return err
+ }
+
+ case HasSetIndex:
+ n := x.Len()
+ i, err := AsInt32(y)
+ if err != nil {
+ return err
+ }
+ origI := i
+ if i < 0 {
+ i += n
+ }
+ if i < 0 || i >= n {
+ return outOfRange(origI, n, x)
+ }
+ return x.SetIndex(i, z)
+
+ default:
+ return fmt.Errorf("%s value does not support item assignment", x.Type())
+ }
+ return nil
+}
+
+// Unary applies a unary operator (+, -, ~, not) to its operand.
+func Unary(op syntax.Token, x Value) (Value, error) {
+ // The NOT operator is not customizable.
+ if op == syntax.NOT {
+ return !x.Truth(), nil
+ }
+
+ // Int, Float, and user-defined types
+ if x, ok := x.(HasUnary); ok {
+ // (nil, nil) => unhandled
+ y, err := x.Unary(op)
+ if y != nil || err != nil {
+ return y, err
+ }
+ }
+
+ return nil, fmt.Errorf("unknown unary op: %s %s", op, x.Type())
+}
+
+// Binary applies a strict binary operator (not AND or OR) to its operands.
+// For equality tests or ordered comparisons, use Compare instead.
+func Binary(op syntax.Token, x, y Value) (Value, error) {
+ switch op {
+ case syntax.PLUS:
+ switch x := x.(type) {
+ case String:
+ if y, ok := y.(String); ok {
+ return x + y, nil
+ }
+ case Int:
+ switch y := y.(type) {
+ case Int:
+ return x.Add(y), nil
+ case Float:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return xf + y, nil
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ return x + y, nil
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return x + yf, nil
+ }
+ case *List:
+ if y, ok := y.(*List); ok {
+ z := make([]Value, 0, x.Len()+y.Len())
+ z = append(z, x.elems...)
+ z = append(z, y.elems...)
+ return NewList(z), nil
+ }
+ case Tuple:
+ if y, ok := y.(Tuple); ok {
+ z := make(Tuple, 0, len(x)+len(y))
+ z = append(z, x...)
+ z = append(z, y...)
+ return z, nil
+ }
+ }
+
+ case syntax.MINUS:
+ switch x := x.(type) {
+ case Int:
+ switch y := y.(type) {
+ case Int:
+ return x.Sub(y), nil
+ case Float:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return xf - y, nil
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ return x - y, nil
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return x - yf, nil
+ }
+ }
+
+ case syntax.STAR:
+ switch x := x.(type) {
+ case Int:
+ switch y := y.(type) {
+ case Int:
+ return x.Mul(y), nil
+ case Float:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return xf * y, nil
+ case String:
+ return stringRepeat(y, x)
+ case Bytes:
+ return bytesRepeat(y, x)
+ case *List:
+ elems, err := tupleRepeat(Tuple(y.elems), x)
+ if err != nil {
+ return nil, err
+ }
+ return NewList(elems), nil
+ case Tuple:
+ return tupleRepeat(y, x)
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ return x * y, nil
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return x * yf, nil
+ }
+ case String:
+ if y, ok := y.(Int); ok {
+ return stringRepeat(x, y)
+ }
+ case Bytes:
+ if y, ok := y.(Int); ok {
+ return bytesRepeat(x, y)
+ }
+ case *List:
+ if y, ok := y.(Int); ok {
+ elems, err := tupleRepeat(Tuple(x.elems), y)
+ if err != nil {
+ return nil, err
+ }
+ return NewList(elems), nil
+ }
+ case Tuple:
+ if y, ok := y.(Int); ok {
+ return tupleRepeat(x, y)
+ }
+
+ }
+
+ case syntax.SLASH:
+ switch x := x.(type) {
+ case Int:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ switch y := y.(type) {
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ if yf == 0.0 {
+ return nil, fmt.Errorf("floating-point division by zero")
+ }
+ return xf / yf, nil
+ case Float:
+ if y == 0.0 {
+ return nil, fmt.Errorf("floating-point division by zero")
+ }
+ return xf / y, nil
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ if y == 0.0 {
+ return nil, fmt.Errorf("floating-point division by zero")
+ }
+ return x / y, nil
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ if yf == 0.0 {
+ return nil, fmt.Errorf("floating-point division by zero")
+ }
+ return x / yf, nil
+ }
+ }
+
+ case syntax.SLASHSLASH:
+ switch x := x.(type) {
+ case Int:
+ switch y := y.(type) {
+ case Int:
+ if y.Sign() == 0 {
+ return nil, fmt.Errorf("floored division by zero")
+ }
+ return x.Div(y), nil
+ case Float:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ if y == 0.0 {
+ return nil, fmt.Errorf("floored division by zero")
+ }
+ return floor(xf / y), nil
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ if y == 0.0 {
+ return nil, fmt.Errorf("floored division by zero")
+ }
+ return floor(x / y), nil
+ case Int:
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ if yf == 0.0 {
+ return nil, fmt.Errorf("floored division by zero")
+ }
+ return floor(x / yf), nil
+ }
+ }
+
+ case syntax.PERCENT:
+ switch x := x.(type) {
+ case Int:
+ switch y := y.(type) {
+ case Int:
+ if y.Sign() == 0 {
+ return nil, fmt.Errorf("integer modulo by zero")
+ }
+ return x.Mod(y), nil
+ case Float:
+ xf, err := x.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ if y == 0 {
+ return nil, fmt.Errorf("floating-point modulo by zero")
+ }
+ return xf.Mod(y), nil
+ }
+ case Float:
+ switch y := y.(type) {
+ case Float:
+ if y == 0.0 {
+ return nil, fmt.Errorf("floating-point modulo by zero")
+ }
+ return x.Mod(y), nil
+ case Int:
+ if y.Sign() == 0 {
+ return nil, fmt.Errorf("floating-point modulo by zero")
+ }
+ yf, err := y.finiteFloat()
+ if err != nil {
+ return nil, err
+ }
+ return x.Mod(yf), nil
+ }
+ case String:
+ return interpolate(string(x), y)
+ }
+
+ case syntax.NOT_IN:
+ z, err := Binary(syntax.IN, x, y)
+ if err != nil {
+ return nil, err
+ }
+ return !z.Truth(), nil
+
+ case syntax.IN:
+ switch y := y.(type) {
+ case *List:
+ for _, elem := range y.elems {
+ if eq, err := Equal(elem, x); err != nil {
+ return nil, err
+ } else if eq {
+ return True, nil
+ }
+ }
+ return False, nil
+ case Tuple:
+ for _, elem := range y {
+ if eq, err := Equal(elem, x); err != nil {
+ return nil, err
+ } else if eq {
+ return True, nil
+ }
+ }
+ return False, nil
+ case Mapping: // e.g. dict
+ // Ignore error from Get as we cannot distinguish true
+ // errors (value cycle, type error) from "key not found".
+ _, found, _ := y.Get(x)
+ return Bool(found), nil
+ case *Set:
+ ok, err := y.Has(x)
+ return Bool(ok), err
+ case String:
+ needle, ok := x.(String)
+ if !ok {
+ return nil, fmt.Errorf("'in <string>' requires string as left operand, not %s", x.Type())
+ }
+ return Bool(strings.Contains(string(y), string(needle))), nil
+ case Bytes:
+ switch needle := x.(type) {
+ case Bytes:
+ return Bool(strings.Contains(string(y), string(needle))), nil
+ case Int:
+ var b byte
+ if err := AsInt(needle, &b); err != nil {
+ return nil, fmt.Errorf("int in bytes: %s", err)
+ }
+ return Bool(strings.IndexByte(string(y), b) >= 0), nil
+ default:
+ return nil, fmt.Errorf("'in bytes' requires bytes or int as left operand, not %s", x.Type())
+ }
+ case rangeValue:
+ i, err := NumberToInt(x)
+ if err != nil {
+ return nil, fmt.Errorf("'in <range>' requires integer as left operand, not %s", x.Type())
+ }
+ return Bool(y.contains(i)), nil
+ }
+
+ case syntax.PIPE:
+ switch x := x.(type) {
+ case Int:
+ if y, ok := y.(Int); ok {
+ return x.Or(y), nil
+ }
+ case *Set: // union
+ if y, ok := y.(*Set); ok {
+ iter := Iterate(y)
+ defer iter.Done()
+ return x.Union(iter)
+ }
+ }
+
+ case syntax.AMP:
+ switch x := x.(type) {
+ case Int:
+ if y, ok := y.(Int); ok {
+ return x.And(y), nil
+ }
+ case *Set: // intersection
+ if y, ok := y.(*Set); ok {
+ set := new(Set)
+ if x.Len() > y.Len() {
+ x, y = y, x // opt: range over smaller set
+ }
+ for _, xelem := range x.elems() {
+ // Has, Insert cannot fail here.
+ if found, _ := y.Has(xelem); found {
+ set.Insert(xelem)
+ }
+ }
+ return set, nil
+ }
+ }
+
+ case syntax.CIRCUMFLEX:
+ switch x := x.(type) {
+ case Int:
+ if y, ok := y.(Int); ok {
+ return x.Xor(y), nil
+ }
+ case *Set: // symmetric difference
+ if y, ok := y.(*Set); ok {
+ set := new(Set)
+ for _, xelem := range x.elems() {
+ if found, _ := y.Has(xelem); !found {
+ set.Insert(xelem)
+ }
+ }
+ for _, yelem := range y.elems() {
+ if found, _ := x.Has(yelem); !found {
+ set.Insert(yelem)
+ }
+ }
+ return set, nil
+ }
+ }
+
+ case syntax.LTLT, syntax.GTGT:
+ if x, ok := x.(Int); ok {
+ y, err := AsInt32(y)
+ if err != nil {
+ return nil, err
+ }
+ if y < 0 {
+ return nil, fmt.Errorf("negative shift count: %v", y)
+ }
+ if op == syntax.LTLT {
+ if y >= 512 {
+ return nil, fmt.Errorf("shift count too large: %v", y)
+ }
+ return x.Lsh(uint(y)), nil
+ } else {
+ return x.Rsh(uint(y)), nil
+ }
+ }
+
+ default:
+ // unknown operator
+ goto unknown
+ }
+
+ // user-defined types
+ // (nil, nil) => unhandled
+ if x, ok := x.(HasBinary); ok {
+ z, err := x.Binary(op, y, Left)
+ if z != nil || err != nil {
+ return z, err
+ }
+ }
+ if y, ok := y.(HasBinary); ok {
+ z, err := y.Binary(op, x, Right)
+ if z != nil || err != nil {
+ return z, err
+ }
+ }
+
+ // unsupported operand types
+unknown:
+ return nil, fmt.Errorf("unknown binary op: %s %s %s", x.Type(), op, y.Type())
+}
+
+// It's always possible to overeat in small bites but we'll
+// try to stop someone swallowing the world in one gulp.
+const maxAlloc = 1 << 30
+
+func tupleRepeat(elems Tuple, n Int) (Tuple, error) {
+ if len(elems) == 0 {
+ return nil, nil
+ }
+ i, err := AsInt32(n)
+ if err != nil {
+ return nil, fmt.Errorf("repeat count %s too large", n)
+ }
+ if i < 1 {
+ return nil, nil
+ }
+ // Inv: i > 0, len > 0
+ sz := len(elems) * i
+ if sz < 0 || sz >= maxAlloc { // sz < 0 => overflow
+ // Don't print sz.
+ return nil, fmt.Errorf("excessive repeat (%d * %d elements)", len(elems), i)
+ }
+ res := make([]Value, sz)
+ // copy elems into res, doubling each time
+ x := copy(res, elems)
+ for x < len(res) {
+ copy(res[x:], res[:x])
+ x *= 2
+ }
+ return res, nil
+}
+
+func bytesRepeat(b Bytes, n Int) (Bytes, error) {
+ res, err := stringRepeat(String(b), n)
+ return Bytes(res), err
+}
+
+func stringRepeat(s String, n Int) (String, error) {
+ if s == "" {
+ return "", nil
+ }
+ i, err := AsInt32(n)
+ if err != nil {
+ return "", fmt.Errorf("repeat count %s too large", n)
+ }
+ if i < 1 {
+ return "", nil
+ }
+ // Inv: i > 0, len > 0
+ sz := len(s) * i
+ if sz < 0 || sz >= maxAlloc { // sz < 0 => overflow
+ // Don't print sz.
+ return "", fmt.Errorf("excessive repeat (%d * %d elements)", len(s), i)
+ }
+ return String(strings.Repeat(string(s), i)), nil
+}
+
+// Call calls the function fn with the specified positional and keyword arguments.
+func Call(thread *Thread, fn Value, args Tuple, kwargs []Tuple) (Value, error) {
+ c, ok := fn.(Callable)
+ if !ok {
+ return nil, fmt.Errorf("invalid call of non-function (%s)", fn.Type())
+ }
+
+ // Allocate and push a new frame.
+ var fr *frame
+ // Optimization: use slack portion of thread.stack
+ // slice as a freelist of empty frames.
+ if n := len(thread.stack); n < cap(thread.stack) {
+ fr = thread.stack[n : n+1][0]
+ }
+ if fr == nil {
+ fr = new(frame)
+ }
+
+ if thread.stack == nil {
+ // one-time initialization of thread
+ if thread.maxSteps == 0 {
+ thread.maxSteps-- // (MaxUint64)
+ }
+ }
+
+ thread.stack = append(thread.stack, fr) // push
+
+ fr.callable = c
+
+ thread.beginProfSpan()
+ result, err := c.CallInternal(thread, args, kwargs)
+ thread.endProfSpan()
+
+ // Sanity check: nil is not a valid Starlark value.
+ if result == nil && err == nil {
+ err = fmt.Errorf("internal error: nil (not None) returned from %s", fn)
+ }
+
+ // Always return an EvalError with an accurate frame.
+ if err != nil {
+ if _, ok := err.(*EvalError); !ok {
+ err = thread.evalError(err)
+ }
+ }
+
+ *fr = frame{} // clear out any references
+ thread.stack = thread.stack[:len(thread.stack)-1] // pop
+
+ return result, err
+}
+
+func slice(x, lo, hi, step_ Value) (Value, error) {
+ sliceable, ok := x.(Sliceable)
+ if !ok {
+ return nil, fmt.Errorf("invalid slice operand %s", x.Type())
+ }
+
+ n := sliceable.Len()
+ step := 1
+ if step_ != None {
+ var err error
+ step, err = AsInt32(step_)
+ if err != nil {
+ return nil, fmt.Errorf("invalid slice step: %s", err)
+ }
+ if step == 0 {
+ return nil, fmt.Errorf("zero is not a valid slice step")
+ }
+ }
+
+ // TODO(adonovan): opt: preallocate result array.
+
+ var start, end int
+ if step > 0 {
+ // positive stride
+ // default indices are [0:n].
+ var err error
+ start, end, err = indices(lo, hi, n)
+ if err != nil {
+ return nil, err
+ }
+
+ if end < start {
+ end = start // => empty result
+ }
+ } else {
+ // negative stride
+ // default indices are effectively [n-1:-1], though to
+ // get this effect using explicit indices requires
+ // [n-1:-1-n:-1] because of the treatment of -ve values.
+ start = n - 1
+ if err := asIndex(lo, n, &start); err != nil {
+ return nil, fmt.Errorf("invalid start index: %s", err)
+ }
+ if start >= n {
+ start = n - 1
+ }
+
+ end = -1
+ if err := asIndex(hi, n, &end); err != nil {
+ return nil, fmt.Errorf("invalid end index: %s", err)
+ }
+ if end < -1 {
+ end = -1
+ }
+
+ if start < end {
+ start = end // => empty result
+ }
+ }
+
+ return sliceable.Slice(start, end, step), nil
+}
+
+// From Hacker's Delight, section 2.8.
+func signum64(x int64) int { return int(uint64(x>>63) | uint64(-x)>>63) }
+func signum(x int) int { return signum64(int64(x)) }
+
+// indices converts start_ and end_ to indices in the range [0:len].
+// The start index defaults to 0 and the end index defaults to len.
+// An index -len < i < 0 is treated like i+len.
+// All other indices outside the range are clamped to the nearest value in the range.
+// Beware: start may be greater than end.
+// This function is suitable only for slices with positive strides.
+func indices(start_, end_ Value, len int) (start, end int, err error) {
+ start = 0
+ if err := asIndex(start_, len, &start); err != nil {
+ return 0, 0, fmt.Errorf("invalid start index: %s", err)
+ }
+ // Clamp to [0:len].
+ if start < 0 {
+ start = 0
+ } else if start > len {
+ start = len
+ }
+
+ end = len
+ if err := asIndex(end_, len, &end); err != nil {
+ return 0, 0, fmt.Errorf("invalid end index: %s", err)
+ }
+ // Clamp to [0:len].
+ if end < 0 {
+ end = 0
+ } else if end > len {
+ end = len
+ }
+
+ return start, end, nil
+}
+
+// asIndex sets *result to the integer value of v, adding len to it
+// if it is negative. If v is nil or None, *result is unchanged.
+func asIndex(v Value, len int, result *int) error {
+ if v != nil && v != None {
+ var err error
+ *result, err = AsInt32(v)
+ if err != nil {
+ return err
+ }
+ if *result < 0 {
+ *result += len
+ }
+ }
+ return nil
+}
+
+// setArgs sets the values of the formal parameters of function fn in
+// based on the actual parameter values in args and kwargs.
+func setArgs(locals []Value, fn *Function, args Tuple, kwargs []Tuple) error {
+
+ // This is the general schema of a function:
+ //
+ // def f(p1, p2=dp2, p3=dp3, *args, k1, k2=dk2, k3, **kwargs)
+ //
+ // The p parameters are non-kwonly, and may be specified positionally.
+ // The k parameters are kwonly, and must be specified by name.
+ // The defaults tuple is (dp2, dp3, mandatory, dk2, mandatory).
+ //
+ // Arguments are processed as follows:
+ // - positional arguments are bound to a prefix of [p1, p2, p3].
+ // - surplus positional arguments are bound to *args.
+ // - keyword arguments are bound to any of {p1, p2, p3, k1, k2, k3};
+ // duplicate bindings are rejected.
+ // - surplus keyword arguments are bound to **kwargs.
+ // - defaults are bound to each parameter from p2 to k3 if no value was set.
+ // default values come from the tuple above.
+ // It is an error if the tuple entry for an unset parameter is 'mandatory'.
+
+ // Nullary function?
+ if fn.NumParams() == 0 {
+ if nactual := len(args) + len(kwargs); nactual > 0 {
+ return fmt.Errorf("function %s accepts no arguments (%d given)", fn.Name(), nactual)
+ }
+ return nil
+ }
+
+ cond := func(x bool, y, z interface{}) interface{} {
+ if x {
+ return y
+ }
+ return z
+ }
+
+ // nparams is the number of ordinary parameters (sans *args and **kwargs).
+ nparams := fn.NumParams()
+ var kwdict *Dict
+ if fn.HasKwargs() {
+ nparams--
+ kwdict = new(Dict)
+ locals[nparams] = kwdict
+ }
+ if fn.HasVarargs() {
+ nparams--
+ }
+
+ // nonkwonly is the number of non-kwonly parameters.
+ nonkwonly := nparams - fn.NumKwonlyParams()
+
+ // Too many positional args?
+ n := len(args)
+ if len(args) > nonkwonly {
+ if !fn.HasVarargs() {
+ return fmt.Errorf("function %s accepts %s%d positional argument%s (%d given)",
+ fn.Name(),
+ cond(len(fn.defaults) > fn.NumKwonlyParams(), "at most ", ""),
+ nonkwonly,
+ cond(nonkwonly == 1, "", "s"),
+ len(args))
+ }
+ n = nonkwonly
+ }
+
+ // Bind positional arguments to non-kwonly parameters.
+ for i := 0; i < n; i++ {
+ locals[i] = args[i]
+ }
+
+ // Bind surplus positional arguments to *args parameter.
+ if fn.HasVarargs() {
+ tuple := make(Tuple, len(args)-n)
+ for i := n; i < len(args); i++ {
+ tuple[i-n] = args[i]
+ }
+ locals[nparams] = tuple
+ }
+
+ // Bind keyword arguments to parameters.
+ paramIdents := fn.funcode.Locals[:nparams]
+ for _, pair := range kwargs {
+ k, v := pair[0].(String), pair[1]
+ if i := findParam(paramIdents, string(k)); i >= 0 {
+ if locals[i] != nil {
+ return fmt.Errorf("function %s got multiple values for parameter %s", fn.Name(), k)
+ }
+ locals[i] = v
+ continue
+ }
+ if kwdict == nil {
+ return fmt.Errorf("function %s got an unexpected keyword argument %s", fn.Name(), k)
+ }
+ oldlen := kwdict.Len()
+ kwdict.SetKey(k, v)
+ if kwdict.Len() == oldlen {
+ return fmt.Errorf("function %s got multiple values for parameter %s", fn.Name(), k)
+ }
+ }
+
+ // Are defaults required?
+ if n < nparams || fn.NumKwonlyParams() > 0 {
+ m := nparams - len(fn.defaults) // first default
+
+ // Report errors for missing required arguments.
+ var missing []string
+ var i int
+ for i = n; i < m; i++ {
+ if locals[i] == nil {
+ missing = append(missing, paramIdents[i].Name)
+ }
+ }
+
+ // Bind default values to parameters.
+ for ; i < nparams; i++ {
+ if locals[i] == nil {
+ dflt := fn.defaults[i-m]
+ if _, ok := dflt.(mandatory); ok {
+ missing = append(missing, paramIdents[i].Name)
+ continue
+ }
+ locals[i] = dflt
+ }
+ }
+
+ if missing != nil {
+ return fmt.Errorf("function %s missing %d argument%s (%s)",
+ fn.Name(), len(missing), cond(len(missing) > 1, "s", ""), strings.Join(missing, ", "))
+ }
+ }
+ return nil
+}
+
+func findParam(params []compile.Binding, name string) int {
+ for i, param := range params {
+ if param.Name == name {
+ return i
+ }
+ }
+ return -1
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string-interpolation
+func interpolate(format string, x Value) (Value, error) {
+ buf := new(strings.Builder)
+ index := 0
+ nargs := 1
+ if tuple, ok := x.(Tuple); ok {
+ nargs = len(tuple)
+ }
+ for {
+ i := strings.IndexByte(format, '%')
+ if i < 0 {
+ buf.WriteString(format)
+ break
+ }
+ buf.WriteString(format[:i])
+ format = format[i+1:]
+
+ if format != "" && format[0] == '%' {
+ buf.WriteByte('%')
+ format = format[1:]
+ continue
+ }
+
+ var arg Value
+ if format != "" && format[0] == '(' {
+ // keyword argument: %(name)s.
+ format = format[1:]
+ j := strings.IndexByte(format, ')')
+ if j < 0 {
+ return nil, fmt.Errorf("incomplete format key")
+ }
+ key := format[:j]
+ if dict, ok := x.(Mapping); !ok {
+ return nil, fmt.Errorf("format requires a mapping")
+ } else if v, found, _ := dict.Get(String(key)); found {
+ arg = v
+ } else {
+ return nil, fmt.Errorf("key not found: %s", key)
+ }
+ format = format[j+1:]
+ } else {
+ // positional argument: %s.
+ if index >= nargs {
+ return nil, fmt.Errorf("not enough arguments for format string")
+ }
+ if tuple, ok := x.(Tuple); ok {
+ arg = tuple[index]
+ } else {
+ arg = x
+ }
+ }
+
+ // NOTE: Starlark does not support any of these optional Python features:
+ // - optional conversion flags: [#0- +], etc.
+ // - optional minimum field width (number or *).
+ // - optional precision (.123 or *)
+ // - optional length modifier
+
+ // conversion type
+ if format == "" {
+ return nil, fmt.Errorf("incomplete format")
+ }
+ switch c := format[0]; c {
+ case 's', 'r':
+ if str, ok := AsString(arg); ok && c == 's' {
+ buf.WriteString(str)
+ } else {
+ writeValue(buf, arg, nil)
+ }
+ case 'd', 'i', 'o', 'x', 'X':
+ i, err := NumberToInt(arg)
+ if err != nil {
+ return nil, fmt.Errorf("%%%c format requires integer: %v", c, err)
+ }
+ switch c {
+ case 'd', 'i':
+ fmt.Fprintf(buf, "%d", i)
+ case 'o':
+ fmt.Fprintf(buf, "%o", i)
+ case 'x':
+ fmt.Fprintf(buf, "%x", i)
+ case 'X':
+ fmt.Fprintf(buf, "%X", i)
+ }
+ case 'e', 'f', 'g', 'E', 'F', 'G':
+ f, ok := AsFloat(arg)
+ if !ok {
+ return nil, fmt.Errorf("%%%c format requires float, not %s", c, arg.Type())
+ }
+ Float(f).format(buf, c)
+ case 'c':
+ switch arg := arg.(type) {
+ case Int:
+ // chr(int)
+ r, err := AsInt32(arg)
+ if err != nil || r < 0 || r > unicode.MaxRune {
+ return nil, fmt.Errorf("%%c format requires a valid Unicode code point, got %s", arg)
+ }
+ buf.WriteRune(rune(r))
+ case String:
+ r, size := utf8.DecodeRuneInString(string(arg))
+ if size != len(arg) || len(arg) == 0 {
+ return nil, fmt.Errorf("%%c format requires a single-character string")
+ }
+ buf.WriteRune(r)
+ default:
+ return nil, fmt.Errorf("%%c format requires int or single-character string, not %s", arg.Type())
+ }
+ case '%':
+ buf.WriteByte('%')
+ default:
+ return nil, fmt.Errorf("unknown conversion %%%c", c)
+ }
+ format = format[1:]
+ index++
+ }
+
+ if index < nargs {
+ return nil, fmt.Errorf("too many arguments for format string")
+ }
+
+ return String(buf.String()), nil
+}
diff --git a/starlark/eval_test.go b/starlark/eval_test.go
new file mode 100644
index 0000000..9752fe8
--- /dev/null
+++ b/starlark/eval_test.go
@@ -0,0 +1,945 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark_test
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "os/exec"
+ "path/filepath"
+ "reflect"
+ "sort"
+ "strings"
+ "testing"
+
+ "go.starlark.net/internal/chunkedfile"
+ "go.starlark.net/resolve"
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkjson"
+ "go.starlark.net/starlarkstruct"
+ "go.starlark.net/starlarktest"
+ "go.starlark.net/syntax"
+)
+
+// A test may enable non-standard options by containing (e.g.) "option:recursion".
+func setOptions(src string) {
+ resolve.AllowGlobalReassign = option(src, "globalreassign")
+ resolve.LoadBindsGlobally = option(src, "loadbindsglobally")
+ resolve.AllowRecursion = option(src, "recursion")
+ resolve.AllowSet = option(src, "set")
+}
+
+func option(chunk, name string) bool {
+ return strings.Contains(chunk, "option:"+name)
+}
+
+// Wrapper is the type of errors with an Unwrap method; see https://golang.org/pkg/errors.
+type Wrapper interface {
+ Unwrap() error
+}
+
+func TestEvalExpr(t *testing.T) {
+ // This is mostly redundant with the new *.star tests.
+ // TODO(adonovan): move checks into *.star files and
+ // reduce this to a mere unit test of starlark.Eval.
+ thread := new(starlark.Thread)
+ for _, test := range []struct{ src, want string }{
+ {`123`, `123`},
+ {`-1`, `-1`},
+ {`"a"+"b"`, `"ab"`},
+ {`1+2`, `3`},
+
+ // lists
+ {`[]`, `[]`},
+ {`[1]`, `[1]`},
+ {`[1,]`, `[1]`},
+ {`[1, 2]`, `[1, 2]`},
+ {`[2 * x for x in [1, 2, 3]]`, `[2, 4, 6]`},
+ {`[2 * x for x in [1, 2, 3] if x > 1]`, `[4, 6]`},
+ {`[(x, y) for x in [1, 2] for y in [3, 4]]`,
+ `[(1, 3), (1, 4), (2, 3), (2, 4)]`},
+ {`[(x, y) for x in [1, 2] if x == 2 for y in [3, 4]]`,
+ `[(2, 3), (2, 4)]`},
+ // tuples
+ {`()`, `()`},
+ {`(1)`, `1`},
+ {`(1,)`, `(1,)`},
+ {`(1, 2)`, `(1, 2)`},
+ {`(1, 2, 3, 4, 5)`, `(1, 2, 3, 4, 5)`},
+ {`1, 2`, `(1, 2)`},
+ // dicts
+ {`{}`, `{}`},
+ {`{"a": 1}`, `{"a": 1}`},
+ {`{"a": 1,}`, `{"a": 1}`},
+
+ // conditional
+ {`1 if 3 > 2 else 0`, `1`},
+ {`1 if "foo" else 0`, `1`},
+ {`1 if "" else 0`, `0`},
+
+ // indexing
+ {`["a", "b"][0]`, `"a"`},
+ {`["a", "b"][1]`, `"b"`},
+ {`("a", "b")[0]`, `"a"`},
+ {`("a", "b")[1]`, `"b"`},
+ {`"aΩb"[0]`, `"a"`},
+ {`"aΩb"[1]`, `"\xce"`},
+ {`"aΩb"[3]`, `"b"`},
+ {`{"a": 1}["a"]`, `1`},
+ {`{"a": 1}["b"]`, `key "b" not in dict`},
+ {`{}[[]]`, `unhashable type: list`},
+ {`{"a": 1}[[]]`, `unhashable type: list`},
+ {`[x for x in range(3)]`, "[0, 1, 2]"},
+ } {
+ var got string
+ if v, err := starlark.Eval(thread, "<expr>", test.src, nil); err != nil {
+ got = err.Error()
+ } else {
+ got = v.String()
+ }
+ if got != test.want {
+ t.Errorf("eval %s = %s, want %s", test.src, got, test.want)
+ }
+ }
+}
+
+func TestExecFile(t *testing.T) {
+ defer setOptions("")
+ testdata := starlarktest.DataFile("starlark", ".")
+ thread := &starlark.Thread{Load: load}
+ starlarktest.SetReporter(thread, t)
+ for _, file := range []string{
+ "testdata/assign.star",
+ "testdata/bool.star",
+ "testdata/builtins.star",
+ "testdata/bytes.star",
+ "testdata/control.star",
+ "testdata/dict.star",
+ "testdata/float.star",
+ "testdata/function.star",
+ "testdata/int.star",
+ "testdata/json.star",
+ "testdata/list.star",
+ "testdata/misc.star",
+ "testdata/set.star",
+ "testdata/string.star",
+ "testdata/tuple.star",
+ "testdata/recursion.star",
+ "testdata/module.star",
+ } {
+ filename := filepath.Join(testdata, file)
+ for _, chunk := range chunkedfile.Read(filename, t) {
+ predeclared := starlark.StringDict{
+ "hasfields": starlark.NewBuiltin("hasfields", newHasFields),
+ "fibonacci": fib{},
+ "struct": starlark.NewBuiltin("struct", starlarkstruct.Make),
+ }
+
+ setOptions(chunk.Source)
+ resolve.AllowLambda = true // used extensively
+
+ _, err := starlark.ExecFile(thread, filename, chunk.Source, predeclared)
+ switch err := err.(type) {
+ case *starlark.EvalError:
+ found := false
+ for i := range err.CallStack {
+ posn := err.CallStack.At(i).Pos
+ if posn.Filename() == filename {
+ chunk.GotError(int(posn.Line), err.Error())
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Error(err.Backtrace())
+ }
+ case nil:
+ // success
+ default:
+ t.Errorf("\n%s", err)
+ }
+ chunk.Done()
+ }
+ }
+}
+
+// A fib is an iterable value representing the infinite Fibonacci sequence.
+type fib struct{}
+
+func (t fib) Freeze() {}
+func (t fib) String() string { return "fib" }
+func (t fib) Type() string { return "fib" }
+func (t fib) Truth() starlark.Bool { return true }
+func (t fib) Hash() (uint32, error) { return 0, fmt.Errorf("fib is unhashable") }
+func (t fib) Iterate() starlark.Iterator { return &fibIterator{0, 1} }
+
+type fibIterator struct{ x, y int }
+
+func (it *fibIterator) Next(p *starlark.Value) bool {
+ *p = starlark.MakeInt(it.x)
+ it.x, it.y = it.y, it.x+it.y
+ return true
+}
+func (it *fibIterator) Done() {}
+
+// load implements the 'load' operation as used in the evaluator tests.
+func load(thread *starlark.Thread, module string) (starlark.StringDict, error) {
+ if module == "assert.star" {
+ return starlarktest.LoadAssertModule()
+ }
+ if module == "json.star" {
+ return starlark.StringDict{"json": starlarkjson.Module}, nil
+ }
+
+ // TODO(adonovan): test load() using this execution path.
+ filename := filepath.Join(filepath.Dir(thread.CallFrame(0).Pos.Filename()), module)
+ return starlark.ExecFile(thread, filename, nil, nil)
+}
+
+func newHasFields(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ if len(args)+len(kwargs) > 0 {
+ return nil, fmt.Errorf("%s: unexpected arguments", b.Name())
+ }
+ return &hasfields{attrs: make(map[string]starlark.Value)}, nil
+}
+
+// hasfields is a test-only implementation of HasAttrs.
+// It permits any field to be set.
+// Clients will likely want to provide their own implementation,
+// so we don't have any public implementation.
+type hasfields struct {
+ attrs starlark.StringDict
+ frozen bool
+}
+
+var (
+ _ starlark.HasAttrs = (*hasfields)(nil)
+ _ starlark.HasBinary = (*hasfields)(nil)
+)
+
+func (hf *hasfields) String() string { return "hasfields" }
+func (hf *hasfields) Type() string { return "hasfields" }
+func (hf *hasfields) Truth() starlark.Bool { return true }
+func (hf *hasfields) Hash() (uint32, error) { return 42, nil }
+
+func (hf *hasfields) Freeze() {
+ if !hf.frozen {
+ hf.frozen = true
+ for _, v := range hf.attrs {
+ v.Freeze()
+ }
+ }
+}
+
+func (hf *hasfields) Attr(name string) (starlark.Value, error) { return hf.attrs[name], nil }
+
+func (hf *hasfields) SetField(name string, val starlark.Value) error {
+ if hf.frozen {
+ return fmt.Errorf("cannot set field on a frozen hasfields")
+ }
+ if strings.HasPrefix(name, "no") { // for testing
+ return starlark.NoSuchAttrError(fmt.Sprintf("no .%s field", name))
+ }
+ hf.attrs[name] = val
+ return nil
+}
+
+func (hf *hasfields) AttrNames() []string {
+ names := make([]string, 0, len(hf.attrs))
+ for key := range hf.attrs {
+ names = append(names, key)
+ }
+ sort.Strings(names)
+ return names
+}
+
+func (hf *hasfields) Binary(op syntax.Token, y starlark.Value, side starlark.Side) (starlark.Value, error) {
+ // This method exists so we can exercise 'list += x'
+ // where x is not Iterable but defines list+x.
+ if op == syntax.PLUS {
+ if _, ok := y.(*starlark.List); ok {
+ return starlark.MakeInt(42), nil // list+hasfields is 42
+ }
+ }
+ return nil, nil
+}
+
+func TestParameterPassing(t *testing.T) {
+ const filename = "parameters.go"
+ const src = `
+def a():
+ return
+def b(a, b):
+ return a, b
+def c(a, b=42):
+ return a, b
+def d(*args):
+ return args
+def e(**kwargs):
+ return kwargs
+def f(a, b=42, *args, **kwargs):
+ return a, b, args, kwargs
+def g(a, b=42, *args, c=123, **kwargs):
+ return a, b, args, c, kwargs
+def h(a, b=42, *, c=123, **kwargs):
+ return a, b, c, kwargs
+def i(a, b=42, *, c, d=123, e, **kwargs):
+ return a, b, c, d, e, kwargs
+def j(a, b=42, *args, c, d=123, e, **kwargs):
+ return a, b, args, c, d, e, kwargs
+`
+
+ thread := new(starlark.Thread)
+ globals, err := starlark.ExecFile(thread, filename, src, nil)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // All errors are dynamic; see resolver for static errors.
+ for _, test := range []struct{ src, want string }{
+ // a()
+ {`a()`, `None`},
+ {`a(1)`, `function a accepts no arguments (1 given)`},
+
+ // b(a, b)
+ {`b()`, `function b missing 2 arguments (a, b)`},
+ {`b(1)`, `function b missing 1 argument (b)`},
+ {`b(a=1)`, `function b missing 1 argument (b)`},
+ {`b(b=1)`, `function b missing 1 argument (a)`},
+ {`b(1, 2)`, `(1, 2)`},
+ {`b`, `<function b>`}, // asserts that b's parameter b was treated as a local variable
+ {`b(1, 2, 3)`, `function b accepts 2 positional arguments (3 given)`},
+ {`b(1, b=2)`, `(1, 2)`},
+ {`b(1, a=2)`, `function b got multiple values for parameter "a"`},
+ {`b(1, x=2)`, `function b got an unexpected keyword argument "x"`},
+ {`b(a=1, b=2)`, `(1, 2)`},
+ {`b(b=1, a=2)`, `(2, 1)`},
+ {`b(b=1, a=2, x=1)`, `function b got an unexpected keyword argument "x"`},
+ {`b(x=1, b=1, a=2)`, `function b got an unexpected keyword argument "x"`},
+
+ // c(a, b=42)
+ {`c()`, `function c missing 1 argument (a)`},
+ {`c(1)`, `(1, 42)`},
+ {`c(1, 2)`, `(1, 2)`},
+ {`c(1, 2, 3)`, `function c accepts at most 2 positional arguments (3 given)`},
+ {`c(1, b=2)`, `(1, 2)`},
+ {`c(1, a=2)`, `function c got multiple values for parameter "a"`},
+ {`c(a=1, b=2)`, `(1, 2)`},
+ {`c(b=1, a=2)`, `(2, 1)`},
+
+ // d(*args)
+ {`d()`, `()`},
+ {`d(1)`, `(1,)`},
+ {`d(1, 2)`, `(1, 2)`},
+ {`d(1, 2, k=3)`, `function d got an unexpected keyword argument "k"`},
+ {`d(args=[])`, `function d got an unexpected keyword argument "args"`},
+
+ // e(**kwargs)
+ {`e()`, `{}`},
+ {`e(1)`, `function e accepts 0 positional arguments (1 given)`},
+ {`e(k=1)`, `{"k": 1}`},
+ {`e(kwargs={})`, `{"kwargs": {}}`},
+
+ // f(a, b=42, *args, **kwargs)
+ {`f()`, `function f missing 1 argument (a)`},
+ {`f(0)`, `(0, 42, (), {})`},
+ {`f(0)`, `(0, 42, (), {})`},
+ {`f(0, 1)`, `(0, 1, (), {})`},
+ {`f(0, 1, 2)`, `(0, 1, (2,), {})`},
+ {`f(0, 1, 2, 3)`, `(0, 1, (2, 3), {})`},
+ {`f(a=0)`, `(0, 42, (), {})`},
+ {`f(0, b=1)`, `(0, 1, (), {})`},
+ {`f(0, a=1)`, `function f got multiple values for parameter "a"`},
+ {`f(0, b=1, c=2)`, `(0, 1, (), {"c": 2})`},
+
+ // g(a, b=42, *args, c=123, **kwargs)
+ {`g()`, `function g missing 1 argument (a)`},
+ {`g(0)`, `(0, 42, (), 123, {})`},
+ {`g(0, 1)`, `(0, 1, (), 123, {})`},
+ {`g(0, 1, 2)`, `(0, 1, (2,), 123, {})`},
+ {`g(0, 1, 2, 3)`, `(0, 1, (2, 3), 123, {})`},
+ {`g(a=0)`, `(0, 42, (), 123, {})`},
+ {`g(0, b=1)`, `(0, 1, (), 123, {})`},
+ {`g(0, a=1)`, `function g got multiple values for parameter "a"`},
+ {`g(0, b=1, c=2, d=3)`, `(0, 1, (), 2, {"d": 3})`},
+
+ // h(a, b=42, *, c=123, **kwargs)
+ {`h()`, `function h missing 1 argument (a)`},
+ {`h(0)`, `(0, 42, 123, {})`},
+ {`h(0, 1)`, `(0, 1, 123, {})`},
+ {`h(0, 1, 2)`, `function h accepts at most 2 positional arguments (3 given)`},
+ {`h(a=0)`, `(0, 42, 123, {})`},
+ {`h(0, b=1)`, `(0, 1, 123, {})`},
+ {`h(0, a=1)`, `function h got multiple values for parameter "a"`},
+ {`h(0, b=1, c=2)`, `(0, 1, 2, {})`},
+ {`h(0, b=1, d=2)`, `(0, 1, 123, {"d": 2})`},
+ {`h(0, b=1, c=2, d=3)`, `(0, 1, 2, {"d": 3})`},
+
+ // i(a, b=42, *, c, d=123, e, **kwargs)
+ {`i()`, `function i missing 3 arguments (a, c, e)`},
+ {`i(0)`, `function i missing 2 arguments (c, e)`},
+ {`i(0, 1)`, `function i missing 2 arguments (c, e)`},
+ {`i(0, 1, 2)`, `function i accepts at most 2 positional arguments (3 given)`},
+ {`i(0, 1, e=2)`, `function i missing 1 argument (c)`},
+ {`i(0, 1, 2, 3)`, `function i accepts at most 2 positional arguments (4 given)`},
+ {`i(a=0)`, `function i missing 2 arguments (c, e)`},
+ {`i(0, b=1)`, `function i missing 2 arguments (c, e)`},
+ {`i(0, a=1)`, `function i got multiple values for parameter "a"`},
+ {`i(0, b=1, c=2)`, `function i missing 1 argument (e)`},
+ {`i(0, b=1, d=2)`, `function i missing 2 arguments (c, e)`},
+ {`i(0, b=1, c=2, d=3)`, `function i missing 1 argument (e)`},
+ {`i(0, b=1, c=2, d=3, e=4)`, `(0, 1, 2, 3, 4, {})`},
+ {`i(0, 1, b=1, c=2, d=3, e=4)`, `function i got multiple values for parameter "b"`},
+
+ // j(a, b=42, *args, c, d=123, e, **kwargs)
+ {`j()`, `function j missing 3 arguments (a, c, e)`},
+ {`j(0)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, 1)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, 1, 2)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, 1, e=2)`, `function j missing 1 argument (c)`},
+ {`j(0, 1, 2, 3)`, `function j missing 2 arguments (c, e)`},
+ {`j(a=0)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, b=1)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, a=1)`, `function j got multiple values for parameter "a"`},
+ {`j(0, b=1, c=2)`, `function j missing 1 argument (e)`},
+ {`j(0, b=1, d=2)`, `function j missing 2 arguments (c, e)`},
+ {`j(0, b=1, c=2, d=3)`, `function j missing 1 argument (e)`},
+ {`j(0, b=1, c=2, d=3, e=4)`, `(0, 1, (), 2, 3, 4, {})`},
+ {`j(0, 1, b=1, c=2, d=3, e=4)`, `function j got multiple values for parameter "b"`},
+ {`j(0, 1, 2, c=3, e=4)`, `(0, 1, (2,), 3, 123, 4, {})`},
+ } {
+ var got string
+ if v, err := starlark.Eval(thread, "<expr>", test.src, globals); err != nil {
+ got = err.Error()
+ } else {
+ got = v.String()
+ }
+ if got != test.want {
+ t.Errorf("eval %s = %s, want %s", test.src, got, test.want)
+ }
+ }
+}
+
+// TestPrint ensures that the Starlark print function calls
+// Thread.Print, if provided.
+func TestPrint(t *testing.T) {
+ const src = `
+print("hello")
+def f(): print("hello", "world", sep=", ")
+f()
+`
+ buf := new(bytes.Buffer)
+ print := func(thread *starlark.Thread, msg string) {
+ caller := thread.CallFrame(1)
+ fmt.Fprintf(buf, "%s: %s: %s\n", caller.Pos, caller.Name, msg)
+ }
+ thread := &starlark.Thread{Print: print}
+ if _, err := starlark.ExecFile(thread, "foo.star", src, nil); err != nil {
+ t.Fatal(err)
+ }
+ want := "foo.star:2:6: <toplevel>: hello\n" +
+ "foo.star:3:15: f: hello, world\n"
+ if got := buf.String(); got != want {
+ t.Errorf("output was %s, want %s", got, want)
+ }
+}
+
+func reportEvalError(tb testing.TB, err error) {
+ if err, ok := err.(*starlark.EvalError); ok {
+ tb.Fatal(err.Backtrace())
+ }
+ tb.Fatal(err)
+}
+
+// TestInt exercises the Int.Int64 and Int.Uint64 methods.
+// If we can move their logic into math/big, delete this test.
+func TestInt(t *testing.T) {
+ one := starlark.MakeInt(1)
+
+ for _, test := range []struct {
+ i starlark.Int
+ wantInt64 string
+ wantUint64 string
+ }{
+ {starlark.MakeInt64(math.MinInt64).Sub(one), "error", "error"},
+ {starlark.MakeInt64(math.MinInt64), "-9223372036854775808", "error"},
+ {starlark.MakeInt64(-1), "-1", "error"},
+ {starlark.MakeInt64(0), "0", "0"},
+ {starlark.MakeInt64(1), "1", "1"},
+ {starlark.MakeInt64(math.MaxInt64), "9223372036854775807", "9223372036854775807"},
+ {starlark.MakeUint64(math.MaxUint64), "error", "18446744073709551615"},
+ {starlark.MakeUint64(math.MaxUint64).Add(one), "error", "error"},
+ } {
+ gotInt64, gotUint64 := "error", "error"
+ if i, ok := test.i.Int64(); ok {
+ gotInt64 = fmt.Sprint(i)
+ }
+ if u, ok := test.i.Uint64(); ok {
+ gotUint64 = fmt.Sprint(u)
+ }
+ if gotInt64 != test.wantInt64 {
+ t.Errorf("(%s).Int64() = %s, want %s", test.i, gotInt64, test.wantInt64)
+ }
+ if gotUint64 != test.wantUint64 {
+ t.Errorf("(%s).Uint64() = %s, want %s", test.i, gotUint64, test.wantUint64)
+ }
+ }
+}
+
+func backtrace(t *testing.T, err error) string {
+ switch err := err.(type) {
+ case *starlark.EvalError:
+ return err.Backtrace()
+ case nil:
+ t.Fatalf("ExecFile succeeded unexpectedly")
+ default:
+ t.Fatalf("ExecFile failed with %v, wanted *EvalError", err)
+ }
+ panic("unreachable")
+}
+
+func TestBacktrace(t *testing.T) {
+ // This test ensures continuity of the stack of active Starlark
+ // functions, including propagation through built-ins such as 'min'.
+ const src = `
+def f(x): return 1//x
+def g(x): return f(x)
+def h(): return min([1, 2, 0], key=g)
+def i(): return h()
+i()
+`
+ thread := new(starlark.Thread)
+ _, err := starlark.ExecFile(thread, "crash.star", src, nil)
+ const want = `Traceback (most recent call last):
+ crash.star:6:2: in <toplevel>
+ crash.star:5:18: in i
+ crash.star:4:20: in h
+ <builtin>: in min
+ crash.star:3:19: in g
+ crash.star:2:19: in f
+Error: floored division by zero`
+ if got := backtrace(t, err); got != want {
+ t.Errorf("error was %s, want %s", got, want)
+ }
+
+ // Additionally, ensure that errors originating in
+ // Starlark and/or Go each have an accurate frame.
+ // The topmost frame, if built-in, is not shown,
+ // but the name of the built-in function is shown
+ // as "Error in fn: ...".
+ //
+ // This program fails in Starlark (f) if x==0,
+ // or in Go (string.join) if x is non-zero.
+ const src2 = `
+def f(): ''.join([1//i])
+f()
+`
+ for i, want := range []string{
+ 0: `Traceback (most recent call last):
+ crash.star:3:2: in <toplevel>
+ crash.star:2:20: in f
+Error: floored division by zero`,
+ 1: `Traceback (most recent call last):
+ crash.star:3:2: in <toplevel>
+ crash.star:2:17: in f
+Error in join: join: in list, want string, got int`,
+ } {
+ globals := starlark.StringDict{"i": starlark.MakeInt(i)}
+ _, err := starlark.ExecFile(thread, "crash.star", src2, globals)
+ if got := backtrace(t, err); got != want {
+ t.Errorf("error was %s, want %s", got, want)
+ }
+ }
+}
+
+func TestLoadBacktrace(t *testing.T) {
+ // This test ensures that load() does NOT preserve stack traces,
+ // but that API callers can get them with Unwrap().
+ // For discussion, see:
+ // https://github.com/google/starlark-go/pull/244
+ const src = `
+load('crash.star', 'x')
+`
+ const loadedSrc = `
+def f(x):
+ return 1 // x
+
+f(0)
+`
+ thread := new(starlark.Thread)
+ thread.Load = func(t *starlark.Thread, module string) (starlark.StringDict, error) {
+ return starlark.ExecFile(new(starlark.Thread), module, loadedSrc, nil)
+ }
+ _, err := starlark.ExecFile(thread, "root.star", src, nil)
+
+ const want = `Traceback (most recent call last):
+ root.star:2:1: in <toplevel>
+Error: cannot load crash.star: floored division by zero`
+ if got := backtrace(t, err); got != want {
+ t.Errorf("error was %s, want %s", got, want)
+ }
+
+ unwrapEvalError := func(err error) *starlark.EvalError {
+ var result *starlark.EvalError
+ for {
+ if evalErr, ok := err.(*starlark.EvalError); ok {
+ result = evalErr
+ }
+
+ // TODO: use errors.Unwrap when go >=1.13 is everywhere.
+ wrapper, isWrapper := err.(Wrapper)
+ if !isWrapper {
+ break
+ }
+ err = wrapper.Unwrap()
+ }
+ return result
+ }
+
+ unwrappedErr := unwrapEvalError(err)
+ const wantUnwrapped = `Traceback (most recent call last):
+ crash.star:5:2: in <toplevel>
+ crash.star:3:12: in f
+Error: floored division by zero`
+ if got := backtrace(t, unwrappedErr); got != wantUnwrapped {
+ t.Errorf("error was %s, want %s", got, wantUnwrapped)
+ }
+
+}
+
+// TestRepeatedExec parses and resolves a file syntax tree once then
+// executes it repeatedly with different values of its predeclared variables.
+func TestRepeatedExec(t *testing.T) {
+ predeclared := starlark.StringDict{"x": starlark.None}
+ _, prog, err := starlark.SourceProgram("repeat.star", "y = 2 * x", predeclared.Has)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for _, test := range []struct {
+ x, want starlark.Value
+ }{
+ {x: starlark.MakeInt(42), want: starlark.MakeInt(84)},
+ {x: starlark.String("mur"), want: starlark.String("murmur")},
+ {x: starlark.Tuple{starlark.None}, want: starlark.Tuple{starlark.None, starlark.None}},
+ } {
+ predeclared["x"] = test.x // update the values in dictionary
+ thread := new(starlark.Thread)
+ if globals, err := prog.Init(thread, predeclared); err != nil {
+ t.Errorf("x=%v: %v", test.x, err) // exec error
+ } else if eq, err := starlark.Equal(globals["y"], test.want); err != nil {
+ t.Errorf("x=%v: %v", test.x, err) // comparison error
+ } else if !eq {
+ t.Errorf("x=%v: got y=%v, want %v", test.x, globals["y"], test.want)
+ }
+ }
+}
+
+// TestEmptyFilePosition ensures that even Programs
+// from empty files have a valid position.
+func TestEmptyPosition(t *testing.T) {
+ var predeclared starlark.StringDict
+ for _, content := range []string{"", "empty = False"} {
+ _, prog, err := starlark.SourceProgram("hello.star", content, predeclared.Has)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got, want := prog.Filename(), "hello.star"; got != want {
+ t.Errorf("Program.Filename() = %q, want %q", got, want)
+ }
+ }
+}
+
+// TestUnpackUserDefined tests that user-defined
+// implementations of starlark.Value may be unpacked.
+func TestUnpackUserDefined(t *testing.T) {
+ // success
+ want := new(hasfields)
+ var x *hasfields
+ if err := starlark.UnpackArgs("unpack", starlark.Tuple{want}, nil, "x", &x); err != nil {
+ t.Errorf("UnpackArgs failed: %v", err)
+ }
+ if x != want {
+ t.Errorf("for x, got %v, want %v", x, want)
+ }
+
+ // failure
+ err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.MakeInt(42)}, nil, "x", &x)
+ if want := "unpack: for parameter x: got int, want hasfields"; fmt.Sprint(err) != want {
+ t.Errorf("unpack args error = %q, want %q", err, want)
+ }
+}
+
+type optionalStringUnpacker struct {
+ str string
+ isSet bool
+}
+
+func (o *optionalStringUnpacker) Unpack(v starlark.Value) error {
+ s, ok := starlark.AsString(v)
+ if !ok {
+ return fmt.Errorf("got %s, want string", v.Type())
+ }
+ o.str = s
+ o.isSet = ok
+ return nil
+}
+
+func TestUnpackCustomUnpacker(t *testing.T) {
+ a := optionalStringUnpacker{}
+ wantA := optionalStringUnpacker{str: "a", isSet: true}
+ b := optionalStringUnpacker{str: "b"}
+ wantB := optionalStringUnpacker{str: "b"}
+
+ // Success
+ if err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.String("a")}, nil, "a?", &a, "b?", &b); err != nil {
+ t.Errorf("UnpackArgs failed: %v", err)
+ }
+ if a != wantA {
+ t.Errorf("for a, got %v, want %v", a, wantA)
+ }
+ if b != wantB {
+ t.Errorf("for b, got %v, want %v", b, wantB)
+ }
+
+ // failure
+ err := starlark.UnpackArgs("unpack", starlark.Tuple{starlark.MakeInt(42)}, nil, "a", &a)
+ if want := "unpack: for parameter a: got int, want string"; fmt.Sprint(err) != want {
+ t.Errorf("unpack args error = %q, want %q", err, want)
+ }
+}
+
+func TestAsInt(t *testing.T) {
+ for _, test := range []struct {
+ val starlark.Value
+ ptr interface{}
+ want string
+ }{
+ {starlark.MakeInt(42), new(int32), "42"},
+ {starlark.MakeInt(-1), new(int32), "-1"},
+ // Use Lsh not 1<<40 as the latter exceeds int if GOARCH=386.
+ {starlark.MakeInt(1).Lsh(40), new(int32), "1099511627776 out of range (want value in signed 32-bit range)"},
+ {starlark.MakeInt(-1).Lsh(40), new(int32), "-1099511627776 out of range (want value in signed 32-bit range)"},
+
+ {starlark.MakeInt(42), new(uint16), "42"},
+ {starlark.MakeInt(0xffff), new(uint16), "65535"},
+ {starlark.MakeInt(0x10000), new(uint16), "65536 out of range (want value in unsigned 16-bit range)"},
+ {starlark.MakeInt(-1), new(uint16), "-1 out of range (want value in unsigned 16-bit range)"},
+ } {
+ var got string
+ if err := starlark.AsInt(test.val, test.ptr); err != nil {
+ got = err.Error()
+ } else {
+ got = fmt.Sprint(reflect.ValueOf(test.ptr).Elem().Interface())
+ }
+ if got != test.want {
+ t.Errorf("AsInt(%s, %T): got %q, want %q", test.val, test.ptr, got, test.want)
+ }
+ }
+}
+
+func TestDocstring(t *testing.T) {
+ globals, _ := starlark.ExecFile(&starlark.Thread{}, "doc.star", `
+def somefunc():
+ "somefunc doc"
+ return 0
+`, nil)
+
+ if globals["somefunc"].(*starlark.Function).Doc() != "somefunc doc" {
+ t.Fatal("docstring not found")
+ }
+}
+
+func TestFrameLocals(t *testing.T) {
+ // trace prints a nice stack trace including argument
+ // values of calls to Starlark functions.
+ trace := func(thread *starlark.Thread) string {
+ buf := new(bytes.Buffer)
+ for i := 0; i < thread.CallStackDepth(); i++ {
+ fr := thread.DebugFrame(i)
+ fmt.Fprintf(buf, "%s(", fr.Callable().Name())
+ if fn, ok := fr.Callable().(*starlark.Function); ok {
+ for i := 0; i < fn.NumParams(); i++ {
+ if i > 0 {
+ buf.WriteString(", ")
+ }
+ name, _ := fn.Param(i)
+ fmt.Fprintf(buf, "%s=%s", name, fr.Local(i))
+ }
+ } else {
+ buf.WriteString("...") // a built-in function
+ }
+ buf.WriteString(")\n")
+ }
+ return buf.String()
+ }
+
+ var got string
+ builtin := func(thread *starlark.Thread, _ *starlark.Builtin, _ starlark.Tuple, _ []starlark.Tuple) (starlark.Value, error) {
+ got = trace(thread)
+ return starlark.None, nil
+ }
+ predeclared := starlark.StringDict{
+ "builtin": starlark.NewBuiltin("builtin", builtin),
+ }
+ _, err := starlark.ExecFile(&starlark.Thread{}, "foo.star", `
+def f(x, y): builtin()
+def g(z): f(z, z*z)
+g(7)
+`, predeclared)
+ if err != nil {
+ t.Errorf("ExecFile failed: %v", err)
+ }
+
+ var want = `
+builtin(...)
+f(x=7, y=49)
+g(z=7)
+<toplevel>()
+`[1:]
+ if got != want {
+ t.Errorf("got <<%s>>, want <<%s>>", got, want)
+ }
+}
+
+type badType string
+
+func (b *badType) String() string { return "badType" }
+func (b *badType) Type() string { return "badType:" + string(*b) } // panics if b==nil
+func (b *badType) Truth() starlark.Bool { return true }
+func (b *badType) Hash() (uint32, error) { return 0, nil }
+func (b *badType) Freeze() {}
+
+var _ starlark.Value = new(badType)
+
+// TestUnpackErrorBadType verifies that the Unpack functions fail
+// gracefully when a parameter's default value's Type method panics.
+func TestUnpackErrorBadType(t *testing.T) {
+ for _, test := range []struct {
+ x *badType
+ want string
+ }{
+ {new(badType), "got NoneType, want badType"}, // Starlark type name
+ {nil, "got NoneType, want *starlark_test.badType"}, // Go type name
+ } {
+ err := starlark.UnpackArgs("f", starlark.Tuple{starlark.None}, nil, "x", &test.x)
+ if err == nil {
+ t.Errorf("UnpackArgs succeeded unexpectedly")
+ continue
+ }
+ if !strings.Contains(err.Error(), test.want) {
+ t.Errorf("UnpackArgs error %q does not contain %q", err, test.want)
+ }
+ }
+}
+
+// Regression test for github.com/google/starlark-go/issues/233.
+func TestREPLChunk(t *testing.T) {
+ thread := new(starlark.Thread)
+ globals := make(starlark.StringDict)
+ exec := func(src string) {
+ f, err := syntax.Parse("<repl>", src, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := starlark.ExecREPLChunk(f, thread, globals); err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ exec("x = 0; y = 0")
+ if got, want := fmt.Sprintf("%v %v", globals["x"], globals["y"]), "0 0"; got != want {
+ t.Fatalf("chunk1: got %s, want %s", got, want)
+ }
+
+ exec("x += 1; y = y + 1")
+ if got, want := fmt.Sprintf("%v %v", globals["x"], globals["y"]), "1 1"; got != want {
+ t.Fatalf("chunk2: got %s, want %s", got, want)
+ }
+}
+
+func TestCancel(t *testing.T) {
+ // A thread cancelled before it begins executes no code.
+ {
+ thread := new(starlark.Thread)
+ thread.Cancel("nope")
+ _, err := starlark.ExecFile(thread, "precancel.star", `x = 1//0`, nil)
+ if fmt.Sprint(err) != "Starlark computation cancelled: nope" {
+ t.Errorf("execution returned error %q, want cancellation", err)
+ }
+
+ // cancellation is sticky
+ _, err = starlark.ExecFile(thread, "precancel.star", `x = 1//0`, nil)
+ if fmt.Sprint(err) != "Starlark computation cancelled: nope" {
+ t.Errorf("execution returned error %q, want cancellation", err)
+ }
+ }
+ // A thread cancelled during a built-in executes no more code.
+ {
+ thread := new(starlark.Thread)
+ predeclared := starlark.StringDict{
+ "stopit": starlark.NewBuiltin("stopit", func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ thread.Cancel(fmt.Sprint(args[0]))
+ return starlark.None, nil
+ }),
+ }
+ _, err := starlark.ExecFile(thread, "stopit.star", `msg = 'nope'; stopit(msg); x = 1//0`, predeclared)
+ if fmt.Sprint(err) != `Starlark computation cancelled: "nope"` {
+ t.Errorf("execution returned error %q, want cancellation", err)
+ }
+ }
+}
+
+func TestExecutionSteps(t *testing.T) {
+ // A Thread records the number of computation steps.
+ thread := new(starlark.Thread)
+ countSteps := func(n int) (uint64, error) {
+ predeclared := starlark.StringDict{"n": starlark.MakeInt(n)}
+ steps0 := thread.ExecutionSteps()
+ _, err := starlark.ExecFile(thread, "steps.star", `squares = [x*x for x in range(n)]`, predeclared)
+ return thread.ExecutionSteps() - steps0, err
+ }
+ steps100, err := countSteps(1000)
+ if err != nil {
+ t.Errorf("execution failed: %v", err)
+ }
+ steps10000, err := countSteps(100000)
+ if err != nil {
+ t.Errorf("execution failed: %v", err)
+ }
+ if ratio := float64(steps10000) / float64(steps100); ratio < 99 || ratio > 101 {
+ t.Errorf("computation steps did not increase linearly: f(100)=%d, f(10000)=%d, ratio=%g, want ~100", steps100, steps10000, ratio)
+ }
+
+ // Exceeding the step limit causes cancellation.
+ thread.SetMaxExecutionSteps(1000)
+ _, err = countSteps(1000)
+ if fmt.Sprint(err) != "Starlark computation cancelled: too many steps" {
+ t.Errorf("execution returned error %q, want cancellation", err)
+ }
+}
+
+// TestDeps fails if the interpreter proper (not the REPL, etc) sprouts new external dependencies.
+// We may expand the list of permitted dependencies, but should do so deliberately, not casually.
+func TestDeps(t *testing.T) {
+ cmd := exec.Command("go", "list", "-deps")
+ out, err := cmd.Output()
+ if err != nil {
+ t.Skipf("'go list' failed: %s", err)
+ }
+ for _, pkg := range strings.Split(string(out), "\n") {
+ // Does pkg have form "domain.name/dir"?
+ slash := strings.IndexByte(pkg, '/')
+ dot := strings.IndexByte(pkg, '.')
+ if 0 < dot && dot < slash {
+ if strings.HasPrefix(pkg, "go.starlark.net/") ||
+ strings.HasPrefix(pkg, "golang.org/x/sys/") {
+ continue // permitted dependencies
+ }
+ t.Errorf("new interpreter dependency: %s", pkg)
+ }
+ }
+}
diff --git a/starlark/example_test.go b/starlark/example_test.go
new file mode 100644
index 0000000..5feca38
--- /dev/null
+++ b/starlark/example_test.go
@@ -0,0 +1,322 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark_test
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+ "sort"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "testing"
+ "unsafe"
+
+ "go.starlark.net/starlark"
+)
+
+// ExampleExecFile demonstrates a simple embedding
+// of the Starlark interpreter into a Go program.
+func ExampleExecFile() {
+ const data = `
+print(greeting + ", world")
+print(repeat("one"))
+print(repeat("mur", 2))
+squares = [x*x for x in range(10)]
+`
+
+ // repeat(str, n=1) is a Go function called from Starlark.
+ // It behaves like the 'string * int' operation.
+ repeat := func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var s string
+ var n int = 1
+ if err := starlark.UnpackArgs(b.Name(), args, kwargs, "s", &s, "n?", &n); err != nil {
+ return nil, err
+ }
+ return starlark.String(strings.Repeat(s, n)), nil
+ }
+
+ // The Thread defines the behavior of the built-in 'print' function.
+ thread := &starlark.Thread{
+ Name: "example",
+ Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) },
+ }
+
+ // This dictionary defines the pre-declared environment.
+ predeclared := starlark.StringDict{
+ "greeting": starlark.String("hello"),
+ "repeat": starlark.NewBuiltin("repeat", repeat),
+ }
+
+ // Execute a program.
+ globals, err := starlark.ExecFile(thread, "apparent/filename.star", data, predeclared)
+ if err != nil {
+ if evalErr, ok := err.(*starlark.EvalError); ok {
+ log.Fatal(evalErr.Backtrace())
+ }
+ log.Fatal(err)
+ }
+
+ // Print the global environment.
+ fmt.Println("\nGlobals:")
+ for _, name := range globals.Keys() {
+ v := globals[name]
+ fmt.Printf("%s (%s) = %s\n", name, v.Type(), v.String())
+ }
+
+ // Output:
+ // hello, world
+ // one
+ // murmur
+ //
+ // Globals:
+ // squares (list) = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
+}
+
+// ExampleThread_Load_sequential demonstrates a simple caching
+// implementation of 'load' that works sequentially.
+func ExampleThread_Load_sequential() {
+ fakeFilesystem := map[string]string{
+ "c.star": `load("b.star", "b"); c = b + "!"`,
+ "b.star": `load("a.star", "a"); b = a + ", world"`,
+ "a.star": `a = "Hello"`,
+ }
+
+ type entry struct {
+ globals starlark.StringDict
+ err error
+ }
+
+ cache := make(map[string]*entry)
+
+ var load func(_ *starlark.Thread, module string) (starlark.StringDict, error)
+ load = func(_ *starlark.Thread, module string) (starlark.StringDict, error) {
+ e, ok := cache[module]
+ if e == nil {
+ if ok {
+ // request for package whose loading is in progress
+ return nil, fmt.Errorf("cycle in load graph")
+ }
+
+ // Add a placeholder to indicate "load in progress".
+ cache[module] = nil
+
+ // Load and initialize the module in a new thread.
+ data := fakeFilesystem[module]
+ thread := &starlark.Thread{Name: "exec " + module, Load: load}
+ globals, err := starlark.ExecFile(thread, module, data, nil)
+ e = &entry{globals, err}
+
+ // Update the cache.
+ cache[module] = e
+ }
+ return e.globals, e.err
+ }
+
+ globals, err := load(nil, "c.star")
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(globals["c"])
+
+ // Output:
+ // "Hello, world!"
+}
+
+// ExampleThread_Load_parallel demonstrates a parallel implementation
+// of 'load' with caching, duplicate suppression, and cycle detection.
+func ExampleThread_Load_parallel() {
+ cache := &cache{
+ cache: make(map[string]*entry),
+ fakeFilesystem: map[string]string{
+ "c.star": `load("a.star", "a"); c = a * 2`,
+ "b.star": `load("a.star", "a"); b = a * 3`,
+ "a.star": `a = 1; print("loaded a")`,
+ },
+ }
+
+ // We load modules b and c in parallel by concurrent calls to
+ // cache.Load. Both of them load module a, but a is executed
+ // only once, as witnessed by the sole output of its print
+ // statement.
+
+ ch := make(chan string)
+ for _, name := range []string{"b", "c"} {
+ go func(name string) {
+ globals, err := cache.Load(name + ".star")
+ if err != nil {
+ log.Fatal(err)
+ }
+ ch <- fmt.Sprintf("%s = %s", name, globals[name])
+ }(name)
+ }
+ got := []string{<-ch, <-ch}
+ sort.Strings(got)
+ fmt.Println(strings.Join(got, "\n"))
+
+ // Output:
+ // loaded a
+ // b = 3
+ // c = 2
+}
+
+// TestThread_Load_parallelCycle demonstrates detection
+// of cycles during parallel loading.
+func TestThreadLoad_ParallelCycle(t *testing.T) {
+ cache := &cache{
+ cache: make(map[string]*entry),
+ fakeFilesystem: map[string]string{
+ "c.star": `load("b.star", "b"); c = b * 2`,
+ "b.star": `load("a.star", "a"); b = a * 3`,
+ "a.star": `load("c.star", "c"); a = c * 5; print("loaded a")`,
+ },
+ }
+
+ ch := make(chan string)
+ for _, name := range "bc" {
+ name := string(name)
+ go func() {
+ _, err := cache.Load(name + ".star")
+ if err == nil {
+ log.Fatalf("Load of %s.star succeeded unexpectedly", name)
+ }
+ ch <- err.Error()
+ }()
+ }
+ got := []string{<-ch, <-ch}
+ sort.Strings(got)
+
+ // Typically, the c goroutine quickly blocks behind b;
+ // b loads a, and a then fails to load c because it forms a cycle.
+ // The errors observed by the two goroutines are:
+ want1 := []string{
+ "cannot load a.star: cannot load c.star: cycle in load graph", // from b
+ "cannot load b.star: cannot load a.star: cannot load c.star: cycle in load graph", // from c
+ }
+ // But if the c goroutine is slow to start, b loads a,
+ // and a loads c; then c fails to load b because it forms a cycle.
+ // The errors this time are:
+ want2 := []string{
+ "cannot load a.star: cannot load c.star: cannot load b.star: cycle in load graph", // from b
+ "cannot load b.star: cycle in load graph", // from c
+ }
+ if !reflect.DeepEqual(got, want1) && !reflect.DeepEqual(got, want2) {
+ t.Error(got)
+ }
+}
+
+// cache is a concurrency-safe, duplicate-suppressing,
+// non-blocking cache of the doLoad function.
+// See Section 9.7 of gopl.io for an explanation of this structure.
+// It also features online deadlock (load cycle) detection.
+type cache struct {
+ cacheMu sync.Mutex
+ cache map[string]*entry
+
+ fakeFilesystem map[string]string
+}
+
+type entry struct {
+ owner unsafe.Pointer // a *cycleChecker; see cycleCheck
+ globals starlark.StringDict
+ err error
+ ready chan struct{}
+}
+
+func (c *cache) Load(module string) (starlark.StringDict, error) {
+ return c.get(new(cycleChecker), module)
+}
+
+// get loads and returns an entry (if not already loaded).
+func (c *cache) get(cc *cycleChecker, module string) (starlark.StringDict, error) {
+ c.cacheMu.Lock()
+ e := c.cache[module]
+ if e != nil {
+ c.cacheMu.Unlock()
+ // Some other goroutine is getting this module.
+ // Wait for it to become ready.
+
+ // Detect load cycles to avoid deadlocks.
+ if err := cycleCheck(e, cc); err != nil {
+ return nil, err
+ }
+
+ cc.setWaitsFor(e)
+ <-e.ready
+ cc.setWaitsFor(nil)
+ } else {
+ // First request for this module.
+ e = &entry{ready: make(chan struct{})}
+ c.cache[module] = e
+ c.cacheMu.Unlock()
+
+ e.setOwner(cc)
+ e.globals, e.err = c.doLoad(cc, module)
+ e.setOwner(nil)
+
+ // Broadcast that the entry is now ready.
+ close(e.ready)
+ }
+ return e.globals, e.err
+}
+
+func (c *cache) doLoad(cc *cycleChecker, module string) (starlark.StringDict, error) {
+ thread := &starlark.Thread{
+ Name: "exec " + module,
+ Print: func(_ *starlark.Thread, msg string) { fmt.Println(msg) },
+ Load: func(_ *starlark.Thread, module string) (starlark.StringDict, error) {
+ // Tunnel the cycle-checker state for this "thread of loading".
+ return c.get(cc, module)
+ },
+ }
+ data := c.fakeFilesystem[module]
+ return starlark.ExecFile(thread, module, data, nil)
+}
+
+// -- concurrent cycle checking --
+
+// A cycleChecker is used for concurrent deadlock detection.
+// Each top-level call to Load creates its own cycleChecker,
+// which is passed to all recursive calls it makes.
+// It corresponds to a logical thread in the deadlock detection literature.
+type cycleChecker struct {
+ waitsFor unsafe.Pointer // an *entry; see cycleCheck
+}
+
+func (cc *cycleChecker) setWaitsFor(e *entry) {
+ atomic.StorePointer(&cc.waitsFor, unsafe.Pointer(e))
+}
+
+func (e *entry) setOwner(cc *cycleChecker) {
+ atomic.StorePointer(&e.owner, unsafe.Pointer(cc))
+}
+
+// cycleCheck reports whether there is a path in the waits-for graph
+// from resource 'e' to thread 'me'.
+//
+// The waits-for graph (WFG) is a bipartite graph whose nodes are
+// alternately of type entry and cycleChecker. Each node has at most
+// one outgoing edge. An entry has an "owner" edge to a cycleChecker
+// while it is being readied by that cycleChecker, and a cycleChecker
+// has a "waits-for" edge to an entry while it is waiting for that entry
+// to become ready.
+//
+// Before adding a waits-for edge, the cache checks whether the new edge
+// would form a cycle. If so, this indicates that the load graph is
+// cyclic and that the following wait operation would deadlock.
+func cycleCheck(e *entry, me *cycleChecker) error {
+ for e != nil {
+ cc := (*cycleChecker)(atomic.LoadPointer(&e.owner))
+ if cc == nil {
+ break
+ }
+ if cc == me {
+ return fmt.Errorf("cycle in load graph")
+ }
+ e = (*entry)(atomic.LoadPointer(&cc.waitsFor))
+ }
+ return nil
+}
diff --git a/starlark/hashtable.go b/starlark/hashtable.go
new file mode 100644
index 0000000..27990b5
--- /dev/null
+++ b/starlark/hashtable.go
@@ -0,0 +1,373 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+import (
+ "fmt"
+ _ "unsafe" // for go:linkname hack
+)
+
+// hashtable is used to represent Starlark dict and set values.
+// It is a hash table whose key/value entries form a doubly-linked list
+// in the order the entries were inserted.
+type hashtable struct {
+ table []bucket // len is zero or a power of two
+ bucket0 [1]bucket // inline allocation for small maps.
+ len uint32
+ itercount uint32 // number of active iterators (ignored if frozen)
+ head *entry // insertion order doubly-linked list; may be nil
+ tailLink **entry // address of nil link at end of list (perhaps &head)
+ frozen bool
+}
+
+const bucketSize = 8
+
+type bucket struct {
+ entries [bucketSize]entry
+ next *bucket // linked list of buckets
+}
+
+type entry struct {
+ hash uint32 // nonzero => in use
+ key, value Value
+ next *entry // insertion order doubly-linked list; may be nil
+ prevLink **entry // address of link to this entry (perhaps &head)
+}
+
+func (ht *hashtable) init(size int) {
+ if size < 0 {
+ panic("size < 0")
+ }
+ nb := 1
+ for overloaded(size, nb) {
+ nb = nb << 1
+ }
+ if nb < 2 {
+ ht.table = ht.bucket0[:1]
+ } else {
+ ht.table = make([]bucket, nb)
+ }
+ ht.tailLink = &ht.head
+}
+
+func (ht *hashtable) freeze() {
+ if !ht.frozen {
+ ht.frozen = true
+ for i := range ht.table {
+ for p := &ht.table[i]; p != nil; p = p.next {
+ for i := range p.entries {
+ e := &p.entries[i]
+ if e.hash != 0 {
+ e.key.Freeze()
+ e.value.Freeze()
+ }
+ }
+ }
+ }
+ }
+}
+
+func (ht *hashtable) insert(k, v Value) error {
+ if ht.frozen {
+ return fmt.Errorf("cannot insert into frozen hash table")
+ }
+ if ht.itercount > 0 {
+ return fmt.Errorf("cannot insert into hash table during iteration")
+ }
+ if ht.table == nil {
+ ht.init(1)
+ }
+ h, err := k.Hash()
+ if err != nil {
+ return err
+ }
+ if h == 0 {
+ h = 1 // zero is reserved
+ }
+
+retry:
+ var insert *entry
+
+ // Inspect each bucket in the bucket list.
+ p := &ht.table[h&(uint32(len(ht.table)-1))]
+ for {
+ for i := range p.entries {
+ e := &p.entries[i]
+ if e.hash != h {
+ if e.hash == 0 {
+ // Found empty entry; make a note.
+ insert = e
+ }
+ continue
+ }
+ if eq, err := Equal(k, e.key); err != nil {
+ return err // e.g. excessively recursive tuple
+ } else if !eq {
+ continue
+ }
+ // Key already present; update value.
+ e.value = v
+ return nil
+ }
+ if p.next == nil {
+ break
+ }
+ p = p.next
+ }
+
+ // Key not found. p points to the last bucket.
+
+ // Does the number of elements exceed the buckets' load factor?
+ if overloaded(int(ht.len), len(ht.table)) {
+ ht.grow()
+ goto retry
+ }
+
+ if insert == nil {
+ // No space in existing buckets. Add a new one to the bucket list.
+ b := new(bucket)
+ p.next = b
+ insert = &b.entries[0]
+ }
+
+ // Insert key/value pair.
+ insert.hash = h
+ insert.key = k
+ insert.value = v
+
+ // Append entry to doubly-linked list.
+ insert.prevLink = ht.tailLink
+ *ht.tailLink = insert
+ ht.tailLink = &insert.next
+
+ ht.len++
+
+ return nil
+}
+
+func overloaded(elems, buckets int) bool {
+ const loadFactor = 6.5 // just a guess
+ return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
+}
+
+func (ht *hashtable) grow() {
+ // Double the number of buckets and rehash.
+ // TODO(adonovan): opt:
+ // - avoid reentrant calls to ht.insert, and specialize it.
+ // e.g. we know the calls to Equals will return false since
+ // there are no duplicates among the old keys.
+ // - saving the entire hash in the bucket would avoid the need to
+ // recompute the hash.
+ // - save the old buckets on a free list.
+ ht.table = make([]bucket, len(ht.table)<<1)
+ oldhead := ht.head
+ ht.head = nil
+ ht.tailLink = &ht.head
+ ht.len = 0
+ for e := oldhead; e != nil; e = e.next {
+ ht.insert(e.key, e.value)
+ }
+ ht.bucket0[0] = bucket{} // clear out unused initial bucket
+}
+
+func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
+ h, err := k.Hash()
+ if err != nil {
+ return nil, false, err // unhashable
+ }
+ if h == 0 {
+ h = 1 // zero is reserved
+ }
+ if ht.table == nil {
+ return None, false, nil // empty
+ }
+
+ // Inspect each bucket in the bucket list.
+ for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
+ for i := range p.entries {
+ e := &p.entries[i]
+ if e.hash == h {
+ if eq, err := Equal(k, e.key); err != nil {
+ return nil, false, err // e.g. excessively recursive tuple
+ } else if eq {
+ return e.value, true, nil // found
+ }
+ }
+ }
+ }
+ return None, false, nil // not found
+}
+
+// Items returns all the items in the map (as key/value pairs) in insertion order.
+func (ht *hashtable) items() []Tuple {
+ items := make([]Tuple, 0, ht.len)
+ array := make([]Value, ht.len*2) // allocate a single backing array
+ for e := ht.head; e != nil; e = e.next {
+ pair := Tuple(array[:2:2])
+ array = array[2:]
+ pair[0] = e.key
+ pair[1] = e.value
+ items = append(items, pair)
+ }
+ return items
+}
+
+func (ht *hashtable) first() (Value, bool) {
+ if ht.head != nil {
+ return ht.head.key, true
+ }
+ return None, false
+}
+
+func (ht *hashtable) keys() []Value {
+ keys := make([]Value, 0, ht.len)
+ for e := ht.head; e != nil; e = e.next {
+ keys = append(keys, e.key)
+ }
+ return keys
+}
+
+func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
+ if ht.frozen {
+ return nil, false, fmt.Errorf("cannot delete from frozen hash table")
+ }
+ if ht.itercount > 0 {
+ return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
+ }
+ if ht.table == nil {
+ return None, false, nil // empty
+ }
+ h, err := k.Hash()
+ if err != nil {
+ return nil, false, err // unhashable
+ }
+ if h == 0 {
+ h = 1 // zero is reserved
+ }
+
+ // Inspect each bucket in the bucket list.
+ for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
+ for i := range p.entries {
+ e := &p.entries[i]
+ if e.hash == h {
+ if eq, err := Equal(k, e.key); err != nil {
+ return nil, false, err
+ } else if eq {
+ // Remove e from doubly-linked list.
+ *e.prevLink = e.next
+ if e.next == nil {
+ ht.tailLink = e.prevLink // deletion of last entry
+ } else {
+ e.next.prevLink = e.prevLink
+ }
+
+ v := e.value
+ *e = entry{}
+ ht.len--
+ return v, true, nil // found
+ }
+ }
+ }
+ }
+
+ // TODO(adonovan): opt: remove completely empty bucket from bucket list.
+
+ return None, false, nil // not found
+}
+
+func (ht *hashtable) clear() error {
+ if ht.frozen {
+ return fmt.Errorf("cannot clear frozen hash table")
+ }
+ if ht.itercount > 0 {
+ return fmt.Errorf("cannot clear hash table during iteration")
+ }
+ if ht.table != nil {
+ for i := range ht.table {
+ ht.table[i] = bucket{}
+ }
+ }
+ ht.head = nil
+ ht.tailLink = &ht.head
+ ht.len = 0
+ return nil
+}
+
+// dump is provided as an aid to debugging.
+func (ht *hashtable) dump() {
+ fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
+ ht, ht.len, ht.head, ht.tailLink)
+ if ht.tailLink != nil {
+ fmt.Printf(" *tailLink=%p", *ht.tailLink)
+ }
+ fmt.Println()
+ for j := range ht.table {
+ fmt.Printf("bucket chain %d\n", j)
+ for p := &ht.table[j]; p != nil; p = p.next {
+ fmt.Printf("bucket %p\n", p)
+ for i := range p.entries {
+ e := &p.entries[i]
+ fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
+ i, e, e.hash, e.key, e.value)
+ fmt.Printf("\t\tnext=%p &next=%p prev=%p",
+ e.next, &e.next, e.prevLink)
+ if e.prevLink != nil {
+ fmt.Printf(" *prev=%p", *e.prevLink)
+ }
+ fmt.Println()
+ }
+ }
+ }
+}
+
+func (ht *hashtable) iterate() *keyIterator {
+ if !ht.frozen {
+ ht.itercount++
+ }
+ return &keyIterator{ht: ht, e: ht.head}
+}
+
+type keyIterator struct {
+ ht *hashtable
+ e *entry
+}
+
+func (it *keyIterator) Next(k *Value) bool {
+ if it.e != nil {
+ *k = it.e.key
+ it.e = it.e.next
+ return true
+ }
+ return false
+}
+
+func (it *keyIterator) Done() {
+ if !it.ht.frozen {
+ it.ht.itercount--
+ }
+}
+
+// hashString computes the hash of s.
+func hashString(s string) uint32 {
+ if len(s) >= 12 {
+ // Call the Go runtime's optimized hash implementation,
+ // which uses the AESENC instruction on amd64 machines.
+ return uint32(goStringHash(s, 0))
+ }
+ return softHashString(s)
+}
+
+//go:linkname goStringHash runtime.stringHash
+func goStringHash(s string, seed uintptr) uintptr
+
+// softHashString computes the 32-bit FNV-1a hash of s in software.
+func softHashString(s string) uint32 {
+ var h uint32 = 2166136261
+ for i := 0; i < len(s); i++ {
+ h ^= uint32(s[i])
+ h *= 16777619
+ }
+ return h
+}
diff --git a/starlark/hashtable_test.go b/starlark/hashtable_test.go
new file mode 100644
index 0000000..3649f14
--- /dev/null
+++ b/starlark/hashtable_test.go
@@ -0,0 +1,125 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+import (
+ "fmt"
+ "math/rand"
+ "sync"
+ "testing"
+)
+
+func TestHashtable(t *testing.T) {
+ makeTestIntsOnce.Do(makeTestInts)
+ testHashtable(t, make(map[int]bool))
+}
+
+func BenchmarkStringHash(b *testing.B) {
+ for len := 1; len <= 1024; len *= 2 {
+ buf := make([]byte, len)
+ rand.New(rand.NewSource(0)).Read(buf)
+ s := string(buf)
+
+ b.Run(fmt.Sprintf("hard-%d", len), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ hashString(s)
+ }
+ })
+ b.Run(fmt.Sprintf("soft-%d", len), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ softHashString(s)
+ }
+ })
+ }
+}
+
+func BenchmarkHashtable(b *testing.B) {
+ makeTestIntsOnce.Do(makeTestInts)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ testHashtable(b, nil)
+ }
+}
+
+const testIters = 10000
+
+var (
+ // testInts is a zipf-distributed array of Ints and corresponding ints.
+ // This removes the cost of generating them on the fly during benchmarking.
+ // Without this, Zipf and MakeInt dominate CPU and memory costs, respectively.
+ makeTestIntsOnce sync.Once
+ testInts [3 * testIters]struct {
+ Int Int
+ goInt int
+ }
+)
+
+func makeTestInts() {
+ zipf := rand.NewZipf(rand.New(rand.NewSource(0)), 1.1, 1.0, 1000.0)
+ for i := range &testInts {
+ r := int(zipf.Uint64())
+ testInts[i].goInt = r
+ testInts[i].Int = MakeInt(r)
+ }
+}
+
+// testHashtable is both a test and a benchmark of hashtable.
+// When sane != nil, it acts as a test against the semantics of Go's map.
+func testHashtable(tb testing.TB, sane map[int]bool) {
+ var i int // index into testInts
+
+ var ht hashtable
+
+ // Insert 10000 random ints into the map.
+ for j := 0; j < testIters; j++ {
+ k := testInts[i]
+ i++
+ if err := ht.insert(k.Int, None); err != nil {
+ tb.Fatal(err)
+ }
+ if sane != nil {
+ sane[k.goInt] = true
+ }
+ }
+
+ // Do 10000 random lookups in the map.
+ for j := 0; j < testIters; j++ {
+ k := testInts[i]
+ i++
+ _, found, err := ht.lookup(k.Int)
+ if err != nil {
+ tb.Fatal(err)
+ }
+ if sane != nil {
+ _, found2 := sane[k.goInt]
+ if found != found2 {
+ tb.Fatal("sanity check failed")
+ }
+ }
+ }
+
+ // Do 10000 random deletes from the map.
+ for j := 0; j < testIters; j++ {
+ k := testInts[i]
+ i++
+ _, found, err := ht.delete(k.Int)
+ if err != nil {
+ tb.Fatal(err)
+ }
+ if sane != nil {
+ _, found2 := sane[k.goInt]
+ if found != found2 {
+ tb.Fatal("sanity check failed")
+ }
+ delete(sane, k.goInt)
+ }
+ }
+
+ if sane != nil {
+ if int(ht.len) != len(sane) {
+ tb.Fatal("sanity check failed")
+ }
+ }
+}
diff --git a/starlark/int.go b/starlark/int.go
new file mode 100644
index 0000000..9ee46f9
--- /dev/null
+++ b/starlark/int.go
@@ -0,0 +1,436 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+import (
+ "fmt"
+ "math"
+ "math/big"
+ "reflect"
+ "strconv"
+
+ "go.starlark.net/syntax"
+)
+
+// Int is the type of a Starlark int.
+//
+// The zero value is not a legal value; use MakeInt(0).
+type Int struct{ impl intImpl }
+
+// --- high-level accessors ---
+
+// MakeInt returns a Starlark int for the specified signed integer.
+func MakeInt(x int) Int { return MakeInt64(int64(x)) }
+
+// MakeInt64 returns a Starlark int for the specified int64.
+func MakeInt64(x int64) Int {
+ if math.MinInt32 <= x && x <= math.MaxInt32 {
+ return makeSmallInt(x)
+ }
+ return makeBigInt(big.NewInt(x))
+}
+
+// MakeUint returns a Starlark int for the specified unsigned integer.
+func MakeUint(x uint) Int { return MakeUint64(uint64(x)) }
+
+// MakeUint64 returns a Starlark int for the specified uint64.
+func MakeUint64(x uint64) Int {
+ if x <= math.MaxInt32 {
+ return makeSmallInt(int64(x))
+ }
+ return makeBigInt(new(big.Int).SetUint64(x))
+}
+
+// MakeBigInt returns a Starlark int for the specified big.Int.
+// The new Int value will contain a copy of x. The caller is safe to modify x.
+func MakeBigInt(x *big.Int) Int {
+ if n := x.BitLen(); n < 32 || n == 32 && x.Int64() == math.MinInt32 {
+ return makeSmallInt(x.Int64())
+ }
+ z := new(big.Int).Set(x)
+ return makeBigInt(z)
+}
+
+var (
+ zero, one = makeSmallInt(0), makeSmallInt(1)
+ oneBig = big.NewInt(1)
+
+ _ HasUnary = Int{}
+)
+
+// Unary implements the operations +int, -int, and ~int.
+func (i Int) Unary(op syntax.Token) (Value, error) {
+ switch op {
+ case syntax.MINUS:
+ return zero.Sub(i), nil
+ case syntax.PLUS:
+ return i, nil
+ case syntax.TILDE:
+ return i.Not(), nil
+ }
+ return nil, nil
+}
+
+// Int64 returns the value as an int64.
+// If it is not exactly representable the result is undefined and ok is false.
+func (i Int) Int64() (_ int64, ok bool) {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ x, acc := bigintToInt64(iBig)
+ if acc != big.Exact {
+ return // inexact
+ }
+ return x, true
+ }
+ return iSmall, true
+}
+
+// BigInt returns a new big.Int with the same value as the Int.
+func (i Int) BigInt() *big.Int {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return new(big.Int).Set(iBig)
+ }
+ return big.NewInt(iSmall)
+}
+
+// bigInt returns the value as a big.Int.
+// It differs from BigInt in that this method returns the actual
+// reference and any modification will change the state of i.
+func (i Int) bigInt() *big.Int {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return iBig
+ }
+ return big.NewInt(iSmall)
+}
+
+// Uint64 returns the value as a uint64.
+// If it is not exactly representable the result is undefined and ok is false.
+func (i Int) Uint64() (_ uint64, ok bool) {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ x, acc := bigintToUint64(iBig)
+ if acc != big.Exact {
+ return // inexact
+ }
+ return x, true
+ }
+ if iSmall < 0 {
+ return // inexact
+ }
+ return uint64(iSmall), true
+}
+
+// The math/big API should provide this function.
+func bigintToInt64(i *big.Int) (int64, big.Accuracy) {
+ sign := i.Sign()
+ if sign > 0 {
+ if i.Cmp(maxint64) > 0 {
+ return math.MaxInt64, big.Below
+ }
+ } else if sign < 0 {
+ if i.Cmp(minint64) < 0 {
+ return math.MinInt64, big.Above
+ }
+ }
+ return i.Int64(), big.Exact
+}
+
+// The math/big API should provide this function.
+func bigintToUint64(i *big.Int) (uint64, big.Accuracy) {
+ sign := i.Sign()
+ if sign > 0 {
+ if i.BitLen() > 64 {
+ return math.MaxUint64, big.Below
+ }
+ } else if sign < 0 {
+ return 0, big.Above
+ }
+ return i.Uint64(), big.Exact
+}
+
+var (
+ minint64 = new(big.Int).SetInt64(math.MinInt64)
+ maxint64 = new(big.Int).SetInt64(math.MaxInt64)
+)
+
+func (i Int) Format(s fmt.State, ch rune) {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ iBig.Format(s, ch)
+ return
+ }
+ big.NewInt(iSmall).Format(s, ch)
+}
+func (i Int) String() string {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return iBig.Text(10)
+ }
+ return strconv.FormatInt(iSmall, 10)
+}
+func (i Int) Type() string { return "int" }
+func (i Int) Freeze() {} // immutable
+func (i Int) Truth() Bool { return i.Sign() != 0 }
+func (i Int) Hash() (uint32, error) {
+ iSmall, iBig := i.get()
+ var lo big.Word
+ if iBig != nil {
+ lo = iBig.Bits()[0]
+ } else {
+ lo = big.Word(iSmall)
+ }
+ return 12582917 * uint32(lo+3), nil
+}
+func (x Int) CompareSameType(op syntax.Token, v Value, depth int) (bool, error) {
+ y := v.(Int)
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return threeway(op, x.bigInt().Cmp(y.bigInt())), nil
+ }
+ return threeway(op, signum64(xSmall-ySmall)), nil
+}
+
+// Float returns the float value nearest i.
+func (i Int) Float() Float {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ f, _ := new(big.Float).SetInt(iBig).Float64()
+ return Float(f)
+ }
+ return Float(iSmall)
+}
+
+// finiteFloat returns the finite float value nearest i,
+// or an error if the magnitude is too large.
+func (i Int) finiteFloat() (Float, error) {
+ f := i.Float()
+ if math.IsInf(float64(f), 0) {
+ return 0, fmt.Errorf("int too large to convert to float")
+ }
+ return f, nil
+}
+
+func (x Int) Sign() int {
+ xSmall, xBig := x.get()
+ if xBig != nil {
+ return xBig.Sign()
+ }
+ return signum64(xSmall)
+}
+
+func (x Int) Add(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Add(x.bigInt(), y.bigInt()))
+ }
+ return MakeInt64(xSmall + ySmall)
+}
+func (x Int) Sub(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Sub(x.bigInt(), y.bigInt()))
+ }
+ return MakeInt64(xSmall - ySmall)
+}
+func (x Int) Mul(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Mul(x.bigInt(), y.bigInt()))
+ }
+ return MakeInt64(xSmall * ySmall)
+}
+func (x Int) Or(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Or(x.bigInt(), y.bigInt()))
+ }
+ return makeSmallInt(xSmall | ySmall)
+}
+func (x Int) And(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).And(x.bigInt(), y.bigInt()))
+ }
+ return makeSmallInt(xSmall & ySmall)
+}
+func (x Int) Xor(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Xor(x.bigInt(), y.bigInt()))
+ }
+ return makeSmallInt(xSmall ^ ySmall)
+}
+func (x Int) Not() Int {
+ xSmall, xBig := x.get()
+ if xBig != nil {
+ return MakeBigInt(new(big.Int).Not(xBig))
+ }
+ return makeSmallInt(^xSmall)
+}
+func (x Int) Lsh(y uint) Int { return MakeBigInt(new(big.Int).Lsh(x.bigInt(), y)) }
+func (x Int) Rsh(y uint) Int { return MakeBigInt(new(big.Int).Rsh(x.bigInt(), y)) }
+
+// Precondition: y is nonzero.
+func (x Int) Div(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ // http://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html
+ if xBig != nil || yBig != nil {
+ xb, yb := x.bigInt(), y.bigInt()
+
+ var quo, rem big.Int
+ quo.QuoRem(xb, yb, &rem)
+ if (xb.Sign() < 0) != (yb.Sign() < 0) && rem.Sign() != 0 {
+ quo.Sub(&quo, oneBig)
+ }
+ return MakeBigInt(&quo)
+ }
+ quo := xSmall / ySmall
+ rem := xSmall % ySmall
+ if (xSmall < 0) != (ySmall < 0) && rem != 0 {
+ quo -= 1
+ }
+ return MakeInt64(quo)
+}
+
+// Precondition: y is nonzero.
+func (x Int) Mod(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ xb, yb := x.bigInt(), y.bigInt()
+
+ var quo, rem big.Int
+ quo.QuoRem(xb, yb, &rem)
+ if (xb.Sign() < 0) != (yb.Sign() < 0) && rem.Sign() != 0 {
+ rem.Add(&rem, yb)
+ }
+ return MakeBigInt(&rem)
+ }
+ rem := xSmall % ySmall
+ if (xSmall < 0) != (ySmall < 0) && rem != 0 {
+ rem += ySmall
+ }
+ return makeSmallInt(rem)
+}
+
+func (i Int) rational() *big.Rat {
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return new(big.Rat).SetInt(iBig)
+ }
+ return new(big.Rat).SetInt64(iSmall)
+}
+
+// AsInt32 returns the value of x if is representable as an int32.
+func AsInt32(x Value) (int, error) {
+ i, ok := x.(Int)
+ if !ok {
+ return 0, fmt.Errorf("got %s, want int", x.Type())
+ }
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return 0, fmt.Errorf("%s out of range", i)
+ }
+ return int(iSmall), nil
+}
+
+// AsInt sets *ptr to the value of Starlark int x, if it is exactly representable,
+// otherwise it returns an error.
+// The type of ptr must be one of the pointer types *int, *int8, *int16, *int32, or *int64,
+// or one of their unsigned counterparts including *uintptr.
+func AsInt(x Value, ptr interface{}) error {
+ xint, ok := x.(Int)
+ if !ok {
+ return fmt.Errorf("got %s, want int", x.Type())
+ }
+
+ bits := reflect.TypeOf(ptr).Elem().Size() * 8
+ switch ptr.(type) {
+ case *int, *int8, *int16, *int32, *int64:
+ i, ok := xint.Int64()
+ if !ok || bits < 64 && !(-1<<(bits-1) <= i && i < 1<<(bits-1)) {
+ return fmt.Errorf("%s out of range (want value in signed %d-bit range)", xint, bits)
+ }
+ switch ptr := ptr.(type) {
+ case *int:
+ *ptr = int(i)
+ case *int8:
+ *ptr = int8(i)
+ case *int16:
+ *ptr = int16(i)
+ case *int32:
+ *ptr = int32(i)
+ case *int64:
+ *ptr = int64(i)
+ }
+
+ case *uint, *uint8, *uint16, *uint32, *uint64, *uintptr:
+ i, ok := xint.Uint64()
+ if !ok || bits < 64 && i >= 1<<bits {
+ return fmt.Errorf("%s out of range (want value in unsigned %d-bit range)", xint, bits)
+ }
+ switch ptr := ptr.(type) {
+ case *uint:
+ *ptr = uint(i)
+ case *uint8:
+ *ptr = uint8(i)
+ case *uint16:
+ *ptr = uint16(i)
+ case *uint32:
+ *ptr = uint32(i)
+ case *uint64:
+ *ptr = uint64(i)
+ case *uintptr:
+ *ptr = uintptr(i)
+ }
+ default:
+ panic(fmt.Sprintf("invalid argument type: %T", ptr))
+ }
+ return nil
+}
+
+// NumberToInt converts a number x to an integer value.
+// An int is returned unchanged, a float is truncated towards zero.
+// NumberToInt reports an error for all other values.
+func NumberToInt(x Value) (Int, error) {
+ switch x := x.(type) {
+ case Int:
+ return x, nil
+ case Float:
+ f := float64(x)
+ if math.IsInf(f, 0) {
+ return zero, fmt.Errorf("cannot convert float infinity to integer")
+ } else if math.IsNaN(f) {
+ return zero, fmt.Errorf("cannot convert float NaN to integer")
+ }
+ return finiteFloatToInt(x), nil
+
+ }
+ return zero, fmt.Errorf("cannot convert %s to int", x.Type())
+}
+
+// finiteFloatToInt converts f to an Int, truncating towards zero.
+// f must be finite.
+func finiteFloatToInt(f Float) Int {
+ if math.MinInt64 <= f && f <= math.MaxInt64 {
+ // small values
+ return MakeInt64(int64(f))
+ }
+ rat := f.rational()
+ if rat == nil {
+ panic(f) // non-finite
+ }
+ return MakeBigInt(new(big.Int).Div(rat.Num(), rat.Denom()))
+}
diff --git a/starlark/int_generic.go b/starlark/int_generic.go
new file mode 100644
index 0000000..9e84d7f
--- /dev/null
+++ b/starlark/int_generic.go
@@ -0,0 +1,33 @@
+//+build !linux,!darwin,!dragonfly,!freebsd,!netbsd,!openbsd,!solaris darwin,arm64 !amd64,!arm64,!mips64x,!ppc64x
+
+package starlark
+
+// generic Int implementation as a union
+
+import "math/big"
+
+type intImpl struct {
+ // We use only the signed 32-bit range of small to ensure
+ // that small+small and small*small do not overflow.
+ small_ int64 // minint32 <= small <= maxint32
+ big_ *big.Int // big != nil <=> value is not representable as int32
+}
+
+// --- low-level accessors ---
+
+// get returns the small and big components of the Int.
+// small is defined only if big is nil.
+// small is sign-extended to 64 bits for ease of subsequent arithmetic.
+func (i Int) get() (small int64, big *big.Int) {
+ return i.impl.small_, i.impl.big_
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+ return Int{intImpl{small_: x}}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int {
+ return Int{intImpl{big_: x}}
+}
diff --git a/starlark/int_posix64.go b/starlark/int_posix64.go
new file mode 100644
index 0000000..1f13d66
--- /dev/null
+++ b/starlark/int_posix64.go
@@ -0,0 +1,67 @@
+//+build linux darwin dragonfly freebsd netbsd openbsd solaris
+//+build amd64 arm64,!darwin mips64x ppc64x
+
+package starlark
+
+// This file defines an optimized Int implementation for 64-bit machines
+// running POSIX. It reserves a 4GB portion of the address space using
+// mmap and represents int32 values as addresses within that range. This
+// disambiguates int32 values from *big.Int pointers, letting all Int
+// values be represented as an unsafe.Pointer, so that Int-to-Value
+// interface conversion need not allocate.
+
+// Although iOS (arm64,darwin) claims to be a POSIX-compliant,
+// it limits each process to about 700MB of virtual address space,
+// which defeats the optimization.
+//
+// TODO(golang.org/issue/38485): darwin,arm64 may refer to macOS in the future.
+// Update this when there are distinct GOOS values for macOS, iOS, and other Apple
+// operating systems on arm64.
+
+import (
+ "log"
+ "math"
+ "math/big"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+// intImpl represents a union of (int32, *big.Int) in a single pointer,
+// so that Int-to-Value conversions need not allocate.
+//
+// The pointer is either a *big.Int, if the value is big, or a pointer into a
+// reserved portion of the address space (smallints), if the value is small.
+//
+// See int_generic.go for the basic representation concepts.
+type intImpl unsafe.Pointer
+
+// get returns the (small, big) arms of the union.
+func (i Int) get() (int64, *big.Int) {
+ ptr := uintptr(i.impl)
+ if ptr >= smallints && ptr < smallints+1<<32 {
+ return math.MinInt32 + int64(ptr-smallints), nil
+ }
+ return 0, (*big.Int)(i.impl)
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+ return Int{intImpl(uintptr(x-math.MinInt32) + smallints)}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int { return Int{intImpl(x)} }
+
+// smallints is the base address of a 2^32 byte memory region.
+// Pointers to addresses in this region represent int32 values.
+// We assume smallints is not at the very top of the address space.
+var smallints = reserveAddresses(1 << 32)
+
+func reserveAddresses(len int) uintptr {
+ b, err := unix.Mmap(-1, 0, len, unix.PROT_READ, unix.MAP_PRIVATE|unix.MAP_ANON)
+ if err != nil {
+ log.Fatalf("mmap: %v", err)
+ }
+ return uintptr(unsafe.Pointer(&b[0]))
+}
diff --git a/starlark/int_test.go b/starlark/int_test.go
new file mode 100644
index 0000000..ad1bf92
--- /dev/null
+++ b/starlark/int_test.go
@@ -0,0 +1,102 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+import (
+ "fmt"
+ "math"
+ "math/big"
+ "testing"
+)
+
+// TestIntOpts exercises integer arithmetic, especially at the boundaries.
+func TestIntOpts(t *testing.T) {
+ f := MakeInt64
+ left, right := big.NewInt(math.MinInt32), big.NewInt(math.MaxInt32)
+
+ for i, test := range []struct {
+ val Int
+ want string
+ }{
+ // Add
+ {f(math.MaxInt32).Add(f(1)), "80000000"},
+ {f(math.MinInt32).Add(f(-1)), "-80000001"},
+ // Mul
+ {f(math.MaxInt32).Mul(f(math.MaxInt32)), "3fffffff00000001"},
+ {f(math.MinInt32).Mul(f(math.MinInt32)), "4000000000000000"},
+ {f(math.MaxUint32).Mul(f(math.MaxUint32)), "fffffffe00000001"},
+ {f(math.MinInt32).Mul(f(-1)), "80000000"},
+ // Div
+ {f(math.MinInt32).Div(f(-1)), "80000000"},
+ {f(1 << 31).Div(f(2)), "40000000"},
+ // And
+ {f(math.MaxInt32).And(f(math.MaxInt32)), "7fffffff"},
+ {f(math.MinInt32).And(f(math.MinInt32)), "-80000000"},
+ {f(1 << 33).And(f(1 << 32)), "0"},
+ // Mod
+ {f(1 << 32).Mod(f(2)), "0"},
+ // Or
+ {f(1 << 32).Or(f(0)), "100000000"},
+ {f(math.MaxInt32).Or(f(0)), "7fffffff"},
+ {f(math.MaxUint32).Or(f(0)), "ffffffff"},
+ {f(math.MinInt32).Or(f(math.MinInt32)), "-80000000"},
+ // Xor
+ {f(math.MinInt32).Xor(f(-1)), "7fffffff"},
+ // Not
+ {f(math.MinInt32).Not(), "7fffffff"},
+ {f(math.MaxInt32).Not(), "-80000000"},
+ // Shift
+ {f(1).Lsh(31), "80000000"},
+ {f(1).Lsh(32), "100000000"},
+ {f(math.MaxInt32 + 1).Rsh(1), "40000000"},
+ {f(math.MinInt32 * 2).Rsh(1), "-80000000"},
+ } {
+ if got := fmt.Sprintf("%x", test.val); got != test.want {
+ t.Errorf("%d equals %s, want %s", i, got, test.want)
+ }
+ small, big := test.val.get()
+ if small < math.MinInt32 || math.MaxInt32 < small {
+ t.Errorf("expected big, %d %s", i, test.val)
+ }
+ if big == nil {
+ continue
+ }
+ if small != 0 {
+ t.Errorf("expected 0 small, %d %s with %d", i, test.val, small)
+ }
+ if big.Cmp(left) >= 0 && big.Cmp(right) <= 0 {
+ t.Errorf("expected small, %d %s", i, test.val)
+ }
+ }
+}
+
+func TestImmutabilityMakeBigInt(t *testing.T) {
+ // use max int64 for the test
+ expect := int64(^uint64(0) >> 1)
+
+ mutint := big.NewInt(expect)
+ value := MakeBigInt(mutint)
+ mutint.Set(big.NewInt(1))
+
+ got, _ := value.Int64()
+ if got != expect {
+ t.Errorf("expected %d, got %d", expect, got)
+ }
+}
+
+func TestImmutabilityBigInt(t *testing.T) {
+ // use 1 and max int64 for the test
+ for _, expect := range []int64{1, int64(^uint64(0) >> 1)} {
+ value := MakeBigInt(big.NewInt(expect))
+
+ bigint := value.BigInt()
+ bigint.Set(big.NewInt(2))
+
+ got, _ := value.Int64()
+ if got != expect {
+ t.Errorf("expected %d, got %d", expect, got)
+ }
+ }
+}
diff --git a/starlark/interp.go b/starlark/interp.go
new file mode 100644
index 0000000..642d8f5
--- /dev/null
+++ b/starlark/interp.go
@@ -0,0 +1,669 @@
+package starlark
+
+// This file defines the bytecode interpreter.
+
+import (
+ "fmt"
+ "os"
+ "sync/atomic"
+ "unsafe"
+
+ "go.starlark.net/internal/compile"
+ "go.starlark.net/internal/spell"
+ "go.starlark.net/resolve"
+ "go.starlark.net/syntax"
+)
+
+const vmdebug = false // TODO(adonovan): use a bitfield of specific kinds of error.
+
+// TODO(adonovan):
+// - optimize position table.
+// - opt: record MaxIterStack during compilation and preallocate the stack.
+
+func (fn *Function) CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) {
+ // Postcondition: args is not mutated. This is stricter than required by Callable,
+ // but allows CALL to avoid a copy.
+
+ if !resolve.AllowRecursion {
+ // detect recursion
+ for _, fr := range thread.stack[:len(thread.stack)-1] {
+ // We look for the same function code,
+ // not function value, otherwise the user could
+ // defeat the check by writing the Y combinator.
+ if frfn, ok := fr.Callable().(*Function); ok && frfn.funcode == fn.funcode {
+ return nil, fmt.Errorf("function %s called recursively", fn.Name())
+ }
+ }
+ }
+
+ f := fn.funcode
+ fr := thread.frameAt(0)
+
+ // Allocate space for stack and locals.
+ // Logically these do not escape from this frame
+ // (See https://github.com/golang/go/issues/20533.)
+ //
+ // This heap allocation looks expensive, but I was unable to get
+ // more than 1% real time improvement in a large alloc-heavy
+ // benchmark (in which this alloc was 8% of alloc-bytes)
+ // by allocating space for 8 Values in each frame, or
+ // by allocating stack by slicing an array held by the Thread
+ // that is expanded in chunks of min(k, nspace), for k=256 or 1024.
+ nlocals := len(f.Locals)
+ nspace := nlocals + f.MaxStack
+ space := make([]Value, nspace)
+ locals := space[:nlocals:nlocals] // local variables, starting with parameters
+ stack := space[nlocals:] // operand stack
+
+ // Digest arguments and set parameters.
+ err := setArgs(locals, fn, args, kwargs)
+ if err != nil {
+ return nil, thread.evalError(err)
+ }
+
+ fr.locals = locals
+
+ if vmdebug {
+ fmt.Printf("Entering %s @ %s\n", f.Name, f.Position(0))
+ fmt.Printf("%d stack, %d locals\n", len(stack), len(locals))
+ defer fmt.Println("Leaving ", f.Name)
+ }
+
+ // Spill indicated locals to cells.
+ // Each cell is a separate alloc to avoid spurious liveness.
+ for _, index := range f.Cells {
+ locals[index] = &cell{locals[index]}
+ }
+
+ // TODO(adonovan): add static check that beneath this point
+ // - there is exactly one return statement
+ // - there is no redefinition of 'err'.
+
+ var iterstack []Iterator // stack of active iterators
+
+ sp := 0
+ var pc uint32
+ var result Value
+ code := f.Code
+loop:
+ for {
+ thread.steps++
+ if thread.steps >= thread.maxSteps {
+ thread.Cancel("too many steps")
+ }
+ if reason := atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&thread.cancelReason))); reason != nil {
+ err = fmt.Errorf("Starlark computation cancelled: %s", *(*string)(reason))
+ break loop
+ }
+
+ fr.pc = pc
+
+ op := compile.Opcode(code[pc])
+ pc++
+ var arg uint32
+ if op >= compile.OpcodeArgMin {
+ // TODO(adonovan): opt: profile this.
+ // Perhaps compiling big endian would be less work to decode?
+ for s := uint(0); ; s += 7 {
+ b := code[pc]
+ pc++
+ arg |= uint32(b&0x7f) << s
+ if b < 0x80 {
+ break
+ }
+ }
+ }
+ if vmdebug {
+ fmt.Fprintln(os.Stderr, stack[:sp]) // very verbose!
+ compile.PrintOp(f, fr.pc, op, arg)
+ }
+
+ switch op {
+ case compile.NOP:
+ // nop
+
+ case compile.DUP:
+ stack[sp] = stack[sp-1]
+ sp++
+
+ case compile.DUP2:
+ stack[sp] = stack[sp-2]
+ stack[sp+1] = stack[sp-1]
+ sp += 2
+
+ case compile.POP:
+ sp--
+
+ case compile.EXCH:
+ stack[sp-2], stack[sp-1] = stack[sp-1], stack[sp-2]
+
+ case compile.EQL, compile.NEQ, compile.GT, compile.LT, compile.LE, compile.GE:
+ op := syntax.Token(op-compile.EQL) + syntax.EQL
+ y := stack[sp-1]
+ x := stack[sp-2]
+ sp -= 2
+ ok, err2 := Compare(op, x, y)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp] = Bool(ok)
+ sp++
+
+ case compile.PLUS,
+ compile.MINUS,
+ compile.STAR,
+ compile.SLASH,
+ compile.SLASHSLASH,
+ compile.PERCENT,
+ compile.AMP,
+ compile.PIPE,
+ compile.CIRCUMFLEX,
+ compile.LTLT,
+ compile.GTGT,
+ compile.IN:
+ binop := syntax.Token(op-compile.PLUS) + syntax.PLUS
+ if op == compile.IN {
+ binop = syntax.IN // IN token is out of order
+ }
+ y := stack[sp-1]
+ x := stack[sp-2]
+ sp -= 2
+ z, err2 := Binary(binop, x, y)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp] = z
+ sp++
+
+ case compile.UPLUS, compile.UMINUS, compile.TILDE:
+ var unop syntax.Token
+ if op == compile.TILDE {
+ unop = syntax.TILDE
+ } else {
+ unop = syntax.Token(op-compile.UPLUS) + syntax.PLUS
+ }
+ x := stack[sp-1]
+ y, err2 := Unary(unop, x)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp-1] = y
+
+ case compile.INPLACE_ADD:
+ y := stack[sp-1]
+ x := stack[sp-2]
+ sp -= 2
+
+ // It's possible that y is not Iterable but
+ // nonetheless defines x+y, in which case we
+ // should fall back to the general case.
+ var z Value
+ if xlist, ok := x.(*List); ok {
+ if yiter, ok := y.(Iterable); ok {
+ if err = xlist.checkMutable("apply += to"); err != nil {
+ break loop
+ }
+ listExtend(xlist, yiter)
+ z = xlist
+ }
+ }
+ if z == nil {
+ z, err = Binary(syntax.PLUS, x, y)
+ if err != nil {
+ break loop
+ }
+ }
+
+ stack[sp] = z
+ sp++
+
+ case compile.NONE:
+ stack[sp] = None
+ sp++
+
+ case compile.TRUE:
+ stack[sp] = True
+ sp++
+
+ case compile.FALSE:
+ stack[sp] = False
+ sp++
+
+ case compile.MANDATORY:
+ stack[sp] = mandatory{}
+ sp++
+
+ case compile.JMP:
+ pc = arg
+
+ case compile.CALL, compile.CALL_VAR, compile.CALL_KW, compile.CALL_VAR_KW:
+ var kwargs Value
+ if op == compile.CALL_KW || op == compile.CALL_VAR_KW {
+ kwargs = stack[sp-1]
+ sp--
+ }
+
+ var args Value
+ if op == compile.CALL_VAR || op == compile.CALL_VAR_KW {
+ args = stack[sp-1]
+ sp--
+ }
+
+ // named args (pairs)
+ var kvpairs []Tuple
+ if nkvpairs := int(arg & 0xff); nkvpairs > 0 {
+ kvpairs = make([]Tuple, 0, nkvpairs)
+ kvpairsAlloc := make(Tuple, 2*nkvpairs) // allocate a single backing array
+ sp -= 2 * nkvpairs
+ for i := 0; i < nkvpairs; i++ {
+ pair := kvpairsAlloc[:2:2]
+ kvpairsAlloc = kvpairsAlloc[2:]
+ pair[0] = stack[sp+2*i] // name
+ pair[1] = stack[sp+2*i+1] // value
+ kvpairs = append(kvpairs, pair)
+ }
+ }
+ if kwargs != nil {
+ // Add key/value items from **kwargs dictionary.
+ dict, ok := kwargs.(IterableMapping)
+ if !ok {
+ err = fmt.Errorf("argument after ** must be a mapping, not %s", kwargs.Type())
+ break loop
+ }
+ items := dict.Items()
+ for _, item := range items {
+ if _, ok := item[0].(String); !ok {
+ err = fmt.Errorf("keywords must be strings, not %s", item[0].Type())
+ break loop
+ }
+ }
+ if len(kvpairs) == 0 {
+ kvpairs = items
+ } else {
+ kvpairs = append(kvpairs, items...)
+ }
+ }
+
+ // positional args
+ var positional Tuple
+ if npos := int(arg >> 8); npos > 0 {
+ positional = stack[sp-npos : sp]
+ sp -= npos
+
+ // Copy positional arguments into a new array,
+ // unless the callee is another Starlark function,
+ // in which case it can be trusted not to mutate them.
+ if _, ok := stack[sp-1].(*Function); !ok || args != nil {
+ positional = append(Tuple(nil), positional...)
+ }
+ }
+ if args != nil {
+ // Add elements from *args sequence.
+ iter := Iterate(args)
+ if iter == nil {
+ err = fmt.Errorf("argument after * must be iterable, not %s", args.Type())
+ break loop
+ }
+ var elem Value
+ for iter.Next(&elem) {
+ positional = append(positional, elem)
+ }
+ iter.Done()
+ }
+
+ function := stack[sp-1]
+
+ if vmdebug {
+ fmt.Printf("VM call %s args=%s kwargs=%s @%s\n",
+ function, positional, kvpairs, f.Position(fr.pc))
+ }
+
+ thread.endProfSpan()
+ z, err2 := Call(thread, function, positional, kvpairs)
+ thread.beginProfSpan()
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ if vmdebug {
+ fmt.Printf("Resuming %s @ %s\n", f.Name, f.Position(0))
+ }
+ stack[sp-1] = z
+
+ case compile.ITERPUSH:
+ x := stack[sp-1]
+ sp--
+ iter := Iterate(x)
+ if iter == nil {
+ err = fmt.Errorf("%s value is not iterable", x.Type())
+ break loop
+ }
+ iterstack = append(iterstack, iter)
+
+ case compile.ITERJMP:
+ iter := iterstack[len(iterstack)-1]
+ if iter.Next(&stack[sp]) {
+ sp++
+ } else {
+ pc = arg
+ }
+
+ case compile.ITERPOP:
+ n := len(iterstack) - 1
+ iterstack[n].Done()
+ iterstack = iterstack[:n]
+
+ case compile.NOT:
+ stack[sp-1] = !stack[sp-1].Truth()
+
+ case compile.RETURN:
+ result = stack[sp-1]
+ break loop
+
+ case compile.SETINDEX:
+ z := stack[sp-1]
+ y := stack[sp-2]
+ x := stack[sp-3]
+ sp -= 3
+ err = setIndex(x, y, z)
+ if err != nil {
+ break loop
+ }
+
+ case compile.INDEX:
+ y := stack[sp-1]
+ x := stack[sp-2]
+ sp -= 2
+ z, err2 := getIndex(x, y)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp] = z
+ sp++
+
+ case compile.ATTR:
+ x := stack[sp-1]
+ name := f.Prog.Names[arg]
+ y, err2 := getAttr(x, name)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp-1] = y
+
+ case compile.SETFIELD:
+ y := stack[sp-1]
+ x := stack[sp-2]
+ sp -= 2
+ name := f.Prog.Names[arg]
+ if err2 := setField(x, name, y); err2 != nil {
+ err = err2
+ break loop
+ }
+
+ case compile.MAKEDICT:
+ stack[sp] = new(Dict)
+ sp++
+
+ case compile.SETDICT, compile.SETDICTUNIQ:
+ dict := stack[sp-3].(*Dict)
+ k := stack[sp-2]
+ v := stack[sp-1]
+ sp -= 3
+ oldlen := dict.Len()
+ if err2 := dict.SetKey(k, v); err2 != nil {
+ err = err2
+ break loop
+ }
+ if op == compile.SETDICTUNIQ && dict.Len() == oldlen {
+ err = fmt.Errorf("duplicate key: %v", k)
+ break loop
+ }
+
+ case compile.APPEND:
+ elem := stack[sp-1]
+ list := stack[sp-2].(*List)
+ sp -= 2
+ list.elems = append(list.elems, elem)
+
+ case compile.SLICE:
+ x := stack[sp-4]
+ lo := stack[sp-3]
+ hi := stack[sp-2]
+ step := stack[sp-1]
+ sp -= 4
+ res, err2 := slice(x, lo, hi, step)
+ if err2 != nil {
+ err = err2
+ break loop
+ }
+ stack[sp] = res
+ sp++
+
+ case compile.UNPACK:
+ n := int(arg)
+ iterable := stack[sp-1]
+ sp--
+ iter := Iterate(iterable)
+ if iter == nil {
+ err = fmt.Errorf("got %s in sequence assignment", iterable.Type())
+ break loop
+ }
+ i := 0
+ sp += n
+ for i < n && iter.Next(&stack[sp-1-i]) {
+ i++
+ }
+ var dummy Value
+ if iter.Next(&dummy) {
+ // NB: Len may return -1 here in obscure cases.
+ err = fmt.Errorf("too many values to unpack (got %d, want %d)", Len(iterable), n)
+ break loop
+ }
+ iter.Done()
+ if i < n {
+ err = fmt.Errorf("too few values to unpack (got %d, want %d)", i, n)
+ break loop
+ }
+
+ case compile.CJMP:
+ if stack[sp-1].Truth() {
+ pc = arg
+ }
+ sp--
+
+ case compile.CONSTANT:
+ stack[sp] = fn.module.constants[arg]
+ sp++
+
+ case compile.MAKETUPLE:
+ n := int(arg)
+ tuple := make(Tuple, n)
+ sp -= n
+ copy(tuple, stack[sp:])
+ stack[sp] = tuple
+ sp++
+
+ case compile.MAKELIST:
+ n := int(arg)
+ elems := make([]Value, n)
+ sp -= n
+ copy(elems, stack[sp:])
+ stack[sp] = NewList(elems)
+ sp++
+
+ case compile.MAKEFUNC:
+ funcode := f.Prog.Functions[arg]
+ tuple := stack[sp-1].(Tuple)
+ n := len(tuple) - len(funcode.Freevars)
+ defaults := tuple[:n:n]
+ freevars := tuple[n:]
+ stack[sp-1] = &Function{
+ funcode: funcode,
+ module: fn.module,
+ defaults: defaults,
+ freevars: freevars,
+ }
+
+ case compile.LOAD:
+ n := int(arg)
+ module := string(stack[sp-1].(String))
+ sp--
+
+ if thread.Load == nil {
+ err = fmt.Errorf("load not implemented by this application")
+ break loop
+ }
+
+ thread.endProfSpan()
+ dict, err2 := thread.Load(thread, module)
+ thread.beginProfSpan()
+ if err2 != nil {
+ err = wrappedError{
+ msg: fmt.Sprintf("cannot load %s: %v", module, err2),
+ cause: err2,
+ }
+ break loop
+ }
+
+ for i := 0; i < n; i++ {
+ from := string(stack[sp-1-i].(String))
+ v, ok := dict[from]
+ if !ok {
+ err = fmt.Errorf("load: name %s not found in module %s", from, module)
+ if n := spell.Nearest(from, dict.Keys()); n != "" {
+ err = fmt.Errorf("%s (did you mean %s?)", err, n)
+ }
+ break loop
+ }
+ stack[sp-1-i] = v
+ }
+
+ case compile.SETLOCAL:
+ locals[arg] = stack[sp-1]
+ sp--
+
+ case compile.SETLOCALCELL:
+ locals[arg].(*cell).v = stack[sp-1]
+ sp--
+
+ case compile.SETGLOBAL:
+ fn.module.globals[arg] = stack[sp-1]
+ sp--
+
+ case compile.LOCAL:
+ x := locals[arg]
+ if x == nil {
+ err = fmt.Errorf("local variable %s referenced before assignment", f.Locals[arg].Name)
+ break loop
+ }
+ stack[sp] = x
+ sp++
+
+ case compile.FREE:
+ stack[sp] = fn.freevars[arg]
+ sp++
+
+ case compile.LOCALCELL:
+ v := locals[arg].(*cell).v
+ if v == nil {
+ err = fmt.Errorf("local variable %s referenced before assignment", f.Locals[arg].Name)
+ break loop
+ }
+ stack[sp] = v
+ sp++
+
+ case compile.FREECELL:
+ v := fn.freevars[arg].(*cell).v
+ if v == nil {
+ err = fmt.Errorf("local variable %s referenced before assignment", f.Freevars[arg].Name)
+ break loop
+ }
+ stack[sp] = v
+ sp++
+
+ case compile.GLOBAL:
+ x := fn.module.globals[arg]
+ if x == nil {
+ err = fmt.Errorf("global variable %s referenced before assignment", f.Prog.Globals[arg].Name)
+ break loop
+ }
+ stack[sp] = x
+ sp++
+
+ case compile.PREDECLARED:
+ name := f.Prog.Names[arg]
+ x := fn.module.predeclared[name]
+ if x == nil {
+ err = fmt.Errorf("internal error: predeclared variable %s is uninitialized", name)
+ break loop
+ }
+ stack[sp] = x
+ sp++
+
+ case compile.UNIVERSAL:
+ stack[sp] = Universe[f.Prog.Names[arg]]
+ sp++
+
+ default:
+ err = fmt.Errorf("unimplemented: %s", op)
+ break loop
+ }
+ }
+
+ // ITERPOP the rest of the iterator stack.
+ for _, iter := range iterstack {
+ iter.Done()
+ }
+
+ fr.locals = nil
+
+ return result, err
+}
+
+type wrappedError struct {
+ msg string
+ cause error
+}
+
+func (e wrappedError) Error() string {
+ return e.msg
+}
+
+// Implements the xerrors.Wrapper interface
+// https://godoc.org/golang.org/x/xerrors#Wrapper
+func (e wrappedError) Unwrap() error {
+ return e.cause
+}
+
+// mandatory is a sentinel value used in a function's defaults tuple
+// to indicate that a (keyword-only) parameter is mandatory.
+type mandatory struct{}
+
+func (mandatory) String() string { return "mandatory" }
+func (mandatory) Type() string { return "mandatory" }
+func (mandatory) Freeze() {} // immutable
+func (mandatory) Truth() Bool { return False }
+func (mandatory) Hash() (uint32, error) { return 0, nil }
+
+// A cell is a box containing a Value.
+// Local variables marked as cells hold their value indirectly
+// so that they may be shared by outer and inner nested functions.
+// Cells are always accessed using indirect {FREE,LOCAL,SETLOCAL}CELL instructions.
+// The FreeVars tuple contains only cells.
+// The FREE instruction always yields a cell.
+type cell struct{ v Value }
+
+func (c *cell) String() string { return "cell" }
+func (c *cell) Type() string { return "cell" }
+func (c *cell) Freeze() {
+ if c.v != nil {
+ c.v.Freeze()
+ }
+}
+func (c *cell) Truth() Bool { panic("unreachable") }
+func (c *cell) Hash() (uint32, error) { panic("unreachable") }
diff --git a/starlark/library.go b/starlark/library.go
new file mode 100644
index 0000000..5620426
--- /dev/null
+++ b/starlark/library.go
@@ -0,0 +1,2251 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+// This file defines the library of built-ins.
+//
+// Built-ins must explicitly check the "frozen" flag before updating
+// mutable types such as lists and dicts.
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "math/big"
+ "os"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf16"
+ "unicode/utf8"
+
+ "go.starlark.net/syntax"
+)
+
+// Universe defines the set of universal built-ins, such as None, True, and len.
+//
+// The Go application may add or remove items from the
+// universe dictionary before Starlark evaluation begins.
+// All values in the dictionary must be immutable.
+// Starlark programs cannot modify the dictionary.
+var Universe StringDict
+
+func init() {
+ // https://github.com/google/starlark-go/blob/master/doc/spec.md#built-in-constants-and-functions
+ Universe = StringDict{
+ "None": None,
+ "True": True,
+ "False": False,
+ "any": NewBuiltin("any", any),
+ "all": NewBuiltin("all", all),
+ "bool": NewBuiltin("bool", bool_),
+ "bytes": NewBuiltin("bytes", bytes_),
+ "chr": NewBuiltin("chr", chr),
+ "dict": NewBuiltin("dict", dict),
+ "dir": NewBuiltin("dir", dir),
+ "enumerate": NewBuiltin("enumerate", enumerate),
+ "fail": NewBuiltin("fail", fail),
+ "float": NewBuiltin("float", float),
+ "getattr": NewBuiltin("getattr", getattr),
+ "hasattr": NewBuiltin("hasattr", hasattr),
+ "hash": NewBuiltin("hash", hash),
+ "int": NewBuiltin("int", int_),
+ "len": NewBuiltin("len", len_),
+ "list": NewBuiltin("list", list),
+ "max": NewBuiltin("max", minmax),
+ "min": NewBuiltin("min", minmax),
+ "ord": NewBuiltin("ord", ord),
+ "print": NewBuiltin("print", print),
+ "range": NewBuiltin("range", range_),
+ "repr": NewBuiltin("repr", repr),
+ "reversed": NewBuiltin("reversed", reversed),
+ "set": NewBuiltin("set", set), // requires resolve.AllowSet
+ "sorted": NewBuiltin("sorted", sorted),
+ "str": NewBuiltin("str", str),
+ "tuple": NewBuiltin("tuple", tuple),
+ "type": NewBuiltin("type", type_),
+ "zip": NewBuiltin("zip", zip),
+ }
+}
+
+// methods of built-in types
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#built-in-methods
+var (
+ bytesMethods = map[string]*Builtin{
+ "elems": NewBuiltin("elems", bytes_elems),
+ }
+
+ dictMethods = map[string]*Builtin{
+ "clear": NewBuiltin("clear", dict_clear),
+ "get": NewBuiltin("get", dict_get),
+ "items": NewBuiltin("items", dict_items),
+ "keys": NewBuiltin("keys", dict_keys),
+ "pop": NewBuiltin("pop", dict_pop),
+ "popitem": NewBuiltin("popitem", dict_popitem),
+ "setdefault": NewBuiltin("setdefault", dict_setdefault),
+ "update": NewBuiltin("update", dict_update),
+ "values": NewBuiltin("values", dict_values),
+ }
+
+ listMethods = map[string]*Builtin{
+ "append": NewBuiltin("append", list_append),
+ "clear": NewBuiltin("clear", list_clear),
+ "extend": NewBuiltin("extend", list_extend),
+ "index": NewBuiltin("index", list_index),
+ "insert": NewBuiltin("insert", list_insert),
+ "pop": NewBuiltin("pop", list_pop),
+ "remove": NewBuiltin("remove", list_remove),
+ }
+
+ stringMethods = map[string]*Builtin{
+ "capitalize": NewBuiltin("capitalize", string_capitalize),
+ "codepoint_ords": NewBuiltin("codepoint_ords", string_iterable),
+ "codepoints": NewBuiltin("codepoints", string_iterable), // sic
+ "count": NewBuiltin("count", string_count),
+ "elem_ords": NewBuiltin("elem_ords", string_iterable),
+ "elems": NewBuiltin("elems", string_iterable), // sic
+ "endswith": NewBuiltin("endswith", string_startswith), // sic
+ "find": NewBuiltin("find", string_find),
+ "format": NewBuiltin("format", string_format),
+ "index": NewBuiltin("index", string_index),
+ "isalnum": NewBuiltin("isalnum", string_isalnum),
+ "isalpha": NewBuiltin("isalpha", string_isalpha),
+ "isdigit": NewBuiltin("isdigit", string_isdigit),
+ "islower": NewBuiltin("islower", string_islower),
+ "isspace": NewBuiltin("isspace", string_isspace),
+ "istitle": NewBuiltin("istitle", string_istitle),
+ "isupper": NewBuiltin("isupper", string_isupper),
+ "join": NewBuiltin("join", string_join),
+ "lower": NewBuiltin("lower", string_lower),
+ "lstrip": NewBuiltin("lstrip", string_strip), // sic
+ "partition": NewBuiltin("partition", string_partition),
+ "replace": NewBuiltin("replace", string_replace),
+ "rfind": NewBuiltin("rfind", string_rfind),
+ "rindex": NewBuiltin("rindex", string_rindex),
+ "rpartition": NewBuiltin("rpartition", string_partition), // sic
+ "rsplit": NewBuiltin("rsplit", string_split), // sic
+ "rstrip": NewBuiltin("rstrip", string_strip), // sic
+ "split": NewBuiltin("split", string_split),
+ "splitlines": NewBuiltin("splitlines", string_splitlines),
+ "startswith": NewBuiltin("startswith", string_startswith),
+ "strip": NewBuiltin("strip", string_strip),
+ "title": NewBuiltin("title", string_title),
+ "upper": NewBuiltin("upper", string_upper),
+ }
+
+ setMethods = map[string]*Builtin{
+ "union": NewBuiltin("union", set_union),
+ }
+)
+
+func builtinAttr(recv Value, name string, methods map[string]*Builtin) (Value, error) {
+ b := methods[name]
+ if b == nil {
+ return nil, nil // no such method
+ }
+ return b.BindReceiver(recv), nil
+}
+
+func builtinAttrNames(methods map[string]*Builtin) []string {
+ names := make([]string, 0, len(methods))
+ for name := range methods {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+ return names
+}
+
+// ---- built-in functions ----
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#all
+func all(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("all", args, kwargs, 1, &iterable); err != nil {
+ return nil, err
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var x Value
+ for iter.Next(&x) {
+ if !x.Truth() {
+ return False, nil
+ }
+ }
+ return True, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#any
+func any(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("any", args, kwargs, 1, &iterable); err != nil {
+ return nil, err
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var x Value
+ for iter.Next(&x) {
+ if x.Truth() {
+ return True, nil
+ }
+ }
+ return False, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#bool
+func bool_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value = False
+ if err := UnpackPositionalArgs("bool", args, kwargs, 0, &x); err != nil {
+ return nil, err
+ }
+ return x.Truth(), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#bytes
+func bytes_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("bytes does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("bytes: got %d arguments, want exactly 1", len(args))
+ }
+ switch x := args[0].(type) {
+ case Bytes:
+ return x, nil
+ case String:
+ // Invalid encodings are replaced by that of U+FFFD.
+ return Bytes(utf8Transcode(string(x))), nil
+ case Iterable:
+ // iterable of numeric byte values
+ var buf strings.Builder
+ if n := Len(x); n >= 0 {
+ // common case: known length
+ buf.Grow(n)
+ }
+ iter := x.Iterate()
+ defer iter.Done()
+ var elem Value
+ var b byte
+ for i := 0; iter.Next(&elem); i++ {
+ if err := AsInt(elem, &b); err != nil {
+ return nil, fmt.Errorf("bytes: at index %d, %s", i, err)
+ }
+ buf.WriteByte(b)
+ }
+ return Bytes(buf.String()), nil
+
+ default:
+ // Unlike string(foo), which stringifies it, bytes(foo) is an error.
+ return nil, fmt.Errorf("bytes: got %s, want string, bytes, or iterable of ints", x.Type())
+ }
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#chr
+func chr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("chr does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("chr: got %d arguments, want 1", len(args))
+ }
+ i, err := AsInt32(args[0])
+ if err != nil {
+ return nil, fmt.Errorf("chr: %s", err)
+ }
+ if i < 0 {
+ return nil, fmt.Errorf("chr: Unicode code point %d out of range (<0)", i)
+ }
+ if i > unicode.MaxRune {
+ return nil, fmt.Errorf("chr: Unicode code point U+%X out of range (>0x10FFFF)", i)
+ }
+ return String(string(rune(i))), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict
+func dict(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(args) > 1 {
+ return nil, fmt.Errorf("dict: got %d arguments, want at most 1", len(args))
+ }
+ dict := new(Dict)
+ if err := updateDict(dict, args, kwargs); err != nil {
+ return nil, fmt.Errorf("dict: %v", err)
+ }
+ return dict, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dir
+func dir(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("dir does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("dir: got %d arguments, want 1", len(args))
+ }
+
+ var names []string
+ if x, ok := args[0].(HasAttrs); ok {
+ names = x.AttrNames()
+ }
+ sort.Strings(names)
+ elems := make([]Value, len(names))
+ for i, name := range names {
+ elems[i] = String(name)
+ }
+ return NewList(elems), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#enumerate
+func enumerate(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ var start int
+ if err := UnpackPositionalArgs("enumerate", args, kwargs, 1, &iterable, &start); err != nil {
+ return nil, err
+ }
+
+ iter := iterable.Iterate()
+ defer iter.Done()
+
+ var pairs []Value
+ var x Value
+
+ if n := Len(iterable); n >= 0 {
+ // common case: known length
+ pairs = make([]Value, 0, n)
+ array := make(Tuple, 2*n) // allocate a single backing array
+ for i := 0; iter.Next(&x); i++ {
+ pair := array[:2:2]
+ array = array[2:]
+ pair[0] = MakeInt(start + i)
+ pair[1] = x
+ pairs = append(pairs, pair)
+ }
+ } else {
+ // non-sequence (unknown length)
+ for i := 0; iter.Next(&x); i++ {
+ pair := Tuple{MakeInt(start + i), x}
+ pairs = append(pairs, pair)
+ }
+ }
+
+ return NewList(pairs), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#fail
+func fail(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ sep := " "
+ if err := UnpackArgs("fail", nil, kwargs, "sep?", &sep); err != nil {
+ return nil, err
+ }
+ buf := new(strings.Builder)
+ buf.WriteString("fail: ")
+ for i, v := range args {
+ if i > 0 {
+ buf.WriteString(sep)
+ }
+ if s, ok := AsString(v); ok {
+ buf.WriteString(s)
+ } else {
+ writeValue(buf, v, nil)
+ }
+ }
+
+ return nil, errors.New(buf.String())
+}
+
+func float(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("float does not accept keyword arguments")
+ }
+ if len(args) == 0 {
+ return Float(0.0), nil
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("float got %d arguments, wants 1", len(args))
+ }
+ switch x := args[0].(type) {
+ case Bool:
+ if x {
+ return Float(1.0), nil
+ } else {
+ return Float(0.0), nil
+ }
+ case Int:
+ return x.finiteFloat()
+ case Float:
+ return x, nil
+ case String:
+ if x == "" {
+ return nil, fmt.Errorf("float: empty string")
+ }
+ // +/- NaN or Inf or Infinity (case insensitive)?
+ s := string(x)
+ switch x[len(x)-1] {
+ case 'y', 'Y':
+ if strings.EqualFold(s, "infinity") || strings.EqualFold(s, "+infinity") {
+ return inf, nil
+ } else if strings.EqualFold(s, "-infinity") {
+ return neginf, nil
+ }
+ case 'f', 'F':
+ if strings.EqualFold(s, "inf") || strings.EqualFold(s, "+inf") {
+ return inf, nil
+ } else if strings.EqualFold(s, "-inf") {
+ return neginf, nil
+ }
+ case 'n', 'N':
+ if strings.EqualFold(s, "nan") || strings.EqualFold(s, "+nan") || strings.EqualFold(s, "-nan") {
+ return nan, nil
+ }
+ }
+ f, err := strconv.ParseFloat(s, 64)
+ if math.IsInf(f, 0) {
+ return nil, fmt.Errorf("floating-point number too large")
+ }
+ if err != nil {
+ return nil, fmt.Errorf("invalid float literal: %s", s)
+ }
+ return Float(f), nil
+ default:
+ return nil, fmt.Errorf("float got %s, want number or string", x.Type())
+ }
+}
+
+var (
+ inf = Float(math.Inf(+1))
+ neginf = Float(math.Inf(-1))
+ nan = Float(math.NaN())
+)
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#getattr
+func getattr(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var object, dflt Value
+ var name string
+ if err := UnpackPositionalArgs("getattr", args, kwargs, 2, &object, &name, &dflt); err != nil {
+ return nil, err
+ }
+ if object, ok := object.(HasAttrs); ok {
+ v, err := object.Attr(name)
+ if err != nil {
+ // An error could mean the field doesn't exist,
+ // or it exists but could not be computed.
+ if dflt != nil {
+ return dflt, nil
+ }
+ return nil, nameErr(b, err)
+ }
+ if v != nil {
+ return v, nil
+ }
+ // (nil, nil) => no such field
+ }
+ if dflt != nil {
+ return dflt, nil
+ }
+ return nil, fmt.Errorf("getattr: %s has no .%s field or method", object.Type(), name)
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#hasattr
+func hasattr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var object Value
+ var name string
+ if err := UnpackPositionalArgs("hasattr", args, kwargs, 2, &object, &name); err != nil {
+ return nil, err
+ }
+ if object, ok := object.(HasAttrs); ok {
+ v, err := object.Attr(name)
+ if err == nil {
+ return Bool(v != nil), nil
+ }
+
+ // An error does not conclusively indicate presence or
+ // absence of a field: it could occur while computing
+ // the value of a present attribute, or it could be a
+ // "no such attribute" error with details.
+ for _, x := range object.AttrNames() {
+ if x == name {
+ return True, nil
+ }
+ }
+ }
+ return False, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#hash
+func hash(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value
+ if err := UnpackPositionalArgs("hash", args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+
+ var h int
+ switch x := x.(type) {
+ case String:
+ // The Starlark spec requires that the hash function be
+ // deterministic across all runs, motivated by the need
+ // for reproducibility of builds. Thus we cannot call
+ // String.Hash, which uses the fastest implementation
+ // available, because as varies across process restarts,
+ // and may evolve with the implementation.
+ h = int(javaStringHash(string(x)))
+ case Bytes:
+ h = int(softHashString(string(x))) // FNV32
+ default:
+ return nil, fmt.Errorf("hash: got %s, want string or bytes", x.Type())
+ }
+ return MakeInt(h), nil
+}
+
+// javaStringHash returns the same hash as would be produced by
+// java.lang.String.hashCode. This requires transcoding the string to
+// UTF-16; transcoding may introduce Unicode replacement characters
+// U+FFFD if s does not contain valid UTF-8.
+func javaStringHash(s string) (h int32) {
+ for _, r := range s {
+ if utf16.IsSurrogate(r) {
+ c1, c2 := utf16.EncodeRune(r)
+ h = 31*h + c1
+ h = 31*h + c2
+ } else {
+ h = 31*h + r // r may be U+FFFD
+ }
+ }
+ return h
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#int
+func int_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value = zero
+ var base Value
+ if err := UnpackArgs("int", args, kwargs, "x", &x, "base?", &base); err != nil {
+ return nil, err
+ }
+
+ if s, ok := AsString(x); ok {
+ b := 10
+ if base != nil {
+ var err error
+ b, err = AsInt32(base)
+ if err != nil {
+ return nil, fmt.Errorf("int: for base, got %s, want int", base.Type())
+ }
+ if b != 0 && (b < 2 || b > 36) {
+ return nil, fmt.Errorf("int: base must be an integer >= 2 && <= 36")
+ }
+ }
+ res := parseInt(s, b)
+ if res == nil {
+ return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, s)
+ }
+ return res, nil
+ }
+
+ if base != nil {
+ return nil, fmt.Errorf("int: can't convert non-string with explicit base")
+ }
+
+ if b, ok := x.(Bool); ok {
+ if b {
+ return one, nil
+ } else {
+ return zero, nil
+ }
+ }
+
+ i, err := NumberToInt(x)
+ if err != nil {
+ return nil, fmt.Errorf("int: %s", err)
+ }
+ return i, nil
+}
+
+// parseInt defines the behavior of int(string, base=int). It returns nil on error.
+func parseInt(s string, base int) Value {
+ // remove sign
+ var neg bool
+ if s != "" {
+ if s[0] == '+' {
+ s = s[1:]
+ } else if s[0] == '-' {
+ neg = true
+ s = s[1:]
+ }
+ }
+
+ // remove optional base prefix
+ baseprefix := 0
+ if len(s) > 1 && s[0] == '0' {
+ if len(s) > 2 {
+ switch s[1] {
+ case 'o', 'O':
+ baseprefix = 8
+ case 'x', 'X':
+ baseprefix = 16
+ case 'b', 'B':
+ baseprefix = 2
+ }
+ }
+ if baseprefix != 0 {
+ // Remove the base prefix if it matches
+ // the explicit base, or if base=0.
+ if base == 0 || baseprefix == base {
+ base = baseprefix
+ s = s[2:]
+ }
+ } else {
+ // For automatic base detection,
+ // a string starting with zero
+ // must be all zeros.
+ // Thus we reject int("0755", 0).
+ if base == 0 {
+ for i := 1; i < len(s); i++ {
+ if s[i] != '0' {
+ return nil
+ }
+ }
+ return zero
+ }
+ }
+ }
+ if base == 0 {
+ base = 10
+ }
+
+ // we explicitly handled sign above.
+ // if a sign remains, it is invalid.
+ if s != "" && (s[0] == '-' || s[0] == '+') {
+ return nil
+ }
+
+ // s has no sign or base prefix.
+ if i, ok := new(big.Int).SetString(s, base); ok {
+ res := MakeBigInt(i)
+ if neg {
+ res = zero.Sub(res)
+ }
+ return res
+ }
+
+ return nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#len
+func len_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value
+ if err := UnpackPositionalArgs("len", args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+ len := Len(x)
+ if len < 0 {
+ return nil, fmt.Errorf("len: value of type %s has no len", x.Type())
+ }
+ return MakeInt(len), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list
+func list(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("list", args, kwargs, 0, &iterable); err != nil {
+ return nil, err
+ }
+ var elems []Value
+ if iterable != nil {
+ iter := iterable.Iterate()
+ defer iter.Done()
+ if n := Len(iterable); n > 0 {
+ elems = make([]Value, 0, n) // preallocate if length known
+ }
+ var x Value
+ for iter.Next(&x) {
+ elems = append(elems, x)
+ }
+ }
+ return NewList(elems), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#min
+func minmax(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(args) == 0 {
+ return nil, fmt.Errorf("%s requires at least one positional argument", b.Name())
+ }
+ var keyFunc Callable
+ if err := UnpackArgs(b.Name(), nil, kwargs, "key?", &keyFunc); err != nil {
+ return nil, err
+ }
+ var op syntax.Token
+ if b.Name() == "max" {
+ op = syntax.GT
+ } else {
+ op = syntax.LT
+ }
+ var iterable Value
+ if len(args) == 1 {
+ iterable = args[0]
+ } else {
+ iterable = args
+ }
+ iter := Iterate(iterable)
+ if iter == nil {
+ return nil, fmt.Errorf("%s: %s value is not iterable", b.Name(), iterable.Type())
+ }
+ defer iter.Done()
+ var extremum Value
+ if !iter.Next(&extremum) {
+ return nil, nameErr(b, "argument is an empty sequence")
+ }
+
+ var extremeKey Value
+ var keyargs Tuple
+ if keyFunc == nil {
+ extremeKey = extremum
+ } else {
+ keyargs = Tuple{extremum}
+ res, err := Call(thread, keyFunc, keyargs, nil)
+ if err != nil {
+ return nil, err // to preserve backtrace, don't modify error
+ }
+ extremeKey = res
+ }
+
+ var x Value
+ for iter.Next(&x) {
+ var key Value
+ if keyFunc == nil {
+ key = x
+ } else {
+ keyargs[0] = x
+ res, err := Call(thread, keyFunc, keyargs, nil)
+ if err != nil {
+ return nil, err // to preserve backtrace, don't modify error
+ }
+ key = res
+ }
+
+ if ok, err := Compare(op, key, extremeKey); err != nil {
+ return nil, nameErr(b, err)
+ } else if ok {
+ extremum = x
+ extremeKey = key
+ }
+ }
+ return extremum, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#ord
+func ord(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("ord does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("ord: got %d arguments, want 1", len(args))
+ }
+ switch x := args[0].(type) {
+ case String:
+ // ord(string) returns int value of sole rune.
+ s := string(x)
+ r, sz := utf8.DecodeRuneInString(s)
+ if sz == 0 || sz != len(s) {
+ n := utf8.RuneCountInString(s)
+ return nil, fmt.Errorf("ord: string encodes %d Unicode code points, want 1", n)
+ }
+ return MakeInt(int(r)), nil
+
+ case Bytes:
+ // ord(bytes) returns int value of sole byte.
+ if len(x) != 1 {
+ return nil, fmt.Errorf("ord: bytes has length %d, want 1", len(x))
+ }
+ return MakeInt(int(x[0])), nil
+ default:
+ return nil, fmt.Errorf("ord: got %s, want string or bytes", x.Type())
+ }
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#print
+func print(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ sep := " "
+ if err := UnpackArgs("print", nil, kwargs, "sep?", &sep); err != nil {
+ return nil, err
+ }
+ buf := new(strings.Builder)
+ for i, v := range args {
+ if i > 0 {
+ buf.WriteString(sep)
+ }
+ if s, ok := AsString(v); ok {
+ buf.WriteString(s)
+ } else if b, ok := v.(Bytes); ok {
+ buf.WriteString(string(b))
+ } else {
+ writeValue(buf, v, nil)
+ }
+ }
+
+ s := buf.String()
+ if thread.Print != nil {
+ thread.Print(thread, s)
+ } else {
+ fmt.Fprintln(os.Stderr, s)
+ }
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#range
+func range_(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var start, stop, step int
+ step = 1
+ if err := UnpackPositionalArgs("range", args, kwargs, 1, &start, &stop, &step); err != nil {
+ return nil, err
+ }
+
+ if len(args) == 1 {
+ // range(stop)
+ start, stop = 0, start
+ }
+ if step == 0 {
+ // we were given range(start, stop, 0)
+ return nil, nameErr(b, "step argument must not be zero")
+ }
+
+ return rangeValue{start: start, stop: stop, step: step, len: rangeLen(start, stop, step)}, nil
+}
+
+// A rangeValue is a comparable, immutable, indexable sequence of integers
+// defined by the three parameters to a range(...) call.
+// Invariant: step != 0.
+type rangeValue struct{ start, stop, step, len int }
+
+var (
+ _ Indexable = rangeValue{}
+ _ Sequence = rangeValue{}
+ _ Comparable = rangeValue{}
+ _ Sliceable = rangeValue{}
+)
+
+func (r rangeValue) Len() int { return r.len }
+func (r rangeValue) Index(i int) Value { return MakeInt(r.start + i*r.step) }
+func (r rangeValue) Iterate() Iterator { return &rangeIterator{r, 0} }
+
+// rangeLen calculates the length of a range with the provided start, stop, and step.
+// caller must ensure that step is non-zero.
+func rangeLen(start, stop, step int) int {
+ switch {
+ case step > 0:
+ if stop > start {
+ return (stop-1-start)/step + 1
+ }
+ case step < 0:
+ if start > stop {
+ return (start-1-stop)/-step + 1
+ }
+ default:
+ panic("rangeLen: zero step")
+ }
+ return 0
+}
+
+func (r rangeValue) Slice(start, end, step int) Value {
+ newStart := r.start + r.step*start
+ newStop := r.start + r.step*end
+ newStep := r.step * step
+ return rangeValue{
+ start: newStart,
+ stop: newStop,
+ step: newStep,
+ len: rangeLen(newStart, newStop, newStep),
+ }
+}
+
+func (r rangeValue) Freeze() {} // immutable
+func (r rangeValue) String() string {
+ if r.step != 1 {
+ return fmt.Sprintf("range(%d, %d, %d)", r.start, r.stop, r.step)
+ } else if r.start != 0 {
+ return fmt.Sprintf("range(%d, %d)", r.start, r.stop)
+ } else {
+ return fmt.Sprintf("range(%d)", r.stop)
+ }
+}
+func (r rangeValue) Type() string { return "range" }
+func (r rangeValue) Truth() Bool { return r.len > 0 }
+func (r rangeValue) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: range") }
+
+func (x rangeValue) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(rangeValue)
+ switch op {
+ case syntax.EQL:
+ return rangeEqual(x, y), nil
+ case syntax.NEQ:
+ return !rangeEqual(x, y), nil
+ default:
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+ }
+}
+
+func rangeEqual(x, y rangeValue) bool {
+ // Two ranges compare equal if they denote the same sequence.
+ if x.len != y.len {
+ return false // sequences differ in length
+ }
+ if x.len == 0 {
+ return true // both sequences are empty
+ }
+ if x.start != y.start {
+ return false // first element differs
+ }
+ return x.len == 1 || x.step == y.step
+}
+
+func (r rangeValue) contains(x Int) bool {
+ x32, err := AsInt32(x)
+ if err != nil {
+ return false // out of range
+ }
+ delta := x32 - r.start
+ quo, rem := delta/r.step, delta%r.step
+ return rem == 0 && 0 <= quo && quo < r.len
+}
+
+type rangeIterator struct {
+ r rangeValue
+ i int
+}
+
+func (it *rangeIterator) Next(p *Value) bool {
+ if it.i < it.r.len {
+ *p = it.r.Index(it.i)
+ it.i++
+ return true
+ }
+ return false
+}
+func (*rangeIterator) Done() {}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#repr
+func repr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value
+ if err := UnpackPositionalArgs("repr", args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+ return String(x.String()), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#reversed
+func reversed(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("reversed", args, kwargs, 1, &iterable); err != nil {
+ return nil, err
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var elems []Value
+ if n := Len(args[0]); n >= 0 {
+ elems = make([]Value, 0, n) // preallocate if length known
+ }
+ var x Value
+ for iter.Next(&x) {
+ elems = append(elems, x)
+ }
+ n := len(elems)
+ for i := 0; i < n>>1; i++ {
+ elems[i], elems[n-1-i] = elems[n-1-i], elems[i]
+ }
+ return NewList(elems), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#set
+func set(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("set", args, kwargs, 0, &iterable); err != nil {
+ return nil, err
+ }
+ set := new(Set)
+ if iterable != nil {
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var x Value
+ for iter.Next(&x) {
+ if err := set.Insert(x); err != nil {
+ return nil, nameErr(b, err)
+ }
+ }
+ }
+ return set, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#sorted
+func sorted(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ // Oddly, Python's sorted permits all arguments to be positional, thus so do we.
+ var iterable Iterable
+ var key Callable
+ var reverse bool
+ if err := UnpackArgs("sorted", args, kwargs,
+ "iterable", &iterable,
+ "key?", &key,
+ "reverse?", &reverse,
+ ); err != nil {
+ return nil, err
+ }
+
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var values []Value
+ if n := Len(iterable); n > 0 {
+ values = make(Tuple, 0, n) // preallocate if length is known
+ }
+ var x Value
+ for iter.Next(&x) {
+ values = append(values, x)
+ }
+
+ // Derive keys from values by applying key function.
+ var keys []Value
+ if key != nil {
+ keys = make([]Value, len(values))
+ for i, v := range values {
+ k, err := Call(thread, key, Tuple{v}, nil)
+ if err != nil {
+ return nil, err // to preserve backtrace, don't modify error
+ }
+ keys[i] = k
+ }
+ }
+
+ slice := &sortSlice{keys: keys, values: values}
+ if reverse {
+ sort.Stable(sort.Reverse(slice))
+ } else {
+ sort.Stable(slice)
+ }
+ return NewList(slice.values), slice.err
+}
+
+type sortSlice struct {
+ keys []Value // nil => values[i] is key
+ values []Value
+ err error
+}
+
+func (s *sortSlice) Len() int { return len(s.values) }
+func (s *sortSlice) Less(i, j int) bool {
+ keys := s.keys
+ if s.keys == nil {
+ keys = s.values
+ }
+ ok, err := Compare(syntax.LT, keys[i], keys[j])
+ if err != nil {
+ s.err = err
+ }
+ return ok
+}
+func (s *sortSlice) Swap(i, j int) {
+ if s.keys != nil {
+ s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
+ }
+ s.values[i], s.values[j] = s.values[j], s.values[i]
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#str
+func str(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("str does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("str: got %d arguments, want exactly 1", len(args))
+ }
+ switch x := args[0].(type) {
+ case String:
+ return x, nil
+ case Bytes:
+ // Invalid encodings are replaced by that of U+FFFD.
+ return String(utf8Transcode(string(x))), nil
+ default:
+ return String(x.String()), nil
+ }
+}
+
+// utf8Transcode returns the UTF-8-to-UTF-8 transcoding of s.
+// The effect is that each code unit that is part of an
+// invalid sequence is replaced by U+FFFD.
+func utf8Transcode(s string) string {
+ if utf8.ValidString(s) {
+ return s
+ }
+ var out strings.Builder
+ for _, r := range s {
+ out.WriteRune(r)
+ }
+ return out.String()
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#tuple
+func tuple(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs("tuple", args, kwargs, 0, &iterable); err != nil {
+ return nil, err
+ }
+ if len(args) == 0 {
+ return Tuple(nil), nil
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ var elems Tuple
+ if n := Len(iterable); n > 0 {
+ elems = make(Tuple, 0, n) // preallocate if length is known
+ }
+ var x Value
+ for iter.Next(&x) {
+ elems = append(elems, x)
+ }
+ return elems, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#type
+func type_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("type does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("type: got %d arguments, want exactly 1", len(args))
+ }
+ return String(args[0].Type()), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#zip
+func zip(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("zip does not accept keyword arguments")
+ }
+ rows, cols := 0, len(args)
+ iters := make([]Iterator, cols)
+ defer func() {
+ for _, iter := range iters {
+ if iter != nil {
+ iter.Done()
+ }
+ }
+ }()
+ for i, seq := range args {
+ it := Iterate(seq)
+ if it == nil {
+ return nil, fmt.Errorf("zip: argument #%d is not iterable: %s", i+1, seq.Type())
+ }
+ iters[i] = it
+ n := Len(seq)
+ if i == 0 || n < rows {
+ rows = n // possibly -1
+ }
+ }
+ var result []Value
+ if rows >= 0 {
+ // length known
+ result = make([]Value, rows)
+ array := make(Tuple, cols*rows) // allocate a single backing array
+ for i := 0; i < rows; i++ {
+ tuple := array[:cols:cols]
+ array = array[cols:]
+ for j, iter := range iters {
+ iter.Next(&tuple[j])
+ }
+ result[i] = tuple
+ }
+ } else {
+ // length not known
+ outer:
+ for {
+ tuple := make(Tuple, cols)
+ for i, iter := range iters {
+ if !iter.Next(&tuple[i]) {
+ break outer
+ }
+ }
+ result = append(result, tuple)
+ }
+ }
+ return NewList(result), nil
+}
+
+// ---- methods of built-in types ---
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·get
+func dict_get(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var key, dflt Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &key, &dflt); err != nil {
+ return nil, err
+ }
+ if v, ok, err := b.Receiver().(*Dict).Get(key); err != nil {
+ return nil, nameErr(b, err)
+ } else if ok {
+ return v, nil
+ } else if dflt != nil {
+ return dflt, nil
+ }
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·clear
+func dict_clear(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ return None, b.Receiver().(*Dict).Clear()
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·items
+func dict_items(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ items := b.Receiver().(*Dict).Items()
+ res := make([]Value, len(items))
+ for i, item := range items {
+ res[i] = item // convert [2]Value to Value
+ }
+ return NewList(res), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·keys
+func dict_keys(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ return NewList(b.Receiver().(*Dict).Keys()), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·pop
+func dict_pop(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var k, d Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &k, &d); err != nil {
+ return nil, err
+ }
+ if v, found, err := b.Receiver().(*Dict).Delete(k); err != nil {
+ return nil, nameErr(b, err) // dict is frozen or key is unhashable
+ } else if found {
+ return v, nil
+ } else if d != nil {
+ return d, nil
+ }
+ return nil, nameErr(b, "missing key")
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·popitem
+func dict_popitem(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := b.Receiver().(*Dict)
+ k, ok := recv.ht.first()
+ if !ok {
+ return nil, nameErr(b, "empty dict")
+ }
+ v, _, err := recv.Delete(k)
+ if err != nil {
+ return nil, nameErr(b, err) // dict is frozen
+ }
+ return Tuple{k, v}, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·setdefault
+func dict_setdefault(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var key, dflt Value = nil, None
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &key, &dflt); err != nil {
+ return nil, err
+ }
+ dict := b.Receiver().(*Dict)
+ if v, ok, err := dict.Get(key); err != nil {
+ return nil, nameErr(b, err)
+ } else if ok {
+ return v, nil
+ } else if err := dict.SetKey(key, dflt); err != nil {
+ return nil, nameErr(b, err)
+ } else {
+ return dflt, nil
+ }
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·update
+func dict_update(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if len(args) > 1 {
+ return nil, fmt.Errorf("update: got %d arguments, want at most 1", len(args))
+ }
+ if err := updateDict(b.Receiver().(*Dict), args, kwargs); err != nil {
+ return nil, fmt.Errorf("update: %v", err)
+ }
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#dict·update
+func dict_values(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ items := b.Receiver().(*Dict).Items()
+ res := make([]Value, len(items))
+ for i, item := range items {
+ res[i] = item[1]
+ }
+ return NewList(res), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·append
+func list_append(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var object Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &object); err != nil {
+ return nil, err
+ }
+ recv := b.Receiver().(*List)
+ if err := recv.checkMutable("append to"); err != nil {
+ return nil, nameErr(b, err)
+ }
+ recv.elems = append(recv.elems, object)
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·clear
+func list_clear(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ if err := b.Receiver().(*List).Clear(); err != nil {
+ return nil, nameErr(b, err)
+ }
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·extend
+func list_extend(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := b.Receiver().(*List)
+ var iterable Iterable
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &iterable); err != nil {
+ return nil, err
+ }
+ if err := recv.checkMutable("extend"); err != nil {
+ return nil, nameErr(b, err)
+ }
+ listExtend(recv, iterable)
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·index
+func list_index(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var value, start_, end_ Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &value, &start_, &end_); err != nil {
+ return nil, err
+ }
+
+ recv := b.Receiver().(*List)
+ start, end, err := indices(start_, end_, recv.Len())
+ if err != nil {
+ return nil, nameErr(b, err)
+ }
+
+ for i := start; i < end; i++ {
+ if eq, err := Equal(recv.elems[i], value); err != nil {
+ return nil, nameErr(b, err)
+ } else if eq {
+ return MakeInt(i), nil
+ }
+ }
+ return nil, nameErr(b, "value not in list")
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·insert
+func list_insert(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := b.Receiver().(*List)
+ var index int
+ var object Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 2, &index, &object); err != nil {
+ return nil, err
+ }
+ if err := recv.checkMutable("insert into"); err != nil {
+ return nil, nameErr(b, err)
+ }
+
+ if index < 0 {
+ index += recv.Len()
+ }
+
+ if index >= recv.Len() {
+ // end
+ recv.elems = append(recv.elems, object)
+ } else {
+ if index < 0 {
+ index = 0 // start
+ }
+ recv.elems = append(recv.elems, nil)
+ copy(recv.elems[index+1:], recv.elems[index:]) // slide up one
+ recv.elems[index] = object
+ }
+ return None, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·remove
+func list_remove(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := b.Receiver().(*List)
+ var value Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &value); err != nil {
+ return nil, err
+ }
+ if err := recv.checkMutable("remove from"); err != nil {
+ return nil, nameErr(b, err)
+ }
+ for i, elem := range recv.elems {
+ if eq, err := Equal(elem, value); err != nil {
+ return nil, fmt.Errorf("remove: %v", err)
+ } else if eq {
+ recv.elems = append(recv.elems[:i], recv.elems[i+1:]...)
+ return None, nil
+ }
+ }
+ return nil, fmt.Errorf("remove: element not found")
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#list·pop
+func list_pop(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := b.Receiver()
+ list := recv.(*List)
+ n := list.Len()
+ i := n - 1
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &i); err != nil {
+ return nil, err
+ }
+ origI := i
+ if i < 0 {
+ i += n
+ }
+ if i < 0 || i >= n {
+ return nil, nameErr(b, outOfRange(origI, n, list))
+ }
+ if err := list.checkMutable("pop from"); err != nil {
+ return nil, nameErr(b, err)
+ }
+ res := list.elems[i]
+ list.elems = append(list.elems[:i], list.elems[i+1:]...)
+ return res, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·capitalize
+func string_capitalize(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ s := string(b.Receiver().(String))
+ res := new(strings.Builder)
+ res.Grow(len(s))
+ for i, r := range s {
+ if i == 0 {
+ r = unicode.ToTitle(r)
+ } else {
+ r = unicode.ToLower(r)
+ }
+ res.WriteRune(r)
+ }
+ return String(res.String()), nil
+}
+
+// string_iterable returns an unspecified iterable value whose iterator yields:
+// - elems: successive 1-byte substrings
+// - codepoints: successive substrings that encode a single Unicode code point.
+// - elem_ords: numeric values of successive bytes
+// - codepoint_ords: numeric values of successive Unicode code points
+func string_iterable(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ s := b.Receiver().(String)
+ ords := b.Name()[len(b.Name())-2] == 'd'
+ codepoints := b.Name()[0] == 'c'
+ if codepoints {
+ return stringCodepoints{s, ords}, nil
+ } else {
+ return stringElems{s, ords}, nil
+ }
+}
+
+// bytes_elems returns an unspecified iterable value whose
+// iterator yields the int values of successive elements.
+func bytes_elems(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ return bytesIterable{b.Receiver().(Bytes)}, nil
+}
+
+// A bytesIterable is an iterable returned by bytes.elems(),
+// whose iterator yields a sequence of numeric bytes values.
+type bytesIterable struct{ bytes Bytes }
+
+var _ Iterable = (*bytesIterable)(nil)
+
+func (bi bytesIterable) String() string { return bi.bytes.String() + ".elems()" }
+func (bi bytesIterable) Type() string { return "bytes.elems" }
+func (bi bytesIterable) Freeze() {} // immutable
+func (bi bytesIterable) Truth() Bool { return True }
+func (bi bytesIterable) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", bi.Type()) }
+func (bi bytesIterable) Iterate() Iterator { return &bytesIterator{bi.bytes} }
+
+type bytesIterator struct{ bytes Bytes }
+
+func (it *bytesIterator) Next(p *Value) bool {
+ if it.bytes == "" {
+ return false
+ }
+ *p = MakeInt(int(it.bytes[0]))
+ it.bytes = it.bytes[1:]
+ return true
+}
+
+func (*bytesIterator) Done() {}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·count
+func string_count(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var sub string
+ var start_, end_ Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sub, &start_, &end_); err != nil {
+ return nil, err
+ }
+
+ recv := string(b.Receiver().(String))
+ start, end, err := indices(start_, end_, len(recv))
+ if err != nil {
+ return nil, nameErr(b, err)
+ }
+
+ var slice string
+ if start < end {
+ slice = recv[start:end]
+ }
+ return MakeInt(strings.Count(slice, sub)), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isalnum
+func string_isalnum(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ for _, r := range recv {
+ if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+ return False, nil
+ }
+ }
+ return Bool(recv != ""), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isalpha
+func string_isalpha(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ for _, r := range recv {
+ if !unicode.IsLetter(r) {
+ return False, nil
+ }
+ }
+ return Bool(recv != ""), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isdigit
+func string_isdigit(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ for _, r := range recv {
+ if !unicode.IsDigit(r) {
+ return False, nil
+ }
+ }
+ return Bool(recv != ""), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·islower
+func string_islower(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ return Bool(isCasedString(recv) && recv == strings.ToLower(recv)), nil
+}
+
+// isCasedString reports whether its argument contains any cased code points.
+func isCasedString(s string) bool {
+ for _, r := range s {
+ if isCasedRune(r) {
+ return true
+ }
+ }
+ return false
+}
+
+func isCasedRune(r rune) bool {
+ // It's unclear what the correct behavior is for a rune such as 'ffi',
+ // a lowercase letter with no upper or title case and no SimpleFold.
+ return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || unicode.SimpleFold(r) != r
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isspace
+func string_isspace(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ for _, r := range recv {
+ if !unicode.IsSpace(r) {
+ return False, nil
+ }
+ }
+ return Bool(recv != ""), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·istitle
+func string_istitle(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+
+ // Python semantics differ from x==strings.{To,}Title(x) in Go:
+ // "uppercase characters may only follow uncased characters and
+ // lowercase characters only cased ones."
+ var cased, prevCased bool
+ for _, r := range recv {
+ if 'A' <= r && r <= 'Z' || unicode.IsTitle(r) { // e.g. "Dž"
+ if prevCased {
+ return False, nil
+ }
+ prevCased = true
+ cased = true
+ } else if unicode.IsLower(r) {
+ if !prevCased {
+ return False, nil
+ }
+ prevCased = true
+ cased = true
+ } else if unicode.IsUpper(r) {
+ return False, nil
+ } else {
+ prevCased = false
+ }
+ }
+ return Bool(cased), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·isupper
+func string_isupper(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ return Bool(isCasedString(recv) && recv == strings.ToUpper(recv)), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·find
+func string_find(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ return string_find_impl(b, args, kwargs, true, false)
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·format
+func string_format(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ format := string(b.Receiver().(String))
+ var auto, manual bool // kinds of positional indexing used
+ buf := new(strings.Builder)
+ index := 0
+ for {
+ literal := format
+ i := strings.IndexByte(format, '{')
+ if i >= 0 {
+ literal = format[:i]
+ }
+
+ // Replace "}}" with "}" in non-field portion, rejecting a lone '}'.
+ for {
+ j := strings.IndexByte(literal, '}')
+ if j < 0 {
+ buf.WriteString(literal)
+ break
+ }
+ if len(literal) == j+1 || literal[j+1] != '}' {
+ return nil, fmt.Errorf("format: single '}' in format")
+ }
+ buf.WriteString(literal[:j+1])
+ literal = literal[j+2:]
+ }
+
+ if i < 0 {
+ break // end of format string
+ }
+
+ if i+1 < len(format) && format[i+1] == '{' {
+ // "{{" means a literal '{'
+ buf.WriteByte('{')
+ format = format[i+2:]
+ continue
+ }
+
+ format = format[i+1:]
+ i = strings.IndexByte(format, '}')
+ if i < 0 {
+ return nil, fmt.Errorf("format: unmatched '{' in format")
+ }
+
+ var arg Value
+ conv := "s"
+ var spec string
+
+ field := format[:i]
+ format = format[i+1:]
+
+ var name string
+ if i := strings.IndexByte(field, '!'); i < 0 {
+ // "name" or "name:spec"
+ if i := strings.IndexByte(field, ':'); i < 0 {
+ name = field
+ } else {
+ name = field[:i]
+ spec = field[i+1:]
+ }
+ } else {
+ // "name!conv" or "name!conv:spec"
+ name = field[:i]
+ field = field[i+1:]
+ // "conv" or "conv:spec"
+ if i := strings.IndexByte(field, ':'); i < 0 {
+ conv = field
+ } else {
+ conv = field[:i]
+ spec = field[i+1:]
+ }
+ }
+
+ if name == "" {
+ // "{}": automatic indexing
+ if manual {
+ return nil, fmt.Errorf("format: cannot switch from manual field specification to automatic field numbering")
+ }
+ auto = true
+ if index >= len(args) {
+ return nil, fmt.Errorf("format: tuple index out of range")
+ }
+ arg = args[index]
+ index++
+ } else if num, ok := decimal(name); ok {
+ // positional argument
+ if auto {
+ return nil, fmt.Errorf("format: cannot switch from automatic field numbering to manual field specification")
+ }
+ manual = true
+ if num >= len(args) {
+ return nil, fmt.Errorf("format: tuple index out of range")
+ } else {
+ arg = args[num]
+ }
+ } else {
+ // keyword argument
+ for _, kv := range kwargs {
+ if string(kv[0].(String)) == name {
+ arg = kv[1]
+ break
+ }
+ }
+ if arg == nil {
+ // Starlark does not support Python's x.y or a[i] syntaxes,
+ // or nested use of {...}.
+ if strings.Contains(name, ".") {
+ return nil, fmt.Errorf("format: attribute syntax x.y is not supported in replacement fields: %s", name)
+ }
+ if strings.Contains(name, "[") {
+ return nil, fmt.Errorf("format: element syntax a[i] is not supported in replacement fields: %s", name)
+ }
+ if strings.Contains(name, "{") {
+ return nil, fmt.Errorf("format: nested replacement fields not supported")
+ }
+ return nil, fmt.Errorf("format: keyword %s not found", name)
+ }
+ }
+
+ if spec != "" {
+ // Starlark does not support Python's format_spec features.
+ return nil, fmt.Errorf("format spec features not supported in replacement fields: %s", spec)
+ }
+
+ switch conv {
+ case "s":
+ if str, ok := AsString(arg); ok {
+ buf.WriteString(str)
+ } else {
+ writeValue(buf, arg, nil)
+ }
+ case "r":
+ writeValue(buf, arg, nil)
+ default:
+ return nil, fmt.Errorf("format: unknown conversion %q", conv)
+ }
+ }
+ return String(buf.String()), nil
+}
+
+// decimal interprets s as a sequence of decimal digits.
+func decimal(s string) (x int, ok bool) {
+ n := len(s)
+ for i := 0; i < n; i++ {
+ digit := s[i] - '0'
+ if digit > 9 {
+ return 0, false
+ }
+ x = x*10 + int(digit)
+ if x < 0 {
+ return 0, false // underflow
+ }
+ }
+ return x, true
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·index
+func string_index(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ return string_find_impl(b, args, kwargs, false, false)
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·join
+func string_join(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := string(b.Receiver().(String))
+ var iterable Iterable
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &iterable); err != nil {
+ return nil, err
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ buf := new(strings.Builder)
+ var x Value
+ for i := 0; iter.Next(&x); i++ {
+ if i > 0 {
+ buf.WriteString(recv)
+ }
+ s, ok := AsString(x)
+ if !ok {
+ return nil, fmt.Errorf("join: in list, want string, got %s", x.Type())
+ }
+ buf.WriteString(s)
+ }
+ return String(buf.String()), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·lower
+func string_lower(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ return String(strings.ToLower(string(b.Receiver().(String)))), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·partition
+func string_partition(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := string(b.Receiver().(String))
+ var sep string
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sep); err != nil {
+ return nil, err
+ }
+ if sep == "" {
+ return nil, nameErr(b, "empty separator")
+ }
+ var i int
+ if b.Name()[0] == 'p' {
+ i = strings.Index(recv, sep) // partition
+ } else {
+ i = strings.LastIndex(recv, sep) // rpartition
+ }
+ tuple := make(Tuple, 0, 3)
+ if i < 0 {
+ if b.Name()[0] == 'p' {
+ tuple = append(tuple, String(recv), String(""), String(""))
+ } else {
+ tuple = append(tuple, String(""), String(""), String(recv))
+ }
+ } else {
+ tuple = append(tuple, String(recv[:i]), String(sep), String(recv[i+len(sep):]))
+ }
+ return tuple, nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·replace
+func string_replace(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := string(b.Receiver().(String))
+ var old, new string
+ count := -1
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 2, &old, &new, &count); err != nil {
+ return nil, err
+ }
+ return String(strings.Replace(recv, old, new, count)), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rfind
+func string_rfind(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ return string_find_impl(b, args, kwargs, true, true)
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rindex
+func string_rindex(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ return string_find_impl(b, args, kwargs, false, true)
+}
+
+// https://github.com/google/starlark-go/starlark/blob/master/doc/spec.md#string·startswith
+// https://github.com/google/starlark-go/starlark/blob/master/doc/spec.md#string·endswith
+func string_startswith(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var x Value
+ var start, end Value = None, None
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x, &start, &end); err != nil {
+ return nil, err
+ }
+
+ // compute effective substring.
+ s := string(b.Receiver().(String))
+ if start, end, err := indices(start, end, len(s)); err != nil {
+ return nil, nameErr(b, err)
+ } else {
+ if end < start {
+ end = start // => empty result
+ }
+ s = s[start:end]
+ }
+
+ f := strings.HasPrefix
+ if b.Name()[0] == 'e' { // endswith
+ f = strings.HasSuffix
+ }
+
+ switch x := x.(type) {
+ case Tuple:
+ for i, x := range x {
+ prefix, ok := AsString(x)
+ if !ok {
+ return nil, fmt.Errorf("%s: want string, got %s, for element %d",
+ b.Name(), x.Type(), i)
+ }
+ if f(s, prefix) {
+ return True, nil
+ }
+ }
+ return False, nil
+ case String:
+ return Bool(f(s, string(x))), nil
+ }
+ return nil, fmt.Errorf("%s: got %s, want string or tuple of string", b.Name(), x.Type())
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·strip
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·lstrip
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rstrip
+func string_strip(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var chars string
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &chars); err != nil {
+ return nil, err
+ }
+ recv := string(b.Receiver().(String))
+ var s string
+ switch b.Name()[0] {
+ case 's': // strip
+ if chars != "" {
+ s = strings.Trim(recv, chars)
+ } else {
+ s = strings.TrimSpace(recv)
+ }
+ case 'l': // lstrip
+ if chars != "" {
+ s = strings.TrimLeft(recv, chars)
+ } else {
+ s = strings.TrimLeftFunc(recv, unicode.IsSpace)
+ }
+ case 'r': // rstrip
+ if chars != "" {
+ s = strings.TrimRight(recv, chars)
+ } else {
+ s = strings.TrimRightFunc(recv, unicode.IsSpace)
+ }
+ }
+ return String(s), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·title
+func string_title(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+
+ s := string(b.Receiver().(String))
+
+ // Python semantics differ from x==strings.{To,}Title(x) in Go:
+ // "uppercase characters may only follow uncased characters and
+ // lowercase characters only cased ones."
+ buf := new(strings.Builder)
+ buf.Grow(len(s))
+ var prevCased bool
+ for _, r := range s {
+ if prevCased {
+ r = unicode.ToLower(r)
+ } else {
+ r = unicode.ToTitle(r)
+ }
+ prevCased = isCasedRune(r)
+ buf.WriteRune(r)
+ }
+ return String(buf.String()), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·upper
+func string_upper(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+ return nil, err
+ }
+ return String(strings.ToUpper(string(b.Receiver().(String)))), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·split
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·rsplit
+func string_split(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ recv := string(b.Receiver().(String))
+ var sep_ Value
+ maxsplit := -1
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &sep_, &maxsplit); err != nil {
+ return nil, err
+ }
+
+ var res []string
+
+ if sep_ == nil || sep_ == None {
+ // special case: split on whitespace
+ if maxsplit < 0 {
+ res = strings.Fields(recv)
+ } else if b.Name() == "split" {
+ res = splitspace(recv, maxsplit)
+ } else { // rsplit
+ res = rsplitspace(recv, maxsplit)
+ }
+
+ } else if sep, ok := AsString(sep_); ok {
+ if sep == "" {
+ return nil, fmt.Errorf("split: empty separator")
+ }
+ // usual case: split on non-empty separator
+ if maxsplit < 0 {
+ res = strings.Split(recv, sep)
+ } else if b.Name() == "split" {
+ res = strings.SplitN(recv, sep, maxsplit+1)
+ } else { // rsplit
+ res = strings.Split(recv, sep)
+ if excess := len(res) - maxsplit; excess > 0 {
+ res[0] = strings.Join(res[:excess], sep)
+ res = append(res[:1], res[excess:]...)
+ }
+ }
+
+ } else {
+ return nil, fmt.Errorf("split: got %s for separator, want string", sep_.Type())
+ }
+
+ list := make([]Value, len(res))
+ for i, x := range res {
+ list[i] = String(x)
+ }
+ return NewList(list), nil
+}
+
+// Precondition: max >= 0.
+func rsplitspace(s string, max int) []string {
+ res := make([]string, 0, max+1)
+ end := -1 // index of field end, or -1 in a region of spaces.
+ for i := len(s); i > 0; {
+ r, sz := utf8.DecodeLastRuneInString(s[:i])
+ if unicode.IsSpace(r) {
+ if end >= 0 {
+ if len(res) == max {
+ break // let this field run to the start
+ }
+ res = append(res, s[i:end])
+ end = -1
+ }
+ } else if end < 0 {
+ end = i
+ }
+ i -= sz
+ }
+ if end >= 0 {
+ res = append(res, s[:end])
+ }
+
+ resLen := len(res)
+ for i := 0; i < resLen/2; i++ {
+ res[i], res[resLen-1-i] = res[resLen-1-i], res[i]
+ }
+
+ return res
+}
+
+// Precondition: max >= 0.
+func splitspace(s string, max int) []string {
+ var res []string
+ start := -1 // index of field start, or -1 in a region of spaces
+ for i, r := range s {
+ if unicode.IsSpace(r) {
+ if start >= 0 {
+ if len(res) == max {
+ break // let this field run to the end
+ }
+ res = append(res, s[start:i])
+ start = -1
+ }
+ } else if start == -1 {
+ start = i
+ }
+ }
+ if start >= 0 {
+ res = append(res, s[start:])
+ }
+ return res
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#string·splitlines
+func string_splitlines(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var keepends bool
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &keepends); err != nil {
+ return nil, err
+ }
+ var lines []string
+ if s := string(b.Receiver().(String)); s != "" {
+ // TODO(adonovan): handle CRLF correctly.
+ if keepends {
+ lines = strings.SplitAfter(s, "\n")
+ } else {
+ lines = strings.Split(s, "\n")
+ }
+ if strings.HasSuffix(s, "\n") {
+ lines = lines[:len(lines)-1]
+ }
+ }
+ list := make([]Value, len(lines))
+ for i, x := range lines {
+ list[i] = String(x)
+ }
+ return NewList(list), nil
+}
+
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#set·union.
+func set_union(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+ var iterable Iterable
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0, &iterable); err != nil {
+ return nil, err
+ }
+ iter := iterable.Iterate()
+ defer iter.Done()
+ union, err := b.Receiver().(*Set).Union(iter)
+ if err != nil {
+ return nil, nameErr(b, err)
+ }
+ return union, nil
+}
+
+// Common implementation of string_{r}{find,index}.
+func string_find_impl(b *Builtin, args Tuple, kwargs []Tuple, allowError, last bool) (Value, error) {
+ var sub string
+ var start_, end_ Value
+ if err := UnpackPositionalArgs(b.Name(), args, kwargs, 1, &sub, &start_, &end_); err != nil {
+ return nil, err
+ }
+
+ s := string(b.Receiver().(String))
+ start, end, err := indices(start_, end_, len(s))
+ if err != nil {
+ return nil, nameErr(b, err)
+ }
+ var slice string
+ if start < end {
+ slice = s[start:end]
+ }
+
+ var i int
+ if last {
+ i = strings.LastIndex(slice, sub)
+ } else {
+ i = strings.Index(slice, sub)
+ }
+ if i < 0 {
+ if !allowError {
+ return nil, nameErr(b, "substring not found")
+ }
+ return MakeInt(-1), nil
+ }
+ return MakeInt(i + start), nil
+}
+
+// Common implementation of builtin dict function and dict.update method.
+// Precondition: len(updates) == 0 or 1.
+func updateDict(dict *Dict, updates Tuple, kwargs []Tuple) error {
+ if len(updates) == 1 {
+ switch updates := updates[0].(type) {
+ case IterableMapping:
+ // Iterate over dict's key/value pairs, not just keys.
+ for _, item := range updates.Items() {
+ if err := dict.SetKey(item[0], item[1]); err != nil {
+ return err // dict is frozen
+ }
+ }
+ default:
+ // all other sequences
+ iter := Iterate(updates)
+ if iter == nil {
+ return fmt.Errorf("got %s, want iterable", updates.Type())
+ }
+ defer iter.Done()
+ var pair Value
+ for i := 0; iter.Next(&pair); i++ {
+ iter2 := Iterate(pair)
+ if iter2 == nil {
+ return fmt.Errorf("dictionary update sequence element #%d is not iterable (%s)", i, pair.Type())
+
+ }
+ defer iter2.Done()
+ len := Len(pair)
+ if len < 0 {
+ return fmt.Errorf("dictionary update sequence element #%d has unknown length (%s)", i, pair.Type())
+ } else if len != 2 {
+ return fmt.Errorf("dictionary update sequence element #%d has length %d, want 2", i, len)
+ }
+ var k, v Value
+ iter2.Next(&k)
+ iter2.Next(&v)
+ if err := dict.SetKey(k, v); err != nil {
+ return err
+ }
+ }
+ }
+ }
+
+ // Then add the kwargs.
+ before := dict.Len()
+ for _, pair := range kwargs {
+ if err := dict.SetKey(pair[0], pair[1]); err != nil {
+ return err // dict is frozen
+ }
+ }
+ // In the common case, each kwarg will add another dict entry.
+ // If that's not so, check whether it is because there was a duplicate kwarg.
+ if dict.Len() < before+len(kwargs) {
+ keys := make(map[String]bool, len(kwargs))
+ for _, kv := range kwargs {
+ k := kv[0].(String)
+ if keys[k] {
+ return fmt.Errorf("duplicate keyword arg: %v", k)
+ }
+ keys[k] = true
+ }
+ }
+
+ return nil
+}
+
+// nameErr returns an error message of the form "name: msg"
+// where name is b.Name() and msg is a string or error.
+func nameErr(b *Builtin, msg interface{}) error {
+ return fmt.Errorf("%s: %v", b.Name(), msg)
+}
diff --git a/starlark/profile.go b/starlark/profile.go
new file mode 100644
index 0000000..38da2b2
--- /dev/null
+++ b/starlark/profile.go
@@ -0,0 +1,449 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark
+
+// This file defines a simple execution-time profiler for Starlark.
+// It measures the wall time spent executing Starlark code, and emits a
+// gzipped protocol message in pprof format (github.com/google/pprof).
+//
+// When profiling is enabled, the interpreter calls the profiler to
+// indicate the start and end of each "span" or time interval. A leaf
+// function (whether Go or Starlark) has a single span. A function that
+// calls another function has spans for each interval in which it is the
+// top of the stack. (A LOAD instruction also ends a span.)
+//
+// At the start of a span, the interpreter records the current time in
+// the thread's topmost frame. At the end of the span, it obtains the
+// time again and subtracts the span start time. The difference is added
+// to an accumulator variable in the thread. If the accumulator exceeds
+// some fixed quantum (10ms, say), the profiler records the current call
+// stack and sends it to the profiler goroutine, along with the number
+// of quanta, which are subtracted. For example, if the accumulator
+// holds 3ms and then a completed span adds 25ms to it, its value is 28ms,
+// which exceeeds 10ms. The profiler records a stack with the value 20ms
+// (2 quanta), and the accumulator is left with 8ms.
+//
+// The profiler goroutine converts the stacks into the pprof format and
+// emits a gzip-compressed protocol message to the designated output
+// file. We use a hand-written streaming proto encoder to avoid
+// dependencies on pprof and proto, and to avoid the need to
+// materialize the profile data structure in memory.
+//
+// A limitation of this profiler is that it measures wall time, which
+// does not necessarily correspond to CPU time. A CPU profiler requires
+// that only running (not runnable) threads are sampled; this is
+// commonly achieved by having the kernel deliver a (PROF) signal to an
+// arbitrary running thread, through setitimer(2). The CPU profiler in the
+// Go runtime uses this mechanism, but it is not possible for a Go
+// application to register a SIGPROF handler, nor is it possible for a
+// Go handler for some other signal to read the stack pointer of
+// the interrupted thread.
+//
+// Two caveats:
+// (1) it is tempting to send the leaf Frame directly to the profiler
+// goroutine instead of making a copy of the stack, since a Frame is a
+// spaghetti stack--a linked list. However, as soon as execution
+// resumes, the stack's Frame.pc values may be mutated, so Frames are
+// not safe to share with the asynchronous profiler goroutine.
+// (2) it is tempting to use Callables as keys in a map when tabulating
+// the pprof protocols's Function entities. However, we cannot assume
+// that Callables are valid map keys, and furthermore we must not
+// pin function values in memory indefinitely as this may cause lambda
+// values to keep their free variables live much longer than necessary.
+
+// TODO(adonovan):
+// - make Start/Stop fully thread-safe.
+// - fix the pc hack.
+// - experiment with other values of quantum.
+
+import (
+ "bufio"
+ "bytes"
+ "compress/gzip"
+ "encoding/binary"
+ "fmt"
+ "io"
+ "log"
+ "reflect"
+ "sync/atomic"
+ "time"
+ "unsafe"
+
+ "go.starlark.net/syntax"
+)
+
+// StartProfile enables time profiling of all Starlark threads,
+// and writes a profile in pprof format to w.
+// It must be followed by a call to StopProfiler to stop
+// the profiler and finalize the profile.
+//
+// StartProfile returns an error if profiling was already enabled.
+//
+// StartProfile must not be called concurrently with Starlark execution.
+func StartProfile(w io.Writer) error {
+ if !atomic.CompareAndSwapUint32(&profiler.on, 0, 1) {
+ return fmt.Errorf("profiler already running")
+ }
+
+ // TODO(adonovan): make the API fully concurrency-safe.
+ // The main challenge is racy reads/writes of profiler.events,
+ // and of send/close races on the channel it refers to.
+ // It's easy to solve them with a mutex but harder to do
+ // it efficiently.
+
+ profiler.events = make(chan *profEvent, 1)
+ profiler.done = make(chan error)
+
+ go profile(w)
+
+ return nil
+}
+
+// StopProfiler stops the profiler started by a prior call to
+// StartProfile and finalizes the profile. It returns an error if the
+// profile could not be completed.
+//
+// StopProfiler must not be called concurrently with Starlark execution.
+func StopProfile() error {
+ // Terminate the profiler goroutine and get its result.
+ close(profiler.events)
+ err := <-profiler.done
+
+ profiler.done = nil
+ profiler.events = nil
+ atomic.StoreUint32(&profiler.on, 0)
+
+ return err
+}
+
+// globals
+var profiler struct {
+ on uint32 // nonzero => profiler running
+ events chan *profEvent // profile events from interpreter threads
+ done chan error // indicates profiler goroutine is ready
+}
+
+func (thread *Thread) beginProfSpan() {
+ if profiler.events == nil {
+ return // profiling not enabled
+ }
+
+ thread.frameAt(0).spanStart = nanotime()
+}
+
+// TODO(adonovan): experiment with smaller values,
+// which trade space and time for greater precision.
+const quantum = 10 * time.Millisecond
+
+func (thread *Thread) endProfSpan() {
+ if profiler.events == nil {
+ return // profiling not enabled
+ }
+
+ // Add the span to the thread's accumulator.
+ thread.proftime += time.Duration(nanotime() - thread.frameAt(0).spanStart)
+ if thread.proftime < quantum {
+ return
+ }
+
+ // Only record complete quanta.
+ n := thread.proftime / quantum
+ thread.proftime -= n * quantum
+
+ // Copy the stack.
+ // (We can't save thread.frame because its pc will change.)
+ ev := &profEvent{
+ thread: thread,
+ time: n * quantum,
+ }
+ ev.stack = ev.stackSpace[:0]
+ for i := range thread.stack {
+ fr := thread.frameAt(i)
+ ev.stack = append(ev.stack, profFrame{
+ pos: fr.Position(),
+ fn: fr.Callable(),
+ pc: fr.pc,
+ })
+ }
+
+ profiler.events <- ev
+}
+
+type profEvent struct {
+ thread *Thread // currently unused
+ time time.Duration
+ stack []profFrame
+ stackSpace [8]profFrame // initial space for stack
+}
+
+type profFrame struct {
+ fn Callable // don't hold this live for too long (prevents GC of lambdas)
+ pc uint32 // program counter (Starlark frames only)
+ pos syntax.Position // position of pc within this frame
+}
+
+// profile is the profiler goroutine.
+// It runs until StopProfiler is called.
+func profile(w io.Writer) {
+ // Field numbers from pprof protocol.
+ // See https://github.com/google/pprof/blob/master/proto/profile.proto
+ const (
+ Profile_sample_type = 1 // repeated ValueType
+ Profile_sample = 2 // repeated Sample
+ Profile_mapping = 3 // repeated Mapping
+ Profile_location = 4 // repeated Location
+ Profile_function = 5 // repeated Function
+ Profile_string_table = 6 // repeated string
+ Profile_time_nanos = 9 // int64
+ Profile_duration_nanos = 10 // int64
+ Profile_period_type = 11 // ValueType
+ Profile_period = 12 // int64
+
+ ValueType_type = 1 // int64
+ ValueType_unit = 2 // int64
+
+ Sample_location_id = 1 // repeated uint64
+ Sample_value = 2 // repeated int64
+ Sample_label = 3 // repeated Label
+
+ Label_key = 1 // int64
+ Label_str = 2 // int64
+ Label_num = 3 // int64
+ Label_num_unit = 4 // int64
+
+ Location_id = 1 // uint64
+ Location_mapping_id = 2 // uint64
+ Location_address = 3 // uint64
+ Location_line = 4 // repeated Line
+
+ Line_function_id = 1 // uint64
+ Line_line = 2 // int64
+
+ Function_id = 1 // uint64
+ Function_name = 2 // int64
+ Function_system_name = 3 // int64
+ Function_filename = 4 // int64
+ Function_start_line = 5 // int64
+ )
+
+ bufw := bufio.NewWriter(w) // write file in 4KB (not 240B flate-sized) chunks
+ gz := gzip.NewWriter(bufw)
+ enc := protoEncoder{w: gz}
+
+ // strings
+ stringIndex := make(map[string]int64)
+ str := func(s string) int64 {
+ i, ok := stringIndex[s]
+ if !ok {
+ i = int64(len(stringIndex))
+ enc.string(Profile_string_table, s)
+ stringIndex[s] = i
+ }
+ return i
+ }
+ str("") // entry 0
+
+ // functions
+ //
+ // function returns the ID of a Callable for use in Line.FunctionId.
+ // The ID is the same as the function's logical address,
+ // which is supplied by the caller to avoid the need to recompute it.
+ functionId := make(map[uintptr]uint64)
+ function := func(fn Callable, addr uintptr) uint64 {
+ id, ok := functionId[addr]
+ if !ok {
+ id = uint64(addr)
+
+ var pos syntax.Position
+ if fn, ok := fn.(callableWithPosition); ok {
+ pos = fn.Position()
+ }
+
+ name := fn.Name()
+ if name == "<toplevel>" {
+ name = pos.Filename()
+ }
+
+ nameIndex := str(name)
+
+ fun := new(bytes.Buffer)
+ funenc := protoEncoder{w: fun}
+ funenc.uint(Function_id, id)
+ funenc.int(Function_name, nameIndex)
+ funenc.int(Function_system_name, nameIndex)
+ funenc.int(Function_filename, str(pos.Filename()))
+ funenc.int(Function_start_line, int64(pos.Line))
+ enc.bytes(Profile_function, fun.Bytes())
+
+ functionId[addr] = id
+ }
+ return id
+ }
+
+ // locations
+ //
+ // location returns the ID of the location denoted by fr.
+ // For Starlark frames, this is the Frame pc.
+ locationId := make(map[uintptr]uint64)
+ location := func(fr profFrame) uint64 {
+ fnAddr := profFuncAddr(fr.fn)
+
+ // For Starlark functions, the frame position
+ // represents the current PC value.
+ // Mix it into the low bits of the address.
+ // This is super hacky and may result in collisions
+ // in large functions or if functions are numerous.
+ // TODO(adonovan): fix: try making this cleaner by treating
+ // each bytecode segment as a Profile.Mapping.
+ pcAddr := fnAddr
+ if _, ok := fr.fn.(*Function); ok {
+ pcAddr = (pcAddr << 16) ^ uintptr(fr.pc)
+ }
+
+ id, ok := locationId[pcAddr]
+ if !ok {
+ id = uint64(pcAddr)
+
+ line := new(bytes.Buffer)
+ lineenc := protoEncoder{w: line}
+ lineenc.uint(Line_function_id, function(fr.fn, fnAddr))
+ lineenc.int(Line_line, int64(fr.pos.Line))
+ loc := new(bytes.Buffer)
+ locenc := protoEncoder{w: loc}
+ locenc.uint(Location_id, id)
+ locenc.uint(Location_address, uint64(pcAddr))
+ locenc.bytes(Location_line, line.Bytes())
+ enc.bytes(Profile_location, loc.Bytes())
+
+ locationId[pcAddr] = id
+ }
+ return id
+ }
+
+ wallNanos := new(bytes.Buffer)
+ wnenc := protoEncoder{w: wallNanos}
+ wnenc.int(ValueType_type, str("wall"))
+ wnenc.int(ValueType_unit, str("nanoseconds"))
+
+ // informational fields of Profile
+ enc.bytes(Profile_sample_type, wallNanos.Bytes())
+ enc.int(Profile_period, quantum.Nanoseconds()) // magnitude of sampling period
+ enc.bytes(Profile_period_type, wallNanos.Bytes()) // dimension and unit of period
+ enc.int(Profile_time_nanos, time.Now().UnixNano()) // start (real) time of profile
+
+ startNano := nanotime()
+
+ // Read profile events from the channel
+ // until it is closed by StopProfiler.
+ for e := range profiler.events {
+ sample := new(bytes.Buffer)
+ sampleenc := protoEncoder{w: sample}
+ sampleenc.int(Sample_value, e.time.Nanoseconds()) // wall nanoseconds
+ for _, fr := range e.stack {
+ sampleenc.uint(Sample_location_id, location(fr))
+ }
+ enc.bytes(Profile_sample, sample.Bytes())
+ }
+
+ endNano := nanotime()
+ enc.int(Profile_duration_nanos, endNano-startNano)
+
+ err := gz.Close() // Close reports any prior write error
+ if flushErr := bufw.Flush(); err == nil {
+ err = flushErr
+ }
+ profiler.done <- err
+}
+
+// nanotime returns the time in nanoseconds since epoch.
+// It is implemented by runtime.nanotime using the linkname hack;
+// runtime.nanotime is defined for all OSs/ARCHS and uses the
+// monotonic system clock, which there is no portable way to access.
+// Should that function ever go away, these alternatives exist:
+//
+// // POSIX only. REALTIME not MONOTONIC. 17ns.
+// var tv syscall.Timeval
+// syscall.Gettimeofday(&tv) // can't fail
+// return tv.Nano()
+//
+// // Portable. REALTIME not MONOTONIC. 46ns.
+// return time.Now().Nanoseconds()
+//
+// // POSIX only. Adds a dependency.
+// import "golang.org/x/sys/unix"
+// var ts unix.Timespec
+// unix.ClockGettime(CLOCK_MONOTONIC, &ts) // can't fail
+// return unix.TimespecToNsec(ts)
+//
+//go:linkname nanotime runtime.nanotime
+func nanotime() int64
+
+// profFuncAddr returns the canonical "address"
+// of a Callable for use by the profiler.
+func profFuncAddr(fn Callable) uintptr {
+ switch fn := fn.(type) {
+ case *Builtin:
+ return reflect.ValueOf(fn.fn).Pointer()
+ case *Function:
+ return uintptr(unsafe.Pointer(fn.funcode))
+ }
+
+ // User-defined callable types are typically of
+ // of kind pointer-to-struct. Handle them specially.
+ if v := reflect.ValueOf(fn); v.Type().Kind() == reflect.Ptr {
+ return v.Pointer()
+ }
+
+ // Address zero is reserved by the protocol.
+ // Use 1 for callables we don't recognize.
+ log.Printf("Starlark profiler: no address for Callable %T", fn)
+ return 1
+}
+
+// We encode the protocol message by hand to avoid making
+// the interpreter depend on both github.com/google/pprof
+// and github.com/golang/protobuf.
+//
+// This also avoids the need to materialize a protocol message object
+// tree of unbounded size and serialize it all at the end.
+// The pprof format appears to have been designed to
+// permit streaming implementations such as this one.
+//
+// See https://developers.google.com/protocol-buffers/docs/encoding.
+type protoEncoder struct {
+ w io.Writer // *bytes.Buffer or *gzip.Writer
+ tmp [binary.MaxVarintLen64]byte
+}
+
+func (e *protoEncoder) uvarint(x uint64) {
+ n := binary.PutUvarint(e.tmp[:], x)
+ e.w.Write(e.tmp[:n])
+}
+
+func (e *protoEncoder) tag(field, wire uint) {
+ e.uvarint(uint64(field<<3 | wire))
+}
+
+func (e *protoEncoder) string(field uint, s string) {
+ e.tag(field, 2) // length-delimited
+ e.uvarint(uint64(len(s)))
+ io.WriteString(e.w, s)
+}
+
+func (e *protoEncoder) bytes(field uint, b []byte) {
+ e.tag(field, 2) // length-delimited
+ e.uvarint(uint64(len(b)))
+ e.w.Write(b)
+}
+
+func (e *protoEncoder) uint(field uint, x uint64) {
+ e.tag(field, 0) // varint
+ e.uvarint(x)
+}
+
+func (e *protoEncoder) int(field uint, x int64) {
+ e.tag(field, 0) // varint
+ e.uvarint(uint64(x))
+}
diff --git a/starlark/profile_test.go b/starlark/profile_test.go
new file mode 100644
index 0000000..2781833
--- /dev/null
+++ b/starlark/profile_test.go
@@ -0,0 +1,83 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark_test
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "strings"
+ "testing"
+
+ "go.starlark.net/starlark"
+)
+
+// TestProfile is a simple integration test that the profiler
+// emits minimally plausible pprof-compatible output.
+func TestProfile(t *testing.T) {
+ prof, err := ioutil.TempFile("", "profile_test")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer prof.Close()
+ defer os.Remove(prof.Name())
+ if err := starlark.StartProfile(prof); err != nil {
+ t.Fatal(err)
+ }
+
+ const src = `
+def fibonacci(n):
+ res = list(range(n))
+ for i in res[2:]:
+ res[i] = res[i-2] + res[i-1]
+ return res
+
+fibonacci(100000)
+`
+
+ thread := new(starlark.Thread)
+ if _, err := starlark.ExecFile(thread, "foo.star", src, nil); err != nil {
+ _ = starlark.StopProfile()
+ t.Fatal(err)
+ }
+ if err := starlark.StopProfile(); err != nil {
+ t.Fatal(err)
+ }
+ prof.Sync()
+ cmd := exec.Command("go", "tool", "pprof", "-top", prof.Name())
+ cmd.Stderr = new(bytes.Buffer)
+ cmd.Stdout = new(bytes.Buffer)
+ if err := cmd.Run(); err != nil {
+ t.Fatalf("pprof failed: %v; output=<<%s>>", err, cmd.Stderr)
+ }
+
+ // Typical output (may vary by go release):
+ //
+ // Type: wall
+ // Time: Apr 4, 2019 at 11:10am (EDT)
+ // Duration: 251.62ms, Total samples = 250ms (99.36%)
+ // Showing nodes accounting for 250ms, 100% of 250ms total
+ // flat flat% sum% cum cum%
+ // 320ms 100% 100% 320ms 100% fibonacci
+ // 0 0% 100% 320ms 100% foo.star
+ //
+ // We'll assert a few key substrings are present.
+ got := fmt.Sprint(cmd.Stdout)
+ for _, want := range []string{
+ "flat%",
+ "fibonacci",
+ "foo.star",
+ } {
+ if !strings.Contains(got, want) {
+ t.Errorf("output did not contain %q", want)
+ }
+ }
+ if t.Failed() {
+ t.Logf("stderr=%v", cmd.Stderr)
+ t.Logf("stdout=%v", cmd.Stdout)
+ }
+}
diff --git a/starlark/testdata/assign.star b/starlark/testdata/assign.star
new file mode 100644
index 0000000..7f579f0
--- /dev/null
+++ b/starlark/testdata/assign.star
@@ -0,0 +1,354 @@
+# Tests of Starlark assignment.
+
+# This is a "chunked" file: each "---" effectively starts a new file.
+
+# tuple assignment
+load("assert.star", "assert")
+
+() = () # empty ok
+
+a, b, c = 1, 2, 3
+assert.eq(a, 1)
+assert.eq(b, 2)
+assert.eq(c, 3)
+
+(d, e, f,) = (1, 2, 3) # trailing comma ok
+---
+(a, b, c) = 1 ### "got int in sequence assignment"
+---
+(a, b) = () ### "too few values to unpack"
+---
+(a, b) = (1,) ### "too few values to unpack"
+---
+(a, b, c) = (1, 2) ### "too few values to unpack"
+---
+(a, b) = (1, 2, 3) ### "too many values to unpack"
+---
+() = 1 ### "got int in sequence assignment"
+---
+() = (1,) ### "too many values to unpack"
+---
+() = (1, 2) ### "too many values to unpack"
+---
+# list assignment
+load("assert.star", "assert")
+
+[] = [] # empty ok
+
+[a, b, c] = [1, 2, 3]
+assert.eq(a, 1)
+assert.eq(b, 2)
+assert.eq(c, 3)
+
+[d, e, f,] = [1, 2, 3] # trailing comma ok
+---
+[a, b, c] = 1 ### "got int in sequence assignment"
+---
+[a, b] = [] ### "too few values to unpack"
+---
+[a, b] = [1] ### "too few values to unpack"
+---
+[a, b, c] = [1, 2] ### "too few values to unpack"
+---
+[a, b] = [1, 2, 3] ### "too many values to unpack"
+---
+[] = 1 ### "got int in sequence assignment"
+---
+[] = [1] ### "too many values to unpack"
+---
+[] = [1, 2] ### "too many values to unpack"
+---
+# list-tuple assignment
+load("assert.star", "assert")
+
+# empty ok
+[] = ()
+() = []
+
+[a, b, c] = (1, 2, 3)
+assert.eq(a, 1)
+assert.eq(b, 2)
+assert.eq(c, 3)
+
+[a2, b2, c2] = 1, 2, 3 # bare tuple ok
+
+(d, e, f) = [1, 2, 3]
+assert.eq(d, 1)
+assert.eq(e, 2)
+assert.eq(f, 3)
+
+[g, h, (i, j)] = (1, 2, [3, 4])
+assert.eq(g, 1)
+assert.eq(h, 2)
+assert.eq(i, 3)
+assert.eq(j, 4)
+
+(k, l, [m, n]) = [1, 2, (3, 4)]
+assert.eq(k, 1)
+assert.eq(l, 2)
+assert.eq(m, 3)
+assert.eq(n, 4)
+
+---
+# misc assignment
+load("assert.star", "assert")
+
+def assignment():
+ a = [1, 2, 3]
+ a[1] = 5
+ assert.eq(a, [1, 5, 3])
+ a[-2] = 2
+ assert.eq(a, [1, 2, 3])
+ assert.eq("%d %d" % (5, 7), "5 7")
+ x={}
+ x[1] = 2
+ x[1] += 3
+ assert.eq(x[1], 5)
+ def f12(): x[(1, "abc", {})] = 1
+ assert.fails(f12, "unhashable type: dict")
+
+assignment()
+
+---
+# augmented assignment
+
+load("assert.star", "assert")
+
+def f():
+ x = 1
+ x += 1
+ assert.eq(x, 2)
+ x *= 3
+ assert.eq(x, 6)
+f()
+
+---
+# effects of evaluating LHS occur only once
+
+load("assert.star", "assert")
+
+count = [0] # count[0] is the number of calls to f
+
+def f():
+ count[0] += 1
+ return count[0]
+
+x = [1, 2, 3]
+x[f()] += 1
+
+assert.eq(x, [1, 3, 3]) # sole call to f returned 1
+assert.eq(count[0], 1) # f was called only once
+
+---
+# Order of evaluation.
+
+load("assert.star", "assert")
+
+calls = []
+
+def f(name, result):
+ calls.append(name)
+ return result
+
+# The right side is evaluated before the left in an ordinary assignment.
+calls.clear()
+f("array", [0])[f("index", 0)] = f("rhs", 0)
+assert.eq(calls, ["rhs", "array", "index"])
+
+calls.clear()
+f("lhs1", [0])[0], f("lhs2", [0])[0] = f("rhs1", 0), f("rhs2", 0)
+assert.eq(calls, ["rhs1", "rhs2", "lhs1", "lhs2"])
+
+# Left side is evaluated first (and only once) in an augmented assignment.
+calls.clear()
+f("array", [0])[f("index", 0)] += f("addend", 1)
+assert.eq(calls, ["array", "index", "addend"])
+
+---
+# global referenced before assignment
+
+def f():
+ return g ### "global variable g referenced before assignment"
+
+f()
+
+g = 1
+
+---
+# Free variables are captured by reference, so this is ok.
+load("assert.star", "assert")
+
+def f():
+ def g():
+ return outer
+ outer = 1
+ return g()
+
+assert.eq(f(), 1)
+
+---
+load("assert.star", "assert")
+
+printok = [False]
+
+# This program should resolve successfully but fail dynamically.
+# However, the Java implementation currently reports the dynamic
+# error at the x=1 statement (b/33975425). I think we need to simplify
+# the resolver algorithm to what we have implemented.
+def use_before_def():
+ print(x) # dynamic error: local var referenced before assignment
+ printok[0] = True
+ x = 1 # makes 'x' local
+
+assert.fails(use_before_def, 'local variable x referenced before assignment')
+assert.true(not printok[0]) # execution of print statement failed
+
+---
+x = [1]
+x.extend([2]) # ok
+
+def f():
+ x += [4] ### "local variable x referenced before assignment"
+
+f()
+
+---
+
+z += 3 ### "global variable z referenced before assignment"
+
+---
+load("assert.star", "assert")
+
+# It's ok to define a global that shadows a built-in...
+list = []
+assert.eq(type(list), "list")
+
+# ...but then all uses refer to the global,
+# even if they occur before the binding use.
+# See github.com/google/skylark/issues/116.
+assert.fails(lambda: tuple, "global variable tuple referenced before assignment")
+tuple = ()
+
+---
+# option:set
+# Same as above, but set is dialect-specific;
+# we shouldn't notice any difference.
+load("assert.star", "assert")
+
+set = [1, 2, 3]
+assert.eq(type(set), "list")
+
+# As in Python 2 and Python 3,
+# all 'in x' expressions in a comprehension are evaluated
+# in the comprehension's lexical block, except the first,
+# which is resolved in the outer block.
+x = [[1, 2]]
+assert.eq([x for x in x for y in x],
+ [[1, 2], [1, 2]])
+
+---
+# A comprehension establishes a single new lexical block,
+# not one per 'for' clause.
+x = [1, 2]
+_ = [x for _ in [3] for x in x] ### "local variable x referenced before assignment"
+
+---
+load("assert.star", "assert")
+
+# assign singleton sequence to 1-tuple
+(x,) = (1,)
+assert.eq(x, 1)
+(y,) = [1]
+assert.eq(y, 1)
+
+# assign 1-tuple to variable
+z = (1,)
+assert.eq(type(z), "tuple")
+assert.eq(len(z), 1)
+assert.eq(z[0], 1)
+
+# assign value to parenthesized variable
+(a) = 1
+assert.eq(a, 1)
+
+---
+# assignment to/from fields.
+load("assert.star", "assert", "freeze")
+
+hf = hasfields()
+hf.x = 1
+assert.eq(hf.x, 1)
+hf.x = [1, 2]
+hf.x += [3, 4]
+assert.eq(hf.x, [1, 2, 3, 4])
+freeze(hf)
+def setX(hf):
+ hf.x = 2
+def setY(hf):
+ hf.y = 3
+assert.fails(lambda: setX(hf), "cannot set field on a frozen hasfields")
+assert.fails(lambda: setY(hf), "cannot set field on a frozen hasfields")
+
+---
+# destucturing assignment in a for loop.
+load("assert.star", "assert")
+
+def f():
+ res = []
+ for (x, y), z in [(["a", "b"], 3), (["c", "d"], 4)]:
+ res.append((x, y, z))
+ return res
+assert.eq(f(), [("a", "b", 3), ("c", "d", 4)])
+
+def g():
+ a = {}
+ for i, a[i] in [("one", 1), ("two", 2)]:
+ pass
+ return a
+assert.eq(g(), {"one": 1, "two": 2})
+
+---
+# parenthesized LHS in augmented assignment (success)
+# option:globalreassign
+load("assert.star", "assert")
+
+a = 5
+(a) += 3
+assert.eq(a, 8)
+
+---
+# parenthesized LHS in augmented assignment (error)
+
+(a) += 5 ### "global variable a referenced before assignment"
+
+---
+# option:globalreassign
+load("assert.star", "assert")
+assert = 1
+load("assert.star", "assert")
+
+---
+# option:globalreassign option:loadbindsglobally
+load("assert.star", "assert")
+assert = 1
+load("assert.star", "assert")
+
+---
+# option:loadbindsglobally
+_ = assert ### "global variable assert referenced before assignment"
+load("assert.star", "assert")
+
+---
+_ = assert ### "local variable assert referenced before assignment"
+load("assert.star", "assert")
+
+---
+def f(): assert.eq(1, 1) # forward ref OK
+load("assert.star", "assert")
+f()
+
+---
+# option:loadbindsglobally
+def f(): assert.eq(1, 1) # forward ref OK
+load("assert.star", "assert")
+f()
diff --git a/starlark/testdata/benchmark.star b/starlark/testdata/benchmark.star
new file mode 100644
index 0000000..b02868d
--- /dev/null
+++ b/starlark/testdata/benchmark.star
@@ -0,0 +1,62 @@
+# Benchmarks of Starlark execution
+
+def bench_range_construction(b):
+ for _ in range(b.n):
+ range(200)
+
+def bench_range_iteration(b):
+ for _ in range(b.n):
+ for x in range(200):
+ pass
+
+# Make a 2-level call tree of 100 * 100 calls.
+def bench_calling(b):
+ list = range(100)
+
+ def g():
+ for x in list:
+ pass
+
+ def f():
+ for x in list:
+ g()
+
+ for _ in range(b.n):
+ f()
+
+# Measure overhead of calling a trivial built-in method.
+emptydict = {}
+range1000 = range(1000)
+
+def bench_builtin_method(b):
+ for _ in range(b.n):
+ for _ in range1000:
+ emptydict.get(None)
+
+def bench_int(b):
+ for _ in range(b.n):
+ a = 0
+ for _ in range1000:
+ a += 1
+
+def bench_bigint(b):
+ for _ in range(b.n):
+ a = 1 << 31 # maxint32 + 1
+ for _ in range1000:
+ a += 1
+
+def bench_gauss(b):
+ # Sum of arithmetic series. All results fit in int32.
+ for _ in range(b.n):
+ acc = 0
+ for x in range(92000):
+ acc += x
+
+def bench_mix(b):
+ "Benchmark of a simple mix of computation (for, if, arithmetic, comprehension)."
+ for _ in range(b.n):
+ x = 0
+ for i in range(50):
+ if i:
+ x += 1
+ a = [x for x in range(i)]
diff --git a/starlark/testdata/bool.star b/starlark/testdata/bool.star
new file mode 100644
index 0000000..6c084a3
--- /dev/null
+++ b/starlark/testdata/bool.star
@@ -0,0 +1,62 @@
+# Tests of Starlark 'bool'
+
+load("assert.star", "assert")
+
+# truth
+assert.true(True)
+assert.true(not False)
+assert.true(not not True)
+assert.true(not not 1 >= 1)
+
+# precedence of not
+assert.true(not not 2 > 1)
+# assert.true(not (not 2) > 1) # TODO(adonovan): fix: gives error for False > 1.
+# assert.true(not ((not 2) > 1)) # TODO(adonovan): fix
+# assert.true(not ((not (not 2)) > 1)) # TODO(adonovan): fix
+# assert.true(not not not (2 > 1))
+
+# bool conversion
+assert.eq(
+ [bool(), bool(1), bool(0), bool("hello"), bool("")],
+ [False, True, False, True, False],
+)
+
+# comparison
+assert.true(None == None)
+assert.true(None != False)
+assert.true(None != True)
+assert.eq(1 == 1, True)
+assert.eq(1 == 2, False)
+assert.true(False == False)
+assert.true(True == True)
+
+# ordered comparison
+assert.true(False < True)
+assert.true(False <= True)
+assert.true(False <= False)
+assert.true(True > False)
+assert.true(True >= False)
+assert.true(True >= True)
+
+# conditional expression
+assert.eq(1 if 3 > 2 else 0, 1)
+assert.eq(1 if "foo" else 0, 1)
+assert.eq(1 if "" else 0, 0)
+
+# short-circuit evaluation of 'and' and 'or':
+# 'or' yields the first true operand, or the last if all are false.
+assert.eq(0 or "" or [] or 0, 0)
+assert.eq(0 or "" or [] or 123 or 1 // 0, 123)
+assert.fails(lambda : 0 or "" or [] or 0 or 1 // 0, "division by zero")
+
+# 'and' yields the first false operand, or the last if all are true.
+assert.eq(1 and "a" and [1] and 123, 123)
+assert.eq(1 and "a" and [1] and 0 and 1 // 0, 0)
+assert.fails(lambda : 1 and "a" and [1] and 123 and 1 // 0, "division by zero")
+
+# Built-ins that want a bool want an actual bool, not a truth value.
+# See github.com/bazelbuild/starlark/issues/30
+assert.eq(''.splitlines(True), [])
+assert.fails(lambda: ''.splitlines(1), 'got int, want bool')
+assert.fails(lambda: ''.splitlines("hello"), 'got string, want bool')
+assert.fails(lambda: ''.splitlines(0.0), 'got float, want bool')
diff --git a/starlark/testdata/builtins.star b/starlark/testdata/builtins.star
new file mode 100644
index 0000000..c6591b8
--- /dev/null
+++ b/starlark/testdata/builtins.star
@@ -0,0 +1,225 @@
+# Tests of Starlark built-in functions
+# option:set
+
+load("assert.star", "assert")
+
+# len
+assert.eq(len([1, 2, 3]), 3)
+assert.eq(len((1, 2, 3)), 3)
+assert.eq(len({1: 2}), 1)
+assert.fails(lambda: len(1), "int.*has no len")
+
+# and, or
+assert.eq(123 or "foo", 123)
+assert.eq(0 or "foo", "foo")
+assert.eq(123 and "foo", "foo")
+assert.eq(0 and "foo", 0)
+none = None
+_1 = none and none[0] # rhs is not evaluated
+_2 = (not none) or none[0] # rhs is not evaluated
+
+# any, all
+assert.true(all([]))
+assert.true(all([1, True, "foo"]))
+assert.true(not all([1, True, ""]))
+assert.true(not any([]))
+assert.true(any([0, False, "foo"]))
+assert.true(not any([0, False, ""]))
+
+# in
+assert.true(3 in [1, 2, 3])
+assert.true(4 not in [1, 2, 3])
+assert.true(3 in (1, 2, 3))
+assert.true(4 not in (1, 2, 3))
+assert.fails(lambda: 3 in "foo", "in.*requires string as left operand")
+assert.true(123 in {123: ""})
+assert.true(456 not in {123:""})
+assert.true([] not in {123: ""})
+
+# sorted
+assert.eq(sorted([42, 123, 3]), [3, 42, 123])
+assert.eq(sorted([42, 123, 3], reverse=True), [123, 42, 3])
+assert.eq(sorted(["wiz", "foo", "bar"]), ["bar", "foo", "wiz"])
+assert.eq(sorted(["wiz", "foo", "bar"], reverse=True), ["wiz", "foo", "bar"])
+assert.fails(lambda: sorted([1, 2, None, 3]), "int < NoneType not implemented")
+assert.fails(lambda: sorted([1, "one"]), "string < int not implemented")
+# custom key function
+assert.eq(sorted(["two", "three", "four"], key=len),
+ ["two", "four", "three"])
+assert.eq(sorted(["two", "three", "four"], key=len, reverse=True),
+ ["three", "four", "two"])
+assert.fails(lambda: sorted([1, 2, 3], key=None), "got NoneType, want callable")
+# sort is stable
+pairs = [(4, 0), (3, 1), (4, 2), (2, 3), (3, 4), (1, 5), (2, 6), (3, 7)]
+assert.eq(sorted(pairs, key=lambda x: x[0]),
+ [(1, 5),
+ (2, 3), (2, 6),
+ (3, 1), (3, 4), (3, 7),
+ (4, 0), (4, 2)])
+assert.fails(lambda: sorted(1), 'sorted: for parameter iterable: got int, want iterable')
+
+# reversed
+assert.eq(reversed([1, 144, 81, 16]), [16, 81, 144, 1])
+
+# set
+assert.contains(set([1, 2, 3]), 1)
+assert.true(4 not in set([1, 2, 3]))
+assert.eq(len(set([1, 2, 3])), 3)
+assert.eq(sorted([x for x in set([1, 2, 3])]), [1, 2, 3])
+
+# dict
+assert.eq(dict([(1, 2), (3, 4)]), {1: 2, 3: 4})
+assert.eq(dict([(1, 2), (3, 4)], foo="bar"), {1: 2, 3: 4, "foo": "bar"})
+assert.eq(dict({1:2, 3:4}), {1: 2, 3: 4})
+assert.eq(dict({1:2, 3:4}.items()), {1: 2, 3: 4})
+
+# range
+assert.eq("range", type(range(10)))
+assert.eq("range(10)", str(range(0, 10, 1)))
+assert.eq("range(1, 10)", str(range(1, 10)))
+assert.eq(range(0, 5, 10), range(0, 5, 11))
+assert.eq("range(0, 10, -1)", str(range(0, 10, -1)))
+assert.fails(lambda: {range(10): 10}, "unhashable: range")
+assert.true(bool(range(1, 2)))
+assert.true(not(range(2, 1))) # an empty range is false
+assert.eq([x*x for x in range(5)], [0, 1, 4, 9, 16])
+assert.eq(list(range(5)), [0, 1, 2, 3, 4])
+assert.eq(list(range(-5)), [])
+assert.eq(list(range(2, 5)), [2, 3, 4])
+assert.eq(list(range(5, 2)), [])
+assert.eq(list(range(-2, -5)), [])
+assert.eq(list(range(-5, -2)), [-5, -4, -3])
+assert.eq(list(range(2, 10, 3)), [2, 5, 8])
+assert.eq(list(range(10, 2, -3)), [10, 7, 4])
+assert.eq(list(range(-2, -10, -3)), [-2, -5, -8])
+assert.eq(list(range(-10, -2, 3)), [-10, -7, -4])
+assert.eq(list(range(10, 2, -1)), [10, 9, 8, 7, 6, 5, 4, 3])
+assert.eq(list(range(5)[1:]), [1, 2, 3, 4])
+assert.eq(len(range(5)[1:]), 4)
+assert.eq(list(range(5)[:2]), [0, 1])
+assert.eq(list(range(10)[1:]), [1, 2, 3, 4, 5, 6, 7, 8, 9])
+assert.eq(list(range(10)[1:9:2]), [1, 3, 5, 7])
+assert.eq(list(range(10)[1:10:2]), [1, 3, 5, 7, 9])
+assert.eq(list(range(10)[1:11:2]), [1, 3, 5, 7, 9])
+assert.eq(list(range(10)[::-2]), [9, 7, 5, 3, 1])
+assert.eq(list(range(0, 10, 2)[::2]), [0, 4, 8])
+assert.eq(list(range(0, 10, 2)[::-2]), [8, 4, 0])
+# range() is limited by the width of the Go int type (int32 or int64).
+assert.fails(lambda: range(1<<64), "... out of range .want value in signed ..-bit range")
+assert.eq(len(range(0x7fffffff)), 0x7fffffff) # O(1)
+# Two ranges compare equal if they denote the same sequence:
+assert.eq(range(0), range(2, 1, 3)) # []
+assert.eq(range(0, 3, 2), range(0, 4, 2)) # [0, 2]
+assert.ne(range(1, 10), range(2, 10))
+assert.fails(lambda: range(0) < range(0), "range < range not implemented")
+# <number> in <range>
+assert.contains(range(3), 1)
+assert.contains(range(3), 2.0) # acts like 2
+assert.fails(lambda: True in range(3), "requires integer.*not bool") # bools aren't numbers
+assert.fails(lambda: "one" in range(10), "requires integer.*not string")
+assert.true(4 not in range(4))
+assert.true(1e15 not in range(4)) # too big for int32
+assert.true(1e100 not in range(4)) # too big for int64
+# https://github.com/google/starlark-go/issues/116
+assert.fails(lambda: range(0, 0, 2)[:][0], "index 0 out of range: empty range")
+
+# list
+assert.eq(list("abc".elems()), ["a", "b", "c"])
+assert.eq(sorted(list({"a": 1, "b": 2})), ['a', 'b'])
+
+# min, max
+assert.eq(min(5, -2, 1, 7, 3), -2)
+assert.eq(max(5, -2, 1, 7, 3), 7)
+assert.eq(min([5, -2, 1, 7, 3]), -2)
+assert.eq(min("one", "two", "three", "four"), "four")
+assert.eq(max("one", "two", "three", "four"), "two")
+assert.fails(min, "min requires at least one positional argument")
+assert.fails(lambda: min(1), "not iterable")
+assert.fails(lambda: min([]), "empty")
+assert.eq(min(5, -2, 1, 7, 3, key=lambda x: x*x), 1) # min absolute value
+assert.eq(min(5, -2, 1, 7, 3, key=lambda x: -x), 7) # min negated value
+
+# enumerate
+assert.eq(enumerate("abc".elems()), [(0, "a"), (1, "b"), (2, "c")])
+assert.eq(enumerate([False, True, None], 42), [(42, False), (43, True), (44, None)])
+
+# zip
+assert.eq(zip(), [])
+assert.eq(zip([]), [])
+assert.eq(zip([1, 2, 3]), [(1,), (2,), (3,)])
+assert.eq(zip("".elems()), [])
+assert.eq(zip("abc".elems(),
+ list("def".elems()),
+ "hijk".elems()),
+ [("a", "d", "h"), ("b", "e", "i"), ("c", "f", "j")])
+z1 = [1]
+assert.eq(zip(z1), [(1,)])
+z1.append(2)
+assert.eq(zip(z1), [(1,), (2,)])
+assert.fails(lambda: zip(z1, 1), "zip: argument #2 is not iterable: int")
+z1.append(3)
+
+# dir for builtin_function_or_method
+assert.eq(dir(None), [])
+assert.eq(dir({})[:3], ["clear", "get", "items"]) # etc
+assert.eq(dir(1), [])
+assert.eq(dir([])[:3], ["append", "clear", "extend"]) # etc
+
+# hasattr, getattr, dir
+# hasfields is an application-defined type defined in eval_test.go.
+hf = hasfields()
+assert.eq(dir(hf), [])
+assert.true(not hasattr(hf, "x"))
+assert.fails(lambda: getattr(hf, "x"), "no .x field or method")
+assert.eq(getattr(hf, "x", 42), 42)
+hf.x = 1
+assert.true(hasattr(hf, "x"))
+assert.eq(getattr(hf, "x"), 1)
+assert.eq(hf.x, 1)
+hf.x = 2
+assert.eq(getattr(hf, "x"), 2)
+assert.eq(hf.x, 2)
+# built-in types can have attributes (methods) too.
+myset = set([])
+assert.eq(dir(myset), ["union"])
+assert.true(hasattr(myset, "union"))
+assert.true(not hasattr(myset, "onion"))
+assert.eq(str(getattr(myset, "union")), "<built-in method union of set value>")
+assert.fails(lambda: getattr(myset, "onion"), "no .onion field or method")
+assert.eq(getattr(myset, "onion", 42), 42)
+
+# dir returns a new, sorted, mutable list
+assert.eq(sorted(dir("")), dir("")) # sorted
+dir("").append("!") # mutable
+assert.true("!" not in dir("")) # new
+
+# error messages should suggest spelling corrections
+hf.one = 1
+hf.two = 2
+hf.three = 3
+hf.forty_five = 45
+assert.fails(lambda: hf.One, 'no .One field.*did you mean .one')
+assert.fails(lambda: hf.oone, 'no .oone field.*did you mean .one')
+assert.fails(lambda: hf.FortyFive, 'no .FortyFive field.*did you mean .forty_five')
+assert.fails(lambda: hf.trhee, 'no .trhee field.*did you mean .three')
+assert.fails(lambda: hf.thirty, 'no .thirty field or method$') # no suggestion
+
+# spell check in setfield too
+def setfield(): hf.noForty_Five = 46 # "no" prefix => SetField returns NoSuchField
+assert.fails(setfield, 'no .noForty_Five field.*did you mean .forty_five')
+
+# repr
+assert.eq(repr(1), "1")
+assert.eq(repr("x"), '"x"')
+assert.eq(repr(["x", 1]), '["x", 1]')
+
+# fail
+---
+fail() ### `fail: $`
+x = 1//0 # unreachable
+---
+fail(1) ### `fail: 1`
+---
+fail(1, 2, 3) ### `fail: 1 2 3`
+---
+fail(1, 2, 3, sep="/") ### `fail: 1/2/3`
diff --git a/starlark/testdata/bytes.star b/starlark/testdata/bytes.star
new file mode 100644
index 0000000..d500403
--- /dev/null
+++ b/starlark/testdata/bytes.star
@@ -0,0 +1,159 @@
+# Tests of 'bytes' (immutable byte strings).
+
+load("assert.star", "assert")
+
+# bytes(string) -- UTF-k to UTF-8 transcoding with U+FFFD replacement
+hello = bytes("hello, 世界")
+goodbye = bytes("goodbye")
+empty = bytes("")
+nonprinting = bytes("\t\n\x7F\u200D") # TAB, NEWLINE, DEL, ZERO_WIDTH_JOINER
+assert.eq(bytes("hello, 世界"[:-1]), b"hello, 世��")
+
+# bytes(iterable of int) -- construct from numeric byte values
+assert.eq(bytes([65, 66, 67]), b"ABC")
+assert.eq(bytes((65, 66, 67)), b"ABC")
+assert.eq(bytes([0xf0, 0x9f, 0x98, 0xbf]), b"😿")
+assert.fails(lambda: bytes([300]),
+ "at index 0, 300 out of range .want value in unsigned 8-bit range")
+assert.fails(lambda: bytes([b"a"]),
+ "at index 0, got bytes, want int")
+assert.fails(lambda: bytes(1), "want string, bytes, or iterable of ints")
+
+# literals
+assert.eq(b"hello, 世界", hello)
+assert.eq(b"goodbye", goodbye)
+assert.eq(b"", empty)
+assert.eq(b"\t\n\x7F\u200D", nonprinting)
+assert.ne("abc", b"abc")
+assert.eq(b"\012\xff\u0400\U0001F63F", b"\n\xffЀ😿") # see scanner tests for more
+assert.eq(rb"\r\n\t", b"\\r\\n\\t") # raw
+
+# type
+assert.eq(type(hello), "bytes")
+
+# len
+assert.eq(len(hello), 13)
+assert.eq(len(goodbye), 7)
+assert.eq(len(empty), 0)
+assert.eq(len(b"A"), 1)
+assert.eq(len(b"Ѐ"), 2)
+assert.eq(len(b"世"), 3)
+assert.eq(len(b"😿"), 4)
+
+# truth
+assert.true(hello)
+assert.true(goodbye)
+assert.true(not empty)
+
+# str(bytes) does UTF-8 to UTF-k transcoding.
+# TODO(adonovan): specify.
+assert.eq(str(hello), "hello, 世界")
+assert.eq(str(hello[:-1]), "hello, 世��") # incomplete UTF-8 encoding => U+FFFD
+assert.eq(str(goodbye), "goodbye")
+assert.eq(str(empty), "")
+assert.eq(str(nonprinting), "\t\n\x7f\u200d")
+assert.eq(str(b"\xED\xB0\x80"), "���") # UTF-8 encoding of unpaired surrogate => U+FFFD x 3
+
+# repr
+assert.eq(repr(hello), r'b"hello, 世界"')
+assert.eq(repr(hello[:-1]), r'b"hello, 世\xe7\x95"') # (incomplete UTF-8 encoding )
+assert.eq(repr(goodbye), 'b"goodbye"')
+assert.eq(repr(empty), 'b""')
+assert.eq(repr(nonprinting), 'b"\\t\\n\\x7f\\u200d"')
+
+# equality
+assert.eq(hello, hello)
+assert.ne(hello, goodbye)
+assert.eq(b"goodbye", goodbye)
+
+# ordered comparison
+assert.lt(b"abc", b"abd")
+assert.lt(b"abc", b"abcd")
+assert.lt(b"\x7f", b"\x80") # bytes compare as uint8, not int8
+
+# bytes are dict-hashable
+dict = {hello: 1, goodbye: 2}
+dict[b"goodbye"] = 3
+assert.eq(len(dict), 2)
+assert.eq(dict[goodbye], 3)
+
+# hash(bytes) is 32-bit FNV-1a.
+assert.eq(hash(b""), 0x811c9dc5)
+assert.eq(hash(b"a"), 0xe40c292c)
+assert.eq(hash(b"ab"), 0x4d2505ca)
+assert.eq(hash(b"abc"), 0x1a47e90b)
+
+# indexing
+assert.eq(goodbye[0], b"g")
+assert.eq(goodbye[-1], b"e")
+assert.fails(lambda: goodbye[100], "out of range")
+
+# slicing
+assert.eq(goodbye[:4], b"good")
+assert.eq(goodbye[4:], b"bye")
+assert.eq(goodbye[::2], b"gobe")
+assert.eq(goodbye[3:4], b"d") # special case: len=1
+assert.eq(goodbye[4:4], b"") # special case: len=0
+
+# bytes in bytes
+assert.eq(b"bc" in b"abcd", True)
+assert.eq(b"bc" in b"dcab", False)
+assert.fails(lambda: "bc" in b"dcab", "requires bytes or int as left operand, not string")
+
+# int in bytes
+assert.eq(97 in b"abc", True) # 97='a'
+assert.eq(100 in b"abc", False) # 100='d'
+assert.fails(lambda: 256 in b"abc", "int in bytes: 256 out of range")
+assert.fails(lambda: -1 in b"abc", "int in bytes: -1 out of range")
+
+# ord TODO(adonovan): specify
+assert.eq(ord(b"a"), 97)
+assert.fails(lambda: ord(b"ab"), "ord: bytes has length 2, want 1")
+assert.fails(lambda: ord(b""), "ord: bytes has length 0, want 1")
+
+# repeat (bytes * int)
+assert.eq(goodbye * 3, b"goodbyegoodbyegoodbye")
+assert.eq(3 * goodbye, b"goodbyegoodbyegoodbye")
+
+# elems() returns an iterable value over 1-byte substrings.
+assert.eq(type(hello.elems()), "bytes.elems")
+assert.eq(str(hello.elems()), "b\"hello, 世界\".elems()")
+assert.eq(list(hello.elems()), [104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140])
+assert.eq(bytes([104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]), hello)
+assert.eq(list(goodbye.elems()), [103, 111, 111, 100, 98, 121, 101])
+assert.eq(list(empty.elems()), [])
+assert.eq(bytes(hello.elems()), hello) # bytes(iterable) is dual to bytes.elems()
+
+# x[i] = ...
+def f():
+ b"abc"[1] = b"B"
+
+assert.fails(f, "bytes.*does not support.*assignment")
+
+# TODO(adonovan): the specification is not finalized in many areas:
+# - chr, ord functions
+# - encoding/decoding bytes to string.
+# - methods: find, index, split, etc.
+#
+# Summary of string operations (put this in spec).
+#
+# string to number:
+# - bytes[i] returns numeric value of ith byte.
+# - ord(string) returns numeric value of sole code point in string.
+# - ord(string[i]) is not a useful operation: fails on non-ASCII; see below.
+# Q. Perhaps ord should return the first (not sole) code point? Then it becomes a UTF-8 decoder.
+# Perhaps ord(string, index=int) should apply the index and relax the len=1 check.
+# - string.codepoint() iterates over 1-codepoint substrings.
+# - string.codepoint_ords() iterates over numeric values of code points in string.
+# - string.elems() iterates over 1-element (UTF-k code) substrings.
+# - string.elem_ords() iterates over numeric UTF-k code values.
+# - string.elem_ords()[i] returns numeric value of ith element (UTF-k code).
+# - string.elems()[i] returns substring of a single element (UTF-k code).
+# - int(string) parses string as decimal (or other) numeric literal.
+#
+# number to string:
+# - chr(int) returns string, UTF-k encoding of Unicode code point (like Python).
+# Redundant with '%c' % int (which Python2 calls 'unichr'.)
+# - bytes(chr(int)) returns byte string containing UTF-8 encoding of one code point.
+# - bytes([int]) returns 1-byte string (with regrettable list allocation).
+# - str(int) - format number as decimal.
diff --git a/starlark/testdata/control.star b/starlark/testdata/control.star
new file mode 100644
index 0000000..554ab25
--- /dev/null
+++ b/starlark/testdata/control.star
@@ -0,0 +1,64 @@
+# Tests of Starlark control flow
+
+load("assert.star", "assert")
+
+def controlflow():
+ # elif
+ x = 0
+ if True:
+ x=1
+ elif False:
+ assert.fail("else of true")
+ else:
+ assert.fail("else of else of true")
+ assert.true(x)
+
+ x = 0
+ if False:
+ assert.fail("then of false")
+ elif True:
+ x = 1
+ else:
+ assert.fail("else of true")
+ assert.true(x)
+
+ x = 0
+ if False:
+ assert.fail("then of false")
+ elif False:
+ assert.fail("then of false")
+ else:
+ x = 1
+ assert.true(x)
+controlflow()
+
+def loops():
+ y = ""
+ for x in [1, 2, 3, 4, 5]:
+ if x == 2:
+ continue
+ if x == 4:
+ break
+ y = y + str(x)
+ return y
+assert.eq(loops(), "13")
+
+# return
+g = 123
+def f(x):
+ for g in (1, 2, 3):
+ if g == x:
+ return g
+assert.eq(f(2), 2)
+assert.eq(f(4), None) # falling off end => return None
+assert.eq(g, 123) # unchanged by local use of g in function
+
+# infinite sequences
+def fib(n):
+ seq = []
+ for x in fibonacci: # fibonacci is an infinite iterable defined in eval_test.go
+ if len(seq) == n:
+ break
+ seq.append(x)
+ return seq
+assert.eq(fib(10), [0, 1, 1, 2, 3, 5, 8, 13, 21, 34])
diff --git a/starlark/testdata/dict.star b/starlark/testdata/dict.star
new file mode 100644
index 0000000..1aeb1e7
--- /dev/null
+++ b/starlark/testdata/dict.star
@@ -0,0 +1,248 @@
+# Tests of Starlark 'dict'
+
+load("assert.star", "assert", "freeze")
+
+# literals
+assert.eq({}, {})
+assert.eq({"a": 1}, {"a": 1})
+assert.eq({"a": 1,}, {"a": 1})
+
+# truth
+assert.true({False: False})
+assert.true(not {})
+
+# dict + dict is no longer supported.
+assert.fails(lambda: {"a": 1} + {"b": 2}, 'unknown binary op: dict \\+ dict')
+
+# dict comprehension
+assert.eq({x: x*x for x in range(3)}, {0: 0, 1: 1, 2: 4})
+
+# dict.pop
+x6 = {"a": 1, "b": 2}
+assert.eq(x6.pop("a"), 1)
+assert.eq(str(x6), '{"b": 2}')
+assert.fails(lambda: x6.pop("c"), "pop: missing key")
+assert.eq(x6.pop("c", 3), 3)
+assert.eq(x6.pop("c", None), None) # default=None tests an edge case of UnpackArgs
+assert.eq(x6.pop("b"), 2)
+assert.eq(len(x6), 0)
+
+# dict.popitem
+x7 = {"a": 1, "b": 2}
+assert.eq([x7.popitem(), x7.popitem()], [("a", 1), ("b", 2)])
+assert.fails(x7.popitem, "empty dict")
+assert.eq(len(x7), 0)
+
+# dict.keys, dict.values
+x8 = {"a": 1, "b": 2}
+assert.eq(x8.keys(), ["a", "b"])
+assert.eq(x8.values(), [1, 2])
+
+# equality
+assert.eq({"a": 1, "b": 2}, {"a": 1, "b": 2})
+assert.eq({"a": 1, "b": 2,}, {"a": 1, "b": 2})
+assert.eq({"a": 1, "b": 2}, {"b": 2, "a": 1})
+
+# insertion order is preserved
+assert.eq(dict([("a", 0), ("b", 1), ("c", 2), ("b", 3)]).keys(), ["a", "b", "c"])
+assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)]).keys(), ["b", "a", "c"])
+assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)])["b"], 2)
+# ...even after rehashing (which currently occurs after key 'i'):
+small = dict([("a", 0), ("b", 1), ("c", 2)])
+small.update([("d", 4), ("e", 5), ("f", 6), ("g", 7), ("h", 8), ("i", 9), ("j", 10), ("k", 11)])
+assert.eq(small.keys(), ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"])
+
+# Duplicate keys are not permitted in dictionary expressions (see b/35698444).
+# (Nor in keyword args to function calls---checked by resolver.)
+assert.fails(lambda: {"aa": 1, "bb": 2, "cc": 3, "bb": 4}, 'duplicate key: "bb"')
+
+# Check that even with many positional args, keyword collisions are detected.
+assert.fails(lambda: dict({'b': 3}, a=4, **dict(a=5)), 'dict: duplicate keyword arg: "a"')
+assert.fails(lambda: dict({'a': 2, 'b': 3}, a=4, **dict(a=5)), 'dict: duplicate keyword arg: "a"')
+# positional/keyword arg key collisions are ok
+assert.eq(dict((['a', 2], ), a=4), {'a': 4})
+assert.eq(dict((['a', 2], ['a', 3]), a=4), {'a': 4})
+
+# index
+def setIndex(d, k, v):
+ d[k] = v
+
+x9 = {}
+assert.fails(lambda: x9["a"], 'key "a" not in dict')
+x9["a"] = 1
+assert.eq(x9["a"], 1)
+assert.eq(x9, {"a": 1})
+assert.fails(lambda: setIndex(x9, [], 2), 'unhashable type: list')
+freeze(x9)
+assert.fails(lambda: setIndex(x9, "a", 3), 'cannot insert into frozen hash table')
+
+x9a = {}
+x9a[1, 2] = 3 # unparenthesized tuple is allowed here
+assert.eq(x9a.keys()[0], (1, 2))
+
+# dict.get
+x10 = {"a": 1}
+assert.eq(x10.get("a"), 1)
+assert.eq(x10.get("b"), None)
+assert.eq(x10.get("a", 2), 1)
+assert.eq(x10.get("b", 2), 2)
+
+# dict.clear
+x11 = {"a": 1}
+assert.contains(x11, "a")
+assert.eq(x11["a"], 1)
+x11.clear()
+assert.fails(lambda: x11["a"], 'key "a" not in dict')
+assert.true("a" not in x11)
+freeze(x11)
+assert.fails(x11.clear, "cannot clear frozen hash table")
+
+# dict.setdefault
+x12 = {"a": 1}
+assert.eq(x12.setdefault("a"), 1)
+assert.eq(x12["a"], 1)
+assert.eq(x12.setdefault("b"), None)
+assert.eq(x12["b"], None)
+assert.eq(x12.setdefault("c", 2), 2)
+assert.eq(x12["c"], 2)
+assert.eq(x12.setdefault("c", 3), 2)
+assert.eq(x12["c"], 2)
+freeze(x12)
+assert.eq(x12.setdefault("a", 1), 1) # no change, no error
+assert.fails(lambda: x12.setdefault("d", 1), "cannot insert into frozen hash table")
+
+# dict.update
+x13 = {"a": 1}
+x13.update(a=2, b=3)
+assert.eq(x13, {"a": 2, "b": 3})
+x13.update([("b", 4), ("c", 5)])
+assert.eq(x13, {"a": 2, "b": 4, "c": 5})
+x13.update({"c": 6, "d": 7})
+assert.eq(x13, {"a": 2, "b": 4, "c": 6, "d": 7})
+freeze(x13)
+assert.fails(lambda: x13.update({"a": 8}), "cannot insert into frozen hash table")
+
+# dict as a sequence
+#
+# for loop
+x14 = {1:2, 3:4}
+def keys(dict):
+ keys = []
+ for k in dict: keys.append(k)
+ return keys
+assert.eq(keys(x14), [1, 3])
+#
+# comprehension
+assert.eq([x for x in x14], [1, 3])
+#
+# varargs
+def varargs(*args): return args
+x15 = {"one": 1}
+assert.eq(varargs(*x15), ("one",))
+
+# kwargs parameter does not alias the **kwargs dict
+def kwargs(**kwargs): return kwargs
+x16 = kwargs(**x15)
+assert.eq(x16, x15)
+x15["two"] = 2 # mutate
+assert.ne(x16, x15)
+
+# iterator invalidation
+def iterator1():
+ dict = {1:1, 2:1}
+ for k in dict:
+ dict[2*k] = dict[k]
+assert.fails(iterator1, "insert.*during iteration")
+
+def iterator2():
+ dict = {1:1, 2:1}
+ for k in dict:
+ dict.pop(k)
+assert.fails(iterator2, "delete.*during iteration")
+
+def iterator3():
+ def f(d):
+ d[3] = 3
+ dict = {1:1, 2:1}
+ _ = [f(dict) for x in dict]
+assert.fails(iterator3, "insert.*during iteration")
+
+# This assignment is not a modification-during-iteration:
+# the sequence x should be completely iterated before
+# the assignment occurs.
+def f():
+ x = {1:2, 2:4}
+ a, x[0] = x
+ assert.eq(a, 1)
+ assert.eq(x, {1: 2, 2: 4, 0: 2})
+f()
+
+# Regression test for a bug in hashtable.delete
+def test_delete():
+ d = {}
+
+ # delete tail first
+ d["one"] = 1
+ d["two"] = 2
+ assert.eq(str(d), '{"one": 1, "two": 2}')
+ d.pop("two")
+ assert.eq(str(d), '{"one": 1}')
+ d.pop("one")
+ assert.eq(str(d), '{}')
+
+ # delete head first
+ d["one"] = 1
+ d["two"] = 2
+ assert.eq(str(d), '{"one": 1, "two": 2}')
+ d.pop("one")
+ assert.eq(str(d), '{"two": 2}')
+ d.pop("two")
+ assert.eq(str(d), '{}')
+
+ # delete middle
+ d["one"] = 1
+ d["two"] = 2
+ d["three"] = 3
+ assert.eq(str(d), '{"one": 1, "two": 2, "three": 3}')
+ d.pop("two")
+ assert.eq(str(d), '{"one": 1, "three": 3}')
+ d.pop("three")
+ assert.eq(str(d), '{"one": 1}')
+ d.pop("one")
+ assert.eq(str(d), '{}')
+
+test_delete()
+
+# Regression test for github.com/google/starlark-go/issues/128.
+assert.fails(lambda: dict(None), 'got NoneType, want iterable')
+assert.fails(lambda: {}.update(None), 'got NoneType, want iterable')
+
+---
+# Verify position of an "unhashable key" error in a dict literal.
+
+_ = {
+ "one": 1,
+ ["two"]: 2, ### "unhashable type: list"
+ "three": 3,
+}
+
+---
+# Verify position of a "duplicate key" error in a dict literal.
+
+_ = {
+ "one": 1,
+ "one": 1, ### `duplicate key: "one"`
+ "three": 3,
+}
+
+---
+# Verify position of an "unhashable key" error in a dict comprehension.
+
+_ = {
+ k: v ### "unhashable type: list"
+ for k, v in [
+ ("one", 1),
+ (["two"], 2),
+ ("three", 3),
+ ]
+}
diff --git a/starlark/testdata/float.star b/starlark/testdata/float.star
new file mode 100644
index 0000000..b4df38d
--- /dev/null
+++ b/starlark/testdata/float.star
@@ -0,0 +1,504 @@
+# Tests of Starlark 'float'
+# option:set
+
+load("assert.star", "assert")
+
+# TODO(adonovan): more tests:
+# - precision
+# - limits
+
+# type
+assert.eq(type(0.0), "float")
+
+# truth
+assert.true(123.0)
+assert.true(-1.0)
+assert.true(not 0.0)
+assert.true(-1.0e-45)
+assert.true(float("NaN"))
+
+# not iterable
+assert.fails(lambda: len(0.0), 'has no len')
+assert.fails(lambda: [x for x in 0.0], 'float value is not iterable')
+
+# literals
+assert.eq(type(1.234), "float")
+assert.eq(type(1e10), "float")
+assert.eq(type(1e+10), "float")
+assert.eq(type(1e-10), "float")
+assert.eq(type(1.234e10), "float")
+assert.eq(type(1.234e+10), "float")
+assert.eq(type(1.234e-10), "float")
+
+# int/float equality
+assert.eq(0.0, 0)
+assert.eq(0, 0.0)
+assert.eq(1.0, 1)
+assert.eq(1, 1.0)
+assert.true(1.23e45 != 1229999999999999973814869011019624571608236031)
+assert.true(1.23e45 == 1229999999999999973814869011019624571608236032)
+assert.true(1.23e45 != 1229999999999999973814869011019624571608236033)
+assert.true(1229999999999999973814869011019624571608236031 != 1.23e45)
+assert.true(1229999999999999973814869011019624571608236032 == 1.23e45)
+assert.true(1229999999999999973814869011019624571608236033 != 1.23e45)
+
+# loss of precision
+p53 = 1<<53
+assert.eq(float(p53-1), p53-1)
+assert.eq(float(p53+0), p53+0)
+assert.eq(float(p53+1), p53+0) #
+assert.eq(float(p53+2), p53+2)
+assert.eq(float(p53+3), p53+4) #
+assert.eq(float(p53+4), p53+4)
+assert.eq(float(p53+5), p53+4) #
+assert.eq(float(p53+6), p53+6)
+assert.eq(float(p53+7), p53+8) #
+assert.eq(float(p53+8), p53+8)
+
+assert.true(float(p53+1) != p53+1) # comparisons are exact
+assert.eq(float(p53+1) - (p53+1), 0) # arithmetic entails rounding
+
+assert.fails(lambda: {123.0: "f", 123: "i"}, "duplicate key: 123")
+
+# equal int/float values have same hash
+d = {123.0: "x"}
+d[123] = "y"
+assert.eq(len(d), 1)
+assert.eq(d[123.0], "y")
+
+# literals (mostly covered by scanner tests)
+assert.eq(str(0.), "0.0")
+assert.eq(str(.0), "0.0")
+assert.true(5.0 != 4.999999999999999)
+assert.eq(5.0, 4.9999999999999999) # both literals denote 5.0
+assert.eq(1.23e45, 1.23 * 1000000000000000000000000000000000000000000000)
+assert.eq(str(1.23e-45 - (1.23 / 1000000000000000000000000000000000000000000000)), "-1.5557538194652854e-61")
+
+nan = float("NaN")
+inf = float("+Inf")
+neginf = float("-Inf")
+negzero = (-1e-323 / 10)
+
+# -- arithmetic --
+
+# +float, -float
+assert.eq(+(123.0), 123.0)
+assert.eq(-(123.0), -123.0)
+assert.eq(-(-(123.0)), 123.0)
+assert.eq(+(inf), inf)
+assert.eq(-(inf), neginf)
+assert.eq(-(neginf), inf)
+assert.eq(str(-(nan)), "nan")
+# +
+assert.eq(1.2e3 + 5.6e7, 5.60012e+07)
+assert.eq(1.2e3 + 1, 1201)
+assert.eq(1 + 1.2e3, 1201)
+assert.eq(str(1.2e3 + nan), "nan")
+assert.eq(inf + 0, inf)
+assert.eq(inf + 1, inf)
+assert.eq(inf + inf, inf)
+assert.eq(str(inf + neginf), "nan")
+# -
+assert.eq(1.2e3 - 5.6e7, -5.59988e+07)
+assert.eq(1.2e3 - 1, 1199)
+assert.eq(1 - 1.2e3, -1199)
+assert.eq(str(1.2e3 - nan), "nan")
+assert.eq(inf - 0, inf)
+assert.eq(inf - 1, inf)
+assert.eq(str(inf - inf), "nan")
+assert.eq(inf - neginf, inf)
+# *
+assert.eq(1.5e6 * 2.2e3, 3.3e9)
+assert.eq(1.5e6 * 123, 1.845e+08)
+assert.eq(123 * 1.5e6, 1.845e+08)
+assert.eq(str(1.2e3 * nan), "nan")
+assert.eq(str(inf * 0), "nan")
+assert.eq(inf * 1, inf)
+assert.eq(inf * inf, inf)
+assert.eq(inf * neginf, neginf)
+# %
+assert.eq(100.0 % 7.0, 2)
+assert.eq(100.0 % -7.0, -5) # NB: different from Go / Java
+assert.eq(-100.0 % 7.0, 5) # NB: different from Go / Java
+assert.eq(-100.0 % -7.0, -2)
+assert.eq(-100.0 % 7, 5)
+assert.eq(100 % 7.0, 2)
+assert.eq(str(1.2e3 % nan), "nan")
+assert.eq(str(inf % 1), "nan")
+assert.eq(str(inf % inf), "nan")
+assert.eq(str(inf % neginf), "nan")
+# /
+assert.eq(str(100.0 / 7.0), "14.285714285714286")
+assert.eq(str(100 / 7.0), "14.285714285714286")
+assert.eq(str(100.0 / 7), "14.285714285714286")
+assert.eq(str(100.0 / nan), "nan")
+# //
+assert.eq(100.0 // 7.0, 14)
+assert.eq(100 // 7.0, 14)
+assert.eq(100.0 // 7, 14)
+assert.eq(100.0 // -7.0, -15)
+assert.eq(100 // -7.0, -15)
+assert.eq(100.0 // -7, -15)
+assert.eq(str(1 // neginf), "-0.0")
+assert.eq(str(100.0 // nan), "nan")
+
+# addition
+assert.eq(0.0 + 1.0, 1.0)
+assert.eq(1.0 + 1.0, 2.0)
+assert.eq(1.25 + 2.75, 4.0)
+assert.eq(5.0 + 7.0, 12.0)
+assert.eq(5.1 + 7, 12.1) # float + int
+assert.eq(7 + 5.1, 12.1) # int + float
+
+# subtraction
+assert.eq(5.0 - 7.0, -2.0)
+assert.eq(5.1 - 7.1, -2.0)
+assert.eq(5.5 - 7, -1.5)
+assert.eq(5 - 7.5, -2.5)
+assert.eq(0.0 - 1.0, -1.0)
+
+# multiplication
+assert.eq(5.0 * 7.0, 35.0)
+assert.eq(5.5 * 2.5, 13.75)
+assert.eq(5.5 * 7, 38.5)
+assert.eq(5 * 7.1, 35.5)
+
+# real division (like Python 3)
+# The / operator is available only when the 'fp' dialect option is enabled.
+assert.eq(100.0 / 8.0, 12.5)
+assert.eq(100.0 / -8.0, -12.5)
+assert.eq(-100.0 / 8.0, -12.5)
+assert.eq(-100.0 / -8.0, 12.5)
+assert.eq(98.0 / 8.0, 12.25)
+assert.eq(98.0 / -8.0, -12.25)
+assert.eq(-98.0 / 8.0, -12.25)
+assert.eq(-98.0 / -8.0, 12.25)
+assert.eq(2.5 / 2.0, 1.25)
+assert.eq(2.5 / 2, 1.25)
+assert.eq(5 / 4.0, 1.25)
+assert.eq(5 / 4, 1.25)
+assert.fails(lambda: 1.0 / 0, "floating-point division by zero")
+assert.fails(lambda: 1.0 / 0.0, "floating-point division by zero")
+assert.fails(lambda: 1 / 0.0, "floating-point division by zero")
+
+# floored division
+assert.eq(100.0 // 8.0, 12.0)
+assert.eq(100.0 // -8.0, -13.0)
+assert.eq(-100.0 // 8.0, -13.0)
+assert.eq(-100.0 // -8.0, 12.0)
+assert.eq(98.0 // 8.0, 12.0)
+assert.eq(98.0 // -8.0, -13.0)
+assert.eq(-98.0 // 8.0, -13.0)
+assert.eq(-98.0 // -8.0, 12.0)
+assert.eq(2.5 // 2.0, 1.0)
+assert.eq(2.5 // 2, 1.0)
+assert.eq(5 // 4.0, 1.0)
+assert.eq(5 // 4, 1)
+assert.eq(type(5 // 4), "int")
+assert.fails(lambda: 1.0 // 0, "floored division by zero")
+assert.fails(lambda: 1.0 // 0.0, "floored division by zero")
+assert.fails(lambda: 1 // 0.0, "floored division by zero")
+
+# remainder
+assert.eq(100.0 % 8.0, 4.0)
+assert.eq(100.0 % -8.0, -4.0)
+assert.eq(-100.0 % 8.0, 4.0)
+assert.eq(-100.0 % -8.0, -4.0)
+assert.eq(98.0 % 8.0, 2.0)
+assert.eq(98.0 % -8.0, -6.0)
+assert.eq(-98.0 % 8.0, 6.0)
+assert.eq(-98.0 % -8.0, -2.0)
+assert.eq(2.5 % 2.0, 0.5)
+assert.eq(2.5 % 2, 0.5)
+assert.eq(5 % 4.0, 1.0)
+assert.fails(lambda: 1.0 % 0, "floating-point modulo by zero")
+assert.fails(lambda: 1.0 % 0.0, "floating-point modulo by zero")
+assert.fails(lambda: 1 % 0.0, "floating-point modulo by zero")
+
+# floats cannot be used as indices, even if integral
+assert.fails(lambda: "abc"[1.0], "want int")
+assert.fails(lambda: ["A", "B", "C"].insert(1.0, "D"), "want int")
+assert.fails(lambda: range(3)[1.0], "got float, want int")
+
+# -- comparisons --
+# NaN
+assert.true(nan == nan) # \
+assert.true(nan >= nan) # unlike Python
+assert.true(nan <= nan) # /
+assert.true(not (nan > nan))
+assert.true(not (nan < nan))
+assert.true(not (nan != nan)) # unlike Python
+# Sort is stable: 0.0 and -0.0 are equal, but they are not permuted.
+# Similarly 1 and 1.0.
+assert.eq(
+ str(sorted([inf, neginf, nan, 1e300, -1e300, 1.0, -1.0, 1, -1, 1e-300, -1e-300, 0, 0.0, negzero, 1e-300, -1e-300])),
+ "[-inf, -1e+300, -1.0, -1, -1e-300, -1e-300, 0, 0.0, -0.0, 1e-300, 1e-300, 1.0, 1, 1e+300, +inf, nan]")
+
+# Sort is stable, and its result contains no adjacent x, y such that y > x.
+# Note: Python's reverse sort is unstable; see https://bugs.python.org/issue36095.
+assert.eq(str(sorted([7, 3, nan, 1, 9])), "[1, 3, 7, 9, nan]")
+assert.eq(str(sorted([7, 3, nan, 1, 9], reverse=True)), "[nan, 9, 7, 3, 1]")
+
+# All NaN values compare equal. (Identical objects compare equal.)
+nandict = {nan: 1}
+nandict[nan] = 2
+assert.eq(len(nandict), 1) # (same as Python)
+assert.eq(nandict[nan], 2) # (same as Python)
+assert.fails(lambda: {nan: 1, nan: 2}, "duplicate key: nan")
+
+nandict[float('nan')] = 3 # a distinct NaN object
+assert.eq(str(nandict), "{nan: 3}") # (Python: {nan: 2, nan: 3})
+
+assert.eq(str({inf: 1, neginf: 2}), "{+inf: 1, -inf: 2}")
+
+# zero
+assert.eq(0.0, negzero)
+
+# inf
+assert.eq(+inf / +inf, nan)
+assert.eq(+inf / -inf, nan)
+assert.eq(-inf / +inf, nan)
+assert.eq(0.0 / +inf, 0.0)
+assert.eq(0.0 / -inf, 0.0)
+assert.true(inf > -inf)
+assert.eq(inf, -neginf)
+# TODO(adonovan): assert inf > any finite number, etc.
+
+# negative zero
+negz = -0
+assert.eq(negz, 0)
+
+# min/max ordering with NaN (the greatest float value)
+assert.eq(max([1, nan, 3]), nan)
+assert.eq(max([nan, 2, 3]), nan)
+assert.eq(min([1, nan, 3]), 1)
+assert.eq(min([nan, 2, 3]), 2)
+
+# float/float comparisons
+fltmax = 1.7976931348623157e+308 # approx
+fltmin = 4.9406564584124654e-324 # approx
+assert.lt(-inf, -fltmax)
+assert.lt(-fltmax, -1.0)
+assert.lt(-1.0, -fltmin)
+assert.lt(-fltmin, 0.0)
+assert.lt(0, fltmin)
+assert.lt(fltmin, 1.0)
+assert.lt(1.0, fltmax)
+assert.lt(fltmax, inf)
+
+# int/float comparisons
+assert.eq(0, 0.0)
+assert.eq(1, 1.0)
+assert.eq(-1, -1.0)
+assert.ne(-1, -1.0 + 1e-7)
+assert.lt(-2, -2 + 1e-15)
+
+# int conversion (rounds towards zero)
+assert.eq(int(100.1), 100)
+assert.eq(int(100.0), 100)
+assert.eq(int(99.9), 99)
+assert.eq(int(-99.9), -99)
+assert.eq(int(-100.0), -100)
+assert.eq(int(-100.1), -100)
+assert.eq(int(1e100), int("10000000000000000159028911097599180468360808563945281389781327557747838772170381060813469985856815104"))
+assert.fails(lambda: int(inf), "cannot convert.*infinity")
+assert.fails(lambda: int(nan), "cannot convert.*NaN")
+
+# -- float() function --
+assert.eq(float(), 0.0)
+# float(bool)
+assert.eq(float(False), 0.0)
+assert.eq(float(True), 1.0)
+# float(int)
+assert.eq(float(0), 0.0)
+assert.eq(float(1), 1.0)
+assert.eq(float(123), 123.0)
+assert.eq(float(123 * 1000000 * 1000000 * 1000000 * 1000000 * 1000000), 1.23e+32)
+# float(float)
+assert.eq(float(1.1), 1.1)
+assert.fails(lambda: float(None), "want number or string")
+assert.ne(False, 0.0) # differs from Python
+assert.ne(True, 1.0)
+# float(string)
+assert.eq(float("1.1"), 1.1)
+assert.fails(lambda: float("1.1abc"), "invalid float literal")
+assert.fails(lambda: float("1e100.0"), "invalid float literal")
+assert.fails(lambda: float("1e1000"), "floating-point number too large")
+assert.eq(float("-1.1"), -1.1)
+assert.eq(float("+1.1"), +1.1)
+assert.eq(float("+Inf"), inf)
+assert.eq(float("-Inf"), neginf)
+assert.eq(float("NaN"), nan)
+assert.eq(float("NaN"), nan)
+assert.eq(float("+NAN"), nan)
+assert.eq(float("-nan"), nan)
+assert.eq(str(float("Inf")), "+inf")
+assert.eq(str(float("+INF")), "+inf")
+assert.eq(str(float("-inf")), "-inf")
+assert.eq(str(float("+InFiniTy")), "+inf")
+assert.eq(str(float("-iNFiniTy")), "-inf")
+assert.fails(lambda: float("one point two"), "invalid float literal: one point two")
+assert.fails(lambda: float("1.2.3"), "invalid float literal: 1.2.3")
+assert.fails(lambda: float(123 << 500 << 500 << 50), "int too large to convert to float")
+assert.fails(lambda: float(-123 << 500 << 500 << 50), "int too large to convert to float")
+assert.fails(lambda: float(str(-123 << 500 << 500 << 50)), "floating-point number too large")
+
+# -- implicit float(int) conversions --
+assert.fails(lambda: (1<<500<<500<<500) + 0.0, "int too large to convert to float")
+assert.fails(lambda: 0.0 + (1<<500<<500<<500), "int too large to convert to float")
+assert.fails(lambda: (1<<500<<500<<500) - 0.0, "int too large to convert to float")
+assert.fails(lambda: 0.0 - (1<<500<<500<<500), "int too large to convert to float")
+assert.fails(lambda: (1<<500<<500<<500) * 1.0, "int too large to convert to float")
+assert.fails(lambda: 1.0 * (1<<500<<500<<500), "int too large to convert to float")
+assert.fails(lambda: (1<<500<<500<<500) / 1.0, "int too large to convert to float")
+assert.fails(lambda: 1.0 / (1<<500<<500<<500), "int too large to convert to float")
+assert.fails(lambda: (1<<500<<500<<500) // 1.0, "int too large to convert to float")
+assert.fails(lambda: 1.0 // (1<<500<<500<<500), "int too large to convert to float")
+assert.fails(lambda: (1<<500<<500<<500) % 1.0, "int too large to convert to float")
+assert.fails(lambda: 1.0 % (1<<500<<500<<500), "int too large to convert to float")
+
+
+# -- int function --
+assert.eq(int(0.0), 0)
+assert.eq(int(1.0), 1)
+assert.eq(int(1.1), 1)
+assert.eq(int(0.9), 0)
+assert.eq(int(-1.1), -1.0)
+assert.eq(int(-1.0), -1.0)
+assert.eq(int(-0.9), 0.0)
+assert.eq(int(1.23e+32), 123000000000000004979083645550592)
+assert.eq(int(-1.23e-32), 0)
+assert.eq(int(1.23e-32), 0)
+assert.fails(lambda: int(float("+Inf")), "cannot convert float infinity to integer")
+assert.fails(lambda: int(float("-Inf")), "cannot convert float infinity to integer")
+assert.fails(lambda: int(float("NaN")), "cannot convert float NaN to integer")
+
+
+# hash
+# Check that equal float and int values have the same internal hash.
+def checkhash():
+ for a in [1.23e100, 1.23e10, 1.23e1, 1.23,
+ 1, 4294967295, 8589934591, 9223372036854775807]:
+ for b in [a, -a, 1/a, -1/a]:
+ f = float(b)
+ i = int(b)
+ if f == i:
+ fh = {f: None}
+ ih = {i: None}
+ if fh != ih:
+ assert.true(False, "{%v: None} != {%v: None}: hashes vary" % fh, ih)
+checkhash()
+
+# string formatting
+
+# %d
+assert.eq("%d" % 0, "0")
+assert.eq("%d" % 0.0, "0")
+assert.eq("%d" % 123, "123")
+assert.eq("%d" % 123.0, "123")
+assert.eq("%d" % 1.23e45, "1229999999999999973814869011019624571608236032")
+# (see below for '%d' % NaN/Inf)
+assert.eq("%d" % negzero, "0")
+assert.fails(lambda: "%d" % float("NaN"), "cannot convert float NaN to integer")
+assert.fails(lambda: "%d" % float("+Inf"), "cannot convert float infinity to integer")
+assert.fails(lambda: "%d" % float("-Inf"), "cannot convert float infinity to integer")
+
+# %e
+assert.eq("%e" % 0, "0.000000e+00")
+assert.eq("%e" % 0.0, "0.000000e+00")
+assert.eq("%e" % 123, "1.230000e+02")
+assert.eq("%e" % 123.0, "1.230000e+02")
+assert.eq("%e" % 1.23e45, "1.230000e+45")
+assert.eq("%e" % -1.23e-45, "-1.230000e-45")
+assert.eq("%e" % nan, "nan")
+assert.eq("%e" % inf, "+inf")
+assert.eq("%e" % neginf, "-inf")
+assert.eq("%e" % negzero, "-0.000000e+00")
+assert.fails(lambda: "%e" % "123", "requires float, not str")
+# %f
+assert.eq("%f" % 0, "0.000000")
+assert.eq("%f" % 0.0, "0.000000")
+assert.eq("%f" % 123, "123.000000")
+assert.eq("%f" % 123.0, "123.000000")
+# Note: Starlark/Java emits 1230000000000000000000000000000000000000000000.000000. Why?
+assert.eq("%f" % 1.23e45, "1229999999999999973814869011019624571608236032.000000")
+assert.eq("%f" % -1.23e-45, "-0.000000")
+assert.eq("%f" % nan, "nan")
+assert.eq("%f" % inf, "+inf")
+assert.eq("%f" % neginf, "-inf")
+assert.eq("%f" % negzero, "-0.000000")
+assert.fails(lambda: "%f" % "123", "requires float, not str")
+# %g
+assert.eq("%g" % 0, "0.0")
+assert.eq("%g" % 0.0, "0.0")
+assert.eq("%g" % 123, "123.0")
+assert.eq("%g" % 123.0, "123.0")
+assert.eq("%g" % 1.110, "1.11")
+assert.eq("%g" % 1e5, "100000.0")
+assert.eq("%g" % 1e6, "1e+06") # Note: threshold of scientific notation is 1e17 in Starlark/Java
+assert.eq("%g" % 1.23e45, "1.23e+45")
+assert.eq("%g" % -1.23e-45, "-1.23e-45")
+assert.eq("%g" % nan, "nan")
+assert.eq("%g" % inf, "+inf")
+assert.eq("%g" % neginf, "-inf")
+assert.eq("%g" % negzero, "-0.0")
+# str uses %g
+assert.eq(str(0.0), "0.0")
+assert.eq(str(123.0), "123.0")
+assert.eq(str(1.23e45), "1.23e+45")
+assert.eq(str(-1.23e-45), "-1.23e-45")
+assert.eq(str(nan), "nan")
+assert.eq(str(inf), "+inf")
+assert.eq(str(neginf), "-inf")
+assert.eq(str(negzero), "-0.0")
+assert.fails(lambda: "%g" % "123", "requires float, not str")
+
+i0 = 1
+f0 = 1.0
+assert.eq(type(i0), "int")
+assert.eq(type(f0), "float")
+
+ops = {
+ '+': lambda x, y: x + y,
+ '-': lambda x, y: x - y,
+ '*': lambda x, y: x * y,
+ '/': lambda x, y: x / y,
+ '//': lambda x, y: x // y,
+ '%': lambda x, y: x % y,
+}
+
+# Check that if either argument is a float, so too is the result.
+def checktypes():
+ want = set("""
+int + int = int
+int + float = float
+float + int = float
+float + float = float
+int - int = int
+int - float = float
+float - int = float
+float - float = float
+int * int = int
+int * float = float
+float * int = float
+float * float = float
+int / int = float
+int / float = float
+float / int = float
+float / float = float
+int // int = int
+int // float = float
+float // int = float
+float // float = float
+int % int = int
+int % float = float
+float % int = float
+float % float = float
+"""[1:].splitlines())
+ for opname in ("+", "-", "*", "/", "%"):
+ for x in [i0, f0]:
+ for y in [i0, f0]:
+ op = ops[opname]
+ got = "%s %s %s = %s" % (type(x), opname, type(y), type(op(x, y)))
+ assert.contains(want, got)
+checktypes()
diff --git a/starlark/testdata/function.star b/starlark/testdata/function.star
new file mode 100644
index 0000000..737df26
--- /dev/null
+++ b/starlark/testdata/function.star
@@ -0,0 +1,323 @@
+# Tests of Starlark 'function'
+# option:set
+
+# TODO(adonovan):
+# - add some introspection functions for looking at function values
+# and test that functions have correct position, free vars, names of locals, etc.
+# - move the hard-coded tests of parameter passing from eval_test.go to here.
+
+load("assert.star", "assert", "freeze")
+
+# Test lexical scope and closures:
+def outer(x):
+ def inner(y):
+ return x + x + y # multiple occurrences of x should create only 1 freevar
+ return inner
+
+z = outer(3)
+assert.eq(z(5), 11)
+assert.eq(z(7), 13)
+z2 = outer(4)
+assert.eq(z2(5), 13)
+assert.eq(z2(7), 15)
+assert.eq(z(5), 11)
+assert.eq(z(7), 13)
+
+# Function name
+assert.eq(str(outer), '<function outer>')
+assert.eq(str(z), '<function inner>')
+assert.eq(str(str), '<built-in function str>')
+assert.eq(str("".startswith), '<built-in method startswith of string value>')
+
+# Stateful closure
+def squares():
+ x = [0]
+ def f():
+ x[0] += 1
+ return x[0] * x[0]
+ return f
+
+sq = squares()
+assert.eq(sq(), 1)
+assert.eq(sq(), 4)
+assert.eq(sq(), 9)
+assert.eq(sq(), 16)
+
+# Freezing a closure
+sq2 = freeze(sq)
+assert.fails(sq2, "frozen list")
+
+# recursion detection, simple
+def fib(x):
+ if x < 2:
+ return x
+ return fib(x-2) + fib(x-1)
+assert.fails(lambda: fib(10), "function fib called recursively")
+
+# recursion detection, advanced
+#
+# A simplistic recursion check that looks for repeated calls to the
+# same function value will not detect recursion using the Y
+# combinator, which creates a new closure at each step of the
+# recursion. To truly prohibit recursion, the dynamic check must look
+# for repeated calls of the same syntactic function body.
+Y = lambda f: (lambda x: x(x))(lambda y: f(lambda *args: y(y)(*args)))
+fibgen = lambda fib: lambda x: (x if x<2 else fib(x-1)+fib(x-2))
+fib2 = Y(fibgen)
+assert.fails(lambda: [fib2(x) for x in range(10)], "function lambda called recursively")
+
+# However, this stricter check outlaws many useful programs
+# that are still bounded, and creates a hazard because
+# helper functions such as map below cannot be used to
+# call functions that themselves use map:
+def map(f, seq): return [f(x) for x in seq]
+def double(x): return x+x
+assert.eq(map(double, [1, 2, 3]), [2, 4, 6])
+assert.eq(map(double, ["a", "b", "c"]), ["aa", "bb", "cc"])
+def mapdouble(x): return map(double, x)
+assert.fails(lambda: map(mapdouble, ([1, 2, 3], ["a", "b", "c"])),
+ 'function map called recursively')
+# With the -recursion option it would yield [[2, 4, 6], ["aa", "bb", "cc"]].
+
+# call of function not through its name
+# (regression test for parsing suffixes of primary expressions)
+hf = hasfields()
+hf.x = [len]
+assert.eq(hf.x[0]("abc"), 3)
+def f():
+ return lambda: 1
+assert.eq(f()(), 1)
+assert.eq(["abc"][0][0].upper(), "A")
+
+# functions may be recursively defined,
+# so long as they don't dynamically recur.
+calls = []
+def yin(x):
+ calls.append("yin")
+ if x:
+ yang(False)
+
+def yang(x):
+ calls.append("yang")
+ if x:
+ yin(False)
+
+yin(True)
+assert.eq(calls, ["yin", "yang"])
+
+calls.clear()
+yang(True)
+assert.eq(calls, ["yang", "yin"])
+
+
+# builtin_function_or_method use identity equivalence.
+closures = set(["".count for _ in range(10)])
+assert.eq(len(closures), 10)
+
+---
+# Default values of function parameters are mutable.
+load("assert.star", "assert", "freeze")
+
+def f(x=[0]):
+ return x
+
+assert.eq(f(), [0])
+
+f().append(1)
+assert.eq(f(), [0, 1])
+
+# Freezing a function value freezes its parameter defaults.
+freeze(f)
+assert.fails(lambda: f().append(2), "cannot append to frozen list")
+
+---
+# This is a well known corner case of parsing in Python.
+load("assert.star", "assert")
+
+f = lambda x: 1 if x else 0
+assert.eq(f(True), 1)
+assert.eq(f(False), 0)
+
+x = True
+f2 = (lambda x: 1) if x else 0
+assert.eq(f2(123), 1)
+
+tf = lambda: True, lambda: False
+assert.true(tf[0]())
+assert.true(not tf[1]())
+
+---
+# Missing parameters are correctly reported
+# in functions of more than 64 parameters.
+# (This tests a corner case of the implementation:
+# we avoid a map allocation for <64 parameters)
+
+load("assert.star", "assert")
+
+def f(a, b, c, d, e, f, g, h,
+ i, j, k, l, m, n, o, p,
+ q, r, s, t, u, v, w, x,
+ y, z, A, B, C, D, E, F,
+ G, H, I, J, K, L, M, N,
+ O, P, Q, R, S, T, U, V,
+ W, X, Y, Z, aa, bb, cc, dd,
+ ee, ff, gg, hh, ii, jj, kk, ll,
+ mm):
+ pass
+
+assert.fails(lambda: f(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64), "missing 1 argument \\(mm\\)")
+
+assert.fails(lambda: f(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64, 65,
+ mm = 100), 'multiple values for parameter "mm"')
+
+---
+# Regression test for github.com/google/starlark-go/issues/21,
+# which concerns dynamic checks.
+# Related: https://github.com/bazelbuild/starlark/issues/21,
+# which concerns static checks.
+
+load("assert.star", "assert")
+
+def f(*args, **kwargs):
+ return args, kwargs
+
+assert.eq(f(x=1, y=2), ((), {"x": 1, "y": 2}))
+assert.fails(lambda: f(x=1, **dict(x=2)), 'multiple values for parameter "x"')
+
+def g(x, y):
+ return x, y
+
+assert.eq(g(1, y=2), (1, 2))
+assert.fails(lambda: g(1, y=2, **{'y': 3}), 'multiple values for parameter "y"')
+
+---
+# Regression test for a bug in CALL_VAR_KW.
+
+load("assert.star", "assert")
+
+def f(a, b, x, y):
+ return a+b+x+y
+
+assert.eq(f(*("a", "b"), **dict(y="y", x="x")) + ".", 'abxy.')
+---
+# Order of evaluation of function arguments.
+# Regression test for github.com/google/skylark/issues/135.
+load("assert.star", "assert")
+
+r = []
+
+def id(x):
+ r.append(x)
+ return x
+
+def f(*args, **kwargs):
+ return (args, kwargs)
+
+y = f(id(1), id(2), x=id(3), *[id(4)], **dict(z=id(5)))
+assert.eq(y, ((1, 2, 4), dict(x=3, z=5)))
+
+# This matches Python2 and Starlark-in-Java, but not Python3 [1 2 4 3 6].
+# *args and *kwargs are evaluated last.
+# (Python[23] also allows keyword arguments after *args.)
+# See github.com/bazelbuild/starlark#13 for spec change.
+assert.eq(r, [1, 2, 3, 4, 5])
+
+---
+# option:recursion
+# See github.com/bazelbuild/starlark#170
+load("assert.star", "assert")
+
+def a():
+ list = []
+ def b(n):
+ list.append(n)
+ if n > 0:
+ b(n - 1) # recursive reference to b
+
+ b(3)
+ return list
+
+assert.eq(a(), [3, 2, 1, 0])
+
+def c():
+ list = []
+ x = 1
+ def d():
+ list.append(x) # this use of x observes both assignments
+ d()
+ x = 2
+ d()
+ return list
+
+assert.eq(c(), [1, 2])
+
+def e():
+ def f():
+ return x # forward reference ok: x is a closure cell
+ x = 1
+ return f()
+
+assert.eq(e(), 1)
+
+---
+load("assert.star", "assert")
+
+def e():
+ x = 1
+ def f():
+ print(x) # this reference to x fails
+ x = 3 # because this assignment makes x local to f
+ f()
+
+assert.fails(e, "local variable x referenced before assignment")
+
+def f():
+ def inner():
+ return x
+ if False:
+ x = 0
+ return x # fails (x is an uninitialized cell of this function)
+
+assert.fails(f, "local variable x referenced before assignment")
+
+def g():
+ def inner():
+ return x # fails (x is an uninitialized cell of the enclosing function)
+ if False:
+ x = 0
+ return inner()
+
+assert.fails(g, "local variable x referenced before assignment")
+
+---
+# A trailing comma is allowed in any function definition or call.
+# This reduces the need to edit neighboring lines when editing defs
+# or calls splayed across multiple lines.
+
+def a(x,): pass
+def b(x, y=None, ): pass
+def c(x, y=None, *args, ): pass
+def d(x, y=None, *args, z=None, ): pass
+def e(x, y=None, *args, z=None, **kwargs, ): pass
+
+a(1,)
+b(1, y=2, )
+#c(1, *[], )
+#d(1, *[], z=None, )
+#e(1, *[], z=None, *{}, )
diff --git a/starlark/testdata/int.star b/starlark/testdata/int.star
new file mode 100644
index 0000000..46c0ad0
--- /dev/null
+++ b/starlark/testdata/int.star
@@ -0,0 +1,260 @@
+# Tests of Starlark 'int'
+
+load("assert.star", "assert")
+
+# basic arithmetic
+assert.eq(0 - 1, -1)
+assert.eq(0 + 1, +1)
+assert.eq(1 + 1, 2)
+assert.eq(5 + 7, 12)
+assert.eq(5 * 7, 35)
+assert.eq(5 - 7, -2)
+
+# int boundaries
+maxint64 = (1 << 63) - 1
+minint64 = -1 << 63
+maxint32 = (1 << 31) - 1
+minint32 = -1 << 31
+assert.eq(maxint64, 9223372036854775807)
+assert.eq(minint64, -9223372036854775808)
+assert.eq(maxint32, 2147483647)
+assert.eq(minint32, -2147483648)
+
+# truth
+def truth():
+ assert.true(not 0)
+ for m in [1, maxint32]: # Test small/big ranges
+ assert.true(123 * m)
+ assert.true(-1 * m)
+
+truth()
+
+# floored division
+# (For real division, see float.star.)
+def division():
+ for m in [1, maxint32]: # Test small/big ranges
+ assert.eq((100 * m) // (7 * m), 14)
+ assert.eq((100 * m) // (-7 * m), -15)
+ assert.eq((-100 * m) // (7 * m), -15) # NB: different from Go/Java
+ assert.eq((-100 * m) // (-7 * m), 14) # NB: different from Go/Java
+ assert.eq((98 * m) // (7 * m), 14)
+ assert.eq((98 * m) // (-7 * m), -14)
+ assert.eq((-98 * m) // (7 * m), -14)
+ assert.eq((-98 * m) // (-7 * m), 14)
+
+division()
+
+# remainder
+def remainder():
+ for m in [1, maxint32]: # Test small/big ranges
+ assert.eq((100 * m) % (7 * m), 2 * m)
+ assert.eq((100 * m) % (-7 * m), -5 * m) # NB: different from Go/Java
+ assert.eq((-100 * m) % (7 * m), 5 * m) # NB: different from Go/Java
+ assert.eq((-100 * m) % (-7 * m), -2 * m)
+ assert.eq((98 * m) % (7 * m), 0)
+ assert.eq((98 * m) % (-7 * m), 0)
+ assert.eq((-98 * m) % (7 * m), 0)
+ assert.eq((-98 * m) % (-7 * m), 0)
+
+remainder()
+
+# compound assignment
+def compound():
+ x = 1
+ x += 1
+ assert.eq(x, 2)
+ x -= 3
+ assert.eq(x, -1)
+ x *= 39
+ assert.eq(x, -39)
+ x //= 4
+ assert.eq(x, -10)
+ x /= -2
+ assert.eq(x, 5)
+ x %= 3
+ assert.eq(x, 2)
+
+ # use resolve.AllowBitwise to enable the ops:
+ x = 2
+ x &= 1
+ assert.eq(x, 0)
+ x |= 2
+ assert.eq(x, 2)
+ x ^= 3
+ assert.eq(x, 1)
+ x <<= 2
+ assert.eq(x, 4)
+ x >>= 2
+ assert.eq(x, 1)
+
+compound()
+
+# int conversion
+# See float.star for float-to-int conversions.
+# We follow Python 3 here, but I can't see the method in its madness.
+# int from bool/int/float
+assert.fails(int, "missing argument") # int()
+assert.eq(int(False), 0)
+assert.eq(int(True), 1)
+assert.eq(int(3), 3)
+assert.eq(int(3.1), 3)
+assert.fails(lambda: int(3, base = 10), "non-string with explicit base")
+assert.fails(lambda: int(True, 10), "non-string with explicit base")
+
+# int from string, base implicitly 10
+assert.eq(int("100000000000000000000"), 10000000000 * 10000000000)
+assert.eq(int("-100000000000000000000"), -10000000000 * 10000000000)
+assert.eq(int("123"), 123)
+assert.eq(int("-123"), -123)
+assert.eq(int("0123"), 123) # not octal
+assert.eq(int("-0123"), -123)
+assert.fails(lambda: int("0x12"), "invalid literal with base 10")
+assert.fails(lambda: int("-0x12"), "invalid literal with base 10")
+assert.fails(lambda: int("0o123"), "invalid literal.*base 10")
+assert.fails(lambda: int("-0o123"), "invalid literal.*base 10")
+
+# int from string, explicit base
+assert.eq(int("0"), 0)
+assert.eq(int("00"), 0)
+assert.eq(int("0", base = 10), 0)
+assert.eq(int("00", base = 10), 0)
+assert.eq(int("0", base = 8), 0)
+assert.eq(int("00", base = 8), 0)
+assert.eq(int("-0"), 0)
+assert.eq(int("-00"), 0)
+assert.eq(int("-0", base = 10), 0)
+assert.eq(int("-00", base = 10), 0)
+assert.eq(int("-0", base = 8), 0)
+assert.eq(int("-00", base = 8), 0)
+assert.eq(int("+0"), 0)
+assert.eq(int("+00"), 0)
+assert.eq(int("+0", base = 10), 0)
+assert.eq(int("+00", base = 10), 0)
+assert.eq(int("+0", base = 8), 0)
+assert.eq(int("+00", base = 8), 0)
+assert.eq(int("11", base = 9), 10)
+assert.eq(int("-11", base = 9), -10)
+assert.eq(int("10011", base = 2), 19)
+assert.eq(int("-10011", base = 2), -19)
+assert.eq(int("123", 8), 83)
+assert.eq(int("-123", 8), -83)
+assert.eq(int("0123", 8), 83) # redundant zeros permitted
+assert.eq(int("-0123", 8), -83)
+assert.eq(int("00123", 8), 83)
+assert.eq(int("-00123", 8), -83)
+assert.eq(int("0o123", 8), 83)
+assert.eq(int("-0o123", 8), -83)
+assert.eq(int("123", 7), 66) # 1*7*7 + 2*7 + 3
+assert.eq(int("-123", 7), -66)
+assert.eq(int("12", 16), 18)
+assert.eq(int("-12", 16), -18)
+assert.eq(int("0x12", 16), 18)
+assert.eq(int("-0x12", 16), -18)
+assert.eq(0x1000000000000001 * 0x1000000000000001, 0x1000000000000002000000000000001)
+assert.eq(int("1010", 2), 10)
+assert.eq(int("111111101", 2), 509)
+assert.eq(int("0b0101", 0), 5)
+assert.eq(int("0b0101", 2), 5) # prefix is redundant with explicit base
+assert.eq(int("0b00000", 0), 0)
+assert.eq(1111111111111111 * 1111111111111111, 1234567901234567654320987654321)
+assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8")
+assert.fails(lambda: int("-0x123", 8), "invalid literal.*base 8")
+assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16")
+assert.fails(lambda: int("-0o123", 16), "invalid literal.*base 16")
+assert.fails(lambda: int("0x110", 2), "invalid literal.*base 2")
+
+# Base prefix is honored only if base=0, or if the prefix matches the explicit base.
+# See https://github.com/google/starlark-go/issues/337
+assert.fails(lambda: int("0b0"), "invalid literal.*base 10")
+assert.eq(int("0b0", 0), 0)
+assert.eq(int("0b0", 2), 0)
+assert.eq(int("0b0", 16), 0xb0)
+assert.eq(int("0x0b0", 16), 0xb0)
+assert.eq(int("0x0b0", 0), 0xb0)
+assert.eq(int("0x0b0101", 16), 0x0b0101)
+
+# int from string, auto detect base
+assert.eq(int("123", 0), 123)
+assert.eq(int("+123", 0), +123)
+assert.eq(int("-123", 0), -123)
+assert.eq(int("0x12", 0), 18)
+assert.eq(int("+0x12", 0), +18)
+assert.eq(int("-0x12", 0), -18)
+assert.eq(int("0o123", 0), 83)
+assert.eq(int("+0o123", 0), +83)
+assert.eq(int("-0o123", 0), -83)
+assert.fails(lambda: int("0123", 0), "invalid literal.*base 0") # valid in Python 2.7
+assert.fails(lambda: int("-0123", 0), "invalid literal.*base 0")
+
+# github.com/google/starlark-go/issues/108
+assert.fails(lambda: int("0Oxa", 8), "invalid literal with base 8: 0Oxa")
+
+# follow-on bugs to issue 108
+assert.fails(lambda: int("--4"), "invalid literal with base 10: --4")
+assert.fails(lambda: int("++4"), "invalid literal with base 10: \\+\\+4")
+assert.fails(lambda: int("+-4"), "invalid literal with base 10: \\+-4")
+assert.fails(lambda: int("0x-4", 16), "invalid literal with base 16: 0x-4")
+
+# bitwise union (int|int), intersection (int&int), XOR (int^int), unary not (~int),
+# left shift (int<<int), and right shift (int>>int).
+# use resolve.AllowBitwise to enable the ops.
+# TODO(adonovan): this is not yet in the Starlark spec,
+# but there is consensus that it should be.
+assert.eq(1 | 2, 3)
+assert.eq(3 | 6, 7)
+assert.eq((1 | 2) & (2 | 4), 2)
+assert.eq(1 ^ 2, 3)
+assert.eq(2 ^ 2, 0)
+assert.eq(1 | 0 ^ 1, 1) # check | and ^ operators precedence
+assert.eq(~1, -2)
+assert.eq(~(-2), 1)
+assert.eq(~0, -1)
+assert.eq(1 << 2, 4)
+assert.eq(2 >> 1, 1)
+assert.fails(lambda: 2 << -1, "negative shift count")
+assert.fails(lambda: 1 << 512, "shift count too large")
+
+# comparisons
+# TODO(adonovan): test: < > == != etc
+def comparisons():
+ for m in [1, maxint32 / 2, maxint32]: # Test small/big ranges
+ assert.lt(-2 * m, -1 * m)
+ assert.lt(-1 * m, 0 * m)
+ assert.lt(0 * m, 1 * m)
+ assert.lt(1 * m, 2 * m)
+ assert.true(2 * m >= 2 * m)
+ assert.true(2 * m > 1 * m)
+ assert.true(1 * m >= 1 * m)
+ assert.true(1 * m > 0 * m)
+ assert.true(0 * m >= 0 * m)
+ assert.true(0 * m > -1 * m)
+ assert.true(-1 * m >= -1 * m)
+ assert.true(-1 * m > -2 * m)
+
+comparisons()
+
+# precision
+assert.eq(str(maxint64), "9223372036854775807")
+assert.eq(str(maxint64 + 1), "9223372036854775808")
+assert.eq(str(minint64), "-9223372036854775808")
+assert.eq(str(minint64 - 1), "-9223372036854775809")
+assert.eq(str(minint64 * minint64), "85070591730234615865843651857942052864")
+assert.eq(str(maxint32 + 1), "2147483648")
+assert.eq(str(minint32 - 1), "-2147483649")
+assert.eq(str(minint32 * minint32), "4611686018427387904")
+assert.eq(str(minint32 | maxint32), "-1")
+assert.eq(str(minint32 & minint32), "-2147483648")
+assert.eq(str(minint32 ^ maxint32), "-1")
+assert.eq(str(minint32 // -1), "2147483648")
+
+# string formatting
+assert.eq("%o %x %d" % (0o755, 0xDEADBEEF, 42), "755 deadbeef 42")
+nums = [-95, -1, 0, +1, +95]
+assert.eq(" ".join(["%o" % x for x in nums]), "-137 -1 0 1 137")
+assert.eq(" ".join(["%d" % x for x in nums]), "-95 -1 0 1 95")
+assert.eq(" ".join(["%i" % x for x in nums]), "-95 -1 0 1 95")
+assert.eq(" ".join(["%x" % x for x in nums]), "-5f -1 0 1 5f")
+assert.eq(" ".join(["%X" % x for x in nums]), "-5F -1 0 1 5F")
+assert.eq("%o %x %d" % (123, 123, 123), "173 7b 123")
+assert.eq("%o %x %d" % (123.1, 123.1, 123.1), "173 7b 123") # non-int operands are acceptable
+assert.fails(lambda: "%d" % True, "cannot convert bool to int")
diff --git a/starlark/testdata/json.star b/starlark/testdata/json.star
new file mode 100644
index 0000000..7c7b316
--- /dev/null
+++ b/starlark/testdata/json.star
@@ -0,0 +1,147 @@
+# Tests of json module.
+
+load("assert.star", "assert")
+load("json.star", "json")
+
+assert.eq(dir(json), ["decode", "encode", "indent"])
+
+# Some of these cases were inspired by github.com/nst/JSONTestSuite.
+
+## json.encode
+
+assert.eq(json.encode(None), "null")
+assert.eq(json.encode(True), "true")
+assert.eq(json.encode(False), "false")
+assert.eq(json.encode(-123), "-123")
+assert.eq(json.encode(12345*12345*12345*12345*12345*12345), "3539537889086624823140625")
+assert.eq(json.encode(float(12345*12345*12345*12345*12345*12345)), "3.539537889086625e+24")
+assert.eq(json.encode(12.345e67), "1.2345e+68")
+assert.eq(json.encode("hello"), '"hello"')
+assert.eq(json.encode([1, 2, 3]), "[1,2,3]")
+assert.eq(json.encode((1, 2, 3)), "[1,2,3]")
+assert.eq(json.encode(range(3)), "[0,1,2]") # a built-in iterable
+assert.eq(json.encode(dict(x = 1, y = "two")), '{"x":1,"y":"two"}')
+assert.eq(json.encode(dict(y = "two", x = 1)), '{"x":1,"y":"two"}') # key, not insertion, order
+assert.eq(json.encode(struct(x = 1, y = "two")), '{"x":1,"y":"two"}') # a user-defined HasAttrs
+assert.eq(json.encode("😹"[:1]), '"\\ufffd"') # invalid UTF-8 -> replacement char
+
+def encode_error(expr, error):
+ assert.fails(lambda: json.encode(expr), error)
+
+encode_error(float("NaN"), "json.encode: cannot encode non-finite float nan")
+encode_error({1: "two"}, "dict has int key, want string")
+encode_error(len, "cannot encode builtin_function_or_method as JSON")
+encode_error(struct(x=[1, {"x": len}]), # nested failure
+ 'in field .x: at list index 1: in dict key "x": cannot encode...')
+encode_error(struct(x=[1, {"x": len}]), # nested failure
+ 'in field .x: at list index 1: in dict key "x": cannot encode...')
+encode_error({1: 2}, 'dict has int key, want string')
+
+## json.decode
+
+assert.eq(json.decode("null"), None)
+assert.eq(json.decode("true"), True)
+assert.eq(json.decode("false"), False)
+assert.eq(json.decode("-123"), -123)
+assert.eq(json.decode("-0"), -0)
+assert.eq(json.decode("3539537889086624823140625"), 3539537889086624823140625)
+assert.eq(json.decode("3539537889086624823140625.0"), float(3539537889086624823140625))
+assert.eq(json.decode("3.539537889086625e+24"), 3.539537889086625e+24)
+assert.eq(json.decode("0e+1"), 0)
+assert.eq(json.decode("-0.0"), -0.0)
+assert.eq(json.decode(
+ "-0.000000000000000000000000000000000000000000000000000000000000000000000000000001"),
+ -0.000000000000000000000000000000000000000000000000000000000000000000000000000001)
+assert.eq(json.decode('[]'), [])
+assert.eq(json.decode('[1]'), [1])
+assert.eq(json.decode('[1,2,3]'), [1, 2, 3])
+assert.eq(json.decode('{"one": 1, "two": 2}'), dict(one=1, two=2))
+assert.eq(json.decode('{"foo\\u0000bar": 42}'), {"foo\x00bar": 42})
+assert.eq(json.decode('"\\ud83d\\ude39\\ud83d\\udc8d"'), "😹💍")
+assert.eq(json.decode('"\\u0123"'), 'ģ')
+assert.eq(json.decode('"\x7f"'), "\x7f")
+
+def decode_error(expr, error):
+ assert.fails(lambda: json.decode(expr), error)
+
+decode_error('truefalse',
+ "json.decode: at offset 4, unexpected character 'f' after value")
+
+decode_error('"abc', "unclosed string literal")
+decode_error('"ab\\gc"', "invalid character 'g' in string escape code")
+decode_error("'abc'", "unexpected character '\\\\''")
+
+decode_error("1.2.3", "invalid number: 1.2.3")
+decode_error("+1", "unexpected character '\\+'")
+decode_error("-abc", "invalid number: -")
+decode_error("-", "invalid number: -")
+decode_error("-00", "invalid number: -00")
+decode_error("00", "invalid number: 00")
+decode_error("--1", "invalid number: --1")
+decode_error("-+1", "invalid number: -\\+1")
+decode_error("1e1e1", "invalid number: 1e1e1")
+decode_error("0123", "invalid number: 0123")
+decode_error("000.123", "invalid number: 000.123")
+decode_error("-0123", "invalid number: -0123")
+decode_error("-000.123", "invalid number: -000.123")
+decode_error("0x123", "unexpected character 'x' after value")
+
+decode_error('[1, 2 ', "unexpected end of file")
+decode_error('[1, 2, ', "unexpected end of file")
+decode_error('[1, 2, ]', "unexpected character ']'")
+decode_error('[1, 2, }', "unexpected character '}'")
+decode_error('[1, 2}', "got '}', want ',' or ']'")
+
+decode_error('{"one": 1', "unexpected end of file")
+decode_error('{"one" 1', "after object key, got '1', want ':'")
+decode_error('{"one": 1 "two": 2', "in object, got '\"', want ',' or '}'")
+decode_error('{"one": 1,', "unexpected end of file")
+decode_error('{"one": 1, }', "unexpected character '}'")
+decode_error('{"one": 1]', "in object, got ']', want ',' or '}'")
+
+def codec(x):
+ return json.decode(json.encode(x))
+
+# string round-tripping
+strings = [
+ "😿", # U+1F63F CRYING_CAT_FACE
+ "🐱‍👤", # CAT FACE + ZERO WIDTH JOINER + BUST IN SILHOUETTE
+]
+assert.eq(codec(strings), strings)
+
+# codepoints is a string with every 16-bit code point.
+codepoints = ''.join(['%c' % c for c in range(65536)])
+assert.eq(codec(codepoints), codepoints)
+
+# number round-tripping
+numbers = [
+ 0, 1, -1, +1, 1.23e45, -1.23e-45,
+ 3539537889086624823140625,
+ float(3539537889086624823140625),
+]
+assert.eq(codec(numbers), numbers)
+
+## json.indent
+
+s = json.encode(dict(x = 1, y = ["one", "two"]))
+
+assert.eq(json.indent(s), '''{
+ "x": 1,
+ "y": [
+ "one",
+ "two"
+ ]
+}''')
+
+assert.eq(json.decode(json.indent(s)), {"x": 1, "y": ["one", "two"]})
+
+assert.eq(json.indent(s, prefix='¶', indent='–––'), '''{
+¶–––"x": 1,
+¶–––"y": [
+¶––––––"one",
+¶––––––"two"
+¶–––]
+¶}''')
+
+assert.fails(lambda: json.indent("!@#$%^& this is not json"), 'invalid character')
+---
diff --git a/starlark/testdata/list.star b/starlark/testdata/list.star
new file mode 100644
index 0000000..526a962
--- /dev/null
+++ b/starlark/testdata/list.star
@@ -0,0 +1,276 @@
+# Tests of Starlark 'list'
+
+load("assert.star", "assert", "freeze")
+
+# literals
+assert.eq([], [])
+assert.eq([1], [1])
+assert.eq([1], [1])
+assert.eq([1, 2], [1, 2])
+assert.ne([1, 2, 3], [1, 2, 4])
+
+# truth
+assert.true([0])
+assert.true(not [])
+
+# indexing, x[i]
+abc = list("abc".elems())
+assert.fails(lambda: abc[-4], "list index -4 out of range \\[-3:2]")
+assert.eq(abc[-3], "a")
+assert.eq(abc[-2], "b")
+assert.eq(abc[-1], "c")
+assert.eq(abc[0], "a")
+assert.eq(abc[1], "b")
+assert.eq(abc[2], "c")
+assert.fails(lambda: abc[3], "list index 3 out of range \\[-3:2]")
+
+# x[i] = ...
+x3 = [0, 1, 2]
+x3[1] = 2
+x3[2] += 3
+assert.eq(x3, [0, 2, 5])
+
+def f2():
+ x3[3] = 4
+
+assert.fails(f2, "out of range")
+freeze(x3)
+
+def f3():
+ x3[0] = 0
+
+assert.fails(f3, "cannot assign to element of frozen list")
+assert.fails(x3.clear, "cannot clear frozen list")
+
+# list + list
+assert.eq([1, 2, 3] + [3, 4, 5], [1, 2, 3, 3, 4, 5])
+assert.fails(lambda: [1, 2] + (3, 4), "unknown.*list \\+ tuple")
+assert.fails(lambda: (1, 2) + [3, 4], "unknown.*tuple \\+ list")
+
+# list * int, int * list
+assert.eq(abc * 0, [])
+assert.eq(abc * -1, [])
+assert.eq(abc * 1, abc)
+assert.eq(abc * 3, ["a", "b", "c", "a", "b", "c", "a", "b", "c"])
+assert.eq(0 * abc, [])
+assert.eq(-1 * abc, [])
+assert.eq(1 * abc, abc)
+assert.eq(3 * abc, ["a", "b", "c", "a", "b", "c", "a", "b", "c"])
+
+# list comprehensions
+assert.eq([2 * x for x in [1, 2, 3]], [2, 4, 6])
+assert.eq([2 * x for x in [1, 2, 3] if x > 1], [4, 6])
+assert.eq(
+ [(x, y) for x in [1, 2] for y in [3, 4]],
+ [(1, 3), (1, 4), (2, 3), (2, 4)],
+)
+assert.eq([(x, y) for x in [1, 2] if x == 2 for y in [3, 4]], [(2, 3), (2, 4)])
+assert.eq([2 * x for x in (1, 2, 3)], [2, 4, 6])
+assert.eq([x for x in "abc".elems()], ["a", "b", "c"])
+assert.eq([x for x in {"a": 1, "b": 2}], ["a", "b"])
+assert.eq([(y, x) for x, y in {1: 2, 3: 4}.items()], [(2, 1), (4, 3)])
+
+# corner cases of parsing:
+assert.eq([x for x in range(12) if x % 2 == 0 if x % 3 == 0], [0, 6])
+assert.eq([x for x in [1, 2] if lambda: None], [1, 2])
+assert.eq([x for x in [1, 2] if (lambda: 3 if True else 4)], [1, 2])
+
+# list function
+assert.eq(list(), [])
+assert.eq(list("ab".elems()), ["a", "b"])
+
+# A list comprehension defines a separate lexical block,
+# whether at top-level...
+a = [1, 2]
+b = [a for a in [3, 4]]
+assert.eq(a, [1, 2])
+assert.eq(b, [3, 4])
+
+# ...or local to a function.
+def listcompblock():
+ c = [1, 2]
+ d = [c for c in [3, 4]]
+ assert.eq(c, [1, 2])
+ assert.eq(d, [3, 4])
+
+listcompblock()
+
+# list.pop
+x4 = [1, 2, 3, 4, 5]
+assert.fails(lambda: x4.pop(-6), "index -6 out of range \\[-5:4]")
+assert.fails(lambda: x4.pop(6), "index 6 out of range \\[-5:4]")
+assert.eq(x4.pop(), 5)
+assert.eq(x4, [1, 2, 3, 4])
+assert.eq(x4.pop(1), 2)
+assert.eq(x4, [1, 3, 4])
+assert.eq(x4.pop(0), 1)
+assert.eq(x4, [3, 4])
+assert.eq(x4.pop(-2), 3)
+assert.eq(x4, [4])
+assert.eq(x4.pop(-1), 4)
+assert.eq(x4, [])
+
+# TODO(adonovan): test uses of list as sequence
+# (for loop, comprehension, library functions).
+
+# x += y for lists is equivalent to x.extend(y).
+# y may be a sequence.
+# TODO: Test that side-effects of 'x' occur only once.
+def list_extend():
+ a = [1, 2, 3]
+ b = a
+ a = a + [4] # creates a new list
+ assert.eq(a, [1, 2, 3, 4])
+ assert.eq(b, [1, 2, 3]) # b is unchanged
+
+ a = [1, 2, 3]
+ b = a
+ a += [4] # updates a (and thus b) in place
+ assert.eq(a, [1, 2, 3, 4])
+ assert.eq(b, [1, 2, 3, 4]) # alias observes the change
+
+ a = [1, 2, 3]
+ b = a
+ a.extend([4]) # updates existing list
+ assert.eq(a, [1, 2, 3, 4])
+ assert.eq(b, [1, 2, 3, 4]) # alias observes the change
+
+list_extend()
+
+# Unlike list.extend(iterable), list += iterable makes its LHS name local.
+a_list = []
+
+def f4():
+ a_list += [1] # binding use => a_list is a local var
+
+assert.fails(f4, "local variable a_list referenced before assignment")
+
+# list += <not iterable>
+def f5():
+ x = []
+ x += 1
+
+assert.fails(f5, "unknown binary op: list \\+ int")
+
+# frozen list += iterable
+def f6():
+ x = []
+ freeze(x)
+ x += [1]
+
+assert.fails(f6, "cannot apply \\+= to frozen list")
+
+# list += hasfields (hasfields is not iterable but defines list+hasfields)
+def f7():
+ x = []
+ x += hasfields()
+ return x
+
+assert.eq(f7(), 42) # weird, but exercises a corner case in list+=x.
+
+# append
+x5 = [1, 2, 3]
+x5.append(4)
+x5.append("abc")
+assert.eq(x5, [1, 2, 3, 4, "abc"])
+
+# extend
+x5a = [1, 2, 3]
+x5a.extend("abc".elems()) # string
+x5a.extend((True, False)) # tuple
+assert.eq(x5a, [1, 2, 3, "a", "b", "c", True, False])
+
+# list.insert
+def insert_at(index):
+ x = list(range(3))
+ x.insert(index, 42)
+ return x
+
+assert.eq(insert_at(-99), [42, 0, 1, 2])
+assert.eq(insert_at(-2), [0, 42, 1, 2])
+assert.eq(insert_at(-1), [0, 1, 42, 2])
+assert.eq(insert_at(0), [42, 0, 1, 2])
+assert.eq(insert_at(1), [0, 42, 1, 2])
+assert.eq(insert_at(2), [0, 1, 42, 2])
+assert.eq(insert_at(3), [0, 1, 2, 42])
+assert.eq(insert_at(4), [0, 1, 2, 42])
+
+# list.remove
+def remove(v):
+ x = [3, 1, 4, 1]
+ x.remove(v)
+ return x
+
+assert.eq(remove(3), [1, 4, 1])
+assert.eq(remove(1), [3, 4, 1])
+assert.eq(remove(4), [3, 1, 1])
+assert.fails(lambda: [3, 1, 4, 1].remove(42), "remove: element not found")
+
+# list.index
+bananas = list("bananas".elems())
+assert.eq(bananas.index("a"), 1) # bAnanas
+assert.fails(lambda: bananas.index("d"), "value not in list")
+
+# start
+assert.eq(bananas.index("a", -1000), 1) # bAnanas
+assert.eq(bananas.index("a", 0), 1) # bAnanas
+assert.eq(bananas.index("a", 1), 1) # bAnanas
+assert.eq(bananas.index("a", 2), 3) # banAnas
+assert.eq(bananas.index("a", 3), 3) # banAnas
+assert.eq(bananas.index("b", 0), 0) # Bananas
+assert.eq(bananas.index("n", -3), 4) # banaNas
+assert.fails(lambda: bananas.index("n", -2), "value not in list")
+assert.eq(bananas.index("s", -2), 6) # bananaS
+assert.fails(lambda: bananas.index("b", 1), "value not in list")
+
+# start, end
+assert.eq(bananas.index("s", -1000, 7), 6) # bananaS
+assert.fails(lambda: bananas.index("s", -1000, 6), "value not in list")
+assert.fails(lambda: bananas.index("d", -1000, 1000), "value not in list")
+
+# slicing, x[i:j:k]
+assert.eq(bananas[6::-2], list("snnb".elems()))
+assert.eq(bananas[5::-2], list("aaa".elems()))
+assert.eq(bananas[4::-2], list("nnb".elems()))
+assert.eq(bananas[99::-2], list("snnb".elems()))
+assert.eq(bananas[100::-2], list("snnb".elems()))
+# TODO(adonovan): many more tests
+
+# iterator invalidation
+def iterator1():
+ list = [0, 1, 2]
+ for x in list:
+ list[x] = 2 * x
+ return list
+
+assert.fails(iterator1, "assign to element.* during iteration")
+
+def iterator2():
+ list = [0, 1, 2]
+ for x in list:
+ list.remove(x)
+
+assert.fails(iterator2, "remove.*during iteration")
+
+def iterator3():
+ list = [0, 1, 2]
+ for x in list:
+ list.append(3)
+
+assert.fails(iterator3, "append.*during iteration")
+
+def iterator4():
+ list = [0, 1, 2]
+ for x in list:
+ list.extend([3, 4])
+
+assert.fails(iterator4, "extend.*during iteration")
+
+def iterator5():
+ def f(x):
+ x.append(4)
+
+ list = [1, 2, 3]
+ _ = [f(list) for x in list]
+
+assert.fails(iterator5, "append.*during iteration")
diff --git a/starlark/testdata/misc.star b/starlark/testdata/misc.star
new file mode 100644
index 0000000..e7e0c06
--- /dev/null
+++ b/starlark/testdata/misc.star
@@ -0,0 +1,139 @@
+# Miscellaneous tests of Starlark evaluation.
+# This is a "chunked" file: each "---" effectively starts a new file.
+
+# TODO(adonovan): move these tests into more appropriate files.
+# TODO(adonovan): test coverage:
+# - stmts: pass; if cond fail; += and failures;
+# for x fail; for x not iterable; for can't assign; for
+# error in loop body
+# - subassign fail
+# - x[i]=x fail in both operands; frozen x; list index not int; boundscheck
+# - x.f = ...
+# - failure in list expr [...]; tuple expr; dict expr (bad key)
+# - cond expr semantics; failures
+# - x[i] failures in both args; dict and iterator key and range checks;
+# unhandled operand types
+# - +: list/list, int/int, string/string, tuple+tuple, dict/dict;
+# - * and ** calls: various errors
+# - call of non-function
+# - slice x[ijk]
+# - comprehension: unhashable dict key;
+# scope of vars (local and toplevel); noniterable for clause
+# - unknown unary op
+# - ordering of values
+# - freeze, transitivity of its effect.
+# - add an application-defined type to the environment so we can test it.
+# - even more:
+#
+# eval
+# pass statement
+# assign to tuple l-value -- illegal
+# assign to list l-value -- illegal
+# assign to field
+# tuple + tuple
+# call with *args, **kwargs
+# slice with step
+# tuple slice
+# interpolate with %c, %%
+
+load("assert.star", "assert")
+
+# Ordered comparisons require values of the same type.
+assert.fails(lambda: None < None, "not impl")
+assert.fails(lambda: None < False, "not impl")
+assert.fails(lambda: False < list, "not impl")
+assert.fails(lambda: list < {}, "not impl")
+assert.fails(lambda: {} < (lambda: None), "not impl")
+assert.fails(lambda: (lambda: None) < 0, "not impl")
+assert.fails(lambda: 0 < [], "not impl")
+assert.fails(lambda: [] < "", "not impl")
+assert.fails(lambda: "" < (), "not impl")
+# Except int < float:
+assert.lt(1, 2.0)
+assert.lt(2.0, 3)
+
+---
+# cyclic data structures
+load("assert.star", "assert")
+
+cyclic = [1, 2, 3] # list cycle
+cyclic[1] = cyclic
+assert.eq(str(cyclic), "[1, [...], 3]")
+assert.fails(lambda: cyclic < cyclic, "maximum recursion")
+assert.fails(lambda: cyclic == cyclic, "maximum recursion")
+cyclic2 = [1, 2, 3]
+cyclic2[1] = cyclic2
+assert.fails(lambda: cyclic2 == cyclic, "maximum recursion")
+
+cyclic3 = [1, [2, 3]] # list-list cycle
+cyclic3[1][0] = cyclic3
+assert.eq(str(cyclic3), "[1, [[...], 3]]")
+cyclic4 = {"x": 1}
+cyclic4["x"] = cyclic4
+assert.eq(str(cyclic4), "{\"x\": {...}}")
+cyclic5 = [0, {"x": 1}] # list-dict cycle
+cyclic5[1]["x"] = cyclic5
+assert.eq(str(cyclic5), "[0, {\"x\": [...]}]")
+assert.eq(str(cyclic5), "[0, {\"x\": [...]}]")
+assert.fails(lambda: cyclic5 == cyclic5 ,"maximum recursion")
+cyclic6 = [0, {"x": 1}]
+cyclic6[1]["x"] = cyclic6
+assert.fails(lambda: cyclic5 == cyclic6, "maximum recursion")
+
+---
+# regression
+load("assert.star", "assert")
+
+# was a parse error:
+assert.eq(("ababab"[2:]).replace("b", "c"), "acac")
+assert.eq("ababab"[2:].replace("b", "c"), "acac")
+
+# test parsing of line continuation, at toplevel and in expression.
+three = 1 + \
+ 2
+assert.eq(1 + \
+ 2, three)
+
+---
+# A regression test for error position information.
+
+_ = {}.get(1, default=2) ### "get: unexpected keyword arguments"
+
+---
+# Load exposes explicitly declared globals from other modules.
+load('assert.star', 'assert', 'freeze')
+assert.eq(str(freeze), '<built-in function freeze>')
+
+---
+# Load does not expose pre-declared globals from other modules.
+# See github.com/google/skylark/issues/75.
+load('assert.star', 'assert', 'matches') ### "matches not found in module"
+
+---
+# Load does not expose universals accessible in other modules.
+load('assert.star', 'len') ### "len not found in module"
+
+
+---
+# Test plus folding optimization.
+load('assert.star', 'assert')
+
+s = "s"
+l = [4]
+t = (4,)
+
+assert.eq("a" + "b" + "c", "abc")
+assert.eq("a" + "b" + s + "c", "absc")
+assert.eq(() + (1,) + (2, 3), (1, 2, 3))
+assert.eq(() + (1,) + t + (2, 3), (1, 4, 2, 3))
+assert.eq([] + [1] + [2, 3], [1, 2, 3])
+assert.eq([] + [1] + l + [2, 3], [1, 4, 2, 3])
+
+assert.fails(lambda: "a" + "b" + 1 + "c", "unknown binary op: string \\+ int")
+assert.fails(lambda: () + () + 1 + (), "unknown binary op: tuple \\+ int")
+assert.fails(lambda: [] + [] + 1 + [], "unknown binary op: list \\+ int")
+
+
+
+---
+load('assert.star', 'froze') ### `name froze not found .*did you mean freeze`
diff --git a/starlark/testdata/module.star b/starlark/testdata/module.star
new file mode 100644
index 0000000..6aac2e2
--- /dev/null
+++ b/starlark/testdata/module.star
@@ -0,0 +1,17 @@
+# Tests of Module.
+
+load("assert.star", "assert")
+
+assert.eq(type(assert), "module")
+assert.eq(str(assert), '<module "assert">')
+assert.eq(dir(assert), ["contains", "eq", "fail", "fails", "lt", "ne", "true"])
+assert.fails(lambda : {assert: None}, "unhashable: module")
+
+def assignfield():
+ assert.foo = None
+
+assert.fails(assignfield, "can't assign to .foo field of module")
+
+# no such field
+assert.fails(lambda : assert.nonesuch, "module has no .nonesuch field or method$")
+assert.fails(lambda : assert.falls, "module has no .falls field or method .did you mean .fails\\?")
diff --git a/starlark/testdata/paths.star b/starlark/testdata/paths.star
new file mode 100644
index 0000000..cf8a3c4
--- /dev/null
+++ b/starlark/testdata/paths.star
@@ -0,0 +1,250 @@
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Skylib module containing file path manipulation functions.
+
+NOTE: The functions in this module currently only support paths with Unix-style
+path separators (forward slash, "/"); they do not handle Windows-style paths
+with backslash separators or drive letters.
+"""
+
+# This file is in the Bazel build language dialect of Starlark,
+# so declarations of 'fail' and 'struct' are required to make
+# it compile in the core language.
+def fail(msg):
+ print(msg)
+
+struct = dict
+
+def _basename(p):
+ """Returns the basename (i.e., the file portion) of a path.
+
+ Note that if `p` ends with a slash, this function returns an empty string.
+ This matches the behavior of Python's `os.path.basename`, but differs from
+ the Unix `basename` command (which would return the path segment preceding
+ the final slash).
+
+ Args:
+ p: The path whose basename should be returned.
+
+ Returns:
+ The basename of the path, which includes the extension.
+ """
+ return p.rpartition("/")[-1]
+
+def _dirname(p):
+ """Returns the dirname of a path.
+
+ The dirname is the portion of `p` up to but not including the file portion
+ (i.e., the basename). Any slashes immediately preceding the basename are not
+ included, unless omitting them would make the dirname empty.
+
+ Args:
+ p: The path whose dirname should be returned.
+
+ Returns:
+ The dirname of the path.
+ """
+ prefix, sep, _ = p.rpartition("/")
+ if not prefix:
+ return sep
+ else:
+ # If there are multiple consecutive slashes, strip them all out as Python's
+ # os.path.dirname does.
+ return prefix.rstrip("/")
+
+def _is_absolute(path):
+ """Returns `True` if `path` is an absolute path.
+
+ Args:
+ path: A path (which is a string).
+
+ Returns:
+ `True` if `path` is an absolute path.
+ """
+ return path.startswith("/") or (len(path) > 2 and path[1] == ":")
+
+def _join(path, *others):
+ """Joins one or more path components intelligently.
+
+ This function mimics the behavior of Python's `os.path.join` function on POSIX
+ platform. It returns the concatenation of `path` and any members of `others`,
+ inserting directory separators before each component except the first. The
+ separator is not inserted if the path up until that point is either empty or
+ already ends in a separator.
+
+ If any component is an absolute path, all previous components are discarded.
+
+ Args:
+ path: A path segment.
+ *others: Additional path segments.
+
+ Returns:
+ A string containing the joined paths.
+ """
+ result = path
+
+ for p in others:
+ if _is_absolute(p):
+ result = p
+ elif not result or result.endswith("/"):
+ result += p
+ else:
+ result += "/" + p
+
+ return result
+
+def _normalize(path):
+ """Normalizes a path, eliminating double slashes and other redundant segments.
+
+ This function mimics the behavior of Python's `os.path.normpath` function on
+ POSIX platforms; specifically:
+
+ - If the entire path is empty, "." is returned.
+ - All "." segments are removed, unless the path consists solely of a single
+ "." segment.
+ - Trailing slashes are removed, unless the path consists solely of slashes.
+ - ".." segments are removed as long as there are corresponding segments
+ earlier in the path to remove; otherwise, they are retained as leading ".."
+ segments.
+ - Single and double leading slashes are preserved, but three or more leading
+ slashes are collapsed into a single leading slash.
+ - Multiple adjacent internal slashes are collapsed into a single slash.
+
+ Args:
+ path: A path.
+
+ Returns:
+ The normalized path.
+ """
+ if not path:
+ return "."
+
+ if path.startswith("//") and not path.startswith("///"):
+ initial_slashes = 2
+ elif path.startswith("/"):
+ initial_slashes = 1
+ else:
+ initial_slashes = 0
+ is_relative = (initial_slashes == 0)
+
+ components = path.split("/")
+ new_components = []
+
+ for component in components:
+ if component in ("", "."):
+ continue
+ if component == "..":
+ if new_components and new_components[-1] != "..":
+ # Only pop the last segment if it isn't another "..".
+ new_components.pop()
+ elif is_relative:
+ # Preserve leading ".." segments for relative paths.
+ new_components.append(component)
+ else:
+ new_components.append(component)
+
+ path = "/".join(new_components)
+ if not is_relative:
+ path = ("/" * initial_slashes) + path
+
+ return path or "."
+
+def _relativize(path, start):
+ """Returns the portion of `path` that is relative to `start`.
+
+ Because we do not have access to the underlying file system, this
+ implementation differs slightly from Python's `os.path.relpath` in that it
+ will fail if `path` is not beneath `start` (rather than use parent segments to
+ walk up to the common file system root).
+
+ Relativizing paths that start with parent directory references only works if
+ the path both start with the same initial parent references.
+
+ Args:
+ path: The path to relativize.
+ start: The ancestor path against which to relativize.
+
+ Returns:
+ The portion of `path` that is relative to `start`.
+ """
+ segments = _normalize(path).split("/")
+ start_segments = _normalize(start).split("/")
+ if start_segments == ["."]:
+ start_segments = []
+ start_length = len(start_segments)
+
+ if (path.startswith("/") != start.startswith("/") or
+ len(segments) < start_length):
+ fail("Path '%s' is not beneath '%s'" % (path, start))
+
+ for ancestor_segment, segment in zip(start_segments, segments):
+ if ancestor_segment != segment:
+ fail("Path '%s' is not beneath '%s'" % (path, start))
+
+ length = len(segments) - start_length
+ result_segments = segments[-length:]
+ return "/".join(result_segments)
+
+def _replace_extension(p, new_extension):
+ """Replaces the extension of the file at the end of a path.
+
+ If the path has no extension, the new extension is added to it.
+
+ Args:
+ p: The path whose extension should be replaced.
+ new_extension: The new extension for the file. The new extension should
+ begin with a dot if you want the new filename to have one.
+
+ Returns:
+ The path with the extension replaced (or added, if it did not have one).
+ """
+ return _split_extension(p)[0] + new_extension
+
+def _split_extension(p):
+ """Splits the path `p` into a tuple containing the root and extension.
+
+ Leading periods on the basename are ignored, so
+ `path.split_extension(".bashrc")` returns `(".bashrc", "")`.
+
+ Args:
+ p: The path whose root and extension should be split.
+
+ Returns:
+ A tuple `(root, ext)` such that the root is the path without the file
+ extension, and `ext` is the file extension (which, if non-empty, contains
+ the leading dot). The returned tuple always satisfies the relationship
+ `root + ext == p`.
+ """
+ b = _basename(p)
+ last_dot_in_basename = b.rfind(".")
+
+ # If there is no dot or the only dot in the basename is at the front, then
+ # there is no extension.
+ if last_dot_in_basename <= 0:
+ return (p, "")
+
+ dot_distance_from_end = len(b) - last_dot_in_basename
+ return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])
+
+paths = struct(
+ basename = _basename,
+ dirname = _dirname,
+ is_absolute = _is_absolute,
+ join = _join,
+ normalize = _normalize,
+ relativize = _relativize,
+ replace_extension = _replace_extension,
+ split_extension = _split_extension,
+)
diff --git a/starlark/testdata/recursion.star b/starlark/testdata/recursion.star
new file mode 100644
index 0000000..3368614
--- /dev/null
+++ b/starlark/testdata/recursion.star
@@ -0,0 +1,43 @@
+# Tests of Starlark recursion and while statement.
+
+# This is a "chunked" file: each "---" effectively starts a new file.
+
+# option:recursion
+
+load("assert.star", "assert")
+
+def sum(n):
+ r = 0
+ while n > 0:
+ r += n
+ n -= 1
+ return r
+
+def fib(n):
+ if n <= 1:
+ return 1
+ return fib(n-1) + fib(n-2)
+
+def while_break(n):
+ r = 0
+ while n > 0:
+ if n == 5:
+ break
+ r += n
+ n -= 1
+ return r
+
+def while_continue(n):
+ r = 0
+ while n > 0:
+ if n % 2 == 0:
+ n -= 1
+ continue
+ r += n
+ n -= 1
+ return r
+
+assert.eq(fib(5), 8)
+assert.eq(sum(5), 5+4+3+2+1)
+assert.eq(while_break(10), 40)
+assert.eq(while_continue(10), 25)
diff --git a/starlark/testdata/set.star b/starlark/testdata/set.star
new file mode 100644
index 0000000..bca4144
--- /dev/null
+++ b/starlark/testdata/set.star
@@ -0,0 +1,118 @@
+# Tests of Starlark 'set'
+# option:set
+
+# Sets are not a standard part of Starlark, so the features
+# tested in this file must be enabled in the application by setting
+# resolve.AllowSet. (All sets are created by calls to the 'set'
+# built-in or derived from operations on existing sets.)
+# The semantics are subject to change as the spec evolves.
+
+# TODO(adonovan): support set mutation:
+# - del set[k]
+# - set.remove
+# - set.update
+# - set.clear
+# - set += iterable, perhaps?
+# Test iterator invalidation.
+
+load("assert.star", "assert")
+
+# literals
+# Parser does not currently support {1, 2, 3}.
+# TODO(adonovan): add test to syntax/testdata/errors.star.
+
+# set comprehensions
+# Parser does not currently support {x for x in y}.
+# See syntax/testdata/errors.star.
+
+# set constructor
+assert.eq(type(set()), "set")
+assert.eq(list(set()), [])
+assert.eq(type(set([1, 3, 2, 3])), "set")
+assert.eq(list(set([1, 3, 2, 3])), [1, 3, 2])
+assert.eq(type(set("hello".elems())), "set")
+assert.eq(list(set("hello".elems())), ["h", "e", "l", "o"])
+assert.eq(list(set(range(3))), [0, 1, 2])
+assert.fails(lambda : set(1), "got int, want iterable")
+assert.fails(lambda : set(1, 2, 3), "got 3 arguments")
+assert.fails(lambda : set([1, 2, {}]), "unhashable type: dict")
+
+# truth
+assert.true(not set())
+assert.true(set([False]))
+assert.true(set([1, 2, 3]))
+
+x = set([1, 2, 3])
+y = set([3, 4, 5])
+
+# set + any is not defined
+assert.fails(lambda : x + y, "unknown.*: set \\+ set")
+
+# set | set (use resolve.AllowBitwise to enable it)
+assert.eq(list(set("a".elems()) | set("b".elems())), ["a", "b"])
+assert.eq(list(set("ab".elems()) | set("bc".elems())), ["a", "b", "c"])
+assert.fails(lambda : set() | [], "unknown binary op: set | list")
+assert.eq(type(x | y), "set")
+assert.eq(list(x | y), [1, 2, 3, 4, 5])
+assert.eq(list(x | set([5, 1])), [1, 2, 3, 5])
+assert.eq(list(x | set((6, 5, 4))), [1, 2, 3, 6, 5, 4])
+
+# set.union (allows any iterable for right operand)
+assert.eq(list(set("a".elems()).union("b".elems())), ["a", "b"])
+assert.eq(list(set("ab".elems()).union("bc".elems())), ["a", "b", "c"])
+assert.eq(set().union([]), set())
+assert.eq(type(x.union(y)), "set")
+assert.eq(list(x.union(y)), [1, 2, 3, 4, 5])
+assert.eq(list(x.union([5, 1])), [1, 2, 3, 5])
+assert.eq(list(x.union((6, 5, 4))), [1, 2, 3, 6, 5, 4])
+assert.fails(lambda : x.union([1, 2, {}]), "unhashable type: dict")
+
+# intersection, set & set (use resolve.AllowBitwise to enable it)
+assert.eq(list(set("a".elems()) & set("b".elems())), [])
+assert.eq(list(set("ab".elems()) & set("bc".elems())), ["b"])
+
+# symmetric difference, set ^ set (use resolve.AllowBitwise to enable it)
+assert.eq(set([1, 2, 3]) ^ set([4, 5, 3]), set([1, 2, 4, 5]))
+
+def test_set_augmented_assign():
+ x = set([1, 2, 3])
+ x &= set([2, 3])
+ assert.eq(x, set([2, 3]))
+ x |= set([1])
+ assert.eq(x, set([1, 2, 3]))
+ x ^= set([4, 5, 3])
+ assert.eq(x, set([1, 2, 4, 5]))
+
+test_set_augmented_assign()
+
+# len
+assert.eq(len(x), 3)
+assert.eq(len(y), 3)
+assert.eq(len(x | y), 5)
+
+# str
+assert.eq(str(set([1])), "set([1])")
+assert.eq(str(set([2, 3])), "set([2, 3])")
+assert.eq(str(set([3, 2])), "set([3, 2])")
+
+# comparison
+assert.eq(x, x)
+assert.eq(y, y)
+assert.true(x != y)
+assert.eq(set([1, 2, 3]), set([3, 2, 1]))
+assert.fails(lambda : x < y, "set < set not implemented")
+
+# iteration
+assert.true(type([elem for elem in x]), "list")
+assert.true(list([elem for elem in x]), [1, 2, 3])
+
+def iter():
+ list = []
+ for elem in x:
+ list.append(elem)
+ return list
+
+assert.eq(iter(), [1, 2, 3])
+
+# sets are not indexable
+assert.fails(lambda : x[0], "unhandled.*operation")
diff --git a/starlark/testdata/string.star b/starlark/testdata/string.star
new file mode 100644
index 0000000..b317d1a
--- /dev/null
+++ b/starlark/testdata/string.star
@@ -0,0 +1,472 @@
+# Tests of Starlark 'string'
+# option:set
+
+load("assert.star", "assert")
+
+# raw string literals:
+assert.eq(r"a\bc", "a\\bc")
+
+# truth
+assert.true("abc")
+assert.true(chr(0))
+assert.true(not "")
+
+# str + str
+assert.eq("a" + "b" + "c", "abc")
+
+# str * int, int * str
+assert.eq("abc" * 0, "")
+assert.eq("abc" * -1, "")
+assert.eq("abc" * 1, "abc")
+assert.eq("abc" * 5, "abcabcabcabcabc")
+assert.eq(0 * "abc", "")
+assert.eq(-1 * "abc", "")
+assert.eq(1 * "abc", "abc")
+assert.eq(5 * "abc", "abcabcabcabcabc")
+assert.fails(lambda: 1.0 * "abc", "unknown.*float \\* str")
+assert.fails(lambda: "abc" * (1000000 * 1000000), "repeat count 1000000000000 too large")
+assert.fails(lambda: "abc" * 1000000 * 1000000, "excessive repeat \\(3000000 \\* 1000000 elements")
+
+# len
+assert.eq(len("Hello, 世界!"), 14)
+assert.eq(len("𐐷"), 4) # U+10437 has a 4-byte UTF-8 encoding (and a 2-code UTF-16 encoding)
+
+# chr & ord
+assert.eq(chr(65), "A") # 1-byte UTF-8 encoding
+assert.eq(chr(1049), "Й") # 2-byte UTF-8 encoding
+assert.eq(chr(0x1F63F), "😿") # 4-byte UTF-8 encoding
+assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)")
+assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)")
+assert.eq(ord("A"), 0x41)
+assert.eq(ord("Й"), 0x419)
+assert.eq(ord("世"), 0x4e16)
+assert.eq(ord("😿"), 0x1F63F)
+assert.eq(ord("Й"[1:]), 0xFFFD) # = Unicode replacement character
+assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1")
+assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1")
+assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1") # 3 x 0xFFFD
+
+# string.codepoint_ords
+assert.eq(type("abcЙ😿".codepoint_ords()), "string.codepoints")
+assert.eq(str("abcЙ😿".codepoint_ords()), '"abcЙ😿".codepoint_ords()')
+assert.eq(list("abcЙ😿".codepoint_ords()), [97, 98, 99, 1049, 128575])
+assert.eq(list(("A" + "😿Z"[1:]).codepoint_ords()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")])
+assert.eq(list("".codepoint_ords()), [])
+assert.fails(lambda: "abcЙ😿".codepoint_ords()[2], "unhandled index") # not indexable
+assert.fails(lambda: len("abcЙ😿".codepoint_ords()), "no len") # unknown length
+
+# string.codepoints
+assert.eq(type("abcЙ😿".codepoints()), "string.codepoints")
+assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()')
+assert.eq(list("abcЙ😿".codepoints()), ["a", "b", "c", "Й", "😿"])
+assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "�", "�", "�", "Z"])
+assert.eq(list("".codepoints()), [])
+assert.fails(lambda: "abcЙ😿".codepoints()[2], "unhandled index") # not indexable
+assert.fails(lambda: len("abcЙ😿".codepoints()), "no len") # unknown length
+
+# string.elem_ords
+assert.eq(type("abcЙ😿".elem_ords()), "string.elems")
+assert.eq(str("abcЙ😿".elem_ords()), '"abcЙ😿".elem_ords()')
+assert.eq(list("abcЙ😿".elem_ords()), [97, 98, 99, 208, 153, 240, 159, 152, 191])
+assert.eq(list(("A" + "😿Z"[1:]).elem_ords()), [65, 159, 152, 191, 90])
+assert.eq(list("".elem_ords()), [])
+assert.eq("abcЙ😿".elem_ords()[2], 99) # indexable
+assert.eq(len("abcЙ😿".elem_ords()), 9) # known length
+
+# string.elems (1-byte substrings, which are invalid text)
+assert.eq(type("abcЙ😿".elems()), "string.elems")
+assert.eq(str("abcЙ😿".elems()), '"abcЙ😿".elems()')
+assert.eq(
+ repr(list("abcЙ😿".elems())),
+ r'["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"]',
+)
+assert.eq(
+ repr(list(("A" + "😿Z"[1:]).elems())),
+ r'["A", "\x9f", "\x98", "\xbf", "Z"]',
+)
+assert.eq(list("".elems()), [])
+assert.eq("abcЙ😿".elems()[2], "c") # indexable
+assert.eq(len("abcЙ😿".elems()), 9) # known length
+
+# indexing, x[i]
+assert.eq("Hello, 世界!"[0], "H")
+assert.eq(repr("Hello, 世界!"[7]), r'"\xe4"') # (invalid text)
+assert.eq("Hello, 世界!"[13], "!")
+assert.fails(lambda: "abc"[-4], "out of range")
+assert.eq("abc"[-3], "a")
+assert.eq("abc"[-2], "b")
+assert.eq("abc"[-1], "c")
+assert.eq("abc"[0], "a")
+assert.eq("abc"[1], "b")
+assert.eq("abc"[2], "c")
+assert.fails(lambda: "abc"[4], "out of range")
+
+# x[i] = ...
+def f():
+ "abc"[1] = "B"
+
+assert.fails(f, "string.*does not support.*assignment")
+
+# slicing, x[i:j]
+assert.eq("abc"[:], "abc")
+assert.eq("abc"[-4:], "abc")
+assert.eq("abc"[-3:], "abc")
+assert.eq("abc"[-2:], "bc")
+assert.eq("abc"[-1:], "c")
+assert.eq("abc"[0:], "abc")
+assert.eq("abc"[1:], "bc")
+assert.eq("abc"[2:], "c")
+assert.eq("abc"[3:], "")
+assert.eq("abc"[4:], "")
+assert.eq("abc"[:-4], "")
+assert.eq("abc"[:-3], "")
+assert.eq("abc"[:-2], "a")
+assert.eq("abc"[:-1], "ab")
+assert.eq("abc"[:0], "")
+assert.eq("abc"[:1], "a")
+assert.eq("abc"[:2], "ab")
+assert.eq("abc"[:3], "abc")
+assert.eq("abc"[:4], "abc")
+assert.eq("abc"[1:2], "b")
+assert.eq("abc"[2:1], "")
+assert.eq(repr("😿"[:1]), r'"\xf0"') # (invalid text)
+
+# non-unit strides
+assert.eq("abcd"[0:4:1], "abcd")
+assert.eq("abcd"[::2], "ac")
+assert.eq("abcd"[1::2], "bd")
+assert.eq("abcd"[4:0:-1], "dcb")
+assert.eq("banana"[7::-2], "aaa")
+assert.eq("banana"[6::-2], "aaa")
+assert.eq("banana"[5::-2], "aaa")
+assert.eq("banana"[4::-2], "nnb")
+assert.eq("banana"[::-1], "ananab")
+assert.eq("banana"[None:None:-2], "aaa")
+assert.fails(lambda: "banana"[1.0::], "invalid start index: got float, want int")
+assert.fails(lambda: "banana"[:"":], "invalid end index: got string, want int")
+assert.fails(lambda: "banana"[:"":True], "invalid slice step: got bool, want int")
+
+# in, not in
+assert.true("oo" in "food")
+assert.true("ox" not in "food")
+assert.true("" in "food")
+assert.true("" in "")
+assert.fails(lambda: 1 in "", "requires string as left operand")
+assert.fails(lambda: "" in 1, "unknown binary op: string in int")
+
+# ==, !=
+assert.eq("hello", "he" + "llo")
+assert.ne("hello", "Hello")
+
+# hash must follow java.lang.String.hashCode.
+wanthash = {
+ "": 0,
+ "\0" * 100: 0,
+ "hello": 99162322,
+ "world": 113318802,
+ "Hello, 世界!": 417292677,
+}
+gothash = {s: hash(s) for s in wanthash}
+assert.eq(gothash, wanthash)
+
+# TODO(adonovan): ordered comparisons
+
+# string % tuple formatting
+assert.eq("A %d %x Z" % (123, 456), "A 123 1c8 Z")
+assert.eq("A %(foo)d %(bar)s Z" % {"foo": 123, "bar": "hi"}, "A 123 hi Z")
+assert.eq("%s %r" % ("hi", "hi"), 'hi "hi"') # TODO(adonovan): use ''-quotation
+assert.eq("%%d %d" % 1, "%d 1")
+assert.fails(lambda: "%d %d" % 1, "not enough arguments for format string")
+assert.fails(lambda: "%d %d" % (1, 2, 3), "too many arguments for format string")
+assert.fails(lambda: "" % 1, "too many arguments for format string")
+
+# %c
+assert.eq("%c" % 65, "A")
+assert.eq("%c" % 0x3b1, "α")
+assert.eq("%c" % "A", "A")
+assert.eq("%c" % "α", "α")
+assert.fails(lambda: "%c" % "abc", "requires a single-character string")
+assert.fails(lambda: "%c" % "", "requires a single-character string")
+assert.fails(lambda: "%c" % 65.0, "requires int or single-character string")
+assert.fails(lambda: "%c" % 10000000, "requires a valid Unicode code point")
+assert.fails(lambda: "%c" % -1, "requires a valid Unicode code point")
+# TODO(adonovan): more tests
+
+# str.format
+assert.eq("a{}b".format(123), "a123b")
+assert.eq("a{}b{}c{}d{}".format(1, 2, 3, 4), "a1b2c3d4")
+assert.eq("a{{b".format(), "a{b")
+assert.eq("a}}b".format(), "a}b")
+assert.eq("a{{b}}c".format(), "a{b}c")
+assert.eq("a{x}b{y}c{}".format(1, x = 2, y = 3), "a2b3c1")
+assert.fails(lambda: "a{z}b".format(x = 1), "keyword z not found")
+assert.fails(lambda: "{-1}".format(1), "keyword -1 not found")
+assert.fails(lambda: "{-0}".format(1), "keyword -0 not found")
+assert.fails(lambda: "{+0}".format(1), "keyword \\+0 not found")
+assert.fails(lambda: "{+1}".format(1), "keyword \\+1 not found") # starlark-go/issues/114
+assert.eq("{0000000000001}".format(0, 1), "1")
+assert.eq("{012}".format(*range(100)), "12") # decimal, despite leading zeros
+assert.fails(lambda: "{0,1} and {1}".format(1, 2), "keyword 0,1 not found")
+assert.fails(lambda: "a{123}b".format(), "tuple index out of range")
+assert.fails(lambda: "a{}b{}c".format(1), "tuple index out of range")
+assert.eq("a{010}b".format(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), "a10b") # index is decimal
+assert.fails(lambda: "a{}b{1}c".format(1, 2), "cannot switch from automatic field numbering to manual")
+assert.eq("a{!s}c".format("b"), "abc")
+assert.eq("a{!r}c".format("b"), r'a"b"c')
+assert.eq("a{x!r}c".format(x = "b"), r'a"b"c')
+assert.fails(lambda: "{x!}".format(x = 1), "unknown conversion")
+assert.fails(lambda: "{x!:}".format(x = 1), "unknown conversion")
+assert.fails(lambda: "{a.b}".format(1), "syntax x.y is not supported")
+assert.fails(lambda: "{a[0]}".format(1), "syntax a\\[i\\] is not supported")
+assert.fails(lambda: "{ {} }".format(1), "nested replacement fields not supported")
+assert.fails(lambda: "{{}".format(1), "single '}' in format")
+assert.fails(lambda: "{}}".format(1), "single '}' in format")
+assert.fails(lambda: "}}{".format(1), "unmatched '{' in format")
+assert.fails(lambda: "}{{".format(1), "single '}' in format")
+
+# str.split, str.rsplit
+assert.eq("a.b.c.d".split("."), ["a", "b", "c", "d"])
+assert.eq("a.b.c.d".rsplit("."), ["a", "b", "c", "d"])
+assert.eq("a.b.c.d".split(".", -1), ["a", "b", "c", "d"])
+assert.eq("a.b.c.d".rsplit(".", -1), ["a", "b", "c", "d"])
+assert.eq("a.b.c.d".split(".", 0), ["a.b.c.d"])
+assert.eq("a.b.c.d".rsplit(".", 0), ["a.b.c.d"])
+assert.eq("a.b.c.d".split(".", 1), ["a", "b.c.d"])
+assert.eq("a.b.c.d".rsplit(".", 1), ["a.b.c", "d"])
+assert.eq("a.b.c.d".split(".", 2), ["a", "b", "c.d"])
+assert.eq("a.b.c.d".rsplit(".", 2), ["a.b", "c", "d"])
+assert.eq(" ".split("."), [" "])
+assert.eq(" ".rsplit("."), [" "])
+
+# {,r}split on white space:
+assert.eq(" a bc\n def \t ghi".split(), ["a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".split(None), ["a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".split(None, 0), ["a bc\n def \t ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 0), [" a bc\n def \t ghi"])
+assert.eq(" a bc\n def \t ghi".split(None, 1), ["a", "bc\n def \t ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 1), [" a bc\n def", "ghi"])
+assert.eq(" a bc\n def \t ghi".split(None, 2), ["a", "bc", "def \t ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 2), [" a bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".split(None, 3), ["a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 3), [" a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".split(None, 4), ["a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 4), ["a", "bc", "def", "ghi"])
+assert.eq(" a bc\n def \t ghi".rsplit(None, 5), ["a", "bc", "def", "ghi"])
+
+assert.eq(" a bc\n def \t ghi ".split(None, 0), ["a bc\n def \t ghi "])
+assert.eq(" a bc\n def \t ghi ".rsplit(None, 0), [" a bc\n def \t ghi"])
+assert.eq(" a bc\n def \t ghi ".split(None, 1), ["a", "bc\n def \t ghi "])
+assert.eq(" a bc\n def \t ghi ".rsplit(None, 1), [" a bc\n def", "ghi"])
+
+# Observe the algorithmic difference when splitting on spaces versus other delimiters.
+assert.eq("--aa--bb--cc--".split("-", 0), ["--aa--bb--cc--"]) # contrast this
+assert.eq(" aa bb cc ".split(None, 0), ["aa bb cc "]) # with this
+assert.eq("--aa--bb--cc--".rsplit("-", 0), ["--aa--bb--cc--"]) # ditto this
+assert.eq(" aa bb cc ".rsplit(None, 0), [" aa bb cc"]) # and this
+
+#
+assert.eq("--aa--bb--cc--".split("-", 1), ["", "-aa--bb--cc--"])
+assert.eq("--aa--bb--cc--".rsplit("-", 1), ["--aa--bb--cc-", ""])
+assert.eq(" aa bb cc ".split(None, 1), ["aa", "bb cc "])
+assert.eq(" aa bb cc ".rsplit(None, 1), [" aa bb", "cc"])
+
+#
+assert.eq("--aa--bb--cc--".split("-", -1), ["", "", "aa", "", "bb", "", "cc", "", ""])
+assert.eq("--aa--bb--cc--".rsplit("-", -1), ["", "", "aa", "", "bb", "", "cc", "", ""])
+assert.eq(" aa bb cc ".split(None, -1), ["aa", "bb", "cc"])
+assert.eq(" aa bb cc ".rsplit(None, -1), ["aa", "bb", "cc"])
+assert.eq(" ".split(None), [])
+assert.eq(" ".rsplit(None), [])
+
+assert.eq("localhost:80".rsplit(":", 1)[-1], "80")
+
+# str.splitlines
+assert.eq("\nabc\ndef".splitlines(), ["", "abc", "def"])
+assert.eq("\nabc\ndef".splitlines(True), ["\n", "abc\n", "def"])
+assert.eq("\nabc\ndef\n".splitlines(), ["", "abc", "def"])
+assert.eq("\nabc\ndef\n".splitlines(True), ["\n", "abc\n", "def\n"])
+assert.eq("".splitlines(), []) #
+assert.eq("".splitlines(True), []) #
+assert.eq("a".splitlines(), ["a"])
+assert.eq("a".splitlines(True), ["a"])
+assert.eq("\n".splitlines(), [""])
+assert.eq("\n".splitlines(True), ["\n"])
+assert.eq("a\n".splitlines(), ["a"])
+assert.eq("a\n".splitlines(True), ["a\n"])
+assert.eq("a\n\nb".splitlines(), ["a", "", "b"])
+assert.eq("a\n\nb".splitlines(True), ["a\n", "\n", "b"])
+assert.eq("a\nb\nc".splitlines(), ["a", "b", "c"])
+assert.eq("a\nb\nc".splitlines(True), ["a\n", "b\n", "c"])
+assert.eq("a\nb\nc\n".splitlines(), ["a", "b", "c"])
+assert.eq("a\nb\nc\n".splitlines(True), ["a\n", "b\n", "c\n"])
+
+# str.{,l,r}strip
+assert.eq(" \tfoo\n ".strip(), "foo")
+assert.eq(" \tfoo\n ".lstrip(), "foo\n ")
+assert.eq(" \tfoo\n ".rstrip(), " \tfoo")
+assert.eq(" \tfoo\n ".strip(""), "foo")
+assert.eq(" \tfoo\n ".lstrip(""), "foo\n ")
+assert.eq(" \tfoo\n ".rstrip(""), " \tfoo")
+assert.eq("blah.h".strip("b.h"), "la")
+assert.eq("blah.h".lstrip("b.h"), "lah.h")
+assert.eq("blah.h".rstrip("b.h"), "bla")
+
+# str.count
+assert.eq("banana".count("a"), 3)
+assert.eq("banana".count("a", 2), 2)
+assert.eq("banana".count("a", -4, -2), 1)
+assert.eq("banana".count("a", 1, 4), 2)
+assert.eq("banana".count("a", 0, -100), 0)
+
+# str.{starts,ends}with
+assert.true("foo".endswith("oo"))
+assert.true(not "foo".endswith("x"))
+assert.true("foo".startswith("fo"))
+assert.true(not "foo".startswith("x"))
+assert.fails(lambda: "foo".startswith(1), "got int.*want string")
+
+#
+assert.true("abc".startswith(("a", "A")))
+assert.true("ABC".startswith(("a", "A")))
+assert.true(not "ABC".startswith(("b", "B")))
+assert.fails(lambda: "123".startswith((1, 2)), "got int, for element 0")
+assert.fails(lambda: "123".startswith(["3"]), "got list")
+
+#
+assert.true("abc".endswith(("c", "C")))
+assert.true("ABC".endswith(("c", "C")))
+assert.true(not "ABC".endswith(("b", "B")))
+assert.fails(lambda: "123".endswith((1, 2)), "got int, for element 0")
+assert.fails(lambda: "123".endswith(["3"]), "got list")
+
+# start/end
+assert.true("abc".startswith("bc", 1))
+assert.true(not "abc".startswith("b", 999))
+assert.true("abc".endswith("ab", None, -1))
+assert.true(not "abc".endswith("b", None, -999))
+
+# str.replace
+assert.eq("banana".replace("a", "o", 1), "bonana")
+assert.eq("banana".replace("a", "o"), "bonono")
+# TODO(adonovan): more tests
+
+# str.{,r}find
+assert.eq("foofoo".find("oo"), 1)
+assert.eq("foofoo".find("ox"), -1)
+assert.eq("foofoo".find("oo", 2), 4)
+assert.eq("foofoo".rfind("oo"), 4)
+assert.eq("foofoo".rfind("ox"), -1)
+assert.eq("foofoo".rfind("oo", 1, 4), 1)
+assert.eq("foofoo".find(""), 0)
+assert.eq("foofoo".rfind(""), 6)
+
+# str.{,r}partition
+assert.eq("foo/bar/wiz".partition("/"), ("foo", "/", "bar/wiz"))
+assert.eq("foo/bar/wiz".rpartition("/"), ("foo/bar", "/", "wiz"))
+assert.eq("foo/bar/wiz".partition("."), ("foo/bar/wiz", "", ""))
+assert.eq("foo/bar/wiz".rpartition("."), ("", "", "foo/bar/wiz"))
+assert.fails(lambda: "foo/bar/wiz".partition(""), "empty separator")
+assert.fails(lambda: "foo/bar/wiz".rpartition(""), "empty separator")
+
+assert.eq("?".join(["foo", "a/b/c.go".rpartition("/")[0]]), "foo?a/b")
+
+# str.is{alpha,...}
+def test_predicates():
+ predicates = ["alnum", "alpha", "digit", "lower", "space", "title", "upper"]
+ table = {
+ "Hello, World!": "title",
+ "hello, world!": "lower",
+ "base64": "alnum lower",
+ "HAL-9000": "upper",
+ "Catch-22": "title",
+ "": "",
+ "\n\t\r": "space",
+ "abc": "alnum alpha lower",
+ "ABC": "alnum alpha upper",
+ "123": "alnum digit",
+ "DŽLJ": "alnum alpha upper",
+ "DžLj": "alnum alpha",
+ "Dž Lj": "title",
+ "džlj": "alnum alpha lower",
+ }
+ for str, want in table.items():
+ got = " ".join([name for name in predicates if getattr(str, "is" + name)()])
+ if got != want:
+ assert.fail("%r matched [%s], want [%s]" % (str, got, want))
+
+test_predicates()
+
+# Strings are not iterable.
+# ok
+assert.eq(len("abc"), 3) # len
+assert.true("a" in "abc") # str in str
+assert.eq("abc"[1], "b") # indexing
+
+# not ok
+def for_string():
+ for x in "abc":
+ pass
+
+def args(*args):
+ return args
+
+assert.fails(lambda: args(*"abc"), "must be iterable, not string") # varargs
+assert.fails(lambda: list("abc"), "got string, want iterable") # list(str)
+assert.fails(lambda: tuple("abc"), "got string, want iterable") # tuple(str)
+assert.fails(lambda: set("abc"), "got string, want iterable") # set(str)
+assert.fails(lambda: set() | "abc", "unknown binary op: set | string") # set union
+assert.fails(lambda: enumerate("ab"), "got string, want iterable") # enumerate
+assert.fails(lambda: sorted("abc"), "got string, want iterable") # sorted
+assert.fails(lambda: [].extend("bc"), "got string, want iterable") # list.extend
+assert.fails(lambda: ",".join("abc"), "got string, want iterable") # string.join
+assert.fails(lambda: dict(["ab"]), "not iterable .*string") # dict
+assert.fails(for_string, "string value is not iterable") # for loop
+assert.fails(lambda: [x for x in "abc"], "string value is not iterable") # comprehension
+assert.fails(lambda: all("abc"), "got string, want iterable") # all
+assert.fails(lambda: any("abc"), "got string, want iterable") # any
+assert.fails(lambda: reversed("abc"), "got string, want iterable") # reversed
+assert.fails(lambda: zip("ab", "cd"), "not iterable: string") # zip
+
+# str.join
+assert.eq(",".join([]), "")
+assert.eq(",".join(["a"]), "a")
+assert.eq(",".join(["a", "b"]), "a,b")
+assert.eq(",".join(["a", "b", "c"]), "a,b,c")
+assert.eq(",".join(("a", "b", "c")), "a,b,c")
+assert.eq("".join(("a", "b", "c")), "abc")
+assert.fails(lambda: "".join(None), "got NoneType, want iterable")
+assert.fails(lambda: "".join(["one", 2]), "join: in list, want string, got int")
+
+# TODO(adonovan): tests for: {,r}index
+
+# str.capitalize
+assert.eq("hElLo, WoRlD!".capitalize(), "Hello, world!")
+assert.eq("por qué".capitalize(), "Por qué")
+assert.eq("¿Por qué?".capitalize(), "¿por qué?")
+
+# str.lower
+assert.eq("hElLo, WoRlD!".lower(), "hello, world!")
+assert.eq("por qué".lower(), "por qué")
+assert.eq("¿Por qué?".lower(), "¿por qué?")
+assert.eq("LJUBOVIĆ".lower(), "ljubović")
+assert.true("dženan ljubović".islower())
+
+# str.upper
+assert.eq("hElLo, WoRlD!".upper(), "HELLO, WORLD!")
+assert.eq("por qué".upper(), "POR QUÉ")
+assert.eq("¿Por qué?".upper(), "¿POR QUÉ?")
+assert.eq("ljubović".upper(), "LJUBOVIĆ")
+assert.true("DŽENAN LJUBOVIĆ".isupper())
+
+# str.title
+assert.eq("hElLo, WoRlD!".title(), "Hello, World!")
+assert.eq("por qué".title(), "Por Qué")
+assert.eq("¿Por qué?".title(), "¿Por Qué?")
+assert.eq("ljubović".title(), "Ljubović")
+assert.true("Dženan Ljubović".istitle())
+assert.true(not "DŽenan LJubović".istitle())
+
+# method spell check
+assert.fails(lambda: "".starts_with, "no .starts_with field.*did you mean .startswith")
+assert.fails(lambda: "".StartsWith, "no .StartsWith field.*did you mean .startswith")
+assert.fails(lambda: "".fin, "no .fin field.*.did you mean .find")
diff --git a/starlark/testdata/tuple.star b/starlark/testdata/tuple.star
new file mode 100644
index 0000000..f306133
--- /dev/null
+++ b/starlark/testdata/tuple.star
@@ -0,0 +1,55 @@
+# Tests of Starlark 'tuple'
+
+load("assert.star", "assert")
+
+# literal
+assert.eq((), ())
+assert.eq((1), 1)
+assert.eq((1,), (1,))
+assert.ne((1), (1,))
+assert.eq((1, 2), (1, 2))
+assert.eq((1, 2, 3, 4, 5), (1, 2, 3, 4, 5))
+assert.ne((1, 2, 3), (1, 2, 4))
+
+# truth
+assert.true((False,))
+assert.true((False, False))
+assert.true(not ())
+
+# indexing, x[i]
+assert.eq(("a", "b")[0], "a")
+assert.eq(("a", "b")[1], "b")
+
+# slicing, x[i:j]
+assert.eq("abcd"[0:4:1], "abcd")
+assert.eq("abcd"[::2], "ac")
+assert.eq("abcd"[1::2], "bd")
+assert.eq("abcd"[4:0:-1], "dcb")
+banana = tuple("banana".elems())
+assert.eq(banana[7::-2], tuple("aaa".elems()))
+assert.eq(banana[6::-2], tuple("aaa".elems()))
+assert.eq(banana[5::-2], tuple("aaa".elems()))
+assert.eq(banana[4::-2], tuple("nnb".elems()))
+
+# tuple
+assert.eq(tuple(), ())
+assert.eq(tuple("abc".elems()), ("a", "b", "c"))
+assert.eq(tuple(["a", "b", "c"]), ("a", "b", "c"))
+assert.eq(tuple([1]), (1,))
+assert.fails(lambda: tuple(1), "got int, want iterable")
+
+# tuple * int, int * tuple
+abc = tuple("abc".elems())
+assert.eq(abc * 0, ())
+assert.eq(abc * -1, ())
+assert.eq(abc * 1, abc)
+assert.eq(abc * 3, ("a", "b", "c", "a", "b", "c", "a", "b", "c"))
+assert.eq(0 * abc, ())
+assert.eq(-1 * abc, ())
+assert.eq(1 * abc, abc)
+assert.eq(3 * abc, ("a", "b", "c", "a", "b", "c", "a", "b", "c"))
+assert.fails(lambda: abc * (1000000 * 1000000), "repeat count 1000000000000 too large")
+assert.fails(lambda: abc * 1000000 * 1000000, "excessive repeat \\(3000000 \\* 1000000 elements")
+
+# TODO(adonovan): test use of tuple as sequence
+# (for loop, comprehension, library functions).
diff --git a/starlark/unpack.go b/starlark/unpack.go
new file mode 100644
index 0000000..1493c85
--- /dev/null
+++ b/starlark/unpack.go
@@ -0,0 +1,319 @@
+package starlark
+
+// This file defines the Unpack helper functions used by
+// built-in functions to interpret their call arguments.
+
+import (
+ "fmt"
+ "log"
+ "reflect"
+ "strings"
+)
+
+// An Unpacker defines custom argument unpacking behavior.
+// See UnpackArgs.
+type Unpacker interface {
+ Unpack(v Value) error
+}
+
+// UnpackArgs unpacks the positional and keyword arguments into the
+// supplied parameter variables. pairs is an alternating list of names
+// and pointers to variables.
+//
+// If the variable is a bool, integer, string, *List, *Dict, Callable,
+// Iterable, or user-defined implementation of Value,
+// UnpackArgs performs the appropriate type check.
+// Predeclared Go integer types uses the AsInt check.
+// If the parameter name ends with "?",
+// it and all following parameters are optional.
+//
+// If the variable implements Unpacker, its Unpack argument
+// is called with the argument value, allowing an application
+// to define its own argument validation and conversion.
+//
+// If the variable implements Value, UnpackArgs may call
+// its Type() method while constructing the error message.
+//
+// Examples:
+//
+// var (
+// a Value
+// b = MakeInt(42)
+// c Value = starlark.None
+// )
+//
+// // 1. mixed parameters, like def f(a, b=42, c=None).
+// err := UnpackArgs("f", args, kwargs, "a", &a, "b?", &b, "c?", &c)
+//
+// // 2. keyword parameters only, like def f(*, a, b, c=None).
+// if len(args) > 0 {
+// return fmt.Errorf("f: unexpected positional arguments")
+// }
+// err := UnpackArgs("f", args, kwargs, "a", &a, "b?", &b, "c?", &c)
+//
+// // 3. positional parameters only, like def f(a, b=42, c=None, /) in Python 3.8.
+// err := UnpackPositionalArgs("f", args, kwargs, 1, &a, &b, &c)
+//
+// More complex forms such as def f(a, b=42, *args, c, d=123, **kwargs)
+// require additional logic, but their need in built-ins is exceedingly rare.
+//
+// In the examples above, the declaration of b with type Int causes UnpackArgs
+// to require that b's argument value, if provided, is also an int.
+// To allow arguments of any type, while retaining the default value of 42,
+// declare b as a Value:
+//
+// var b Value = MakeInt(42)
+//
+// The zero value of a variable of type Value, such as 'a' in the
+// examples above, is not a valid Starlark value, so if the parameter is
+// optional, the caller must explicitly handle the default case by
+// interpreting nil as None or some computed default. The same is true
+// for the zero values of variables of type *List, *Dict, Callable, or
+// Iterable. For example:
+//
+// // def myfunc(d=None, e=[], f={})
+// var (
+// d Value
+// e *List
+// f *Dict
+// )
+// err := UnpackArgs("myfunc", args, kwargs, "d?", &d, "e?", &e, "f?", &f)
+// if d == nil { d = None; }
+// if e == nil { e = new(List); }
+// if f == nil { f = new(Dict); }
+//
+func UnpackArgs(fnname string, args Tuple, kwargs []Tuple, pairs ...interface{}) error {
+ nparams := len(pairs) / 2
+ var defined intset
+ defined.init(nparams)
+
+ paramName := func(x interface{}) string { // (no free variables)
+ name := x.(string)
+ if name[len(name)-1] == '?' {
+ name = name[:len(name)-1]
+ }
+ return name
+ }
+
+ // positional arguments
+ if len(args) > nparams {
+ return fmt.Errorf("%s: got %d arguments, want at most %d",
+ fnname, len(args), nparams)
+ }
+ for i, arg := range args {
+ defined.set(i)
+ if err := unpackOneArg(arg, pairs[2*i+1]); err != nil {
+ name := paramName(pairs[2*i])
+ return fmt.Errorf("%s: for parameter %s: %s", fnname, name, err)
+ }
+ }
+
+ // keyword arguments
+kwloop:
+ for _, item := range kwargs {
+ name, arg := item[0].(String), item[1]
+ for i := 0; i < nparams; i++ {
+ if paramName(pairs[2*i]) == string(name) {
+ // found it
+ if defined.set(i) {
+ return fmt.Errorf("%s: got multiple values for keyword argument %s",
+ fnname, name)
+ }
+ ptr := pairs[2*i+1]
+ if err := unpackOneArg(arg, ptr); err != nil {
+ return fmt.Errorf("%s: for parameter %s: %s", fnname, name, err)
+ }
+ continue kwloop
+ }
+ }
+ return fmt.Errorf("%s: unexpected keyword argument %s", fnname, name)
+ }
+
+ // Check that all non-optional parameters are defined.
+ // (We needn't check the first len(args).)
+ for i := len(args); i < nparams; i++ {
+ name := pairs[2*i].(string)
+ if strings.HasSuffix(name, "?") {
+ break // optional
+ }
+ if !defined.get(i) {
+ return fmt.Errorf("%s: missing argument for %s", fnname, name)
+ }
+ }
+
+ return nil
+}
+
+// UnpackPositionalArgs unpacks the positional arguments into
+// corresponding variables. Each element of vars is a pointer; see
+// UnpackArgs for allowed types and conversions.
+//
+// UnpackPositionalArgs reports an error if the number of arguments is
+// less than min or greater than len(vars), if kwargs is nonempty, or if
+// any conversion fails.
+//
+// See UnpackArgs for general comments.
+func UnpackPositionalArgs(fnname string, args Tuple, kwargs []Tuple, min int, vars ...interface{}) error {
+ if len(kwargs) > 0 {
+ return fmt.Errorf("%s: unexpected keyword arguments", fnname)
+ }
+ max := len(vars)
+ if len(args) < min {
+ var atleast string
+ if min < max {
+ atleast = "at least "
+ }
+ return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atleast, min)
+ }
+ if len(args) > max {
+ var atmost string
+ if max > min {
+ atmost = "at most "
+ }
+ return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atmost, max)
+ }
+ for i, arg := range args {
+ if err := unpackOneArg(arg, vars[i]); err != nil {
+ return fmt.Errorf("%s: for parameter %d: %s", fnname, i+1, err)
+ }
+ }
+ return nil
+}
+
+func unpackOneArg(v Value, ptr interface{}) error {
+ // On failure, don't clobber *ptr.
+ switch ptr := ptr.(type) {
+ case Unpacker:
+ return ptr.Unpack(v)
+ case *Value:
+ *ptr = v
+ case *string:
+ s, ok := AsString(v)
+ if !ok {
+ return fmt.Errorf("got %s, want string", v.Type())
+ }
+ *ptr = s
+ case *bool:
+ b, ok := v.(Bool)
+ if !ok {
+ return fmt.Errorf("got %s, want bool", v.Type())
+ }
+ *ptr = bool(b)
+ case *int, *int8, *int16, *int32, *int64,
+ *uint, *uint8, *uint16, *uint32, *uint64, *uintptr:
+ return AsInt(v, ptr)
+ case *float64:
+ f, ok := v.(Float)
+ if !ok {
+ return fmt.Errorf("got %s, want float", v.Type())
+ }
+ *ptr = float64(f)
+ case **List:
+ list, ok := v.(*List)
+ if !ok {
+ return fmt.Errorf("got %s, want list", v.Type())
+ }
+ *ptr = list
+ case **Dict:
+ dict, ok := v.(*Dict)
+ if !ok {
+ return fmt.Errorf("got %s, want dict", v.Type())
+ }
+ *ptr = dict
+ case *Callable:
+ f, ok := v.(Callable)
+ if !ok {
+ return fmt.Errorf("got %s, want callable", v.Type())
+ }
+ *ptr = f
+ case *Iterable:
+ it, ok := v.(Iterable)
+ if !ok {
+ return fmt.Errorf("got %s, want iterable", v.Type())
+ }
+ *ptr = it
+ default:
+ // v must have type *V, where V is some subtype of starlark.Value.
+ ptrv := reflect.ValueOf(ptr)
+ if ptrv.Kind() != reflect.Ptr {
+ log.Panicf("internal error: not a pointer: %T", ptr)
+ }
+ paramVar := ptrv.Elem()
+ if !reflect.TypeOf(v).AssignableTo(paramVar.Type()) {
+ // The value is not assignable to the variable.
+
+ // Detect a possible bug in the Go program that called Unpack:
+ // If the variable *ptr is not a subtype of Value,
+ // no value of v can possibly work.
+ if !paramVar.Type().AssignableTo(reflect.TypeOf(new(Value)).Elem()) {
+ log.Panicf("pointer element type does not implement Value: %T", ptr)
+ }
+
+ // Report Starlark dynamic type error.
+ //
+ // We prefer the Starlark Value.Type name over
+ // its Go reflect.Type name, but calling the
+ // Value.Type method on the variable is not safe
+ // in general. If the variable is an interface,
+ // the call will fail. Even if the variable has
+ // a concrete type, it might not be safe to call
+ // Type() on a zero instance. Thus we must use
+ // recover.
+
+ // Default to Go reflect.Type name
+ paramType := paramVar.Type().String()
+
+ // Attempt to call Value.Type method.
+ func() {
+ defer func() { recover() }()
+ paramType = paramVar.MethodByName("Type").Call(nil)[0].String()
+ }()
+ return fmt.Errorf("got %s, want %s", v.Type(), paramType)
+ }
+ paramVar.Set(reflect.ValueOf(v))
+ }
+ return nil
+}
+
+type intset struct {
+ small uint64 // bitset, used if n < 64
+ large map[int]bool // set, used if n >= 64
+}
+
+func (is *intset) init(n int) {
+ if n >= 64 {
+ is.large = make(map[int]bool)
+ }
+}
+
+func (is *intset) set(i int) (prev bool) {
+ if is.large == nil {
+ prev = is.small&(1<<uint(i)) != 0
+ is.small |= 1 << uint(i)
+ } else {
+ prev = is.large[i]
+ is.large[i] = true
+ }
+ return
+}
+
+func (is *intset) get(i int) bool {
+ if is.large == nil {
+ return is.small&(1<<uint(i)) != 0
+ }
+ return is.large[i]
+}
+
+func (is *intset) len() int {
+ if is.large == nil {
+ // Suboptimal, but used only for error reporting.
+ len := 0
+ for i := 0; i < 64; i++ {
+ if is.small&(1<<uint(i)) != 0 {
+ len++
+ }
+ }
+ return len
+ }
+ return len(is.large)
+}
diff --git a/starlark/value.go b/starlark/value.go
new file mode 100644
index 0000000..81e29ed
--- /dev/null
+++ b/starlark/value.go
@@ -0,0 +1,1431 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package starlark provides a Starlark interpreter.
+//
+// Starlark values are represented by the Value interface.
+// The following built-in Value types are known to the evaluator:
+//
+// NoneType -- NoneType
+// Bool -- bool
+// Int -- int
+// Float -- float
+// String -- string
+// *List -- list
+// Tuple -- tuple
+// *Dict -- dict
+// *Set -- set
+// *Function -- function (implemented in Starlark)
+// *Builtin -- builtin_function_or_method (function or method implemented in Go)
+//
+// Client applications may define new data types that satisfy at least
+// the Value interface. Such types may provide additional operations by
+// implementing any of these optional interfaces:
+//
+// Callable -- value is callable like a function
+// Comparable -- value defines its own comparison operations
+// Iterable -- value is iterable using 'for' loops
+// Sequence -- value is iterable sequence of known length
+// Indexable -- value is sequence with efficient random access
+// Mapping -- value maps from keys to values, like a dictionary
+// HasBinary -- value defines binary operations such as * and +
+// HasAttrs -- value has readable fields or methods x.f
+// HasSetField -- value has settable fields x.f
+// HasSetIndex -- value supports element update using x[i]=y
+// HasSetKey -- value supports map update using x[k]=v
+// HasUnary -- value defines unary operations such as + and -
+//
+// Client applications may also define domain-specific functions in Go
+// and make them available to Starlark programs. Use NewBuiltin to
+// construct a built-in value that wraps a Go function. The
+// implementation of the Go function may use UnpackArgs to make sense of
+// the positional and keyword arguments provided by the caller.
+//
+// Starlark's None value is not equal to Go's nil. Go's nil is not a legal
+// Starlark value, but the compiler will not stop you from converting nil
+// to Value. Be careful to avoid allowing Go nil values to leak into
+// Starlark data structures.
+//
+// The Compare operation requires two arguments of the same
+// type, but this constraint cannot be expressed in Go's type system.
+// (This is the classic "binary method problem".)
+// So, each Value type's CompareSameType method is a partial function
+// that compares a value only against others of the same type.
+// Use the package's standalone Compare (or Equal) function to compare
+// an arbitrary pair of values.
+//
+// To parse and evaluate a Starlark source file, use ExecFile. The Eval
+// function evaluates a single expression. All evaluator functions
+// require a Thread parameter which defines the "thread-local storage"
+// of a Starlark thread and may be used to plumb application state
+// through Starlark code and into callbacks. When evaluation fails it
+// returns an EvalError from which the application may obtain a
+// backtrace of active Starlark calls.
+//
+package starlark // import "go.starlark.net/starlark"
+
+// This file defines the data types of Starlark and their basic operations.
+
+import (
+ "fmt"
+ "math"
+ "math/big"
+ "reflect"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "go.starlark.net/internal/compile"
+ "go.starlark.net/syntax"
+)
+
+// Value is a value in the Starlark interpreter.
+type Value interface {
+ // String returns the string representation of the value.
+ // Starlark string values are quoted as if by Python's repr.
+ String() string
+
+ // Type returns a short string describing the value's type.
+ Type() string
+
+ // Freeze causes the value, and all values transitively
+ // reachable from it through collections and closures, to be
+ // marked as frozen. All subsequent mutations to the data
+ // structure through this API will fail dynamically, making the
+ // data structure immutable and safe for publishing to other
+ // Starlark interpreters running concurrently.
+ Freeze()
+
+ // Truth returns the truth value of an object.
+ Truth() Bool
+
+ // Hash returns a function of x such that Equals(x, y) => Hash(x) == Hash(y).
+ // Hash may fail if the value's type is not hashable, or if the value
+ // contains a non-hashable value. The hash is used only by dictionaries and
+ // is not exposed to the Starlark program.
+ Hash() (uint32, error)
+}
+
+// A Comparable is a value that defines its own equivalence relation and
+// perhaps ordered comparisons.
+type Comparable interface {
+ Value
+ // CompareSameType compares one value to another of the same Type().
+ // The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE.
+ // CompareSameType returns an error if an ordered comparison was
+ // requested for a type that does not support it.
+ //
+ // Implementations that recursively compare subcomponents of
+ // the value should use the CompareDepth function, not Compare, to
+ // avoid infinite recursion on cyclic structures.
+ //
+ // The depth parameter is used to bound comparisons of cyclic
+ // data structures. Implementations should decrement depth
+ // before calling CompareDepth and should return an error if depth
+ // < 1.
+ //
+ // Client code should not call this method. Instead, use the
+ // standalone Compare or Equals functions, which are defined for
+ // all pairs of operands.
+ CompareSameType(op syntax.Token, y Value, depth int) (bool, error)
+}
+
+var (
+ _ Comparable = Int{}
+ _ Comparable = False
+ _ Comparable = Float(0)
+ _ Comparable = String("")
+ _ Comparable = (*Dict)(nil)
+ _ Comparable = (*List)(nil)
+ _ Comparable = Tuple(nil)
+ _ Comparable = (*Set)(nil)
+)
+
+// A Callable value f may be the operand of a function call, f(x).
+//
+// Clients should use the Call function, never the CallInternal method.
+type Callable interface {
+ Value
+ Name() string
+ CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error)
+}
+
+type callableWithPosition interface {
+ Callable
+ Position() syntax.Position
+}
+
+var (
+ _ Callable = (*Builtin)(nil)
+ _ Callable = (*Function)(nil)
+ _ callableWithPosition = (*Function)(nil)
+)
+
+// An Iterable abstracts a sequence of values.
+// An iterable value may be iterated over by a 'for' loop or used where
+// any other Starlark iterable is allowed. Unlike a Sequence, the length
+// of an Iterable is not necessarily known in advance of iteration.
+type Iterable interface {
+ Value
+ Iterate() Iterator // must be followed by call to Iterator.Done
+}
+
+// A Sequence is a sequence of values of known length.
+type Sequence interface {
+ Iterable
+ Len() int
+}
+
+var (
+ _ Sequence = (*Dict)(nil)
+ _ Sequence = (*Set)(nil)
+)
+
+// An Indexable is a sequence of known length that supports efficient random access.
+// It is not necessarily iterable.
+type Indexable interface {
+ Value
+ Index(i int) Value // requires 0 <= i < Len()
+ Len() int
+}
+
+// A Sliceable is a sequence that can be cut into pieces with the slice operator (x[i:j:step]).
+//
+// All native indexable objects are sliceable.
+// This is a separate interface for backwards-compatibility.
+type Sliceable interface {
+ Indexable
+ // For positive strides (step > 0), 0 <= start <= end <= n.
+ // For negative strides (step < 0), -1 <= end <= start < n.
+ // The caller must ensure that the start and end indices are valid
+ // and that step is non-zero.
+ Slice(start, end, step int) Value
+}
+
+// A HasSetIndex is an Indexable value whose elements may be assigned (x[i] = y).
+//
+// The implementation should not add Len to a negative index as the
+// evaluator does this before the call.
+type HasSetIndex interface {
+ Indexable
+ SetIndex(index int, v Value) error
+}
+
+var (
+ _ HasSetIndex = (*List)(nil)
+ _ Indexable = Tuple(nil)
+ _ Indexable = String("")
+ _ Sliceable = Tuple(nil)
+ _ Sliceable = String("")
+ _ Sliceable = (*List)(nil)
+)
+
+// An Iterator provides a sequence of values to the caller.
+//
+// The caller must call Done when the iterator is no longer needed.
+// Operations that modify a sequence will fail if it has active iterators.
+//
+// Example usage:
+//
+// iter := iterable.Iterator()
+// defer iter.Done()
+// var x Value
+// for iter.Next(&x) {
+// ...
+// }
+//
+type Iterator interface {
+ // If the iterator is exhausted, Next returns false.
+ // Otherwise it sets *p to the current element of the sequence,
+ // advances the iterator, and returns true.
+ Next(p *Value) bool
+ Done()
+}
+
+// A Mapping is a mapping from keys to values, such as a dictionary.
+//
+// If a type satisfies both Mapping and Iterable, the iterator yields
+// the keys of the mapping.
+type Mapping interface {
+ Value
+ // Get returns the value corresponding to the specified key,
+ // or !found if the mapping does not contain the key.
+ //
+ // Get also defines the behavior of "v in mapping".
+ // The 'in' operator reports the 'found' component, ignoring errors.
+ Get(Value) (v Value, found bool, err error)
+}
+
+// An IterableMapping is a mapping that supports key enumeration.
+type IterableMapping interface {
+ Mapping
+ Iterate() Iterator // see Iterable interface
+ Items() []Tuple // a new slice containing all key/value pairs
+}
+
+var _ IterableMapping = (*Dict)(nil)
+
+// A HasSetKey supports map update using x[k]=v syntax, like a dictionary.
+type HasSetKey interface {
+ Mapping
+ SetKey(k, v Value) error
+}
+
+var _ HasSetKey = (*Dict)(nil)
+
+// A HasBinary value may be used as either operand of these binary operators:
+// + - * / // % in not in | & ^ << >>
+//
+// The Side argument indicates whether the receiver is the left or right operand.
+//
+// An implementation may decline to handle an operation by returning (nil, nil).
+// For this reason, clients should always call the standalone Binary(op, x, y)
+// function rather than calling the method directly.
+type HasBinary interface {
+ Value
+ Binary(op syntax.Token, y Value, side Side) (Value, error)
+}
+
+type Side bool
+
+const (
+ Left Side = false
+ Right Side = true
+)
+
+// A HasUnary value may be used as the operand of these unary operators:
+// + - ~
+//
+// An implementation may decline to handle an operation by returning (nil, nil).
+// For this reason, clients should always call the standalone Unary(op, x)
+// function rather than calling the method directly.
+type HasUnary interface {
+ Value
+ Unary(op syntax.Token) (Value, error)
+}
+
+// A HasAttrs value has fields or methods that may be read by a dot expression (y = x.f).
+// Attribute names may be listed using the built-in 'dir' function.
+//
+// For implementation convenience, a result of (nil, nil) from Attr is
+// interpreted as a "no such field or method" error. Implementations are
+// free to return a more precise error.
+type HasAttrs interface {
+ Value
+ Attr(name string) (Value, error) // returns (nil, nil) if attribute not present
+ AttrNames() []string // callers must not modify the result.
+}
+
+var (
+ _ HasAttrs = String("")
+ _ HasAttrs = new(List)
+ _ HasAttrs = new(Dict)
+ _ HasAttrs = new(Set)
+)
+
+// A HasSetField value has fields that may be written by a dot expression (x.f = y).
+//
+// An implementation of SetField may return a NoSuchAttrError,
+// in which case the runtime may augment the error message to
+// warn of possible misspelling.
+type HasSetField interface {
+ HasAttrs
+ SetField(name string, val Value) error
+}
+
+// A NoSuchAttrError may be returned by an implementation of
+// HasAttrs.Attr or HasSetField.SetField to indicate that no such field
+// exists. In that case the runtime may augment the error message to
+// warn of possible misspelling.
+type NoSuchAttrError string
+
+func (e NoSuchAttrError) Error() string { return string(e) }
+
+// NoneType is the type of None. Its only legal value is None.
+// (We represent it as a number, not struct{}, so that None may be constant.)
+type NoneType byte
+
+const None = NoneType(0)
+
+func (NoneType) String() string { return "None" }
+func (NoneType) Type() string { return "NoneType" }
+func (NoneType) Freeze() {} // immutable
+func (NoneType) Truth() Bool { return False }
+func (NoneType) Hash() (uint32, error) { return 0, nil }
+
+// Bool is the type of a Starlark bool.
+type Bool bool
+
+const (
+ False Bool = false
+ True Bool = true
+)
+
+func (b Bool) String() string {
+ if b {
+ return "True"
+ } else {
+ return "False"
+ }
+}
+func (b Bool) Type() string { return "bool" }
+func (b Bool) Freeze() {} // immutable
+func (b Bool) Truth() Bool { return b }
+func (b Bool) Hash() (uint32, error) { return uint32(b2i(bool(b))), nil }
+func (x Bool) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(Bool)
+ return threeway(op, b2i(bool(x))-b2i(bool(y))), nil
+}
+
+// Float is the type of a Starlark float.
+type Float float64
+
+func (f Float) String() string {
+ var buf strings.Builder
+ f.format(&buf, 'g')
+ return buf.String()
+}
+
+func (f Float) format(buf *strings.Builder, conv byte) {
+ ff := float64(f)
+ if !isFinite(ff) {
+ if math.IsInf(ff, +1) {
+ buf.WriteString("+inf")
+ } else if math.IsInf(ff, -1) {
+ buf.WriteString("-inf")
+ } else {
+ buf.WriteString("nan")
+ }
+ return
+ }
+
+ // %g is the default format used by str.
+ // It uses the minimum precision to avoid ambiguity,
+ // and always includes a '.' or an 'e' so that the value
+ // is self-evidently a float, not an int.
+ if conv == 'g' || conv == 'G' {
+ s := strconv.FormatFloat(ff, conv, -1, 64)
+ buf.WriteString(s)
+ // Ensure result always has a decimal point if no exponent.
+ // "123" -> "123.0"
+ if strings.IndexByte(s, conv-'g'+'e') < 0 && strings.IndexByte(s, '.') < 0 {
+ buf.WriteString(".0")
+ }
+ return
+ }
+
+ // %[eEfF] use 6-digit precision
+ buf.WriteString(strconv.FormatFloat(ff, conv, 6, 64))
+}
+
+func (f Float) Type() string { return "float" }
+func (f Float) Freeze() {} // immutable
+func (f Float) Truth() Bool { return f != 0.0 }
+func (f Float) Hash() (uint32, error) {
+ // Equal float and int values must yield the same hash.
+ // TODO(adonovan): opt: if f is non-integral, and thus not equal
+ // to any Int, we can avoid the Int conversion and use a cheaper hash.
+ if isFinite(float64(f)) {
+ return finiteFloatToInt(f).Hash()
+ }
+ return 1618033, nil // NaN, +/-Inf
+}
+
+func floor(f Float) Float { return Float(math.Floor(float64(f))) }
+
+// isFinite reports whether f represents a finite rational value.
+// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
+func isFinite(f float64) bool {
+ return math.Abs(f) <= math.MaxFloat64
+}
+
+func (x Float) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(Float)
+ return threeway(op, floatCmp(x, y)), nil
+}
+
+// floatCmp performs a three-valued comparison on floats,
+// which are totally ordered with NaN > +Inf.
+func floatCmp(x, y Float) int {
+ if x > y {
+ return +1
+ } else if x < y {
+ return -1
+ } else if x == y {
+ return 0
+ }
+
+ // At least one operand is NaN.
+ if x == x {
+ return -1 // y is NaN
+ } else if y == y {
+ return +1 // x is NaN
+ }
+ return 0 // both NaN
+}
+
+func (f Float) rational() *big.Rat { return new(big.Rat).SetFloat64(float64(f)) }
+
+// AsFloat returns the float64 value closest to x.
+// The f result is undefined if x is not a float or Int.
+// The result may be infinite if x is a very large Int.
+func AsFloat(x Value) (f float64, ok bool) {
+ switch x := x.(type) {
+ case Float:
+ return float64(x), true
+ case Int:
+ return float64(x.Float()), true
+ }
+ return 0, false
+}
+
+func (x Float) Mod(y Float) Float {
+ z := Float(math.Mod(float64(x), float64(y)))
+ if (x < 0) != (y < 0) && z != 0 {
+ z += y
+ }
+ return z
+}
+
+// Unary implements the operations +float and -float.
+func (f Float) Unary(op syntax.Token) (Value, error) {
+ switch op {
+ case syntax.MINUS:
+ return -f, nil
+ case syntax.PLUS:
+ return +f, nil
+ }
+ return nil, nil
+}
+
+// String is the type of a Starlark text string.
+//
+// A String encapsulates an an immutable sequence of bytes,
+// but strings are not directly iterable. Instead, iterate
+// over the result of calling one of these four methods:
+// codepoints, codepoint_ords, elems, elem_ords.
+//
+// Strings typically contain text; use Bytes for binary strings.
+// The Starlark spec defines text strings as sequences of UTF-k
+// codes that encode Unicode code points. In this Go implementation,
+// k=8, whereas in a Java implementation, k=16. For portability,
+// operations on strings should aim to avoid assumptions about
+// the value of k.
+//
+// Warning: the contract of the Value interface's String method is that
+// it returns the value printed in Starlark notation,
+// so s.String() or fmt.Sprintf("%s", s) returns a quoted string.
+// Use string(s) or s.GoString() or fmt.Sprintf("%#v", s) to obtain the raw contents
+// of a Starlark string as a Go string.
+type String string
+
+func (s String) String() string { return syntax.Quote(string(s), false) }
+func (s String) GoString() string { return string(s) }
+func (s String) Type() string { return "string" }
+func (s String) Freeze() {} // immutable
+func (s String) Truth() Bool { return len(s) > 0 }
+func (s String) Hash() (uint32, error) { return hashString(string(s)), nil }
+func (s String) Len() int { return len(s) } // bytes
+func (s String) Index(i int) Value { return s[i : i+1] }
+
+func (s String) Slice(start, end, step int) Value {
+ if step == 1 {
+ return s[start:end]
+ }
+
+ sign := signum(step)
+ var str []byte
+ for i := start; signum(end-i) == sign; i += step {
+ str = append(str, s[i])
+ }
+ return String(str)
+}
+
+func (s String) Attr(name string) (Value, error) { return builtinAttr(s, name, stringMethods) }
+func (s String) AttrNames() []string { return builtinAttrNames(stringMethods) }
+
+func (x String) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(String)
+ return threeway(op, strings.Compare(string(x), string(y))), nil
+}
+
+func AsString(x Value) (string, bool) { v, ok := x.(String); return string(v), ok }
+
+// A stringElems is an iterable whose iterator yields a sequence of
+// elements (bytes), either numerically or as successive substrings.
+// It is an indexable sequence.
+type stringElems struct {
+ s String
+ ords bool
+}
+
+var (
+ _ Iterable = (*stringElems)(nil)
+ _ Indexable = (*stringElems)(nil)
+)
+
+func (si stringElems) String() string {
+ if si.ords {
+ return si.s.String() + ".elem_ords()"
+ } else {
+ return si.s.String() + ".elems()"
+ }
+}
+func (si stringElems) Type() string { return "string.elems" }
+func (si stringElems) Freeze() {} // immutable
+func (si stringElems) Truth() Bool { return True }
+func (si stringElems) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) }
+func (si stringElems) Iterate() Iterator { return &stringElemsIterator{si, 0} }
+func (si stringElems) Len() int { return len(si.s) }
+func (si stringElems) Index(i int) Value {
+ if si.ords {
+ return MakeInt(int(si.s[i]))
+ } else {
+ // TODO(adonovan): opt: preallocate canonical 1-byte strings
+ // to avoid interface allocation.
+ return si.s[i : i+1]
+ }
+}
+
+type stringElemsIterator struct {
+ si stringElems
+ i int
+}
+
+func (it *stringElemsIterator) Next(p *Value) bool {
+ if it.i == len(it.si.s) {
+ return false
+ }
+ *p = it.si.Index(it.i)
+ it.i++
+ return true
+}
+
+func (*stringElemsIterator) Done() {}
+
+// A stringCodepoints is an iterable whose iterator yields a sequence of
+// Unicode code points, either numerically or as successive substrings.
+// It is not indexable.
+type stringCodepoints struct {
+ s String
+ ords bool
+}
+
+var _ Iterable = (*stringCodepoints)(nil)
+
+func (si stringCodepoints) String() string {
+ if si.ords {
+ return si.s.String() + ".codepoint_ords()"
+ } else {
+ return si.s.String() + ".codepoints()"
+ }
+}
+func (si stringCodepoints) Type() string { return "string.codepoints" }
+func (si stringCodepoints) Freeze() {} // immutable
+func (si stringCodepoints) Truth() Bool { return True }
+func (si stringCodepoints) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) }
+func (si stringCodepoints) Iterate() Iterator { return &stringCodepointsIterator{si, 0} }
+
+type stringCodepointsIterator struct {
+ si stringCodepoints
+ i int
+}
+
+func (it *stringCodepointsIterator) Next(p *Value) bool {
+ s := it.si.s[it.i:]
+ if s == "" {
+ return false
+ }
+ r, sz := utf8.DecodeRuneInString(string(s))
+ if !it.si.ords {
+ if r == utf8.RuneError {
+ *p = String(r)
+ } else {
+ *p = s[:sz]
+ }
+ } else {
+ *p = MakeInt(int(r))
+ }
+ it.i += sz
+ return true
+}
+
+func (*stringCodepointsIterator) Done() {}
+
+// A Function is a function defined by a Starlark def statement or lambda expression.
+// The initialization behavior of a Starlark module is also represented by a Function.
+type Function struct {
+ funcode *compile.Funcode
+ module *module
+ defaults Tuple
+ freevars Tuple
+}
+
+// A module is the dynamic counterpart to a Program.
+// All functions in the same program share a module.
+type module struct {
+ program *compile.Program
+ predeclared StringDict
+ globals []Value
+ constants []Value
+}
+
+// makeGlobalDict returns a new, unfrozen StringDict containing all global
+// variables so far defined in the module.
+func (m *module) makeGlobalDict() StringDict {
+ r := make(StringDict, len(m.program.Globals))
+ for i, id := range m.program.Globals {
+ if v := m.globals[i]; v != nil {
+ r[id.Name] = v
+ }
+ }
+ return r
+}
+
+func (fn *Function) Name() string { return fn.funcode.Name } // "lambda" for anonymous functions
+func (fn *Function) Doc() string { return fn.funcode.Doc }
+func (fn *Function) Hash() (uint32, error) { return hashString(fn.funcode.Name), nil }
+func (fn *Function) Freeze() { fn.defaults.Freeze(); fn.freevars.Freeze() }
+func (fn *Function) String() string { return toString(fn) }
+func (fn *Function) Type() string { return "function" }
+func (fn *Function) Truth() Bool { return true }
+
+// Globals returns a new, unfrozen StringDict containing all global
+// variables so far defined in the function's module.
+func (fn *Function) Globals() StringDict { return fn.module.makeGlobalDict() }
+
+func (fn *Function) Position() syntax.Position { return fn.funcode.Pos }
+func (fn *Function) NumParams() int { return fn.funcode.NumParams }
+func (fn *Function) NumKwonlyParams() int { return fn.funcode.NumKwonlyParams }
+
+// Param returns the name and position of the ith parameter,
+// where 0 <= i < NumParams().
+// The *args and **kwargs parameters are at the end
+// even if there were optional parameters after *args.
+func (fn *Function) Param(i int) (string, syntax.Position) {
+ if i >= fn.NumParams() {
+ panic(i)
+ }
+ id := fn.funcode.Locals[i]
+ return id.Name, id.Pos
+}
+func (fn *Function) HasVarargs() bool { return fn.funcode.HasVarargs }
+func (fn *Function) HasKwargs() bool { return fn.funcode.HasKwargs }
+
+// A Builtin is a function implemented in Go.
+type Builtin struct {
+ name string
+ fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error)
+ recv Value // for bound methods (e.g. "".startswith)
+}
+
+func (b *Builtin) Name() string { return b.name }
+func (b *Builtin) Freeze() {
+ if b.recv != nil {
+ b.recv.Freeze()
+ }
+}
+func (b *Builtin) Hash() (uint32, error) {
+ h := hashString(b.name)
+ if b.recv != nil {
+ h ^= 5521
+ }
+ return h, nil
+}
+func (b *Builtin) Receiver() Value { return b.recv }
+func (b *Builtin) String() string { return toString(b) }
+func (b *Builtin) Type() string { return "builtin_function_or_method" }
+func (b *Builtin) CallInternal(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) {
+ return b.fn(thread, b, args, kwargs)
+}
+func (b *Builtin) Truth() Bool { return true }
+
+// NewBuiltin returns a new 'builtin_function_or_method' value with the specified name
+// and implementation. It compares unequal with all other values.
+func NewBuiltin(name string, fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error)) *Builtin {
+ return &Builtin{name: name, fn: fn}
+}
+
+// BindReceiver returns a new Builtin value representing a method
+// closure, that is, a built-in function bound to a receiver value.
+//
+// In the example below, the value of f is the string.index
+// built-in method bound to the receiver value "abc":
+//
+// f = "abc".index; f("a"); f("b")
+//
+// In the common case, the receiver is bound only during the call,
+// but this still results in the creation of a temporary method closure:
+//
+// "abc".index("a")
+//
+func (b *Builtin) BindReceiver(recv Value) *Builtin {
+ return &Builtin{name: b.name, fn: b.fn, recv: recv}
+}
+
+// A *Dict represents a Starlark dictionary.
+// The zero value of Dict is a valid empty dictionary.
+// If you know the exact final number of entries,
+// it is more efficient to call NewDict.
+type Dict struct {
+ ht hashtable
+}
+
+// NewDict returns a set with initial space for
+// at least size insertions before rehashing.
+func NewDict(size int) *Dict {
+ dict := new(Dict)
+ dict.ht.init(size)
+ return dict
+}
+
+func (d *Dict) Clear() error { return d.ht.clear() }
+func (d *Dict) Delete(k Value) (v Value, found bool, err error) { return d.ht.delete(k) }
+func (d *Dict) Get(k Value) (v Value, found bool, err error) { return d.ht.lookup(k) }
+func (d *Dict) Items() []Tuple { return d.ht.items() }
+func (d *Dict) Keys() []Value { return d.ht.keys() }
+func (d *Dict) Len() int { return int(d.ht.len) }
+func (d *Dict) Iterate() Iterator { return d.ht.iterate() }
+func (d *Dict) SetKey(k, v Value) error { return d.ht.insert(k, v) }
+func (d *Dict) String() string { return toString(d) }
+func (d *Dict) Type() string { return "dict" }
+func (d *Dict) Freeze() { d.ht.freeze() }
+func (d *Dict) Truth() Bool { return d.Len() > 0 }
+func (d *Dict) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: dict") }
+
+func (d *Dict) Attr(name string) (Value, error) { return builtinAttr(d, name, dictMethods) }
+func (d *Dict) AttrNames() []string { return builtinAttrNames(dictMethods) }
+
+func (x *Dict) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(*Dict)
+ switch op {
+ case syntax.EQL:
+ ok, err := dictsEqual(x, y, depth)
+ return ok, err
+ case syntax.NEQ:
+ ok, err := dictsEqual(x, y, depth)
+ return !ok, err
+ default:
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+ }
+}
+
+func dictsEqual(x, y *Dict, depth int) (bool, error) {
+ if x.Len() != y.Len() {
+ return false, nil
+ }
+ for _, xitem := range x.Items() {
+ key, xval := xitem[0], xitem[1]
+
+ if yval, found, _ := y.Get(key); !found {
+ return false, nil
+ } else if eq, err := EqualDepth(xval, yval, depth-1); err != nil {
+ return false, err
+ } else if !eq {
+ return false, nil
+ }
+ }
+ return true, nil
+}
+
+// A *List represents a Starlark list value.
+type List struct {
+ elems []Value
+ frozen bool
+ itercount uint32 // number of active iterators (ignored if frozen)
+}
+
+// NewList returns a list containing the specified elements.
+// Callers should not subsequently modify elems.
+func NewList(elems []Value) *List { return &List{elems: elems} }
+
+func (l *List) Freeze() {
+ if !l.frozen {
+ l.frozen = true
+ for _, elem := range l.elems {
+ elem.Freeze()
+ }
+ }
+}
+
+// checkMutable reports an error if the list should not be mutated.
+// verb+" list" should describe the operation.
+func (l *List) checkMutable(verb string) error {
+ if l.frozen {
+ return fmt.Errorf("cannot %s frozen list", verb)
+ }
+ if l.itercount > 0 {
+ return fmt.Errorf("cannot %s list during iteration", verb)
+ }
+ return nil
+}
+
+func (l *List) String() string { return toString(l) }
+func (l *List) Type() string { return "list" }
+func (l *List) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: list") }
+func (l *List) Truth() Bool { return l.Len() > 0 }
+func (l *List) Len() int { return len(l.elems) }
+func (l *List) Index(i int) Value { return l.elems[i] }
+
+func (l *List) Slice(start, end, step int) Value {
+ if step == 1 {
+ elems := append([]Value{}, l.elems[start:end]...)
+ return NewList(elems)
+ }
+
+ sign := signum(step)
+ var list []Value
+ for i := start; signum(end-i) == sign; i += step {
+ list = append(list, l.elems[i])
+ }
+ return NewList(list)
+}
+
+func (l *List) Attr(name string) (Value, error) { return builtinAttr(l, name, listMethods) }
+func (l *List) AttrNames() []string { return builtinAttrNames(listMethods) }
+
+func (l *List) Iterate() Iterator {
+ if !l.frozen {
+ l.itercount++
+ }
+ return &listIterator{l: l}
+}
+
+func (x *List) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(*List)
+ // It's tempting to check x == y as an optimization here,
+ // but wrong because a list containing NaN is not equal to itself.
+ return sliceCompare(op, x.elems, y.elems, depth)
+}
+
+func sliceCompare(op syntax.Token, x, y []Value, depth int) (bool, error) {
+ // Fast path: check length.
+ if len(x) != len(y) && (op == syntax.EQL || op == syntax.NEQ) {
+ return op == syntax.NEQ, nil
+ }
+
+ // Find first element that is not equal in both lists.
+ for i := 0; i < len(x) && i < len(y); i++ {
+ if eq, err := EqualDepth(x[i], y[i], depth-1); err != nil {
+ return false, err
+ } else if !eq {
+ switch op {
+ case syntax.EQL:
+ return false, nil
+ case syntax.NEQ:
+ return true, nil
+ default:
+ return CompareDepth(op, x[i], y[i], depth-1)
+ }
+ }
+ }
+
+ return threeway(op, len(x)-len(y)), nil
+}
+
+type listIterator struct {
+ l *List
+ i int
+}
+
+func (it *listIterator) Next(p *Value) bool {
+ if it.i < it.l.Len() {
+ *p = it.l.elems[it.i]
+ it.i++
+ return true
+ }
+ return false
+}
+
+func (it *listIterator) Done() {
+ if !it.l.frozen {
+ it.l.itercount--
+ }
+}
+
+func (l *List) SetIndex(i int, v Value) error {
+ if err := l.checkMutable("assign to element of"); err != nil {
+ return err
+ }
+ l.elems[i] = v
+ return nil
+}
+
+func (l *List) Append(v Value) error {
+ if err := l.checkMutable("append to"); err != nil {
+ return err
+ }
+ l.elems = append(l.elems, v)
+ return nil
+}
+
+func (l *List) Clear() error {
+ if err := l.checkMutable("clear"); err != nil {
+ return err
+ }
+ for i := range l.elems {
+ l.elems[i] = nil // aid GC
+ }
+ l.elems = l.elems[:0]
+ return nil
+}
+
+// A Tuple represents a Starlark tuple value.
+type Tuple []Value
+
+func (t Tuple) Len() int { return len(t) }
+func (t Tuple) Index(i int) Value { return t[i] }
+
+func (t Tuple) Slice(start, end, step int) Value {
+ if step == 1 {
+ return t[start:end]
+ }
+
+ sign := signum(step)
+ var tuple Tuple
+ for i := start; signum(end-i) == sign; i += step {
+ tuple = append(tuple, t[i])
+ }
+ return tuple
+}
+
+func (t Tuple) Iterate() Iterator { return &tupleIterator{elems: t} }
+func (t Tuple) Freeze() {
+ for _, elem := range t {
+ elem.Freeze()
+ }
+}
+func (t Tuple) String() string { return toString(t) }
+func (t Tuple) Type() string { return "tuple" }
+func (t Tuple) Truth() Bool { return len(t) > 0 }
+
+func (x Tuple) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(Tuple)
+ return sliceCompare(op, x, y, depth)
+}
+
+func (t Tuple) Hash() (uint32, error) {
+ // Use same algorithm as Python.
+ var x, mult uint32 = 0x345678, 1000003
+ for _, elem := range t {
+ y, err := elem.Hash()
+ if err != nil {
+ return 0, err
+ }
+ x = x ^ y*mult
+ mult += 82520 + uint32(len(t)+len(t))
+ }
+ return x, nil
+}
+
+type tupleIterator struct{ elems Tuple }
+
+func (it *tupleIterator) Next(p *Value) bool {
+ if len(it.elems) > 0 {
+ *p = it.elems[0]
+ it.elems = it.elems[1:]
+ return true
+ }
+ return false
+}
+
+func (it *tupleIterator) Done() {}
+
+// A Set represents a Starlark set value.
+// The zero value of Set is a valid empty set.
+// If you know the exact final number of elements,
+// it is more efficient to call NewSet.
+type Set struct {
+ ht hashtable // values are all None
+}
+
+// NewSet returns a dictionary with initial space for
+// at least size insertions before rehashing.
+func NewSet(size int) *Set {
+ set := new(Set)
+ set.ht.init(size)
+ return set
+}
+
+func (s *Set) Delete(k Value) (found bool, err error) { _, found, err = s.ht.delete(k); return }
+func (s *Set) Clear() error { return s.ht.clear() }
+func (s *Set) Has(k Value) (found bool, err error) { _, found, err = s.ht.lookup(k); return }
+func (s *Set) Insert(k Value) error { return s.ht.insert(k, None) }
+func (s *Set) Len() int { return int(s.ht.len) }
+func (s *Set) Iterate() Iterator { return s.ht.iterate() }
+func (s *Set) String() string { return toString(s) }
+func (s *Set) Type() string { return "set" }
+func (s *Set) elems() []Value { return s.ht.keys() }
+func (s *Set) Freeze() { s.ht.freeze() }
+func (s *Set) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: set") }
+func (s *Set) Truth() Bool { return s.Len() > 0 }
+
+func (s *Set) Attr(name string) (Value, error) { return builtinAttr(s, name, setMethods) }
+func (s *Set) AttrNames() []string { return builtinAttrNames(setMethods) }
+
+func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(*Set)
+ switch op {
+ case syntax.EQL:
+ ok, err := setsEqual(x, y, depth)
+ return ok, err
+ case syntax.NEQ:
+ ok, err := setsEqual(x, y, depth)
+ return !ok, err
+ default:
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+ }
+}
+
+func setsEqual(x, y *Set, depth int) (bool, error) {
+ if x.Len() != y.Len() {
+ return false, nil
+ }
+ for _, elem := range x.elems() {
+ if found, _ := y.Has(elem); !found {
+ return false, nil
+ }
+ }
+ return true, nil
+}
+
+func (s *Set) Union(iter Iterator) (Value, error) {
+ set := new(Set)
+ for _, elem := range s.elems() {
+ set.Insert(elem) // can't fail
+ }
+ var x Value
+ for iter.Next(&x) {
+ if err := set.Insert(x); err != nil {
+ return nil, err
+ }
+ }
+ return set, nil
+}
+
+// toString returns the string form of value v.
+// It may be more efficient than v.String() for larger values.
+func toString(v Value) string {
+ buf := new(strings.Builder)
+ writeValue(buf, v, nil)
+ return buf.String()
+}
+
+// writeValue writes x to out.
+//
+// path is used to detect cycles.
+// It contains the list of *List and *Dict values we're currently printing.
+// (These are the only potentially cyclic structures.)
+// Callers should generally pass nil for path.
+// It is safe to re-use the same path slice for multiple calls.
+func writeValue(out *strings.Builder, x Value, path []Value) {
+ switch x := x.(type) {
+ case nil:
+ out.WriteString("<nil>") // indicates a bug
+
+ // These four cases are duplicates of T.String(), for efficiency.
+ case NoneType:
+ out.WriteString("None")
+
+ case Int:
+ out.WriteString(x.String())
+
+ case Bool:
+ if x {
+ out.WriteString("True")
+ } else {
+ out.WriteString("False")
+ }
+
+ case String:
+ out.WriteString(syntax.Quote(string(x), false))
+
+ case *List:
+ out.WriteByte('[')
+ if pathContains(path, x) {
+ out.WriteString("...") // list contains itself
+ } else {
+ for i, elem := range x.elems {
+ if i > 0 {
+ out.WriteString(", ")
+ }
+ writeValue(out, elem, append(path, x))
+ }
+ }
+ out.WriteByte(']')
+
+ case Tuple:
+ out.WriteByte('(')
+ for i, elem := range x {
+ if i > 0 {
+ out.WriteString(", ")
+ }
+ writeValue(out, elem, path)
+ }
+ if len(x) == 1 {
+ out.WriteByte(',')
+ }
+ out.WriteByte(')')
+
+ case *Function:
+ fmt.Fprintf(out, "<function %s>", x.Name())
+
+ case *Builtin:
+ if x.recv != nil {
+ fmt.Fprintf(out, "<built-in method %s of %s value>", x.Name(), x.recv.Type())
+ } else {
+ fmt.Fprintf(out, "<built-in function %s>", x.Name())
+ }
+
+ case *Dict:
+ out.WriteByte('{')
+ if pathContains(path, x) {
+ out.WriteString("...") // dict contains itself
+ } else {
+ sep := ""
+ for _, item := range x.Items() {
+ k, v := item[0], item[1]
+ out.WriteString(sep)
+ writeValue(out, k, path)
+ out.WriteString(": ")
+ writeValue(out, v, append(path, x)) // cycle check
+ sep = ", "
+ }
+ }
+ out.WriteByte('}')
+
+ case *Set:
+ out.WriteString("set([")
+ for i, elem := range x.elems() {
+ if i > 0 {
+ out.WriteString(", ")
+ }
+ writeValue(out, elem, path)
+ }
+ out.WriteString("])")
+
+ default:
+ out.WriteString(x.String())
+ }
+}
+
+func pathContains(path []Value, x Value) bool {
+ for _, y := range path {
+ if x == y {
+ return true
+ }
+ }
+ return false
+}
+
+const maxdepth = 10
+
+// Equal reports whether two Starlark values are equal.
+func Equal(x, y Value) (bool, error) {
+ if x, ok := x.(String); ok {
+ return x == y, nil // fast path for an important special case
+ }
+ return EqualDepth(x, y, maxdepth)
+}
+
+// EqualDepth reports whether two Starlark values are equal.
+//
+// Recursive comparisons by implementations of Value.CompareSameType
+// should use EqualDepth to prevent infinite recursion.
+func EqualDepth(x, y Value, depth int) (bool, error) {
+ return CompareDepth(syntax.EQL, x, y, depth)
+}
+
+// Compare compares two Starlark values.
+// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE.
+// Compare returns an error if an ordered comparison was
+// requested for a type that does not support it.
+//
+// Recursive comparisons by implementations of Value.CompareSameType
+// should use CompareDepth to prevent infinite recursion.
+func Compare(op syntax.Token, x, y Value) (bool, error) {
+ return CompareDepth(op, x, y, maxdepth)
+}
+
+// CompareDepth compares two Starlark values.
+// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE.
+// CompareDepth returns an error if an ordered comparison was
+// requested for a pair of values that do not support it.
+//
+// The depth parameter limits the maximum depth of recursion
+// in cyclic data structures.
+func CompareDepth(op syntax.Token, x, y Value, depth int) (bool, error) {
+ if depth < 1 {
+ return false, fmt.Errorf("comparison exceeded maximum recursion depth")
+ }
+ if sameType(x, y) {
+ if xcomp, ok := x.(Comparable); ok {
+ return xcomp.CompareSameType(op, y, depth)
+ }
+
+ // use identity comparison
+ switch op {
+ case syntax.EQL:
+ return x == y, nil
+ case syntax.NEQ:
+ return x != y, nil
+ }
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+ }
+
+ // different types
+
+ // int/float ordered comparisons
+ switch x := x.(type) {
+ case Int:
+ if y, ok := y.(Float); ok {
+ var cmp int
+ if y != y {
+ cmp = -1 // y is NaN
+ } else if !math.IsInf(float64(y), 0) {
+ cmp = x.rational().Cmp(y.rational()) // y is finite
+ } else if y > 0 {
+ cmp = -1 // y is +Inf
+ } else {
+ cmp = +1 // y is -Inf
+ }
+ return threeway(op, cmp), nil
+ }
+ case Float:
+ if y, ok := y.(Int); ok {
+ var cmp int
+ if x != x {
+ cmp = +1 // x is NaN
+ } else if !math.IsInf(float64(x), 0) {
+ cmp = x.rational().Cmp(y.rational()) // x is finite
+ } else if x > 0 {
+ cmp = +1 // x is +Inf
+ } else {
+ cmp = -1 // x is -Inf
+ }
+ return threeway(op, cmp), nil
+ }
+ }
+
+ // All other values of different types compare unequal.
+ switch op {
+ case syntax.EQL:
+ return false, nil
+ case syntax.NEQ:
+ return true, nil
+ }
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+}
+
+func sameType(x, y Value) bool {
+ return reflect.TypeOf(x) == reflect.TypeOf(y) || x.Type() == y.Type()
+}
+
+// threeway interprets a three-way comparison value cmp (-1, 0, +1)
+// as a boolean comparison (e.g. x < y).
+func threeway(op syntax.Token, cmp int) bool {
+ switch op {
+ case syntax.EQL:
+ return cmp == 0
+ case syntax.NEQ:
+ return cmp != 0
+ case syntax.LE:
+ return cmp <= 0
+ case syntax.LT:
+ return cmp < 0
+ case syntax.GE:
+ return cmp >= 0
+ case syntax.GT:
+ return cmp > 0
+ }
+ panic(op)
+}
+
+func b2i(b bool) int {
+ if b {
+ return 1
+ } else {
+ return 0
+ }
+}
+
+// Len returns the length of a string or sequence value,
+// and -1 for all others.
+//
+// Warning: Len(x) >= 0 does not imply Iterate(x) != nil.
+// A string has a known length but is not directly iterable.
+func Len(x Value) int {
+ switch x := x.(type) {
+ case String:
+ return x.Len()
+ case Indexable:
+ return x.Len()
+ case Sequence:
+ return x.Len()
+ }
+ return -1
+}
+
+// Iterate return a new iterator for the value if iterable, nil otherwise.
+// If the result is non-nil, the caller must call Done when finished with it.
+//
+// Warning: Iterate(x) != nil does not imply Len(x) >= 0.
+// Some iterables may have unknown length.
+func Iterate(x Value) Iterator {
+ if x, ok := x.(Iterable); ok {
+ return x.Iterate()
+ }
+ return nil
+}
+
+// Bytes is the type of a Starlark binary string.
+//
+// A Bytes encapsulates an immutable sequence of bytes.
+// It is comparable, indexable, and sliceable, but not direcly iterable;
+// use bytes.elems() for an iterable view.
+//
+// In this Go implementation, the elements of 'string' and 'bytes' are
+// both bytes, but in other implementations, notably Java, the elements
+// of a 'string' are UTF-16 codes (Java chars). The spec abstracts text
+// strings as sequences of UTF-k codes that encode Unicode code points,
+// and operations that convert from text to binary incur UTF-k-to-UTF-8
+// transcoding; conversely, conversion from binary to text incurs
+// UTF-8-to-UTF-k transcoding. Because k=8 for Go, these operations
+// are the identity function, at least for valid encodings of text.
+type Bytes string
+
+var (
+ _ Comparable = Bytes("")
+ _ Sliceable = Bytes("")
+ _ Indexable = Bytes("")
+)
+
+func (b Bytes) String() string { return syntax.Quote(string(b), true) }
+func (b Bytes) Type() string { return "bytes" }
+func (b Bytes) Freeze() {} // immutable
+func (b Bytes) Truth() Bool { return len(b) > 0 }
+func (b Bytes) Hash() (uint32, error) { return String(b).Hash() }
+func (b Bytes) Len() int { return len(b) }
+func (b Bytes) Index(i int) Value { return b[i : i+1] }
+
+func (b Bytes) Attr(name string) (Value, error) { return builtinAttr(b, name, bytesMethods) }
+func (b Bytes) AttrNames() []string { return builtinAttrNames(bytesMethods) }
+
+func (b Bytes) Slice(start, end, step int) Value {
+ if step == 1 {
+ return b[start:end]
+ }
+
+ sign := signum(step)
+ var str []byte
+ for i := start; signum(end-i) == sign; i += step {
+ str = append(str, b[i])
+ }
+ return Bytes(str)
+}
+
+func (x Bytes) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+ y := y_.(Bytes)
+ return threeway(op, strings.Compare(string(x), string(y))), nil
+}
diff --git a/starlark/value_test.go b/starlark/value_test.go
new file mode 100644
index 0000000..6420a95
--- /dev/null
+++ b/starlark/value_test.go
@@ -0,0 +1,46 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlark_test
+
+// This file defines tests of the Value API.
+
+import (
+ "fmt"
+ "testing"
+
+ "go.starlark.net/starlark"
+)
+
+func TestStringMethod(t *testing.T) {
+ s := starlark.String("hello")
+ for i, test := range [][2]string{
+ // quoted string:
+ {s.String(), `"hello"`},
+ {fmt.Sprintf("%s", s), `"hello"`},
+ {fmt.Sprintf("%+s", s), `"hello"`},
+ {fmt.Sprintf("%v", s), `"hello"`},
+ {fmt.Sprintf("%+v", s), `"hello"`},
+ // unquoted:
+ {s.GoString(), `hello`},
+ {fmt.Sprintf("%#v", s), `hello`},
+ } {
+ got, want := test[0], test[1]
+ if got != want {
+ t.Errorf("#%d: got <<%s>>, want <<%s>>", i, got, want)
+ }
+ }
+}
+
+func TestListAppend(t *testing.T) {
+ l := starlark.NewList(nil)
+ l.Append(starlark.String("hello"))
+ res, ok := starlark.AsString(l.Index(0))
+ if !ok {
+ t.Errorf("failed list.Append() got: %s, want: starlark.String", l.Index(0).Type())
+ }
+ if res != "hello" {
+ t.Errorf("failed list.Append() got: %+v, want: hello", res)
+ }
+}
diff --git a/starlarkjson/json.go b/starlarkjson/json.go
new file mode 100644
index 0000000..fc5d53f
--- /dev/null
+++ b/starlarkjson/json.go
@@ -0,0 +1,478 @@
+// Copyright 2020 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package starlarkjson defines utilities for converting Starlark values
+// to/from JSON strings. The most recent IETF standard for JSON is
+// https://www.ietf.org/rfc/rfc7159.txt.
+package starlarkjson // import "go.starlark.net/starlarkjson"
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "log"
+ "math"
+ "math/big"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkstruct"
+)
+
+// Module json is a Starlark module of JSON-related functions.
+//
+// json = module(
+// encode,
+// decode,
+// indent,
+// )
+//
+// def encode(x):
+//
+// The encode function accepts one required positional argument,
+// which it converts to JSON by cases:
+// - A Starlark value that implements Go's standard json.Marshal
+// interface defines its own JSON encoding.
+// - None, True, and False are converted to null, true, and false, respectively.
+// - Starlark int values, no matter how large, are encoded as decimal integers.
+// Some decoders may not be able to decode very large integers.
+// - Starlark float values are encoded using decimal point notation,
+// even if the value is an integer.
+// It is an error to encode a non-finite floating-point value.
+// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
+// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
+// It is an error if any key is not a string.
+// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
+// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
+// It an application-defined type matches more than one the cases describe above,
+// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
+// Encoding any other value yields an error.
+//
+// def decode(x):
+//
+// The decode function accepts one positional parameter, a JSON string.
+// It returns the Starlark value that the string denotes.
+// - Numbers are parsed as int or float, depending on whether they
+// contain a decimal point.
+// - JSON objects are parsed as new unfrozen Starlark dicts.
+// - JSON arrays are parsed as new unfrozen Starlark lists.
+// Decoding fails if x is not a valid JSON string.
+//
+// def indent(str, *, prefix="", indent="\t"):
+//
+// The indent function pretty-prints a valid JSON encoding,
+// and returns a string containing the indented form.
+// It accepts one required positional parameter, the JSON string,
+// and two optional keyword-only string parameters, prefix and indent,
+// that specify a prefix of each new line, and the unit of indentation.
+//
+var Module = &starlarkstruct.Module{
+ Name: "json",
+ Members: starlark.StringDict{
+ "encode": starlark.NewBuiltin("json.encode", encode),
+ "decode": starlark.NewBuiltin("json.decode", decode),
+ "indent": starlark.NewBuiltin("json.indent", indent),
+ },
+}
+
+func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var x starlark.Value
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+
+ buf := new(bytes.Buffer)
+
+ var quoteSpace [128]byte
+ quote := func(s string) {
+ // Non-trivial escaping is handled by Go's encoding/json.
+ if isPrintableASCII(s) {
+ buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
+ } else {
+ // TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
+ // Can we avoid this call?
+ data, _ := json.Marshal(s)
+ buf.Write(data)
+ }
+ }
+
+ var emit func(x starlark.Value) error
+ emit = func(x starlark.Value) error {
+ switch x := x.(type) {
+ case json.Marshaler:
+ // Application-defined starlark.Value types
+ // may define their own JSON encoding.
+ data, err := x.MarshalJSON()
+ if err != nil {
+ return err
+ }
+ buf.Write(data)
+
+ case starlark.NoneType:
+ buf.WriteString("null")
+
+ case starlark.Bool:
+ if x {
+ buf.WriteString("true")
+ } else {
+ buf.WriteString("false")
+ }
+
+ case starlark.Int:
+ fmt.Fprint(buf, x)
+
+ case starlark.Float:
+ if !isFinite(float64(x)) {
+ return fmt.Errorf("cannot encode non-finite float %v", x)
+ }
+ fmt.Fprintf(buf, "%g", x) // always contains a decimal point
+
+ case starlark.String:
+ quote(string(x))
+
+ case starlark.IterableMapping:
+ // e.g. dict (must have string keys)
+ buf.WriteByte('{')
+ items := x.Items()
+ for _, item := range items {
+ if _, ok := item[0].(starlark.String); !ok {
+ return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
+ }
+ }
+ sort.Slice(items, func(i, j int) bool {
+ return items[i][0].(starlark.String) < items[j][0].(starlark.String)
+ })
+ for i, item := range items {
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ k, _ := starlark.AsString(item[0])
+ quote(k)
+ buf.WriteByte(':')
+ if err := emit(item[1]); err != nil {
+ return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
+ }
+ }
+ buf.WriteByte('}')
+
+ case starlark.Iterable:
+ // e.g. tuple, list
+ buf.WriteByte('[')
+ iter := x.Iterate()
+ defer iter.Done()
+ var elem starlark.Value
+ for i := 0; iter.Next(&elem); i++ {
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ if err := emit(elem); err != nil {
+ return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
+ }
+ }
+ buf.WriteByte(']')
+
+ case starlark.HasAttrs:
+ // e.g. struct
+ buf.WriteByte('{')
+ var names []string
+ names = append(names, x.AttrNames()...)
+ sort.Strings(names)
+ for i, name := range names {
+ v, err := x.Attr(name)
+ if err != nil || v == nil {
+ log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
+ }
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ quote(name)
+ buf.WriteByte(':')
+ if err := emit(v); err != nil {
+ return fmt.Errorf("in field .%s: %v", name, err)
+ }
+ }
+ buf.WriteByte('}')
+
+ default:
+ return fmt.Errorf("cannot encode %s as JSON", x.Type())
+ }
+ return nil
+ }
+
+ if err := emit(x); err != nil {
+ return nil, fmt.Errorf("%s: %v", b.Name(), err)
+ }
+ return starlark.String(buf.String()), nil
+}
+
+// isPrintableASCII reports whether s contains only printable ASCII.
+func isPrintableASCII(s string) bool {
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if b < 0x20 || b >= 0x80 {
+ return false
+ }
+ }
+ return true
+}
+
+// isFinite reports whether f represents a finite rational value.
+// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
+func isFinite(f float64) bool {
+ return math.Abs(f) <= math.MaxFloat64
+}
+
+func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ prefix, indent := "", "\t" // keyword-only
+ if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
+ "prefix?", &prefix,
+ "indent?", &indent,
+ ); err != nil {
+ return nil, err
+ }
+ var str string // positional-only
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
+ return nil, err
+ }
+
+ buf := new(bytes.Buffer)
+ if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
+ return nil, fmt.Errorf("%s: %v", b.Name(), err)
+ }
+ return starlark.String(buf.String()), nil
+}
+
+func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
+ var s string
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
+ return nil, err
+ }
+
+ // The decoder necessarily makes certain representation choices
+ // such as list vs tuple, struct vs dict, int vs float.
+ // In principle, we could parameterize it to allow the caller to
+ // control the returned types, but there's no compelling need yet.
+
+ // Use panic/recover with a distinguished type (failure) for error handling.
+ type failure string
+ fail := func(format string, args ...interface{}) {
+ panic(failure(fmt.Sprintf(format, args...)))
+ }
+
+ i := 0
+
+ // skipSpace consumes leading spaces, and reports whether there is more input.
+ skipSpace := func() bool {
+ for ; i < len(s); i++ {
+ b := s[i]
+ if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
+ return true
+ }
+ }
+ return false
+ }
+
+ // next consumes leading spaces and returns the first non-space.
+ // It panics if at EOF.
+ next := func() byte {
+ if skipSpace() {
+ return s[i]
+ }
+ fail("unexpected end of file")
+ panic("unreachable")
+ }
+
+ // parse returns the next JSON value from the input.
+ // It consumes leading but not trailing whitespace.
+ // It panics on error.
+ var parse func() starlark.Value
+ parse = func() starlark.Value {
+ b := next()
+ switch b {
+ case '"':
+ // string
+
+ // Find end of quotation.
+ // Also, record whether trivial unquoting is safe.
+ // Non-trivial unquoting is handled by Go's encoding/json.
+ safe := true
+ closed := false
+ j := i + 1
+ for ; j < len(s); j++ {
+ b := s[j]
+ if b == '\\' {
+ safe = false
+ j++ // skip x in \x
+ } else if b == '"' {
+ closed = true
+ j++ // skip '"'
+ break
+ } else if b >= utf8.RuneSelf {
+ safe = false
+ }
+ }
+ if !closed {
+ fail("unclosed string literal")
+ }
+
+ r := s[i:j]
+ i = j
+
+ // unquote
+ if safe {
+ r = r[1 : len(r)-1]
+ } else if err := json.Unmarshal([]byte(r), &r); err != nil {
+ fail("%s", err)
+ }
+ return starlark.String(r)
+
+ case 'n':
+ if strings.HasPrefix(s[i:], "null") {
+ i += len("null")
+ return starlark.None
+ }
+
+ case 't':
+ if strings.HasPrefix(s[i:], "true") {
+ i += len("true")
+ return starlark.True
+ }
+
+ case 'f':
+ if strings.HasPrefix(s[i:], "false") {
+ i += len("false")
+ return starlark.False
+ }
+
+ case '[':
+ // array
+ var elems []starlark.Value
+
+ i++ // '['
+ b = next()
+ if b != ']' {
+ for {
+ elem := parse()
+ elems = append(elems, elem)
+ b = next()
+ if b != ',' {
+ if b != ']' {
+ fail("got %q, want ',' or ']'", b)
+ }
+ break
+ }
+ i++ // ','
+ }
+ }
+ i++ // ']'
+ return starlark.NewList(elems)
+
+ case '{':
+ // object
+ dict := new(starlark.Dict)
+
+ i++ // '{'
+ b = next()
+ if b != '}' {
+ for {
+ key := parse()
+ if _, ok := key.(starlark.String); !ok {
+ fail("got %s for object key, want string", key.Type())
+ }
+ b = next()
+ if b != ':' {
+ fail("after object key, got %q, want ':' ", b)
+ }
+ i++ // ':'
+ value := parse()
+ dict.SetKey(key, value) // can't fail
+ b = next()
+ if b != ',' {
+ if b != '}' {
+ fail("in object, got %q, want ',' or '}'", b)
+ }
+ break
+ }
+ i++ // ','
+ }
+ }
+ i++ // '}'
+ return dict
+
+ default:
+ // number?
+ if isdigit(b) || b == '-' {
+ // scan literal. Allow [0-9+-eE.] for now.
+ float := false
+ var j int
+ for j = i + 1; j < len(s); j++ {
+ b = s[j]
+ if isdigit(b) {
+ // ok
+ } else if b == '.' ||
+ b == 'e' ||
+ b == 'E' ||
+ b == '+' ||
+ b == '-' {
+ float = true
+ } else {
+ break
+ }
+ }
+ num := s[i:j]
+ i = j
+
+ // Unlike most C-like languages,
+ // JSON disallows a leading zero before a digit.
+ digits := num
+ if num[0] == '-' {
+ digits = num[1:]
+ }
+ if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
+ fail("invalid number: %s", num)
+ }
+
+ // parse literal
+ if float {
+ x, err := strconv.ParseFloat(num, 64)
+ if err != nil {
+ fail("invalid number: %s", num)
+ }
+ return starlark.Float(x)
+ } else {
+ x, ok := new(big.Int).SetString(num, 10)
+ if !ok {
+ fail("invalid number: %s", num)
+ }
+ return starlark.MakeBigInt(x)
+ }
+ }
+ }
+ fail("unexpected character %q", b)
+ panic("unreachable")
+ }
+ defer func() {
+ x := recover()
+ switch x := x.(type) {
+ case failure:
+ err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
+ case nil:
+ // nop
+ default:
+ panic(x) // unexpected panic
+ }
+ }()
+ x := parse()
+ if skipSpace() {
+ fail("unexpected character %q after value", s[i])
+ }
+ return x, nil
+}
+
+func isdigit(b byte) bool {
+ return b >= '0' && b <= '9'
+}
diff --git a/starlarkstruct/module.go b/starlarkstruct/module.go
new file mode 100644
index 0000000..735c98a
--- /dev/null
+++ b/starlarkstruct/module.go
@@ -0,0 +1,43 @@
+package starlarkstruct
+
+import (
+ "fmt"
+
+ "go.starlark.net/starlark"
+)
+
+// A Module is a named collection of values,
+// typically a suite of functions imported by a load statement.
+//
+// It differs from Struct primarily in that its string representation
+// does not enumerate its fields.
+type Module struct {
+ Name string
+ Members starlark.StringDict
+}
+
+var _ starlark.HasAttrs = (*Module)(nil)
+
+func (m *Module) Attr(name string) (starlark.Value, error) { return m.Members[name], nil }
+func (m *Module) AttrNames() []string { return m.Members.Keys() }
+func (m *Module) Freeze() { m.Members.Freeze() }
+func (m *Module) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", m.Type()) }
+func (m *Module) String() string { return fmt.Sprintf("<module %q>", m.Name) }
+func (m *Module) Truth() starlark.Bool { return true }
+func (m *Module) Type() string { return "module" }
+
+// MakeModule may be used as the implementation of a Starlark built-in
+// function, module(name, **kwargs). It returns a new module with the
+// specified name and members.
+func MakeModule(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var name string
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &name); err != nil {
+ return nil, err
+ }
+ members := make(starlark.StringDict, len(kwargs))
+ for _, kwarg := range kwargs {
+ k := string(kwarg[0].(starlark.String))
+ members[k] = kwarg[1]
+ }
+ return &Module{name, members}, nil
+}
diff --git a/starlarkstruct/struct.go b/starlarkstruct/struct.go
new file mode 100644
index 0000000..1982cc0
--- /dev/null
+++ b/starlarkstruct/struct.go
@@ -0,0 +1,281 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package starlarkstruct defines the Starlark types 'struct' and
+// 'module', both optional language extensions.
+//
+package starlarkstruct // import "go.starlark.net/starlarkstruct"
+
+// It is tempting to introduce a variant of Struct that is a wrapper
+// around a Go struct value, for stronger typing guarantees and more
+// efficient and convenient field lookup. However:
+// 1) all fields of Starlark structs are optional, so we cannot represent
+// them using more specific types such as String, Int, *Depset, and
+// *File, as such types give no way to represent missing fields.
+// 2) the efficiency gain of direct struct field access is rather
+// marginal: finding the index of a field by binary searching on the
+// sorted list of field names is quite fast compared to the other
+// overheads.
+// 3) the gains in compactness and spatial locality are also rather
+// marginal: the array behind the []entry slice is (due to field name
+// strings) only a factor of 2 larger than the corresponding Go struct
+// would be, and, like the Go struct, requires only a single allocation.
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/syntax"
+)
+
+// Make is the implementation of a built-in function that instantiates
+// an immutable struct from the specified keyword arguments.
+//
+// An application can add 'struct' to the Starlark environment like so:
+//
+// globals := starlark.StringDict{
+// "struct": starlark.NewBuiltin("struct", starlarkstruct.Make),
+// }
+//
+func Make(_ *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ if len(args) > 0 {
+ return nil, fmt.Errorf("struct: unexpected positional arguments")
+ }
+ return FromKeywords(Default, kwargs), nil
+}
+
+// FromKeywords returns a new struct instance whose fields are specified by the
+// key/value pairs in kwargs. (Each kwargs[i][0] must be a starlark.String.)
+func FromKeywords(constructor starlark.Value, kwargs []starlark.Tuple) *Struct {
+ if constructor == nil {
+ panic("nil constructor")
+ }
+ s := &Struct{
+ constructor: constructor,
+ entries: make(entries, 0, len(kwargs)),
+ }
+ for _, kwarg := range kwargs {
+ k := string(kwarg[0].(starlark.String))
+ v := kwarg[1]
+ s.entries = append(s.entries, entry{k, v})
+ }
+ sort.Sort(s.entries)
+ return s
+}
+
+// FromStringDict returns a whose elements are those of d.
+// The constructor parameter specifies the constructor; use Default for an ordinary struct.
+func FromStringDict(constructor starlark.Value, d starlark.StringDict) *Struct {
+ if constructor == nil {
+ panic("nil constructor")
+ }
+ s := &Struct{
+ constructor: constructor,
+ entries: make(entries, 0, len(d)),
+ }
+ for k, v := range d {
+ s.entries = append(s.entries, entry{k, v})
+ }
+ sort.Sort(s.entries)
+ return s
+}
+
+// Struct is an immutable Starlark type that maps field names to values.
+// It is not iterable and does not support len.
+//
+// A struct has a constructor, a distinct value that identifies a class
+// of structs, and which appears in the struct's string representation.
+//
+// Operations such as x+y fail if the constructors of the two operands
+// are not equal.
+//
+// The default constructor, Default, is the string "struct", but
+// clients may wish to 'brand' structs for their own purposes.
+// The constructor value appears in the printed form of the value,
+// and is accessible using the Constructor method.
+//
+// Use Attr to access its fields and AttrNames to enumerate them.
+type Struct struct {
+ constructor starlark.Value
+ entries entries // sorted by name
+}
+
+// Default is the default constructor for structs.
+// It is merely the string "struct".
+const Default = starlark.String("struct")
+
+type entries []entry
+
+func (a entries) Len() int { return len(a) }
+func (a entries) Less(i, j int) bool { return a[i].name < a[j].name }
+func (a entries) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+type entry struct {
+ name string
+ value starlark.Value
+}
+
+var (
+ _ starlark.HasAttrs = (*Struct)(nil)
+ _ starlark.HasBinary = (*Struct)(nil)
+)
+
+// ToStringDict adds a name/value entry to d for each field of the struct.
+func (s *Struct) ToStringDict(d starlark.StringDict) {
+ for _, e := range s.entries {
+ d[e.name] = e.value
+ }
+}
+
+func (s *Struct) String() string {
+ buf := new(strings.Builder)
+ if s.constructor == Default {
+ // NB: The Java implementation always prints struct
+ // even for Bazel provider instances.
+ buf.WriteString("struct") // avoid String()'s quotation
+ } else {
+ buf.WriteString(s.constructor.String())
+ }
+ buf.WriteByte('(')
+ for i, e := range s.entries {
+ if i > 0 {
+ buf.WriteString(", ")
+ }
+ buf.WriteString(e.name)
+ buf.WriteString(" = ")
+ buf.WriteString(e.value.String())
+ }
+ buf.WriteByte(')')
+ return buf.String()
+}
+
+// Constructor returns the constructor used to create this struct.
+func (s *Struct) Constructor() starlark.Value { return s.constructor }
+
+func (s *Struct) Type() string { return "struct" }
+func (s *Struct) Truth() starlark.Bool { return true } // even when empty
+func (s *Struct) Hash() (uint32, error) {
+ // Same algorithm as Tuple.hash, but with different primes.
+ var x, m uint32 = 8731, 9839
+ for _, e := range s.entries {
+ namehash, _ := starlark.String(e.name).Hash()
+ x = x ^ 3*namehash
+ y, err := e.value.Hash()
+ if err != nil {
+ return 0, err
+ }
+ x = x ^ y*m
+ m += 7349
+ }
+ return x, nil
+}
+func (s *Struct) Freeze() {
+ for _, e := range s.entries {
+ e.value.Freeze()
+ }
+}
+
+func (x *Struct) Binary(op syntax.Token, y starlark.Value, side starlark.Side) (starlark.Value, error) {
+ if y, ok := y.(*Struct); ok && op == syntax.PLUS {
+ if side == starlark.Right {
+ x, y = y, x
+ }
+
+ if eq, err := starlark.Equal(x.constructor, y.constructor); err != nil {
+ return nil, fmt.Errorf("in %s + %s: error comparing constructors: %v",
+ x.constructor, y.constructor, err)
+ } else if !eq {
+ return nil, fmt.Errorf("cannot add structs of different constructors: %s + %s",
+ x.constructor, y.constructor)
+ }
+
+ z := make(starlark.StringDict, x.len()+y.len())
+ for _, e := range x.entries {
+ z[e.name] = e.value
+ }
+ for _, e := range y.entries {
+ z[e.name] = e.value
+ }
+
+ return FromStringDict(x.constructor, z), nil
+ }
+ return nil, nil // unhandled
+}
+
+// Attr returns the value of the specified field.
+func (s *Struct) Attr(name string) (starlark.Value, error) {
+ // Binary search the entries.
+ // This implementation is a specialization of
+ // sort.Search that avoids dynamic dispatch.
+ n := len(s.entries)
+ i, j := 0, n
+ for i < j {
+ h := int(uint(i+j) >> 1)
+ if s.entries[h].name < name {
+ i = h + 1
+ } else {
+ j = h
+ }
+ }
+ if i < n && s.entries[i].name == name {
+ return s.entries[i].value, nil
+ }
+
+ var ctor string
+ if s.constructor != Default {
+ ctor = s.constructor.String() + " "
+ }
+ return nil, starlark.NoSuchAttrError(
+ fmt.Sprintf("%sstruct has no .%s attribute", ctor, name))
+}
+
+func (s *Struct) len() int { return len(s.entries) }
+
+// AttrNames returns a new sorted list of the struct fields.
+func (s *Struct) AttrNames() []string {
+ names := make([]string, len(s.entries))
+ for i, e := range s.entries {
+ names[i] = e.name
+ }
+ return names
+}
+
+func (x *Struct) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) {
+ y := y_.(*Struct)
+ switch op {
+ case syntax.EQL:
+ return structsEqual(x, y, depth)
+ case syntax.NEQ:
+ eq, err := structsEqual(x, y, depth)
+ return !eq, err
+ default:
+ return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type())
+ }
+}
+
+func structsEqual(x, y *Struct, depth int) (bool, error) {
+ if x.len() != y.len() {
+ return false, nil
+ }
+
+ if eq, err := starlark.Equal(x.constructor, y.constructor); err != nil {
+ return false, fmt.Errorf("error comparing struct constructors %v and %v: %v",
+ x.constructor, y.constructor, err)
+ } else if !eq {
+ return false, nil
+ }
+
+ for i, n := 0, x.len(); i < n; i++ {
+ if x.entries[i].name != y.entries[i].name {
+ return false, nil
+ } else if eq, err := starlark.EqualDepth(x.entries[i].value, y.entries[i].value, depth-1); err != nil {
+ return false, err
+ } else if !eq {
+ return false, nil
+ }
+ }
+ return true, nil
+}
diff --git a/starlarkstruct/struct_test.go b/starlarkstruct/struct_test.go
new file mode 100644
index 0000000..4f103bd
--- /dev/null
+++ b/starlarkstruct/struct_test.go
@@ -0,0 +1,69 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package starlarkstruct_test
+
+import (
+ "fmt"
+ "path/filepath"
+ "testing"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkstruct"
+ "go.starlark.net/starlarktest"
+)
+
+func Test(t *testing.T) {
+ testdata := starlarktest.DataFile("starlarkstruct", ".")
+ thread := &starlark.Thread{Load: load}
+ starlarktest.SetReporter(thread, t)
+ filename := filepath.Join(testdata, "testdata/struct.star")
+ predeclared := starlark.StringDict{
+ "struct": starlark.NewBuiltin("struct", starlarkstruct.Make),
+ "gensym": starlark.NewBuiltin("gensym", gensym),
+ }
+ if _, err := starlark.ExecFile(thread, filename, nil, predeclared); err != nil {
+ if err, ok := err.(*starlark.EvalError); ok {
+ t.Fatal(err.Backtrace())
+ }
+ t.Fatal(err)
+ }
+}
+
+// load implements the 'load' operation as used in the evaluator tests.
+func load(thread *starlark.Thread, module string) (starlark.StringDict, error) {
+ if module == "assert.star" {
+ return starlarktest.LoadAssertModule()
+ }
+ return nil, fmt.Errorf("load not implemented")
+}
+
+// gensym is a built-in function that generates a unique symbol.
+func gensym(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var name string
+ if err := starlark.UnpackArgs("gensym", args, kwargs, "name", &name); err != nil {
+ return nil, err
+ }
+ return &symbol{name: name}, nil
+}
+
+// A symbol is a distinct value that acts as a constructor of "branded"
+// struct instances, like a class symbol in Python or a "provider" in Bazel.
+type symbol struct{ name string }
+
+var _ starlark.Callable = (*symbol)(nil)
+
+func (sym *symbol) Name() string { return sym.name }
+func (sym *symbol) String() string { return sym.name }
+func (sym *symbol) Type() string { return "symbol" }
+func (sym *symbol) Freeze() {} // immutable
+func (sym *symbol) Truth() starlark.Bool { return starlark.True }
+func (sym *symbol) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", sym.Type()) }
+
+func (sym *symbol) CallInternal(thread *starlark.Thread, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ if len(args) > 0 {
+ return nil, fmt.Errorf("%s: unexpected positional arguments", sym)
+ }
+ return starlarkstruct.FromKeywords(sym, kwargs), nil
+}
diff --git a/starlarkstruct/testdata/struct.star b/starlarkstruct/testdata/struct.star
new file mode 100644
index 0000000..e54fe04
--- /dev/null
+++ b/starlarkstruct/testdata/struct.star
@@ -0,0 +1,63 @@
+# Tests of Starlark 'struct' extension.
+# This is not a standard feature and the Go and Starlark APIs may yet change.
+
+load("assert.star", "assert")
+
+assert.eq(str(struct), "<built-in function struct>")
+
+# struct is a constructor for "unbranded" structs.
+s = struct(host = "localhost", port = 80)
+assert.eq(s, s)
+assert.eq(s, struct(host = "localhost", port = 80))
+assert.ne(s, struct(host = "localhost", port = 81))
+assert.eq(type(s), "struct")
+assert.eq(str(s), 'struct(host = "localhost", port = 80)')
+assert.eq(s.host, "localhost")
+assert.eq(s.port, 80)
+assert.fails(lambda : s.protocol, "struct has no .protocol attribute")
+assert.eq(dir(s), ["host", "port"])
+
+# Use gensym to create "branded" struct types.
+hostport = gensym(name = "hostport")
+assert.eq(type(hostport), "symbol")
+assert.eq(str(hostport), "hostport")
+
+# Call the symbol to instantiate a new type.
+http = hostport(host = "localhost", port = 80)
+assert.eq(type(http), "struct")
+assert.eq(str(http), 'hostport(host = "localhost", port = 80)') # includes name of constructor
+assert.eq(http, http)
+assert.eq(http, hostport(host = "localhost", port = 80))
+assert.ne(http, hostport(host = "localhost", port = 443))
+assert.eq(http.host, "localhost")
+assert.eq(http.port, 80)
+assert.fails(lambda : http.protocol, "hostport struct has no .protocol attribute")
+
+person = gensym(name = "person")
+bob = person(name = "bob", age = 50)
+alice = person(name = "alice", city = "NYC")
+assert.ne(http, bob) # different constructor symbols
+assert.ne(bob, alice) # different fields
+
+hostport2 = gensym(name = "hostport")
+assert.eq(hostport, hostport)
+assert.ne(hostport, hostport2) # same name, different symbol
+assert.ne(http, hostport2(host = "localhost", port = 80)) # equal fields but different ctor symbols
+
+# dir
+assert.eq(dir(alice), ["city", "name"])
+assert.eq(dir(bob), ["age", "name"])
+assert.eq(dir(http), ["host", "port"])
+
+# hasattr, getattr
+assert.true(hasattr(alice, "city"))
+assert.eq(hasattr(alice, "ageaa"), False)
+assert.eq(getattr(alice, "city"), "NYC")
+
+# +
+assert.eq(bob + bob, bob)
+assert.eq(bob + alice, person(age = 50, city = "NYC", name = "alice"))
+assert.eq(alice + bob, person(age = 50, city = "NYC", name = "bob")) # not commutative! a misfeature
+assert.fails(lambda : alice + 1, "struct \\+ int")
+assert.eq(http + http, http)
+assert.fails(lambda : http + bob, "different constructors: hostport \\+ person")
diff --git a/starlarktest/assert.star b/starlarktest/assert.star
new file mode 100644
index 0000000..c6e480f
--- /dev/null
+++ b/starlarktest/assert.star
@@ -0,0 +1,51 @@
+# Predeclared built-ins for this module:
+#
+# error(msg): report an error in Go's test framework without halting execution.
+# This is distinct from the built-in fail function, which halts execution.
+# catch(f): evaluate f() and returns its evaluation error message, if any
+# matches(str, pattern): report whether str matches regular expression pattern.
+# module(**kwargs): a constructor for a module.
+# _freeze(x): freeze the value x and everything reachable from it.
+#
+# Clients may use these functions to define their own testing abstractions.
+
+def _eq(x, y):
+ if x != y:
+ error("%r != %r" % (x, y))
+
+def _ne(x, y):
+ if x == y:
+ error("%r == %r" % (x, y))
+
+def _true(cond, msg = "assertion failed"):
+ if not cond:
+ error(msg)
+
+def _lt(x, y):
+ if not (x < y):
+ error("%s is not less than %s" % (x, y))
+
+def _contains(x, y):
+ if y not in x:
+ error("%s does not contain %s" % (x, y))
+
+def _fails(f, pattern):
+ "assert_fails asserts that evaluation of f() fails with the specified error."
+ msg = catch(f)
+ if msg == None:
+ error("evaluation succeeded unexpectedly (want error matching %r)" % pattern)
+ elif not matches(pattern, msg):
+ error("regular expression (%s) did not match error (%s)" % (pattern, msg))
+
+freeze = _freeze # an exported global whose value is the built-in freeze function
+
+assert = module(
+ "assert",
+ fail = error,
+ eq = _eq,
+ ne = _ne,
+ true = _true,
+ lt = _lt,
+ contains = _contains,
+ fails = _fails,
+)
diff --git a/starlarktest/starlarktest.go b/starlarktest/starlarktest.go
new file mode 100644
index 0000000..e449436
--- /dev/null
+++ b/starlarktest/starlarktest.go
@@ -0,0 +1,147 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package starlarktest defines utilities for testing Starlark programs.
+//
+// Clients can call LoadAssertModule to load a module that defines
+// several functions useful for testing. See assert.star for its
+// definition.
+//
+// The assert.error function, which reports errors to the current Go
+// testing.T, requires that clients call SetReporter(thread, t) before use.
+package starlarktest // import "go.starlark.net/starlarktest"
+
+import (
+ "fmt"
+ "go/build"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+ "sync"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkstruct"
+)
+
+const localKey = "Reporter"
+
+// A Reporter is a value to which errors may be reported.
+// It is satisfied by *testing.T.
+type Reporter interface {
+ Error(args ...interface{})
+}
+
+// SetReporter associates an error reporter (such as a testing.T in
+// a Go test) with the Starlark thread so that Starlark programs may
+// report errors to it.
+func SetReporter(thread *starlark.Thread, r Reporter) {
+ thread.SetLocal(localKey, r)
+}
+
+// GetReporter returns the Starlark thread's error reporter.
+// It must be preceded by a call to SetReporter.
+func GetReporter(thread *starlark.Thread) Reporter {
+ r, ok := thread.Local(localKey).(Reporter)
+ if !ok {
+ panic("internal error: starlarktest.SetReporter was not called")
+ }
+ return r
+}
+
+var (
+ once sync.Once
+ assert starlark.StringDict
+ assertErr error
+)
+
+// LoadAssertModule loads the assert module.
+// It is concurrency-safe and idempotent.
+func LoadAssertModule() (starlark.StringDict, error) {
+ once.Do(func() {
+ predeclared := starlark.StringDict{
+ "error": starlark.NewBuiltin("error", error_),
+ "catch": starlark.NewBuiltin("catch", catch),
+ "matches": starlark.NewBuiltin("matches", matches),
+ "module": starlark.NewBuiltin("module", starlarkstruct.MakeModule),
+ "_freeze": starlark.NewBuiltin("freeze", freeze),
+ }
+ filename := DataFile("starlarktest", "assert.star")
+ thread := new(starlark.Thread)
+ assert, assertErr = starlark.ExecFile(thread, filename, nil, predeclared)
+ })
+ return assert, assertErr
+}
+
+// catch(f) evaluates f() and returns its evaluation error message
+// if it failed or None if it succeeded.
+func catch(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var fn starlark.Callable
+ if err := starlark.UnpackArgs("catch", args, kwargs, "fn", &fn); err != nil {
+ return nil, err
+ }
+ if _, err := starlark.Call(thread, fn, nil, nil); err != nil {
+ return starlark.String(err.Error()), nil
+ }
+ return starlark.None, nil
+}
+
+// matches(pattern, str) reports whether string str matches the regular expression pattern.
+func matches(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var pattern, str string
+ if err := starlark.UnpackArgs("matches", args, kwargs, "pattern", &pattern, "str", &str); err != nil {
+ return nil, err
+ }
+ ok, err := regexp.MatchString(pattern, str)
+ if err != nil {
+ return nil, fmt.Errorf("matches: %s", err)
+ }
+ return starlark.Bool(ok), nil
+}
+
+// error(x) reports an error to the Go test framework.
+func error_(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ if len(args) != 1 {
+ return nil, fmt.Errorf("error: got %d arguments, want 1", len(args))
+ }
+ buf := new(strings.Builder)
+ stk := thread.CallStack()
+ stk.Pop()
+ fmt.Fprintf(buf, "%sError: ", stk)
+ if s, ok := starlark.AsString(args[0]); ok {
+ buf.WriteString(s)
+ } else {
+ buf.WriteString(args[0].String())
+ }
+ GetReporter(thread).Error(buf.String())
+ return starlark.None, nil
+}
+
+// freeze(x) freezes its operand.
+func freeze(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ if len(kwargs) > 0 {
+ return nil, fmt.Errorf("freeze does not accept keyword arguments")
+ }
+ if len(args) != 1 {
+ return nil, fmt.Errorf("freeze got %d arguments, wants 1", len(args))
+ }
+ args[0].Freeze()
+ return args[0], nil
+}
+
+// DataFile returns the effective filename of the specified
+// test data resource. The function abstracts differences between
+// 'go build', under which a test runs in its package directory,
+// and Blaze, under which a test runs in the root of the tree.
+var DataFile = func(pkgdir, filename string) string {
+ // Check if we're being run by Bazel and change directories if so.
+ // TEST_SRCDIR and TEST_WORKSPACE are set by the Bazel test runner, so that makes a decent check
+ testSrcdir := os.Getenv("TEST_SRCDIR")
+ testWorkspace := os.Getenv("TEST_WORKSPACE")
+ if testSrcdir != "" && testWorkspace != "" {
+ return filepath.Join(testSrcdir, "net_starlark_go", pkgdir, filename)
+ }
+
+ return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
diff --git a/syntax/grammar.txt b/syntax/grammar.txt
new file mode 100644
index 0000000..7f5dfc8
--- /dev/null
+++ b/syntax/grammar.txt
@@ -0,0 +1,129 @@
+
+Grammar of Starlark
+==================
+
+File = {Statement | newline} eof .
+
+Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
+
+DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
+
+Parameters = Parameter {',' Parameter}.
+
+Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
+
+IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
+
+ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
+
+WhileStmt = 'while' Test ':' Suite .
+
+Suite = [newline indent {Statement} outdent] | SimpleStmt .
+
+SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
+# NOTE: '\n' optional at EOF
+
+SmallStmt = ReturnStmt
+ | BreakStmt | ContinueStmt | PassStmt
+ | AssignStmt
+ | ExprStmt
+ | LoadStmt
+ .
+
+ReturnStmt = 'return' [Expression] .
+BreakStmt = 'break' .
+ContinueStmt = 'continue' .
+PassStmt = 'pass' .
+AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
+ExprStmt = Expression .
+
+LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
+
+Test = LambdaExpr
+ | IfExpr
+ | PrimaryExpr
+ | UnaryExpr
+ | BinaryExpr
+ .
+
+LambdaExpr = 'lambda' [Parameters] ':' Test .
+
+IfExpr = Test 'if' Test 'else' Test .
+
+PrimaryExpr = Operand
+ | PrimaryExpr DotSuffix
+ | PrimaryExpr CallSuffix
+ | PrimaryExpr SliceSuffix
+ .
+
+Operand = identifier
+ | int | float | string
+ | ListExpr | ListComp
+ | DictExpr | DictComp
+ | '(' [Expression [',']] ')'
+ | ('-' | '+') PrimaryExpr
+ .
+
+DotSuffix = '.' identifier .
+CallSuffix = '(' [Arguments [',']] ')' .
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+
+Arguments = Argument {',' Argument} .
+Argument = Test | identifier '=' Test | '*' Test | '**' Test .
+
+ListExpr = '[' [Expression [',']] ']' .
+ListComp = '[' Test {CompClause} ']'.
+
+DictExpr = '{' [Entries [',']] '}' .
+DictComp = '{' Entry {CompClause} '}' .
+Entries = Entry {',' Entry} .
+Entry = Test ':' Test .
+
+CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
+
+UnaryExpr = 'not' Test .
+
+BinaryExpr = Test {Binop Test} .
+
+Binop = 'or'
+ | 'and'
+ | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
+ | '|'
+ | '^'
+ | '&'
+ | '-' | '+'
+ | '*' | '%' | '/' | '//'
+ .
+
+Expression = Test {',' Test} .
+# NOTE: trailing comma permitted only when within [...] or (...).
+
+LoopVariables = PrimaryExpr {',' PrimaryExpr} .
+
+
+# Notation (similar to Go spec):
+- lowercase and 'quoted' items are lexical tokens.
+- Capitalized names denote grammar productions.
+- (...) implies grouping
+- x | y means either x or y.
+- [x] means x is optional
+- {x} means x is repeated zero or more times
+- The end of each declaration is marked with a period.
+
+# Tokens
+- spaces: newline, eof, indent, outdent.
+- identifier.
+- literals: string, int, float.
+- plus all quoted tokens such as '+=', 'return'.
+
+# Notes:
+- Ambiguity is resolved using operator precedence.
+- The grammar does not enforce the legal order of params and args,
+ nor that the first compclause must be a 'for'.
+
+TODO:
+- explain how the lexer generates indent, outdent, and newline tokens.
+- why is unary NOT separated from unary - and +?
+- the grammar is (mostly) in LL(1) style so, for example,
+ dot expressions are formed suffixes, not complete expressions,
+ which makes the spec harder to read. Reorganize into non-LL(1) form?
diff --git a/syntax/parse.go b/syntax/parse.go
new file mode 100644
index 0000000..f4c8fff
--- /dev/null
+++ b/syntax/parse.go
@@ -0,0 +1,1028 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// This file defines a recursive-descent parser for Starlark.
+// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
+//
+// TODO(adonovan): use syntax.Error more systematically throughout the
+// package. Verify that error positions are correct using the
+// chunkedfile mechanism.
+
+import "log"
+
+// Enable this flag to print the token stream and log.Fatal on the first error.
+const debug = false
+
+// A Mode value is a set of flags (or 0) that controls optional parser functionality.
+type Mode uint
+
+const (
+ RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
+)
+
+// Parse parses the input data and returns the corresponding parse tree.
+//
+// If src != nil, ParseFile parses the source from src and the filename
+// is only used when recording position information.
+// The type of the argument for the src parameter must be string,
+// []byte, io.Reader, or FilePortion.
+// If src == nil, ParseFile parses the file specified by filename.
+func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
+ in, err := newScanner(filename, src, mode&RetainComments != 0)
+ if err != nil {
+ return nil, err
+ }
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+ f = p.parseFile()
+ if f != nil {
+ f.Path = filename
+ }
+ p.assignComments(f)
+ return f, nil
+}
+
+// ParseCompoundStmt parses a single compound statement:
+// a blank line, a def, for, while, or if statement, or a
+// semicolon-separated list of simple statements followed
+// by a newline. These are the units on which the REPL operates.
+// ParseCompoundStmt does not consume any following input.
+// The parser calls the readline function each
+// time it needs a new line of input.
+func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
+ in, err := newScanner(filename, readline, false)
+ if err != nil {
+ return nil, err
+ }
+
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+
+ var stmts []Stmt
+ switch p.tok {
+ case DEF, IF, FOR, WHILE:
+ stmts = p.parseStmt(stmts)
+ case NEWLINE:
+ // blank line
+ default:
+ stmts = p.parseSimpleStmt(stmts, false)
+ // Require but don't consume newline, to avoid blocking again.
+ if p.tok != NEWLINE {
+ p.in.errorf(p.in.pos, "invalid syntax")
+ }
+ }
+
+ return &File{Path: filename, Stmts: stmts}, nil
+}
+
+// ParseExpr parses a Starlark expression.
+// A comma-separated list of expressions is parsed as a tuple.
+// See Parse for explanation of parameters.
+func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
+ in, err := newScanner(filename, src, mode&RetainComments != 0)
+ if err != nil {
+ return nil, err
+ }
+ p := parser{in: in}
+ defer p.in.recover(&err)
+
+ p.nextToken() // read first lookahead token
+
+ // Use parseExpr, not parseTest, to permit an unparenthesized tuple.
+ expr = p.parseExpr(false)
+
+ // A following newline (e.g. "f()\n") appears outside any brackets,
+ // on a non-blank line, and thus results in a NEWLINE token.
+ if p.tok == NEWLINE {
+ p.nextToken()
+ }
+
+ if p.tok != EOF {
+ p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
+ }
+ p.assignComments(expr)
+ return expr, nil
+}
+
+type parser struct {
+ in *scanner
+ tok Token
+ tokval tokenValue
+}
+
+// nextToken advances the scanner and returns the position of the
+// previous token.
+func (p *parser) nextToken() Position {
+ oldpos := p.tokval.pos
+ p.tok = p.in.nextToken(&p.tokval)
+ // enable to see the token stream
+ if debug {
+ log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
+ }
+ return oldpos
+}
+
+// file_input = (NEWLINE | stmt)* EOF
+func (p *parser) parseFile() *File {
+ var stmts []Stmt
+ for p.tok != EOF {
+ if p.tok == NEWLINE {
+ p.nextToken()
+ continue
+ }
+ stmts = p.parseStmt(stmts)
+ }
+ return &File{Stmts: stmts}
+}
+
+func (p *parser) parseStmt(stmts []Stmt) []Stmt {
+ if p.tok == DEF {
+ return append(stmts, p.parseDefStmt())
+ } else if p.tok == IF {
+ return append(stmts, p.parseIfStmt())
+ } else if p.tok == FOR {
+ return append(stmts, p.parseForStmt())
+ } else if p.tok == WHILE {
+ return append(stmts, p.parseWhileStmt())
+ }
+ return p.parseSimpleStmt(stmts, true)
+}
+
+func (p *parser) parseDefStmt() Stmt {
+ defpos := p.nextToken() // consume DEF
+ id := p.parseIdent()
+ p.consume(LPAREN)
+ params := p.parseParams()
+ p.consume(RPAREN)
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &DefStmt{
+ Def: defpos,
+ Name: id,
+ Params: params,
+ Body: body,
+ }
+}
+
+func (p *parser) parseIfStmt() Stmt {
+ ifpos := p.nextToken() // consume IF
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ ifStmt := &IfStmt{
+ If: ifpos,
+ Cond: cond,
+ True: body,
+ }
+ tail := ifStmt
+ for p.tok == ELIF {
+ elifpos := p.nextToken() // consume ELIF
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ elif := &IfStmt{
+ If: elifpos,
+ Cond: cond,
+ True: body,
+ }
+ tail.ElsePos = elifpos
+ tail.False = []Stmt{elif}
+ tail = elif
+ }
+ if p.tok == ELSE {
+ tail.ElsePos = p.nextToken() // consume ELSE
+ p.consume(COLON)
+ tail.False = p.parseSuite()
+ }
+ return ifStmt
+}
+
+func (p *parser) parseForStmt() Stmt {
+ forpos := p.nextToken() // consume FOR
+ vars := p.parseForLoopVariables()
+ p.consume(IN)
+ x := p.parseExpr(false)
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &ForStmt{
+ For: forpos,
+ Vars: vars,
+ X: x,
+ Body: body,
+ }
+}
+
+func (p *parser) parseWhileStmt() Stmt {
+ whilepos := p.nextToken() // consume WHILE
+ cond := p.parseTest()
+ p.consume(COLON)
+ body := p.parseSuite()
+ return &WhileStmt{
+ While: whilepos,
+ Cond: cond,
+ Body: body,
+ }
+}
+
+// Equivalent to 'exprlist' production in Python grammar.
+//
+// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
+func (p *parser) parseForLoopVariables() Expr {
+ // Avoid parseExpr because it would consume the IN token
+ // following x in "for x in y: ...".
+ v := p.parsePrimaryWithSuffix()
+ if p.tok != COMMA {
+ return v
+ }
+
+ list := []Expr{v}
+ for p.tok == COMMA {
+ p.nextToken()
+ if terminatesExprList(p.tok) {
+ break
+ }
+ list = append(list, p.parsePrimaryWithSuffix())
+ }
+ return &TupleExpr{List: list}
+}
+
+// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
+// In REPL mode, it does not consume the NEWLINE.
+func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
+ for {
+ stmts = append(stmts, p.parseSmallStmt())
+ if p.tok != SEMI {
+ break
+ }
+ p.nextToken() // consume SEMI
+ if p.tok == NEWLINE || p.tok == EOF {
+ break
+ }
+ }
+ // EOF without NEWLINE occurs in `if x: pass`, for example.
+ if p.tok != EOF && consumeNL {
+ p.consume(NEWLINE)
+ }
+
+ return stmts
+}
+
+// small_stmt = RETURN expr?
+// | PASS | BREAK | CONTINUE
+// | LOAD ...
+// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign
+// | expr
+func (p *parser) parseSmallStmt() Stmt {
+ switch p.tok {
+ case RETURN:
+ pos := p.nextToken() // consume RETURN
+ var result Expr
+ if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
+ result = p.parseExpr(false)
+ }
+ return &ReturnStmt{Return: pos, Result: result}
+
+ case BREAK, CONTINUE, PASS:
+ tok := p.tok
+ pos := p.nextToken() // consume it
+ return &BranchStmt{Token: tok, TokenPos: pos}
+
+ case LOAD:
+ return p.parseLoadStmt()
+ }
+
+ // Assignment
+ x := p.parseExpr(false)
+ switch p.tok {
+ case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
+ op := p.tok
+ pos := p.nextToken() // consume op
+ rhs := p.parseExpr(false)
+ return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
+ }
+
+ // Expression statement (e.g. function call, doc string).
+ return &ExprStmt{X: x}
+}
+
+// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
+func (p *parser) parseLoadStmt() *LoadStmt {
+ loadPos := p.nextToken() // consume LOAD
+ lparen := p.consume(LPAREN)
+
+ if p.tok != STRING {
+ p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
+ }
+ module := p.parsePrimary().(*Literal)
+
+ var from, to []*Ident
+ for p.tok != RPAREN && p.tok != EOF {
+ p.consume(COMMA)
+ if p.tok == RPAREN {
+ break // allow trailing comma
+ }
+ switch p.tok {
+ case STRING:
+ // load("module", "id")
+ // To name is same as original.
+ lit := p.parsePrimary().(*Literal)
+ id := &Ident{
+ NamePos: lit.TokenPos.add(`"`),
+ Name: lit.Value.(string),
+ }
+ to = append(to, id)
+ from = append(from, id)
+
+ case IDENT:
+ // load("module", to="from")
+ id := p.parseIdent()
+ to = append(to, id)
+ if p.tok != EQ {
+ p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
+ }
+ p.consume(EQ)
+ if p.tok != STRING {
+ p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
+ }
+ lit := p.parsePrimary().(*Literal)
+ from = append(from, &Ident{
+ NamePos: lit.TokenPos.add(`"`),
+ Name: lit.Value.(string),
+ })
+
+ case RPAREN:
+ p.in.errorf(p.in.pos, "trailing comma in load statement")
+
+ default:
+ p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
+ }
+ }
+ rparen := p.consume(RPAREN)
+
+ if len(to) == 0 {
+ p.in.errorf(lparen, "load statement must import at least 1 symbol")
+ }
+ return &LoadStmt{
+ Load: loadPos,
+ Module: module,
+ To: to,
+ From: from,
+ Rparen: rparen,
+ }
+}
+
+// suite is typically what follows a COLON (e.g. after DEF or FOR).
+// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
+func (p *parser) parseSuite() []Stmt {
+ if p.tok == NEWLINE {
+ p.nextToken() // consume NEWLINE
+ p.consume(INDENT)
+ var stmts []Stmt
+ for p.tok != OUTDENT && p.tok != EOF {
+ stmts = p.parseStmt(stmts)
+ }
+ p.consume(OUTDENT)
+ return stmts
+ }
+
+ return p.parseSimpleStmt(nil, true)
+}
+
+func (p *parser) parseIdent() *Ident {
+ if p.tok != IDENT {
+ p.in.error(p.in.pos, "not an identifier")
+ }
+ id := &Ident{
+ NamePos: p.tokval.pos,
+ Name: p.tokval.raw,
+ }
+ p.nextToken()
+ return id
+}
+
+func (p *parser) consume(t Token) Position {
+ if p.tok != t {
+ p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
+ }
+ return p.nextToken()
+}
+
+// params = (param COMMA)* param COMMA?
+// |
+//
+// param = IDENT
+// | IDENT EQ test
+// | STAR
+// | STAR IDENT
+// | STARSTAR IDENT
+//
+// parseParams parses a parameter list. The resulting expressions are of the form:
+//
+// *Ident x
+// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y
+// *Unary{Op: STAR} *
+// *Unary{Op: STAR, X: *Ident} *args
+// *Unary{Op: STARSTAR, X: *Ident} **kwargs
+func (p *parser) parseParams() []Expr {
+ var params []Expr
+ for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
+ if len(params) > 0 {
+ p.consume(COMMA)
+ }
+ if p.tok == RPAREN {
+ break
+ }
+
+ // * or *args or **kwargs
+ if p.tok == STAR || p.tok == STARSTAR {
+ op := p.tok
+ pos := p.nextToken()
+ var x Expr
+ if op == STARSTAR || p.tok == IDENT {
+ x = p.parseIdent()
+ }
+ params = append(params, &UnaryExpr{
+ OpPos: pos,
+ Op: op,
+ X: x,
+ })
+ continue
+ }
+
+ // IDENT
+ // IDENT = test
+ id := p.parseIdent()
+ if p.tok == EQ { // default value
+ eq := p.nextToken()
+ dflt := p.parseTest()
+ params = append(params, &BinaryExpr{
+ X: id,
+ OpPos: eq,
+ Op: EQ,
+ Y: dflt,
+ })
+ continue
+ }
+
+ params = append(params, id)
+ }
+ return params
+}
+
+// parseExpr parses an expression, possible consisting of a
+// comma-separated list of 'test' expressions.
+//
+// In many cases we must use parseTest to avoid ambiguity such as
+// f(x, y) vs. f((x, y)).
+func (p *parser) parseExpr(inParens bool) Expr {
+ x := p.parseTest()
+ if p.tok != COMMA {
+ return x
+ }
+
+ // tuple
+ exprs := p.parseExprs([]Expr{x}, inParens)
+ return &TupleExpr{List: exprs}
+}
+
+// parseExprs parses a comma-separated list of expressions, starting with the comma.
+// It is used to parse tuples and list elements.
+// expr_list = (',' expr)* ','?
+func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
+ for p.tok == COMMA {
+ pos := p.nextToken()
+ if terminatesExprList(p.tok) {
+ if !allowTrailingComma {
+ p.in.error(pos, "unparenthesized tuple with trailing comma")
+ }
+ break
+ }
+ exprs = append(exprs, p.parseTest())
+ }
+ return exprs
+}
+
+// parseTest parses a 'test', a single-component expression.
+func (p *parser) parseTest() Expr {
+ if p.tok == LAMBDA {
+ return p.parseLambda(true)
+ }
+
+ x := p.parseTestPrec(0)
+
+ // conditional expression (t IF cond ELSE f)
+ if p.tok == IF {
+ ifpos := p.nextToken()
+ cond := p.parseTestPrec(0)
+ if p.tok != ELSE {
+ p.in.error(ifpos, "conditional expression without else clause")
+ }
+ elsepos := p.nextToken()
+ else_ := p.parseTest()
+ return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
+ }
+
+ return x
+}
+
+// parseTestNoCond parses a a single-component expression without
+// consuming a trailing 'if expr else expr'.
+func (p *parser) parseTestNoCond() Expr {
+ if p.tok == LAMBDA {
+ return p.parseLambda(false)
+ }
+ return p.parseTestPrec(0)
+}
+
+// parseLambda parses a lambda expression.
+// The allowCond flag allows the body to be an 'a if b else c' conditional.
+func (p *parser) parseLambda(allowCond bool) Expr {
+ lambda := p.nextToken()
+ var params []Expr
+ if p.tok != COLON {
+ params = p.parseParams()
+ }
+ p.consume(COLON)
+
+ var body Expr
+ if allowCond {
+ body = p.parseTest()
+ } else {
+ body = p.parseTestNoCond()
+ }
+
+ return &LambdaExpr{
+ Lambda: lambda,
+ Params: params,
+ Body: body,
+ }
+}
+
+func (p *parser) parseTestPrec(prec int) Expr {
+ if prec >= len(preclevels) {
+ return p.parsePrimaryWithSuffix()
+ }
+
+ // expr = NOT expr
+ if p.tok == NOT && prec == int(precedence[NOT]) {
+ pos := p.nextToken()
+ x := p.parseTestPrec(prec)
+ return &UnaryExpr{
+ OpPos: pos,
+ Op: NOT,
+ X: x,
+ }
+ }
+
+ return p.parseBinopExpr(prec)
+}
+
+// expr = test (OP test)*
+// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
+func (p *parser) parseBinopExpr(prec int) Expr {
+ x := p.parseTestPrec(prec + 1)
+ for first := true; ; first = false {
+ if p.tok == NOT {
+ p.nextToken() // consume NOT
+ // In this context, NOT must be followed by IN.
+ // Replace NOT IN by a single NOT_IN token.
+ if p.tok != IN {
+ p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
+ }
+ p.tok = NOT_IN
+ }
+
+ // Binary operator of specified precedence?
+ opprec := int(precedence[p.tok])
+ if opprec < prec {
+ return x
+ }
+
+ // Comparisons are non-associative.
+ if !first && opprec == int(precedence[EQL]) {
+ p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
+ x.(*BinaryExpr).Op, p.tok)
+ }
+
+ op := p.tok
+ pos := p.nextToken()
+ y := p.parseTestPrec(opprec + 1)
+ x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
+ }
+}
+
+// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
+var precedence [maxToken]int8
+
+// preclevels groups operators of equal precedence.
+// Comparisons are nonassociative; other binary operators associate to the left.
+// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
+// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
+var preclevels = [...][]Token{
+ {OR}, // or
+ {AND}, // and
+ {NOT}, // not (unary)
+ {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
+ {PIPE}, // |
+ {CIRCUMFLEX}, // ^
+ {AMP}, // &
+ {LTLT, GTGT}, // << >>
+ {MINUS, PLUS}, // -
+ {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / //
+}
+
+func init() {
+ // populate precedence table
+ for i := range precedence {
+ precedence[i] = -1
+ }
+ for level, tokens := range preclevels {
+ for _, tok := range tokens {
+ precedence[tok] = int8(level)
+ }
+ }
+}
+
+// primary_with_suffix = primary
+// | primary '.' IDENT
+// | primary slice_suffix
+// | primary call_suffix
+func (p *parser) parsePrimaryWithSuffix() Expr {
+ x := p.parsePrimary()
+ for {
+ switch p.tok {
+ case DOT:
+ dot := p.nextToken()
+ id := p.parseIdent()
+ x = &DotExpr{Dot: dot, X: x, Name: id}
+ case LBRACK:
+ x = p.parseSliceSuffix(x)
+ case LPAREN:
+ x = p.parseCallSuffix(x)
+ default:
+ return x
+ }
+ }
+}
+
+// slice_suffix = '[' expr? ':' expr? ':' expr? ']'
+func (p *parser) parseSliceSuffix(x Expr) Expr {
+ lbrack := p.nextToken()
+ var lo, hi, step Expr
+ if p.tok != COLON {
+ y := p.parseExpr(false)
+
+ // index x[y]
+ if p.tok == RBRACK {
+ rbrack := p.nextToken()
+ return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
+ }
+
+ lo = y
+ }
+
+ // slice or substring x[lo:hi:step]
+ if p.tok == COLON {
+ p.nextToken()
+ if p.tok != COLON && p.tok != RBRACK {
+ hi = p.parseTest()
+ }
+ }
+ if p.tok == COLON {
+ p.nextToken()
+ if p.tok != RBRACK {
+ step = p.parseTest()
+ }
+ }
+ rbrack := p.consume(RBRACK)
+ return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
+}
+
+// call_suffix = '(' arg_list? ')'
+func (p *parser) parseCallSuffix(fn Expr) Expr {
+ lparen := p.consume(LPAREN)
+ var rparen Position
+ var args []Expr
+ if p.tok == RPAREN {
+ rparen = p.nextToken()
+ } else {
+ args = p.parseArgs()
+ rparen = p.consume(RPAREN)
+ }
+ return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
+}
+
+// parseArgs parses a list of actual parameter values (arguments).
+// It mirrors the structure of parseParams.
+// arg_list = ((arg COMMA)* arg COMMA?)?
+func (p *parser) parseArgs() []Expr {
+ var args []Expr
+ for p.tok != RPAREN && p.tok != EOF {
+ if len(args) > 0 {
+ p.consume(COMMA)
+ }
+ if p.tok == RPAREN {
+ break
+ }
+
+ // *args or **kwargs
+ if p.tok == STAR || p.tok == STARSTAR {
+ op := p.tok
+ pos := p.nextToken()
+ x := p.parseTest()
+ args = append(args, &UnaryExpr{
+ OpPos: pos,
+ Op: op,
+ X: x,
+ })
+ continue
+ }
+
+ // We use a different strategy from Bazel here to stay within LL(1).
+ // Instead of looking ahead two tokens (IDENT, EQ) we parse
+ // 'test = test' then check that the first was an IDENT.
+ x := p.parseTest()
+
+ if p.tok == EQ {
+ // name = value
+ if _, ok := x.(*Ident); !ok {
+ p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
+ }
+ eq := p.nextToken()
+ y := p.parseTest()
+ x = &BinaryExpr{
+ X: x,
+ OpPos: eq,
+ Op: EQ,
+ Y: y,
+ }
+ }
+
+ args = append(args, x)
+ }
+ return args
+}
+
+// primary = IDENT
+// | INT | FLOAT | STRING | BYTES
+// | '[' ... // list literal or comprehension
+// | '{' ... // dict literal or comprehension
+// | '(' ... // tuple or parenthesized expression
+// | ('-'|'+'|'~') primary_with_suffix
+func (p *parser) parsePrimary() Expr {
+ switch p.tok {
+ case IDENT:
+ return p.parseIdent()
+
+ case INT, FLOAT, STRING, BYTES:
+ var val interface{}
+ tok := p.tok
+ switch tok {
+ case INT:
+ if p.tokval.bigInt != nil {
+ val = p.tokval.bigInt
+ } else {
+ val = p.tokval.int
+ }
+ case FLOAT:
+ val = p.tokval.float
+ case STRING, BYTES:
+ val = p.tokval.string
+ }
+ raw := p.tokval.raw
+ pos := p.nextToken()
+ return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
+
+ case LBRACK:
+ return p.parseList()
+
+ case LBRACE:
+ return p.parseDict()
+
+ case LPAREN:
+ lparen := p.nextToken()
+ if p.tok == RPAREN {
+ // empty tuple
+ rparen := p.nextToken()
+ return &TupleExpr{Lparen: lparen, Rparen: rparen}
+ }
+ e := p.parseExpr(true) // allow trailing comma
+ rparen := p.consume(RPAREN)
+ return &ParenExpr{
+ Lparen: lparen,
+ X: e,
+ Rparen: rparen,
+ }
+
+ case MINUS, PLUS, TILDE: // unary
+ tok := p.tok
+ pos := p.nextToken()
+ x := p.parsePrimaryWithSuffix()
+ return &UnaryExpr{
+ OpPos: pos,
+ Op: tok,
+ X: x,
+ }
+ }
+ p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
+ panic("unreachable")
+}
+
+// list = '[' ']'
+// | '[' expr ']'
+// | '[' expr expr_list ']'
+// | '[' expr (FOR loop_variables IN expr)+ ']'
+func (p *parser) parseList() Expr {
+ lbrack := p.nextToken()
+ if p.tok == RBRACK {
+ // empty List
+ rbrack := p.nextToken()
+ return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
+ }
+
+ x := p.parseTest()
+
+ if p.tok == FOR {
+ // list comprehension
+ return p.parseComprehensionSuffix(lbrack, x, RBRACK)
+ }
+
+ exprs := []Expr{x}
+ if p.tok == COMMA {
+ // multi-item list literal
+ exprs = p.parseExprs(exprs, true) // allow trailing comma
+ }
+
+ rbrack := p.consume(RBRACK)
+ return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
+}
+
+// dict = '{' '}'
+// | '{' dict_entry_list '}'
+// | '{' dict_entry FOR loop_variables IN expr '}'
+func (p *parser) parseDict() Expr {
+ lbrace := p.nextToken()
+ if p.tok == RBRACE {
+ // empty dict
+ rbrace := p.nextToken()
+ return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
+ }
+
+ x := p.parseDictEntry()
+
+ if p.tok == FOR {
+ // dict comprehension
+ return p.parseComprehensionSuffix(lbrace, x, RBRACE)
+ }
+
+ entries := []Expr{x}
+ for p.tok == COMMA {
+ p.nextToken()
+ if p.tok == RBRACE {
+ break
+ }
+ entries = append(entries, p.parseDictEntry())
+ }
+
+ rbrace := p.consume(RBRACE)
+ return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
+}
+
+// dict_entry = test ':' test
+func (p *parser) parseDictEntry() *DictEntry {
+ k := p.parseTest()
+ colon := p.consume(COLON)
+ v := p.parseTest()
+ return &DictEntry{Key: k, Colon: colon, Value: v}
+}
+
+// comp_suffix = FOR loopvars IN expr comp_suffix
+// | IF expr comp_suffix
+// | ']' or ')' (end)
+//
+// There can be multiple FOR/IF clauses; the first is always a FOR.
+func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
+ var clauses []Node
+ for p.tok != endBrace {
+ if p.tok == FOR {
+ pos := p.nextToken()
+ vars := p.parseForLoopVariables()
+ in := p.consume(IN)
+ // Following Python 3, the operand of IN cannot be:
+ // - a conditional expression ('x if y else z'),
+ // due to conflicts in Python grammar
+ // ('if' is used by the comprehension);
+ // - a lambda expression
+ // - an unparenthesized tuple.
+ x := p.parseTestPrec(0)
+ clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
+ } else if p.tok == IF {
+ pos := p.nextToken()
+ cond := p.parseTestNoCond()
+ clauses = append(clauses, &IfClause{If: pos, Cond: cond})
+ } else {
+ p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
+ }
+ }
+ rbrace := p.nextToken()
+
+ return &Comprehension{
+ Curly: endBrace == RBRACE,
+ Lbrack: lbrace,
+ Body: body,
+ Clauses: clauses,
+ Rbrack: rbrace,
+ }
+}
+
+func terminatesExprList(tok Token) bool {
+ switch tok {
+ case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
+ return true
+ }
+ return false
+}
+
+// Comment assignment.
+// We build two lists of all subnodes, preorder and postorder.
+// The preorder list is ordered by start location, with outer nodes first.
+// The postorder list is ordered by end location, with outer nodes last.
+// We use the preorder list to assign each whole-line comment to the syntax
+// immediately following it, and we use the postorder list to assign each
+// end-of-line comment to the syntax immediately preceding it.
+
+// flattenAST returns the list of AST nodes, both in prefix order and in postfix
+// order.
+func flattenAST(root Node) (pre, post []Node) {
+ stack := []Node{}
+ Walk(root, func(n Node) bool {
+ if n != nil {
+ pre = append(pre, n)
+ stack = append(stack, n)
+ } else {
+ post = append(post, stack[len(stack)-1])
+ stack = stack[:len(stack)-1]
+ }
+ return true
+ })
+ return pre, post
+}
+
+// assignComments attaches comments to nearby syntax.
+func (p *parser) assignComments(n Node) {
+ // Leave early if there are no comments
+ if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
+ return
+ }
+
+ pre, post := flattenAST(n)
+
+ // Assign line comments to syntax immediately following.
+ line := p.in.lineComments
+ for _, x := range pre {
+ start, _ := x.Span()
+
+ switch x.(type) {
+ case *File:
+ continue
+ }
+
+ for len(line) > 0 && !start.isBefore(line[0].Start) {
+ x.AllocComments()
+ x.Comments().Before = append(x.Comments().Before, line[0])
+ line = line[1:]
+ }
+ }
+
+ // Remaining line comments go at end of file.
+ if len(line) > 0 {
+ n.AllocComments()
+ n.Comments().After = append(n.Comments().After, line...)
+ }
+
+ // Assign suffix comments to syntax immediately before.
+ suffix := p.in.suffixComments
+ for i := len(post) - 1; i >= 0; i-- {
+ x := post[i]
+
+ // Do not assign suffix comments to file
+ switch x.(type) {
+ case *File:
+ continue
+ }
+
+ _, end := x.Span()
+ if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
+ x.AllocComments()
+ x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
+ suffix = suffix[:len(suffix)-1]
+ }
+ }
+}
diff --git a/syntax/parse_test.go b/syntax/parse_test.go
new file mode 100644
index 0000000..fedbb3e
--- /dev/null
+++ b/syntax/parse_test.go
@@ -0,0 +1,487 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax_test
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "go/build"
+ "io/ioutil"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "testing"
+
+ "go.starlark.net/internal/chunkedfile"
+ "go.starlark.net/starlarktest"
+ "go.starlark.net/syntax"
+)
+
+func TestExprParseTrees(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ {`print(1)`,
+ `(CallExpr Fn=print Args=(1))`},
+ {"print(1)\n",
+ `(CallExpr Fn=print Args=(1))`},
+ {`x + 1`,
+ `(BinaryExpr X=x Op=+ Y=1)`},
+ {`[x for x in y]`,
+ `(Comprehension Body=x Clauses=((ForClause Vars=x X=y)))`},
+ {`[x for x in (a if b else c)]`,
+ `(Comprehension Body=x Clauses=((ForClause Vars=x X=(ParenExpr X=(CondExpr Cond=b True=a False=c)))))`},
+ {`x[i].f(42)`,
+ `(CallExpr Fn=(DotExpr X=(IndexExpr X=x Y=i) Name=f) Args=(42))`},
+ {`x.f()`,
+ `(CallExpr Fn=(DotExpr X=x Name=f))`},
+ {`x+y*z`,
+ `(BinaryExpr X=x Op=+ Y=(BinaryExpr X=y Op=* Y=z))`},
+ {`x%y-z`,
+ `(BinaryExpr X=(BinaryExpr X=x Op=% Y=y) Op=- Y=z)`},
+ {`a + b not in c`,
+ `(BinaryExpr X=(BinaryExpr X=a Op=+ Y=b) Op=not in Y=c)`},
+ {`lambda x, *args, **kwargs: None`,
+ `(LambdaExpr Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=None)`},
+ {`{"one": 1}`,
+ `(DictExpr List=((DictEntry Key="one" Value=1)))`},
+ {`a[i]`,
+ `(IndexExpr X=a Y=i)`},
+ {`a[i:]`,
+ `(SliceExpr X=a Lo=i)`},
+ {`a[:j]`,
+ `(SliceExpr X=a Hi=j)`},
+ {`a[::]`,
+ `(SliceExpr X=a)`},
+ {`a[::k]`,
+ `(SliceExpr X=a Step=k)`},
+ {`[]`,
+ `(ListExpr)`},
+ {`[1]`,
+ `(ListExpr List=(1))`},
+ {`[1,]`,
+ `(ListExpr List=(1))`},
+ {`[1, 2]`,
+ `(ListExpr List=(1 2))`},
+ {`()`,
+ `(TupleExpr)`},
+ {`(4,)`,
+ `(ParenExpr X=(TupleExpr List=(4)))`},
+ {`(4)`,
+ `(ParenExpr X=4)`},
+ {`(4, 5)`,
+ `(ParenExpr X=(TupleExpr List=(4 5)))`},
+ {`1, 2, 3`,
+ `(TupleExpr List=(1 2 3))`},
+ {`1, 2,`,
+ `unparenthesized tuple with trailing comma`},
+ {`{}`,
+ `(DictExpr)`},
+ {`{"a": 1}`,
+ `(DictExpr List=((DictEntry Key="a" Value=1)))`},
+ {`{"a": 1,}`,
+ `(DictExpr List=((DictEntry Key="a" Value=1)))`},
+ {`{"a": 1, "b": 2}`,
+ `(DictExpr List=((DictEntry Key="a" Value=1) (DictEntry Key="b" Value=2)))`},
+ {`{x: y for (x, y) in z}`,
+ `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=(ParenExpr X=(TupleExpr List=(x y))) X=z)))`},
+ {`{x: y for a in b if c}`,
+ `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=a X=b) (IfClause Cond=c)))`},
+ {`-1 + +2`,
+ `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`},
+ {`"foo" + "bar"`,
+ `(BinaryExpr X="foo" Op=+ Y="bar")`},
+ {`-1 * 2`, // prec(unary -) > prec(binary *)
+ `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`},
+ {`-x[i]`, // prec(unary -) < prec(x[i])
+ `(UnaryExpr Op=- X=(IndexExpr X=x Y=i))`},
+ {`a | b & c | d`, // prec(|) < prec(&)
+ `(BinaryExpr X=(BinaryExpr X=a Op=| Y=(BinaryExpr X=b Op=& Y=c)) Op=| Y=d)`},
+ {`a or b and c or d`,
+ `(BinaryExpr X=(BinaryExpr X=a Op=or Y=(BinaryExpr X=b Op=and Y=c)) Op=or Y=d)`},
+ {`a and b or c and d`,
+ `(BinaryExpr X=(BinaryExpr X=a Op=and Y=b) Op=or Y=(BinaryExpr X=c Op=and Y=d))`},
+ {`f(1, x=y)`,
+ `(CallExpr Fn=f Args=(1 (BinaryExpr X=x Op== Y=y)))`},
+ {`f(*args, **kwargs)`,
+ `(CallExpr Fn=f Args=((UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)))`},
+ {`lambda *args, *, x=1, **kwargs: 0`,
+ `(LambdaExpr Params=((UnaryExpr Op=* X=args) (UnaryExpr Op=*) (BinaryExpr X=x Op== Y=1) (UnaryExpr Op=** X=kwargs)) Body=0)`},
+ {`lambda *, a, *b: 0`,
+ `(LambdaExpr Params=((UnaryExpr Op=*) a (UnaryExpr Op=* X=b)) Body=0)`},
+ {`a if b else c`,
+ `(CondExpr Cond=b True=a False=c)`},
+ {`a and not b`,
+ `(BinaryExpr X=a Op=and Y=(UnaryExpr Op=not X=b))`},
+ {`[e for x in y if cond1 if cond2]`,
+ `(Comprehension Body=e Clauses=((ForClause Vars=x X=y) (IfClause Cond=cond1) (IfClause Cond=cond2)))`}, // github.com/google/skylark/issues/53
+ } {
+ e, err := syntax.ParseExpr("foo.star", test.input, 0)
+ var got string
+ if err != nil {
+ got = stripPos(err)
+ } else {
+ got = treeString(e)
+ }
+ if test.want != got {
+ t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+ }
+ }
+}
+
+func TestStmtParseTrees(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ {`print(1)`,
+ `(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
+ {`return 1, 2`,
+ `(ReturnStmt Result=(TupleExpr List=(1 2)))`},
+ {`return`,
+ `(ReturnStmt)`},
+ {`for i in "abc": break`,
+ `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=break)))`},
+ {`for i in "abc": continue`,
+ `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=continue)))`},
+ {`for x, y in z: pass`,
+ `(ForStmt Vars=(TupleExpr List=(x y)) X=z Body=((BranchStmt Token=pass)))`},
+ {`if True: pass`,
+ `(IfStmt Cond=True True=((BranchStmt Token=pass)))`},
+ {`if True: break`,
+ `(IfStmt Cond=True True=((BranchStmt Token=break)))`},
+ {`if True: continue`,
+ `(IfStmt Cond=True True=((BranchStmt Token=continue)))`},
+ {`if True: pass
+else:
+ pass`,
+ `(IfStmt Cond=True True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
+ {"if a: pass\nelif b: pass\nelse: pass",
+ `(IfStmt Cond=a True=((BranchStmt Token=pass)) False=((IfStmt Cond=b True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))))`},
+ {`x, y = 1, 2`,
+ `(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=(TupleExpr List=(1 2)))`},
+ {`x[i] = 1`,
+ `(AssignStmt Op== LHS=(IndexExpr X=x Y=i) RHS=1)`},
+ {`x.f = 1`,
+ `(AssignStmt Op== LHS=(DotExpr X=x Name=f) RHS=1)`},
+ {`(x, y) = 1`,
+ `(AssignStmt Op== LHS=(ParenExpr X=(TupleExpr List=(x y))) RHS=1)`},
+ {`load("", "a", b="c")`,
+ `(LoadStmt Module="" From=(a c) To=(a b))`},
+ {`if True: load("", "a", b="c")`, // load needn't be at toplevel
+ `(IfStmt Cond=True True=((LoadStmt Module="" From=(a c) To=(a b))))`},
+ {`def f(x, *args, **kwargs):
+ pass`,
+ `(DefStmt Name=f Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((BranchStmt Token=pass)))`},
+ {`def f(**kwargs, *args): pass`,
+ `(DefStmt Name=f Params=((UnaryExpr Op=** X=kwargs) (UnaryExpr Op=* X=args)) Body=((BranchStmt Token=pass)))`},
+ {`def f(a, b, c=d): pass`,
+ `(DefStmt Name=f Params=(a b (BinaryExpr X=c Op== Y=d)) Body=((BranchStmt Token=pass)))`},
+ {`def f(a, b=c, d): pass`,
+ `(DefStmt Name=f Params=(a (BinaryExpr X=b Op== Y=c) d) Body=((BranchStmt Token=pass)))`}, // TODO(adonovan): fix this
+ {`def f():
+ def g():
+ pass
+ pass
+def h():
+ pass`,
+ `(DefStmt Name=f Body=((DefStmt Name=g Body=((BranchStmt Token=pass))) (BranchStmt Token=pass)))`},
+ {"f();g()",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ {"f();",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ {"f();g()\n",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ {"f();\n",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ } {
+ f, err := syntax.Parse("foo.star", test.input, 0)
+ if err != nil {
+ t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
+ continue
+ }
+ if got := treeString(f.Stmts[0]); test.want != got {
+ t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+ }
+ }
+}
+
+// TestFileParseTrees tests sequences of statements, and particularly
+// handling of indentation, newlines, line continuations, and blank lines.
+func TestFileParseTrees(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ {`x = 1
+print(x)`,
+ `(AssignStmt Op== LHS=x RHS=1)
+(ExprStmt X=(CallExpr Fn=print Args=(x)))`},
+ {"if cond:\n\tpass",
+ `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+ {"if cond:\n\tpass\nelse:\n\tpass",
+ `(IfStmt Cond=cond True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
+ {`def f():
+ pass
+pass
+
+pass`,
+ `(DefStmt Name=f Body=((BranchStmt Token=pass)))
+(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+ {`pass; pass`,
+ `(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+ {"pass\npass",
+ `(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+ {"pass\n\npass",
+ `(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+ {`x = (1 +
+2)`,
+ `(AssignStmt Op== LHS=x RHS=(ParenExpr X=(BinaryExpr X=1 Op=+ Y=2)))`},
+ {`x = 1 \
++ 2`,
+ `(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`},
+ } {
+ f, err := syntax.Parse("foo.star", test.input, 0)
+ if err != nil {
+ t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
+ continue
+ }
+ var buf bytes.Buffer
+ for i, stmt := range f.Stmts {
+ if i > 0 {
+ buf.WriteByte('\n')
+ }
+ writeTree(&buf, reflect.ValueOf(stmt))
+ }
+ if got := buf.String(); test.want != got {
+ t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+ }
+ }
+}
+
+// TestCompoundStmt tests handling of REPL-style compound statements.
+func TestCompoundStmt(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ // blank lines
+ {"\n",
+ ``},
+ {" \n",
+ ``},
+ {"# comment\n",
+ ``},
+ // simple statement
+ {"1\n",
+ `(ExprStmt X=1)`},
+ {"print(1)\n",
+ `(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
+ {"1;2;3;\n",
+ `(ExprStmt X=1)(ExprStmt X=2)(ExprStmt X=3)`},
+ {"f();g()\n",
+ `(ExprStmt X=(CallExpr Fn=f))(ExprStmt X=(CallExpr Fn=g))`},
+ {"f();\n",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ {"f(\n\n\n\n\n\n\n)\n",
+ `(ExprStmt X=(CallExpr Fn=f))`},
+ // complex statements
+ {"def f():\n pass\n\n",
+ `(DefStmt Name=f Body=((BranchStmt Token=pass)))`},
+ {"if cond:\n pass\n\n",
+ `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+ // Even as a 1-liner, the following blank line is required.
+ {"if cond: pass\n\n",
+ `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+ // github.com/google/starlark-go/issues/121
+ {"a; b; c\n",
+ `(ExprStmt X=a)(ExprStmt X=b)(ExprStmt X=c)`},
+ {"a; b c\n",
+ `invalid syntax`},
+ } {
+
+ // Fake readline input from string.
+ // The ! suffix, which would cause a parse error,
+ // tests that the parser doesn't read more than necessary.
+ sc := bufio.NewScanner(strings.NewReader(test.input + "!"))
+ readline := func() ([]byte, error) {
+ if sc.Scan() {
+ return []byte(sc.Text() + "\n"), nil
+ }
+ return nil, sc.Err()
+ }
+
+ var got string
+ f, err := syntax.ParseCompoundStmt("foo.star", readline)
+ if err != nil {
+ got = stripPos(err)
+ } else {
+ for _, stmt := range f.Stmts {
+ got += treeString(stmt)
+ }
+ }
+ if test.want != got {
+ t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+ }
+ }
+}
+
+func stripPos(err error) string {
+ s := err.Error()
+ if i := strings.Index(s, ": "); i >= 0 {
+ s = s[i+len(": "):] // strip file:line:col
+ }
+ return s
+}
+
+// treeString prints a syntax node as a parenthesized tree.
+// Idents are printed as foo and Literals as "foo" or 42.
+// Structs are printed as (type name=value ...).
+// Only non-empty fields are shown.
+func treeString(n syntax.Node) string {
+ var buf bytes.Buffer
+ writeTree(&buf, reflect.ValueOf(n))
+ return buf.String()
+}
+
+func writeTree(out *bytes.Buffer, x reflect.Value) {
+ switch x.Kind() {
+ case reflect.String, reflect.Int, reflect.Bool:
+ fmt.Fprintf(out, "%v", x.Interface())
+ case reflect.Ptr, reflect.Interface:
+ if elem := x.Elem(); elem.Kind() == 0 {
+ out.WriteString("nil")
+ } else {
+ writeTree(out, elem)
+ }
+ case reflect.Struct:
+ switch v := x.Interface().(type) {
+ case syntax.Literal:
+ switch v.Token {
+ case syntax.STRING:
+ fmt.Fprintf(out, "%q", v.Value)
+ case syntax.BYTES:
+ fmt.Fprintf(out, "b%q", v.Value)
+ case syntax.INT:
+ fmt.Fprintf(out, "%d", v.Value)
+ }
+ return
+ case syntax.Ident:
+ out.WriteString(v.Name)
+ return
+ }
+ fmt.Fprintf(out, "(%s", strings.TrimPrefix(x.Type().String(), "syntax."))
+ for i, n := 0, x.NumField(); i < n; i++ {
+ f := x.Field(i)
+ if f.Type() == reflect.TypeOf(syntax.Position{}) {
+ continue // skip positions
+ }
+ name := x.Type().Field(i).Name
+ if name == "commentsRef" {
+ continue // skip comments fields
+ }
+ if f.Type() == reflect.TypeOf(syntax.Token(0)) {
+ fmt.Fprintf(out, " %s=%s", name, f.Interface())
+ continue
+ }
+
+ switch f.Kind() {
+ case reflect.Slice:
+ if n := f.Len(); n > 0 {
+ fmt.Fprintf(out, " %s=(", name)
+ for i := 0; i < n; i++ {
+ if i > 0 {
+ out.WriteByte(' ')
+ }
+ writeTree(out, f.Index(i))
+ }
+ out.WriteByte(')')
+ }
+ continue
+ case reflect.Ptr, reflect.Interface:
+ if f.IsNil() {
+ continue
+ }
+ case reflect.Int:
+ if f.Int() != 0 {
+ fmt.Fprintf(out, " %s=%d", name, f.Int())
+ }
+ continue
+ case reflect.Bool:
+ if f.Bool() {
+ fmt.Fprintf(out, " %s", name)
+ }
+ continue
+ }
+ fmt.Fprintf(out, " %s=", name)
+ writeTree(out, f)
+ }
+ fmt.Fprintf(out, ")")
+ default:
+ fmt.Fprintf(out, "%T", x.Interface())
+ }
+}
+
+func TestParseErrors(t *testing.T) {
+ filename := starlarktest.DataFile("syntax", "testdata/errors.star")
+ for _, chunk := range chunkedfile.Read(filename, t) {
+ _, err := syntax.Parse(filename, chunk.Source, 0)
+ switch err := err.(type) {
+ case nil:
+ // ok
+ case syntax.Error:
+ chunk.GotError(int(err.Pos.Line), err.Msg)
+ default:
+ t.Error(err)
+ }
+ chunk.Done()
+ }
+}
+
+func TestFilePortion(t *testing.T) {
+ // Imagine that the Starlark file or expression print(x.f) is extracted
+ // from the middle of a file in some hypothetical template language;
+ // see https://github.com/google/starlark-go/issues/346. For example:
+ // --
+ // {{loop x seq}}
+ // {{print(x.f)}}
+ // {{end}}
+ // --
+ fp := syntax.FilePortion{Content: []byte("print(x.f)"), FirstLine: 2, FirstCol: 4}
+ file, err := syntax.Parse("foo.template", fp, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ span := fmt.Sprint(file.Stmts[0].Span())
+ want := "foo.template:2:4 foo.template:2:14"
+ if span != want {
+ t.Errorf("wrong span: got %q, want %q", span, want)
+ }
+}
+
+// dataFile is the same as starlarktest.DataFile.
+// We make a copy to avoid a dependency cycle.
+var dataFile = func(pkgdir, filename string) string {
+ return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
+
+func BenchmarkParse(b *testing.B) {
+ filename := dataFile("syntax", "testdata/scan.star")
+ b.StopTimer()
+ data, err := ioutil.ReadFile(filename)
+ if err != nil {
+ b.Fatal(err)
+ }
+ b.StartTimer()
+
+ for i := 0; i < b.N; i++ {
+ _, err := syntax.Parse(filename, data, 0)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+}
diff --git a/syntax/quote.go b/syntax/quote.go
new file mode 100644
index 0000000..741e106
--- /dev/null
+++ b/syntax/quote.go
@@ -0,0 +1,309 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Starlark quoted string utilities.
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// unesc maps single-letter chars following \ to their actual values.
+var unesc = [256]byte{
+ 'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'n': '\n',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+ '\\': '\\',
+ '\'': '\'',
+ '"': '"',
+}
+
+// esc maps escape-worthy bytes to the char that should follow \.
+var esc = [256]byte{
+ '\a': 'a',
+ '\b': 'b',
+ '\f': 'f',
+ '\n': 'n',
+ '\r': 'r',
+ '\t': 't',
+ '\v': 'v',
+ '\\': '\\',
+ '\'': '\'',
+ '"': '"',
+}
+
+// unquote unquotes the quoted string, returning the actual
+// string value, whether the original was triple-quoted,
+// whether it was a byte string, and an error describing invalid input.
+func unquote(quoted string) (s string, triple, isByte bool, err error) {
+ // Check for raw prefix: means don't interpret the inner \.
+ raw := false
+ if strings.HasPrefix(quoted, "r") {
+ raw = true
+ quoted = quoted[1:]
+ }
+ // Check for bytes prefix.
+ if strings.HasPrefix(quoted, "b") {
+ isByte = true
+ quoted = quoted[1:]
+ }
+
+ if len(quoted) < 2 {
+ err = fmt.Errorf("string literal too short")
+ return
+ }
+
+ if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
+ err = fmt.Errorf("string literal has invalid quotes")
+ return
+ }
+
+ // Check for triple quoted string.
+ quote := quoted[0]
+ if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
+ triple = true
+ quoted = quoted[3 : len(quoted)-3]
+ } else {
+ quoted = quoted[1 : len(quoted)-1]
+ }
+
+ // Now quoted is the quoted data, but no quotes.
+ // If we're in raw mode or there are no escapes or
+ // carriage returns, we're done.
+ var unquoteChars string
+ if raw {
+ unquoteChars = "\r"
+ } else {
+ unquoteChars = "\\\r"
+ }
+ if !strings.ContainsAny(quoted, unquoteChars) {
+ s = quoted
+ return
+ }
+
+ // Otherwise process quoted string.
+ // Each iteration processes one escape sequence along with the
+ // plain text leading up to it.
+ buf := new(strings.Builder)
+ for {
+ // Remove prefix before escape sequence.
+ i := strings.IndexAny(quoted, unquoteChars)
+ if i < 0 {
+ i = len(quoted)
+ }
+ buf.WriteString(quoted[:i])
+ quoted = quoted[i:]
+
+ if len(quoted) == 0 {
+ break
+ }
+
+ // Process carriage return.
+ if quoted[0] == '\r' {
+ buf.WriteByte('\n')
+ if len(quoted) > 1 && quoted[1] == '\n' {
+ quoted = quoted[2:]
+ } else {
+ quoted = quoted[1:]
+ }
+ continue
+ }
+
+ // Process escape sequence.
+ if len(quoted) == 1 {
+ err = fmt.Errorf(`truncated escape sequence \`)
+ return
+ }
+
+ switch quoted[1] {
+ default:
+ // In Starlark, like Go, a backslash must escape something.
+ // (Python still treats unnecessary backslashes literally,
+ // but since 3.6 has emitted a deprecation warning.)
+ err = fmt.Errorf("invalid escape sequence \\%c", quoted[1])
+ return
+
+ case '\n':
+ // Ignore the escape and the line break.
+ quoted = quoted[2:]
+
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
+ // One-char escape.
+ // Escapes are allowed for both kinds of quotation
+ // mark, not just the kind in use.
+ buf.WriteByte(unesc[quoted[1]])
+ quoted = quoted[2:]
+
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ // Octal escape, up to 3 digits, \OOO.
+ n := int(quoted[1] - '0')
+ quoted = quoted[2:]
+ for i := 1; i < 3; i++ {
+ if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
+ break
+ }
+ n = n*8 + int(quoted[0]-'0')
+ quoted = quoted[1:]
+ }
+ if !isByte && n > 127 {
+ err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n)
+ return
+ }
+ if n >= 256 {
+ // NOTE: Python silently discards the high bit,
+ // so that '\541' == '\141' == 'a'.
+ // Let's see if we can avoid doing that in BUILD files.
+ err = fmt.Errorf(`invalid escape sequence \%03o`, n)
+ return
+ }
+ buf.WriteByte(byte(n))
+
+ case 'x':
+ // Hexadecimal escape, exactly 2 digits, \xXX. [0-127]
+ if len(quoted) < 4 {
+ err = fmt.Errorf(`truncated escape sequence %s`, quoted)
+ return
+ }
+ n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
+ if err1 != nil {
+ err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
+ return
+ }
+ if !isByte && n > 127 {
+ err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`,
+ quoted[:4], n, n)
+ return
+ }
+ buf.WriteByte(byte(n))
+ quoted = quoted[4:]
+
+ case 'u', 'U':
+ // Unicode code point, 4 (\uXXXX) or 8 (\UXXXXXXXX) hex digits.
+ sz := 6
+ if quoted[1] == 'U' {
+ sz = 10
+ }
+ if len(quoted) < sz {
+ err = fmt.Errorf(`truncated escape sequence %s`, quoted)
+ return
+ }
+ n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0)
+ if err1 != nil {
+ err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz])
+ return
+ }
+ if n > unicode.MaxRune {
+ err = fmt.Errorf(`code point out of range: %s (max \U%08x)`,
+ quoted[:sz], n)
+ return
+ }
+ // As in Go, surrogates are disallowed.
+ if 0xD800 <= n && n < 0xE000 {
+ err = fmt.Errorf(`invalid Unicode code point U+%04X`, n)
+ return
+ }
+ buf.WriteRune(rune(n))
+ quoted = quoted[sz:]
+ }
+ }
+
+ s = buf.String()
+ return
+}
+
+// indexByte returns the index of the first instance of b in s, or else -1.
+func indexByte(s string, b byte) int {
+ for i := 0; i < len(s); i++ {
+ if s[i] == b {
+ return i
+ }
+ }
+ return -1
+}
+
+// Quote returns a Starlark literal that denotes s.
+// If b, it returns a bytes literal.
+func Quote(s string, b bool) string {
+ const hex = "0123456789abcdef"
+ var runeTmp [utf8.UTFMax]byte
+
+ buf := make([]byte, 0, 3*len(s)/2)
+ if b {
+ buf = append(buf, 'b')
+ }
+ buf = append(buf, '"')
+ for width := 0; len(s) > 0; s = s[width:] {
+ r := rune(s[0])
+ width = 1
+ if r >= utf8.RuneSelf {
+ r, width = utf8.DecodeRuneInString(s)
+ }
+ if width == 1 && r == utf8.RuneError {
+ // String (!b) literals accept \xXX escapes only for ASCII,
+ // but we must use them here to represent invalid bytes.
+ // The result is not a legal literal.
+ buf = append(buf, `\x`...)
+ buf = append(buf, hex[s[0]>>4])
+ buf = append(buf, hex[s[0]&0xF])
+ continue
+ }
+ if r == '"' || r == '\\' { // always backslashed
+ buf = append(buf, '\\')
+ buf = append(buf, byte(r))
+ continue
+ }
+ if strconv.IsPrint(r) {
+ n := utf8.EncodeRune(runeTmp[:], r)
+ buf = append(buf, runeTmp[:n]...)
+ continue
+ }
+ switch r {
+ case '\a':
+ buf = append(buf, `\a`...)
+ case '\b':
+ buf = append(buf, `\b`...)
+ case '\f':
+ buf = append(buf, `\f`...)
+ case '\n':
+ buf = append(buf, `\n`...)
+ case '\r':
+ buf = append(buf, `\r`...)
+ case '\t':
+ buf = append(buf, `\t`...)
+ case '\v':
+ buf = append(buf, `\v`...)
+ default:
+ switch {
+ case r < ' ' || r == 0x7f:
+ buf = append(buf, `\x`...)
+ buf = append(buf, hex[byte(r)>>4])
+ buf = append(buf, hex[byte(r)&0xF])
+ case r > utf8.MaxRune:
+ r = 0xFFFD
+ fallthrough
+ case r < 0x10000:
+ buf = append(buf, `\u`...)
+ for s := 12; s >= 0; s -= 4 {
+ buf = append(buf, hex[r>>uint(s)&0xF])
+ }
+ default:
+ buf = append(buf, `\U`...)
+ for s := 28; s >= 0; s -= 4 {
+ buf = append(buf, hex[r>>uint(s)&0xF])
+ }
+ }
+ }
+ }
+ buf = append(buf, '"')
+ return string(buf)
+}
diff --git a/syntax/quote_test.go b/syntax/quote_test.go
new file mode 100644
index 0000000..be7498b
--- /dev/null
+++ b/syntax/quote_test.go
@@ -0,0 +1,65 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+ "strings"
+ "testing"
+)
+
+var quoteTests = []struct {
+ q string // quoted
+ s string // unquoted (actual string)
+ std bool // q is standard form for s
+}{
+ {`""`, "", true},
+ {`''`, "", false},
+ {`"hello"`, `hello`, true},
+ {`'hello'`, `hello`, false},
+ {`"quote\"here"`, `quote"here`, true},
+ {`'quote"here'`, `quote"here`, false},
+ {`"quote'here"`, `quote'here`, true},
+ {`'quote\'here'`, `quote'here`, false},
+
+ {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", true},
+ {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+ {`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+ {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", true},
+ {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+ {`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+ {`"\a\b\f\n\r\t\v\x00\x7f\"\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"\\\x03", false},
+ {
+ `"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ \x27\\1\x27,/g' >> $@; "`,
+ "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ",
+ false,
+ },
+ {
+ `"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; "`,
+ "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ",
+ true,
+ },
+}
+
+func TestQuote(t *testing.T) {
+ for _, tt := range quoteTests {
+ if !tt.std {
+ continue
+ }
+ q := Quote(tt.s, false)
+ if q != tt.q {
+ t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q)
+ }
+ }
+}
+
+func TestUnquote(t *testing.T) {
+ for _, tt := range quoteTests {
+ s, triple, _, err := unquote(tt.q)
+ wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`)
+ if s != tt.s || triple != wantTriple || err != nil {
+ t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple)
+ }
+ }
+}
diff --git a/syntax/scan.go b/syntax/scan.go
new file mode 100644
index 0000000..bb4165e
--- /dev/null
+++ b/syntax/scan.go
@@ -0,0 +1,1123 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// A lexical scanner for Starlark.
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "math/big"
+ "os"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// A Token represents a Starlark lexical token.
+type Token int8
+
+const (
+ ILLEGAL Token = iota
+ EOF
+
+ NEWLINE
+ INDENT
+ OUTDENT
+
+ // Tokens with values
+ IDENT // x
+ INT // 123
+ FLOAT // 1.23e45
+ STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"
+ BYTES // b"foo", etc
+
+ // Punctuation
+ PLUS // +
+ MINUS // -
+ STAR // *
+ SLASH // /
+ SLASHSLASH // //
+ PERCENT // %
+ AMP // &
+ PIPE // |
+ CIRCUMFLEX // ^
+ LTLT // <<
+ GTGT // >>
+ TILDE // ~
+ DOT // .
+ COMMA // ,
+ EQ // =
+ SEMI // ;
+ COLON // :
+ LPAREN // (
+ RPAREN // )
+ LBRACK // [
+ RBRACK // ]
+ LBRACE // {
+ RBRACE // }
+ LT // <
+ GT // >
+ GE // >=
+ LE // <=
+ EQL // ==
+ NEQ // !=
+ PLUS_EQ // += (keep order consistent with PLUS..GTGT)
+ MINUS_EQ // -=
+ STAR_EQ // *=
+ SLASH_EQ // /=
+ SLASHSLASH_EQ // //=
+ PERCENT_EQ // %=
+ AMP_EQ // &=
+ PIPE_EQ // |=
+ CIRCUMFLEX_EQ // ^=
+ LTLT_EQ // <<=
+ GTGT_EQ // >>=
+ STARSTAR // **
+
+ // Keywords
+ AND
+ BREAK
+ CONTINUE
+ DEF
+ ELIF
+ ELSE
+ FOR
+ IF
+ IN
+ LAMBDA
+ LOAD
+ NOT
+ NOT_IN // synthesized by parser from NOT IN
+ OR
+ PASS
+ RETURN
+ WHILE
+
+ maxToken
+)
+
+func (tok Token) String() string { return tokenNames[tok] }
+
+// GoString is like String but quotes punctuation tokens.
+// Use Sprintf("%#v", tok) when constructing error messages.
+func (tok Token) GoString() string {
+ if tok >= PLUS && tok <= STARSTAR {
+ return "'" + tokenNames[tok] + "'"
+ }
+ return tokenNames[tok]
+}
+
+var tokenNames = [...]string{
+ ILLEGAL: "illegal token",
+ EOF: "end of file",
+ NEWLINE: "newline",
+ INDENT: "indent",
+ OUTDENT: "outdent",
+ IDENT: "identifier",
+ INT: "int literal",
+ FLOAT: "float literal",
+ STRING: "string literal",
+ PLUS: "+",
+ MINUS: "-",
+ STAR: "*",
+ SLASH: "/",
+ SLASHSLASH: "//",
+ PERCENT: "%",
+ AMP: "&",
+ PIPE: "|",
+ CIRCUMFLEX: "^",
+ LTLT: "<<",
+ GTGT: ">>",
+ TILDE: "~",
+ DOT: ".",
+ COMMA: ",",
+ EQ: "=",
+ SEMI: ";",
+ COLON: ":",
+ LPAREN: "(",
+ RPAREN: ")",
+ LBRACK: "[",
+ RBRACK: "]",
+ LBRACE: "{",
+ RBRACE: "}",
+ LT: "<",
+ GT: ">",
+ GE: ">=",
+ LE: "<=",
+ EQL: "==",
+ NEQ: "!=",
+ PLUS_EQ: "+=",
+ MINUS_EQ: "-=",
+ STAR_EQ: "*=",
+ SLASH_EQ: "/=",
+ SLASHSLASH_EQ: "//=",
+ PERCENT_EQ: "%=",
+ AMP_EQ: "&=",
+ PIPE_EQ: "|=",
+ CIRCUMFLEX_EQ: "^=",
+ LTLT_EQ: "<<=",
+ GTGT_EQ: ">>=",
+ STARSTAR: "**",
+ AND: "and",
+ BREAK: "break",
+ CONTINUE: "continue",
+ DEF: "def",
+ ELIF: "elif",
+ ELSE: "else",
+ FOR: "for",
+ IF: "if",
+ IN: "in",
+ LAMBDA: "lambda",
+ LOAD: "load",
+ NOT: "not",
+ NOT_IN: "not in",
+ OR: "or",
+ PASS: "pass",
+ RETURN: "return",
+ WHILE: "while",
+}
+
+// A FilePortion describes the content of a portion of a file.
+// Callers may provide a FilePortion for the src argument of Parse
+// when the desired initial line and column numbers are not (1, 1),
+// such as when an expression is parsed from within larger file.
+type FilePortion struct {
+ Content []byte
+ FirstLine, FirstCol int32
+}
+
+// A Position describes the location of a rune of input.
+type Position struct {
+ file *string // filename (indirect for compactness)
+ Line int32 // 1-based line number; 0 if line unknown
+ Col int32 // 1-based column (rune) number; 0 if column unknown
+}
+
+// IsValid reports whether the position is valid.
+func (p Position) IsValid() bool { return p.file != nil }
+
+// Filename returns the name of the file containing this position.
+func (p Position) Filename() string {
+ if p.file != nil {
+ return *p.file
+ }
+ return "<invalid>"
+}
+
+// MakePosition returns position with the specified components.
+func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} }
+
+// add returns the position at the end of s, assuming it starts at p.
+func (p Position) add(s string) Position {
+ if n := strings.Count(s, "\n"); n > 0 {
+ p.Line += int32(n)
+ s = s[strings.LastIndex(s, "\n")+1:]
+ p.Col = 1
+ }
+ p.Col += int32(utf8.RuneCountInString(s))
+ return p
+}
+
+func (p Position) String() string {
+ file := p.Filename()
+ if p.Line > 0 {
+ if p.Col > 0 {
+ return fmt.Sprintf("%s:%d:%d", file, p.Line, p.Col)
+ }
+ return fmt.Sprintf("%s:%d", file, p.Line)
+ }
+ return file
+}
+
+func (p Position) isBefore(q Position) bool {
+ if p.Line != q.Line {
+ return p.Line < q.Line
+ }
+ return p.Col < q.Col
+}
+
+// An scanner represents a single input file being parsed.
+type scanner struct {
+ rest []byte // rest of input (in REPL, a line of input)
+ token []byte // token being scanned
+ pos Position // current input position
+ depth int // nesting of [ ] { } ( )
+ indentstk []int // stack of indentation levels
+ dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
+ lineStart bool // after NEWLINE; convert spaces to indentation tokens
+ keepComments bool // accumulate comments in slice
+ lineComments []Comment // list of full line comments (if keepComments)
+ suffixComments []Comment // list of suffix comments (if keepComments)
+
+ readline func() ([]byte, error) // read next line of input (REPL only)
+}
+
+func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) {
+ var firstLine, firstCol int32 = 1, 1
+ if portion, ok := src.(FilePortion); ok {
+ firstLine, firstCol = portion.FirstLine, portion.FirstCol
+ }
+ sc := &scanner{
+ pos: MakePosition(&filename, firstLine, firstCol),
+ indentstk: make([]int, 1, 10), // []int{0} + spare capacity
+ lineStart: true,
+ keepComments: keepComments,
+ }
+ sc.readline, _ = src.(func() ([]byte, error)) // ParseCompoundStmt (REPL) only
+ if sc.readline == nil {
+ data, err := readSource(filename, src)
+ if err != nil {
+ return nil, err
+ }
+ sc.rest = data
+ }
+ return sc, nil
+}
+
+func readSource(filename string, src interface{}) ([]byte, error) {
+ switch src := src.(type) {
+ case string:
+ return []byte(src), nil
+ case []byte:
+ return src, nil
+ case io.Reader:
+ data, err := ioutil.ReadAll(src)
+ if err != nil {
+ err = &os.PathError{Op: "read", Path: filename, Err: err}
+ return nil, err
+ }
+ return data, nil
+ case FilePortion:
+ return src.Content, nil
+ case nil:
+ return ioutil.ReadFile(filename)
+ default:
+ return nil, fmt.Errorf("invalid source: %T", src)
+ }
+}
+
+// An Error describes the nature and position of a scanner or parser error.
+type Error struct {
+ Pos Position
+ Msg string
+}
+
+func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }
+
+// errorf is called to report an error.
+// errorf does not return: it panics.
+func (sc *scanner) error(pos Position, s string) {
+ panic(Error{pos, s})
+}
+
+func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
+ sc.error(pos, fmt.Sprintf(format, args...))
+}
+
+func (sc *scanner) recover(err *error) {
+ // The scanner and parser panic both for routine errors like
+ // syntax errors and for programmer bugs like array index
+ // errors. Turn both into error returns. Catching bug panics
+ // is especially important when processing many files.
+ switch e := recover().(type) {
+ case nil:
+ // no panic
+ case Error:
+ *err = e
+ default:
+ *err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
+ if debug {
+ log.Fatal(*err)
+ }
+ }
+}
+
+// eof reports whether the input has reached end of file.
+func (sc *scanner) eof() bool {
+ return len(sc.rest) == 0 && !sc.readLine()
+}
+
+// readLine attempts to read another line of input.
+// Precondition: len(sc.rest)==0.
+func (sc *scanner) readLine() bool {
+ if sc.readline != nil {
+ var err error
+ sc.rest, err = sc.readline()
+ if err != nil {
+ sc.errorf(sc.pos, "%v", err) // EOF or ErrInterrupt
+ }
+ return len(sc.rest) > 0
+ }
+ return false
+}
+
+// peekRune returns the next rune in the input without consuming it.
+// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
+func (sc *scanner) peekRune() rune {
+ // TODO(adonovan): opt: measure and perhaps inline eof.
+ if sc.eof() {
+ return 0
+ }
+
+ // fast path: ASCII
+ if b := sc.rest[0]; b < utf8.RuneSelf {
+ if b == '\r' {
+ return '\n'
+ }
+ return rune(b)
+ }
+
+ r, _ := utf8.DecodeRune(sc.rest)
+ return r
+}
+
+// readRune consumes and returns the next rune in the input.
+// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
+func (sc *scanner) readRune() rune {
+ // eof() has been inlined here, both to avoid a call
+ // and to establish len(rest)>0 to avoid a bounds check.
+ if len(sc.rest) == 0 {
+ if !sc.readLine() {
+ sc.error(sc.pos, "internal scanner error: readRune at EOF")
+ }
+ // Redundant, but eliminates the bounds-check below.
+ if len(sc.rest) == 0 {
+ return 0
+ }
+ }
+
+ // fast path: ASCII
+ if b := sc.rest[0]; b < utf8.RuneSelf {
+ r := rune(b)
+ sc.rest = sc.rest[1:]
+ if r == '\r' {
+ if len(sc.rest) > 0 && sc.rest[0] == '\n' {
+ sc.rest = sc.rest[1:]
+ }
+ r = '\n'
+ }
+ if r == '\n' {
+ sc.pos.Line++
+ sc.pos.Col = 1
+ } else {
+ sc.pos.Col++
+ }
+ return r
+ }
+
+ r, size := utf8.DecodeRune(sc.rest)
+ sc.rest = sc.rest[size:]
+ sc.pos.Col++
+ return r
+}
+
+// tokenValue records the position and value associated with each token.
+type tokenValue struct {
+ raw string // raw text of token
+ int int64 // decoded int
+ bigInt *big.Int // decoded integers > int64
+ float float64 // decoded float
+ string string // decoded string or bytes
+ pos Position // start position of token
+}
+
+// startToken marks the beginning of the next input token.
+// It must be followed by a call to endToken once the token has
+// been consumed using readRune.
+func (sc *scanner) startToken(val *tokenValue) {
+ sc.token = sc.rest
+ val.raw = ""
+ val.pos = sc.pos
+}
+
+// endToken marks the end of an input token.
+// It records the actual token string in val.raw if the caller
+// has not done that already.
+func (sc *scanner) endToken(val *tokenValue) {
+ if val.raw == "" {
+ val.raw = string(sc.token[:len(sc.token)-len(sc.rest)])
+ }
+}
+
+// nextToken is called by the parser to obtain the next input token.
+// It returns the token value and sets val to the data associated with
+// the token.
+//
+// For all our input tokens, the associated data is val.pos (the
+// position where the token begins), val.raw (the input string
+// corresponding to the token). For string and int tokens, the string
+// and int fields additionally contain the token's interpreted value.
+func (sc *scanner) nextToken(val *tokenValue) Token {
+
+ // The following distribution of tokens guides case ordering:
+ //
+ // COMMA 27 %
+ // STRING 23 %
+ // IDENT 15 %
+ // EQL 11 %
+ // LBRACK 5.5 %
+ // RBRACK 5.5 %
+ // NEWLINE 3 %
+ // LPAREN 2.9 %
+ // RPAREN 2.9 %
+ // INT 2 %
+ // others < 1 %
+ //
+ // Although NEWLINE tokens are infrequent, and lineStart is
+ // usually (~97%) false on entry, skipped newlines account for
+ // about 50% of all iterations of the 'start' loop.
+
+start:
+ var c rune
+
+ // Deal with leading spaces and indentation.
+ blank := false
+ savedLineStart := sc.lineStart
+ if sc.lineStart {
+ sc.lineStart = false
+ col := 0
+ for {
+ c = sc.peekRune()
+ if c == ' ' {
+ col++
+ sc.readRune()
+ } else if c == '\t' {
+ const tab = 8
+ col += int(tab - (sc.pos.Col-1)%tab)
+ sc.readRune()
+ } else {
+ break
+ }
+ }
+
+ // The third clause matches EOF.
+ if c == '#' || c == '\n' || c == 0 {
+ blank = true
+ }
+
+ // Compute indentation level for non-blank lines not
+ // inside an expression. This is not the common case.
+ if !blank && sc.depth == 0 {
+ cur := sc.indentstk[len(sc.indentstk)-1]
+ if col > cur {
+ // indent
+ sc.dents++
+ sc.indentstk = append(sc.indentstk, col)
+ } else if col < cur {
+ // outdent(s)
+ for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
+ sc.dents--
+ sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
+ }
+ if col != sc.indentstk[len(sc.indentstk)-1] {
+ sc.error(sc.pos, "unindent does not match any outer indentation level")
+ }
+ }
+ }
+ }
+
+ // Return saved indentation tokens.
+ if sc.dents != 0 {
+ sc.startToken(val)
+ sc.endToken(val)
+ if sc.dents < 0 {
+ sc.dents++
+ return OUTDENT
+ } else {
+ sc.dents--
+ return INDENT
+ }
+ }
+
+ // start of line proper
+ c = sc.peekRune()
+
+ // Skip spaces.
+ for c == ' ' || c == '\t' {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+
+ // comment
+ if c == '#' {
+ if sc.keepComments {
+ sc.startToken(val)
+ }
+ // Consume up to newline (included).
+ for c != 0 && c != '\n' {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ if sc.keepComments {
+ sc.endToken(val)
+ if blank {
+ sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw})
+ } else {
+ sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw})
+ }
+ }
+ }
+
+ // newline
+ if c == '\n' {
+ sc.lineStart = true
+
+ // Ignore newlines within expressions (common case).
+ if sc.depth > 0 {
+ sc.readRune()
+ goto start
+ }
+
+ // Ignore blank lines, except in the REPL,
+ // where they emit OUTDENTs and NEWLINE.
+ if blank {
+ if sc.readline == nil {
+ sc.readRune()
+ goto start
+ } else if len(sc.indentstk) > 1 {
+ sc.dents = 1 - len(sc.indentstk)
+ sc.indentstk = sc.indentstk[:1]
+ goto start
+ }
+ }
+
+ // At top-level (not in an expression).
+ sc.startToken(val)
+ sc.readRune()
+ val.raw = "\n"
+ return NEWLINE
+ }
+
+ // end of file
+ if c == 0 {
+ // Emit OUTDENTs for unfinished indentation,
+ // preceded by a NEWLINE if we haven't just emitted one.
+ if len(sc.indentstk) > 1 {
+ if savedLineStart {
+ sc.dents = 1 - len(sc.indentstk)
+ sc.indentstk = sc.indentstk[:1]
+ goto start
+ } else {
+ sc.lineStart = true
+ sc.startToken(val)
+ val.raw = "\n"
+ return NEWLINE
+ }
+ }
+
+ sc.startToken(val)
+ sc.endToken(val)
+ return EOF
+ }
+
+ // line continuation
+ if c == '\\' {
+ sc.readRune()
+ if sc.peekRune() != '\n' {
+ sc.errorf(sc.pos, "stray backslash in program")
+ }
+ sc.readRune()
+ goto start
+ }
+
+ // start of the next token
+ sc.startToken(val)
+
+ // comma (common case)
+ if c == ',' {
+ sc.readRune()
+ sc.endToken(val)
+ return COMMA
+ }
+
+ // string literal
+ if c == '"' || c == '\'' {
+ return sc.scanString(val, c)
+ }
+
+ // identifier or keyword
+ if isIdentStart(c) {
+ if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
+ // r"..."
+ // b"..."
+ sc.readRune()
+ c = sc.peekRune()
+ return sc.scanString(val, c)
+ } else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') {
+ // rb"..."
+ sc.readRune()
+ sc.readRune()
+ c = sc.peekRune()
+ return sc.scanString(val, c)
+ }
+
+ for isIdent(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ sc.endToken(val)
+ if k, ok := keywordToken[val.raw]; ok {
+ return k
+ }
+
+ return IDENT
+ }
+
+ // brackets
+ switch c {
+ case '[', '(', '{':
+ sc.depth++
+ sc.readRune()
+ sc.endToken(val)
+ switch c {
+ case '[':
+ return LBRACK
+ case '(':
+ return LPAREN
+ case '{':
+ return LBRACE
+ }
+ panic("unreachable")
+
+ case ']', ')', '}':
+ if sc.depth == 0 {
+ sc.errorf(sc.pos, "unexpected %q", c)
+ } else {
+ sc.depth--
+ }
+ sc.readRune()
+ sc.endToken(val)
+ switch c {
+ case ']':
+ return RBRACK
+ case ')':
+ return RPAREN
+ case '}':
+ return RBRACE
+ }
+ panic("unreachable")
+ }
+
+ // int or float literal, or period
+ if isdigit(c) || c == '.' {
+ return sc.scanNumber(val, c)
+ }
+
+ // other punctuation
+ defer sc.endToken(val)
+ switch c {
+ case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '='
+ start := sc.pos
+ sc.readRune()
+ if sc.peekRune() == '=' {
+ sc.readRune()
+ switch c {
+ case '<':
+ return LE
+ case '>':
+ return GE
+ case '=':
+ return EQL
+ case '!':
+ return NEQ
+ case '+':
+ return PLUS_EQ
+ case '-':
+ return MINUS_EQ
+ case '/':
+ return SLASH_EQ
+ case '%':
+ return PERCENT_EQ
+ case '&':
+ return AMP_EQ
+ case '|':
+ return PIPE_EQ
+ case '^':
+ return CIRCUMFLEX_EQ
+ }
+ }
+ switch c {
+ case '=':
+ return EQ
+ case '<':
+ if sc.peekRune() == '<' {
+ sc.readRune()
+ if sc.peekRune() == '=' {
+ sc.readRune()
+ return LTLT_EQ
+ } else {
+ return LTLT
+ }
+ }
+ return LT
+ case '>':
+ if sc.peekRune() == '>' {
+ sc.readRune()
+ if sc.peekRune() == '=' {
+ sc.readRune()
+ return GTGT_EQ
+ } else {
+ return GTGT
+ }
+ }
+ return GT
+ case '!':
+ sc.error(start, "unexpected input character '!'")
+ case '+':
+ return PLUS
+ case '-':
+ return MINUS
+ case '/':
+ if sc.peekRune() == '/' {
+ sc.readRune()
+ if sc.peekRune() == '=' {
+ sc.readRune()
+ return SLASHSLASH_EQ
+ } else {
+ return SLASHSLASH
+ }
+ }
+ return SLASH
+ case '%':
+ return PERCENT
+ case '&':
+ return AMP
+ case '|':
+ return PIPE
+ case '^':
+ return CIRCUMFLEX
+ }
+ panic("unreachable")
+
+ case ':', ';', '~': // single-char tokens (except comma)
+ sc.readRune()
+ switch c {
+ case ':':
+ return COLON
+ case ';':
+ return SEMI
+ case '~':
+ return TILDE
+ }
+ panic("unreachable")
+
+ case '*': // possibly followed by '*' or '='
+ sc.readRune()
+ switch sc.peekRune() {
+ case '*':
+ sc.readRune()
+ return STARSTAR
+ case '=':
+ sc.readRune()
+ return STAR_EQ
+ }
+ return STAR
+ }
+
+ sc.errorf(sc.pos, "unexpected input character %#q", c)
+ panic("unreachable")
+}
+
+func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
+ start := sc.pos
+ triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
+ sc.readRune()
+
+ // String literals may contain escaped or unescaped newlines,
+ // causing them to span multiple lines (gulps) of REPL input;
+ // they are the only such token. Thus we cannot call endToken,
+ // as it assumes sc.rest is unchanged since startToken.
+ // Instead, buffer the token here.
+ // TODO(adonovan): opt: buffer only if we encounter a newline.
+ raw := new(strings.Builder)
+
+ // Copy the prefix, e.g. r' or " (see startToken).
+ raw.Write(sc.token[:len(sc.token)-len(sc.rest)])
+
+ if !triple {
+ // single-quoted string literal
+ for {
+ if sc.eof() {
+ sc.error(val.pos, "unexpected EOF in string")
+ }
+ c := sc.readRune()
+ raw.WriteRune(c)
+ if c == quote {
+ break
+ }
+ if c == '\n' {
+ sc.error(val.pos, "unexpected newline in string")
+ }
+ if c == '\\' {
+ if sc.eof() {
+ sc.error(val.pos, "unexpected EOF in string")
+ }
+ c = sc.readRune()
+ raw.WriteRune(c)
+ }
+ }
+ } else {
+ // triple-quoted string literal
+ sc.readRune()
+ raw.WriteRune(quote)
+ sc.readRune()
+ raw.WriteRune(quote)
+
+ quoteCount := 0
+ for {
+ if sc.eof() {
+ sc.error(val.pos, "unexpected EOF in string")
+ }
+ c := sc.readRune()
+ raw.WriteRune(c)
+ if c == quote {
+ quoteCount++
+ if quoteCount == 3 {
+ break
+ }
+ } else {
+ quoteCount = 0
+ }
+ if c == '\\' {
+ if sc.eof() {
+ sc.error(val.pos, "unexpected EOF in string")
+ }
+ c = sc.readRune()
+ raw.WriteRune(c)
+ }
+ }
+ }
+ val.raw = raw.String()
+
+ s, _, isByte, err := unquote(val.raw)
+ if err != nil {
+ sc.error(start, err.Error())
+ }
+ val.string = s
+ if isByte {
+ return BYTES
+ } else {
+ return STRING
+ }
+}
+
+func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
+ // https://github.com/google/starlark-go/blob/master/doc/spec.md#lexical-elements
+ //
+ // Python features not supported:
+ // - integer literals of >64 bits of precision
+ // - 123L or 123l long suffix
+ // - traditional octal: 0755
+ // https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals
+
+ start := sc.pos
+ fraction, exponent := false, false
+
+ if c == '.' {
+ // dot or start of fraction
+ sc.readRune()
+ c = sc.peekRune()
+ if !isdigit(c) {
+ sc.endToken(val)
+ return DOT
+ }
+ fraction = true
+ } else if c == '0' {
+ // hex, octal, binary or float
+ sc.readRune()
+ c = sc.peekRune()
+
+ if c == '.' {
+ fraction = true
+ } else if c == 'x' || c == 'X' {
+ // hex
+ sc.readRune()
+ c = sc.peekRune()
+ if !isxdigit(c) {
+ sc.error(start, "invalid hex literal")
+ }
+ for isxdigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ } else if c == 'o' || c == 'O' {
+ // octal
+ sc.readRune()
+ c = sc.peekRune()
+ if !isodigit(c) {
+ sc.error(sc.pos, "invalid octal literal")
+ }
+ for isodigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ } else if c == 'b' || c == 'B' {
+ // binary
+ sc.readRune()
+ c = sc.peekRune()
+ if !isbdigit(c) {
+ sc.error(sc.pos, "invalid binary literal")
+ }
+ for isbdigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ } else {
+ // float (or obsolete octal "0755")
+ allzeros, octal := true, true
+ for isdigit(c) {
+ if c != '0' {
+ allzeros = false
+ }
+ if c > '7' {
+ octal = false
+ }
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ if c == '.' {
+ fraction = true
+ } else if c == 'e' || c == 'E' {
+ exponent = true
+ } else if octal && !allzeros {
+ sc.endToken(val)
+ sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
+ }
+ }
+ } else {
+ // decimal
+ for isdigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+
+ if c == '.' {
+ fraction = true
+ } else if c == 'e' || c == 'E' {
+ exponent = true
+ }
+ }
+
+ if fraction {
+ sc.readRune() // consume '.'
+ c = sc.peekRune()
+ for isdigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+
+ if c == 'e' || c == 'E' {
+ exponent = true
+ }
+ }
+
+ if exponent {
+ sc.readRune() // consume [eE]
+ c = sc.peekRune()
+ if c == '+' || c == '-' {
+ sc.readRune()
+ c = sc.peekRune()
+ if !isdigit(c) {
+ sc.error(sc.pos, "invalid float literal")
+ }
+ }
+ for isdigit(c) {
+ sc.readRune()
+ c = sc.peekRune()
+ }
+ }
+
+ sc.endToken(val)
+ if fraction || exponent {
+ var err error
+ val.float, err = strconv.ParseFloat(val.raw, 64)
+ if err != nil {
+ sc.error(sc.pos, "invalid float literal")
+ }
+ return FLOAT
+ } else {
+ var err error
+ s := val.raw
+ val.bigInt = nil
+ if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') {
+ val.int, err = strconv.ParseInt(s[2:], 8, 64)
+ } else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') {
+ val.int, err = strconv.ParseInt(s[2:], 2, 64)
+ } else {
+ val.int, err = strconv.ParseInt(s, 0, 64)
+ if err != nil {
+ num := new(big.Int)
+ var ok bool
+ val.bigInt, ok = num.SetString(s, 0)
+ if ok {
+ err = nil
+ }
+ }
+ }
+ if err != nil {
+ sc.error(start, "invalid int literal")
+ }
+ return INT
+ }
+}
+
+// isIdent reports whether c is an identifier rune.
+func isIdent(c rune) bool {
+ return isdigit(c) || isIdentStart(c)
+}
+
+func isIdentStart(c rune) bool {
+ return 'a' <= c && c <= 'z' ||
+ 'A' <= c && c <= 'Z' ||
+ c == '_' ||
+ unicode.IsLetter(c)
+}
+
+func isdigit(c rune) bool { return '0' <= c && c <= '9' }
+func isodigit(c rune) bool { return '0' <= c && c <= '7' }
+func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' }
+func isbdigit(c rune) bool { return '0' == c || c == '1' }
+
+// keywordToken records the special tokens for
+// strings that should not be treated as ordinary identifiers.
+var keywordToken = map[string]Token{
+ "and": AND,
+ "break": BREAK,
+ "continue": CONTINUE,
+ "def": DEF,
+ "elif": ELIF,
+ "else": ELSE,
+ "for": FOR,
+ "if": IF,
+ "in": IN,
+ "lambda": LAMBDA,
+ "load": LOAD,
+ "not": NOT,
+ "or": OR,
+ "pass": PASS,
+ "return": RETURN,
+ "while": WHILE,
+
+ // reserved words:
+ "as": ILLEGAL,
+ // "assert": ILLEGAL, // heavily used by our tests
+ "class": ILLEGAL,
+ "del": ILLEGAL,
+ "except": ILLEGAL,
+ "finally": ILLEGAL,
+ "from": ILLEGAL,
+ "global": ILLEGAL,
+ "import": ILLEGAL,
+ "is": ILLEGAL,
+ "nonlocal": ILLEGAL,
+ "raise": ILLEGAL,
+ "try": ILLEGAL,
+ "with": ILLEGAL,
+ "yield": ILLEGAL,
+}
diff --git a/syntax/scan_test.go b/syntax/scan_test.go
new file mode 100644
index 0000000..9582bd7
--- /dev/null
+++ b/syntax/scan_test.go
@@ -0,0 +1,310 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "go/build"
+ "io/ioutil"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func scan(src interface{}) (tokens string, err error) {
+ sc, err := newScanner("foo.star", src, false)
+ if err != nil {
+ return "", err
+ }
+
+ defer sc.recover(&err)
+
+ var buf bytes.Buffer
+ var val tokenValue
+ for {
+ tok := sc.nextToken(&val)
+
+ if buf.Len() > 0 {
+ buf.WriteByte(' ')
+ }
+ switch tok {
+ case EOF:
+ buf.WriteString("EOF")
+ case IDENT:
+ buf.WriteString(val.raw)
+ case INT:
+ if val.bigInt != nil {
+ fmt.Fprintf(&buf, "%d", val.bigInt)
+ } else {
+ fmt.Fprintf(&buf, "%d", val.int)
+ }
+ case FLOAT:
+ fmt.Fprintf(&buf, "%e", val.float)
+ case STRING, BYTES:
+ buf.WriteString(Quote(val.string, tok == BYTES))
+ default:
+ buf.WriteString(tok.String())
+ }
+ if tok == EOF {
+ break
+ }
+ }
+ return buf.String(), nil
+}
+
+func TestScanner(t *testing.T) {
+ for _, test := range []struct {
+ input, want string
+ }{
+ {``, "EOF"},
+ {`123`, "123 EOF"},
+ {`x.y`, "x . y EOF"},
+ {`chocolate.éclair`, `chocolate . éclair EOF`},
+ {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
+ {`print(x)`, "print ( x ) EOF"},
+ {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
+ {"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
+ {`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
+ {`# hello
+print(x)`, "print ( x ) EOF"},
+ {`# hello
+print(1)
+cc_binary(name="foo")
+def f(x):
+ return x+1
+print(1)
+`,
+ `print ( 1 ) newline ` +
+ `cc_binary ( name = "foo" ) newline ` +
+ `def f ( x ) : newline ` +
+ `indent return x + 1 newline ` +
+ `outdent print ( 1 ) newline ` +
+ `EOF`},
+ // EOF should act line an implicit newline.
+ {`def f(): pass`,
+ "def f ( ) : pass EOF"},
+ {`def f():
+ pass`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+# oops`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass \
+`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+`,
+ "def f ( ) : newline indent pass newline outdent EOF"},
+ {`pass
+
+
+pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
+ {`def f():
+ pass
+ `, "def f ( ) : newline indent pass newline outdent EOF"},
+ {`def f():
+ pass
+ ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
+ {"pass", "pass EOF"},
+ {"pass\n", "pass newline EOF"},
+ {"pass\n ", "pass newline EOF"},
+ {"pass\n \n", "pass newline EOF"},
+ {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"},
+ {`x = 1 + \
+2`, `x = 1 + 2 EOF`},
+ {`x = 'a\nb'`, `x = "a\nb" EOF`},
+ {`x = r'a\nb'`, `x = "a\\nb" EOF`},
+ {"x = 'a\\\nb'", `x = "ab" EOF`},
+ {`x = '\''`, `x = "'" EOF`},
+ {`x = "\""`, `x = "\"" EOF`},
+ {`x = r'\''`, `x = "\\'" EOF`},
+ {`x = '''\''''`, `x = "'" EOF`},
+ {`x = r'''\''''`, `x = "\\'" EOF`},
+ {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
+ {"x = '''a\nb'''", `x = "a\nb" EOF`},
+ {"x = '''a\rb'''", `x = "a\nb" EOF`},
+ {"x = '''a\r\nb'''", `x = "a\nb" EOF`},
+ {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
+ {"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
+ {"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
+ {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
+ {"a\rb", `a newline b EOF`},
+ {"a\nb", `a newline b EOF`},
+ {"a\r\nb", `a newline b EOF`},
+ {"a\n\nb", `a newline b EOF`},
+ // numbers
+ {"0", `0 EOF`},
+ {"00", `0 EOF`},
+ {"0.", `0.000000e+00 EOF`},
+ {"0.e1", `0.000000e+00 EOF`},
+ {".0", `0.000000e+00 EOF`},
+ {"0.0", `0.000000e+00 EOF`},
+ {".e1", `. e1 EOF`},
+ {"1", `1 EOF`},
+ {"1.", `1.000000e+00 EOF`},
+ {".1", `1.000000e-01 EOF`},
+ {".1e1", `1.000000e+00 EOF`},
+ {".1e+1", `1.000000e+00 EOF`},
+ {".1e-1", `1.000000e-02 EOF`},
+ {"1e1", `1.000000e+01 EOF`},
+ {"1e+1", `1.000000e+01 EOF`},
+ {"1e-1", `1.000000e-01 EOF`},
+ {"123", `123 EOF`},
+ {"123e45", `1.230000e+47 EOF`},
+ {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
+ {"12345678901234567890", `12345678901234567890 EOF`},
+ // hex
+ {"0xA", `10 EOF`},
+ {"0xAAG", `170 G EOF`},
+ {"0xG", `foo.star:1:1: invalid hex literal`},
+ {"0XA", `10 EOF`},
+ {"0XG", `foo.star:1:1: invalid hex literal`},
+ {"0xA.", `10 . EOF`},
+ {"0xA.e1", `10 . e1 EOF`},
+ {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
+ // binary
+ {"0b1010", `10 EOF`},
+ {"0B111101", `61 EOF`},
+ {"0b3", `foo.star:1:3: invalid binary literal`},
+ {"0b1010201", `10 201 EOF`},
+ {"0b1010.01", `10 1.000000e-02 EOF`},
+ {"0b0000", `0 EOF`},
+ // octal
+ {"0o123", `83 EOF`},
+ {"0o12834", `10 834 EOF`},
+ {"0o12934", `10 934 EOF`},
+ {"0o12934.", `10 9.340000e+02 EOF`},
+ {"0o12934.1", `10 9.341000e+02 EOF`},
+ {"0o12934e1", `10 9.340000e+03 EOF`},
+ {"0o123.", `83 . EOF`},
+ {"0o123.1", `83 1.000000e-01 EOF`},
+ {"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
+ {"012834", `foo.star:1:1: invalid int literal`},
+ {"012934", `foo.star:1:1: invalid int literal`},
+ {"i = 012934", `foo.star:1:5: invalid int literal`},
+ // octal escapes in string literals
+ {`"\037"`, `"\x1f" EOF`},
+ {`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
+ {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8'
+ {`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
+ // hex escapes
+ {`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
+ {`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
+ {`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
+ {`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
+ {`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
+ // Unicode escapes
+ // \uXXXX
+ {`"\u0400"`, `"Ѐ" EOF`},
+ {`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
+ {`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+ {`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
+ {`"\u4E16"`, `"世" EOF`},
+ {`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+ // \UXXXXXXXX
+ {`"\U00000400"`, `"Ѐ" EOF`},
+ {`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
+ {`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+ {`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
+ {`"\U0010FFFF"`, `"\U0010ffff" EOF`},
+ {`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
+ {`"\U0001F63F"`, `"😿" EOF`},
+ {`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+
+ // backslash escapes
+ // As in Go, a backslash must escape something.
+ // (Python started issuing a deprecation warning in 3.6.)
+ {`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
+ {`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
+ {`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
+ {`"\""`, `"\"" EOF`},
+ {`"\'"`, `"'" EOF`},
+ {`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
+ {`'\''`, `"'" EOF`},
+ {`'\"'`, `"\"" EOF`},
+ {`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
+ {`"""\""""`, `"\"" EOF`},
+ {`"""\'"""`, `"'" EOF`},
+ {`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
+ {`'''\''''`, `"'" EOF`},
+ {`'''\"'''`, `"\"" EOF`},
+ {`r"\w"`, `"\\w" EOF`},
+ {`r"\""`, `"\\\"" EOF`},
+ {`r"\'"`, `"\\'" EOF`},
+ {`r'\w'`, `"\\w" EOF`},
+ {`r'\''`, `"\\'" EOF`},
+ {`r'\"'`, `"\\\"" EOF`},
+ {`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
+ {`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
+ // bytes literals (where they differ from text strings)
+ {`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal
+ {`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes
+ {`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
+ {`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
+ {`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
+ // floats starting with octal digits
+ {"012934.", `1.293400e+04 EOF`},
+ {"012934.1", `1.293410e+04 EOF`},
+ {"012934e1", `1.293400e+05 EOF`},
+ {"0123.", `1.230000e+02 EOF`},
+ {"0123.1", `1.231000e+02 EOF`},
+ // github.com/google/skylark/issues/16
+ {"x ! 0", "foo.star:1:3: unexpected input character '!'"},
+ // github.com/google/starlark-go/issues/80
+ {"([{<>}])", "( [ { < > } ] ) EOF"},
+ {"f();", "f ( ) ; EOF"},
+ // github.com/google/starlark-go/issues/104
+ {"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
+ {`while cond: pass`, "while cond : pass EOF"},
+ // github.com/google/starlark-go/issues/107
+ {"~= ~= 5", "~ = ~ = 5 EOF"},
+ {"0in", "0 in EOF"},
+ {"0or", "foo.star:1:3: invalid octal literal"},
+ {"6in", "6 in EOF"},
+ {"6or", "6 or EOF"},
+ } {
+ got, err := scan(test.input)
+ if err != nil {
+ got = err.(Error).Error()
+ }
+ // Prefix match allows us to truncate errors in expecations.
+ // Success cases all end in EOF.
+ if !strings.HasPrefix(got, test.want) {
+ t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
+ }
+ }
+}
+
+// dataFile is the same as starlarktest.DataFile.
+// We make a copy to avoid a dependency cycle.
+var dataFile = func(pkgdir, filename string) string {
+ return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
+
+func BenchmarkScan(b *testing.B) {
+ filename := dataFile("syntax", "testdata/scan.star")
+ b.StopTimer()
+ data, err := ioutil.ReadFile(filename)
+ if err != nil {
+ b.Fatal(err)
+ }
+ b.StartTimer()
+
+ for i := 0; i < b.N; i++ {
+ sc, err := newScanner(filename, data, false)
+ if err != nil {
+ b.Fatal(err)
+ }
+ var val tokenValue
+ for sc.nextToken(&val) != EOF {
+ }
+ }
+}
diff --git a/syntax/syntax.go b/syntax/syntax.go
new file mode 100644
index 0000000..20b28bb
--- /dev/null
+++ b/syntax/syntax.go
@@ -0,0 +1,529 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package syntax provides a Starlark parser and abstract syntax tree.
+package syntax // import "go.starlark.net/syntax"
+
+// A Node is a node in a Starlark syntax tree.
+type Node interface {
+ // Span returns the start and end position of the expression.
+ Span() (start, end Position)
+
+ // Comments returns the comments associated with this node.
+ // It returns nil if RetainComments was not specified during parsing,
+ // or if AllocComments was not called.
+ Comments() *Comments
+
+ // AllocComments allocates a new Comments node if there was none.
+ // This makes possible to add new comments using Comments() method.
+ AllocComments()
+}
+
+// A Comment represents a single # comment.
+type Comment struct {
+ Start Position
+ Text string // without trailing newline
+}
+
+// Comments collects the comments associated with an expression.
+type Comments struct {
+ Before []Comment // whole-line comments before this expression
+ Suffix []Comment // end-of-line comments after this expression (up to 1)
+
+ // For top-level expressions only, After lists whole-line
+ // comments following the expression.
+ After []Comment
+}
+
+// A commentsRef is a possibly-nil reference to a set of comments.
+// A commentsRef is embedded in each type of syntax node,
+// and provides its Comments and AllocComments methods.
+type commentsRef struct{ ref *Comments }
+
+// Comments returns the comments associated with a syntax node,
+// or nil if AllocComments has not yet been called.
+func (cr commentsRef) Comments() *Comments { return cr.ref }
+
+// AllocComments enables comments to be associated with a syntax node.
+func (cr *commentsRef) AllocComments() {
+ if cr.ref == nil {
+ cr.ref = new(Comments)
+ }
+}
+
+// Start returns the start position of the expression.
+func Start(n Node) Position {
+ start, _ := n.Span()
+ return start
+}
+
+// End returns the end position of the expression.
+func End(n Node) Position {
+ _, end := n.Span()
+ return end
+}
+
+// A File represents a Starlark file.
+type File struct {
+ commentsRef
+ Path string
+ Stmts []Stmt
+
+ Module interface{} // a *resolve.Module, set by resolver
+}
+
+func (x *File) Span() (start, end Position) {
+ if len(x.Stmts) == 0 {
+ return
+ }
+ start, _ = x.Stmts[0].Span()
+ _, end = x.Stmts[len(x.Stmts)-1].Span()
+ return start, end
+}
+
+// A Stmt is a Starlark statement.
+type Stmt interface {
+ Node
+ stmt()
+}
+
+func (*AssignStmt) stmt() {}
+func (*BranchStmt) stmt() {}
+func (*DefStmt) stmt() {}
+func (*ExprStmt) stmt() {}
+func (*ForStmt) stmt() {}
+func (*WhileStmt) stmt() {}
+func (*IfStmt) stmt() {}
+func (*LoadStmt) stmt() {}
+func (*ReturnStmt) stmt() {}
+
+// An AssignStmt represents an assignment:
+// x = 0
+// x, y = y, x
+// x += 1
+type AssignStmt struct {
+ commentsRef
+ OpPos Position
+ Op Token // = EQ | {PLUS,MINUS,STAR,PERCENT}_EQ
+ LHS Expr
+ RHS Expr
+}
+
+func (x *AssignStmt) Span() (start, end Position) {
+ start, _ = x.LHS.Span()
+ _, end = x.RHS.Span()
+ return
+}
+
+// A DefStmt represents a function definition.
+type DefStmt struct {
+ commentsRef
+ Def Position
+ Name *Ident
+ Params []Expr // param = ident | ident=expr | * | *ident | **ident
+ Body []Stmt
+
+ Function interface{} // a *resolve.Function, set by resolver
+}
+
+func (x *DefStmt) Span() (start, end Position) {
+ _, end = x.Body[len(x.Body)-1].Span()
+ return x.Def, end
+}
+
+// An ExprStmt is an expression evaluated for side effects.
+type ExprStmt struct {
+ commentsRef
+ X Expr
+}
+
+func (x *ExprStmt) Span() (start, end Position) {
+ return x.X.Span()
+}
+
+// An IfStmt is a conditional: If Cond: True; else: False.
+// 'elseif' is desugared into a chain of IfStmts.
+type IfStmt struct {
+ commentsRef
+ If Position // IF or ELIF
+ Cond Expr
+ True []Stmt
+ ElsePos Position // ELSE or ELIF
+ False []Stmt // optional
+}
+
+func (x *IfStmt) Span() (start, end Position) {
+ body := x.False
+ if body == nil {
+ body = x.True
+ }
+ _, end = body[len(body)-1].Span()
+ return x.If, end
+}
+
+// A LoadStmt loads another module and binds names from it:
+// load(Module, "x", y="foo").
+//
+// The AST is slightly unfaithful to the concrete syntax here because
+// Starlark's load statement, so that it can be implemented in Python,
+// binds some names (like y above) with an identifier and some (like x)
+// without. For consistency we create fake identifiers for all the
+// strings.
+type LoadStmt struct {
+ commentsRef
+ Load Position
+ Module *Literal // a string
+ From []*Ident // name defined in loading module
+ To []*Ident // name in loaded module
+ Rparen Position
+}
+
+func (x *LoadStmt) Span() (start, end Position) {
+ return x.Load, x.Rparen
+}
+
+// ModuleName returns the name of the module loaded by this statement.
+func (x *LoadStmt) ModuleName() string { return x.Module.Value.(string) }
+
+// A BranchStmt changes the flow of control: break, continue, pass.
+type BranchStmt struct {
+ commentsRef
+ Token Token // = BREAK | CONTINUE | PASS
+ TokenPos Position
+}
+
+func (x *BranchStmt) Span() (start, end Position) {
+ return x.TokenPos, x.TokenPos.add(x.Token.String())
+}
+
+// A ReturnStmt returns from a function.
+type ReturnStmt struct {
+ commentsRef
+ Return Position
+ Result Expr // may be nil
+}
+
+func (x *ReturnStmt) Span() (start, end Position) {
+ if x.Result == nil {
+ return x.Return, x.Return.add("return")
+ }
+ _, end = x.Result.Span()
+ return x.Return, end
+}
+
+// An Expr is a Starlark expression.
+type Expr interface {
+ Node
+ expr()
+}
+
+func (*BinaryExpr) expr() {}
+func (*CallExpr) expr() {}
+func (*Comprehension) expr() {}
+func (*CondExpr) expr() {}
+func (*DictEntry) expr() {}
+func (*DictExpr) expr() {}
+func (*DotExpr) expr() {}
+func (*Ident) expr() {}
+func (*IndexExpr) expr() {}
+func (*LambdaExpr) expr() {}
+func (*ListExpr) expr() {}
+func (*Literal) expr() {}
+func (*ParenExpr) expr() {}
+func (*SliceExpr) expr() {}
+func (*TupleExpr) expr() {}
+func (*UnaryExpr) expr() {}
+
+// An Ident represents an identifier.
+type Ident struct {
+ commentsRef
+ NamePos Position
+ Name string
+
+ Binding interface{} // a *resolver.Binding, set by resolver
+}
+
+func (x *Ident) Span() (start, end Position) {
+ return x.NamePos, x.NamePos.add(x.Name)
+}
+
+// A Literal represents a literal string or number.
+type Literal struct {
+ commentsRef
+ Token Token // = STRING | BYTES | INT | FLOAT
+ TokenPos Position
+ Raw string // uninterpreted text
+ Value interface{} // = string | int64 | *big.Int | float64
+}
+
+func (x *Literal) Span() (start, end Position) {
+ return x.TokenPos, x.TokenPos.add(x.Raw)
+}
+
+// A ParenExpr represents a parenthesized expression: (X).
+type ParenExpr struct {
+ commentsRef
+ Lparen Position
+ X Expr
+ Rparen Position
+}
+
+func (x *ParenExpr) Span() (start, end Position) {
+ return x.Lparen, x.Rparen.add(")")
+}
+
+// A CallExpr represents a function call expression: Fn(Args).
+type CallExpr struct {
+ commentsRef
+ Fn Expr
+ Lparen Position
+ Args []Expr // arg = expr | ident=expr | *expr | **expr
+ Rparen Position
+}
+
+func (x *CallExpr) Span() (start, end Position) {
+ start, _ = x.Fn.Span()
+ return start, x.Rparen.add(")")
+}
+
+// A DotExpr represents a field or method selector: X.Name.
+type DotExpr struct {
+ commentsRef
+ X Expr
+ Dot Position
+ NamePos Position
+ Name *Ident
+}
+
+func (x *DotExpr) Span() (start, end Position) {
+ start, _ = x.X.Span()
+ _, end = x.Name.Span()
+ return
+}
+
+// A Comprehension represents a list or dict comprehension:
+// [Body for ... if ...] or {Body for ... if ...}
+type Comprehension struct {
+ commentsRef
+ Curly bool // {x:y for ...} or {x for ...}, not [x for ...]
+ Lbrack Position
+ Body Expr
+ Clauses []Node // = *ForClause | *IfClause
+ Rbrack Position
+}
+
+func (x *Comprehension) Span() (start, end Position) {
+ return x.Lbrack, x.Rbrack.add("]")
+}
+
+// A ForStmt represents a loop: for Vars in X: Body.
+type ForStmt struct {
+ commentsRef
+ For Position
+ Vars Expr // name, or tuple of names
+ X Expr
+ Body []Stmt
+}
+
+func (x *ForStmt) Span() (start, end Position) {
+ _, end = x.Body[len(x.Body)-1].Span()
+ return x.For, end
+}
+
+// A WhileStmt represents a while loop: while X: Body.
+type WhileStmt struct {
+ commentsRef
+ While Position
+ Cond Expr
+ Body []Stmt
+}
+
+func (x *WhileStmt) Span() (start, end Position) {
+ _, end = x.Body[len(x.Body)-1].Span()
+ return x.While, end
+}
+
+// A ForClause represents a for clause in a list comprehension: for Vars in X.
+type ForClause struct {
+ commentsRef
+ For Position
+ Vars Expr // name, or tuple of names
+ In Position
+ X Expr
+}
+
+func (x *ForClause) Span() (start, end Position) {
+ _, end = x.X.Span()
+ return x.For, end
+}
+
+// An IfClause represents an if clause in a list comprehension: if Cond.
+type IfClause struct {
+ commentsRef
+ If Position
+ Cond Expr
+}
+
+func (x *IfClause) Span() (start, end Position) {
+ _, end = x.Cond.Span()
+ return x.If, end
+}
+
+// A DictExpr represents a dictionary literal: { List }.
+type DictExpr struct {
+ commentsRef
+ Lbrace Position
+ List []Expr // all *DictEntrys
+ Rbrace Position
+}
+
+func (x *DictExpr) Span() (start, end Position) {
+ return x.Lbrace, x.Rbrace.add("}")
+}
+
+// A DictEntry represents a dictionary entry: Key: Value.
+// Used only within a DictExpr.
+type DictEntry struct {
+ commentsRef
+ Key Expr
+ Colon Position
+ Value Expr
+}
+
+func (x *DictEntry) Span() (start, end Position) {
+ start, _ = x.Key.Span()
+ _, end = x.Value.Span()
+ return start, end
+}
+
+// A LambdaExpr represents an inline function abstraction.
+//
+// Although they may be added in future, lambda expressions are not
+// currently part of the Starlark spec, so their use is controlled by the
+// resolver.AllowLambda flag.
+type LambdaExpr struct {
+ commentsRef
+ Lambda Position
+ Params []Expr // param = ident | ident=expr | * | *ident | **ident
+ Body Expr
+
+ Function interface{} // a *resolve.Function, set by resolver
+}
+
+func (x *LambdaExpr) Span() (start, end Position) {
+ _, end = x.Body.Span()
+ return x.Lambda, end
+}
+
+// A ListExpr represents a list literal: [ List ].
+type ListExpr struct {
+ commentsRef
+ Lbrack Position
+ List []Expr
+ Rbrack Position
+}
+
+func (x *ListExpr) Span() (start, end Position) {
+ return x.Lbrack, x.Rbrack.add("]")
+}
+
+// CondExpr represents the conditional: X if COND else ELSE.
+type CondExpr struct {
+ commentsRef
+ If Position
+ Cond Expr
+ True Expr
+ ElsePos Position
+ False Expr
+}
+
+func (x *CondExpr) Span() (start, end Position) {
+ start, _ = x.True.Span()
+ _, end = x.False.Span()
+ return start, end
+}
+
+// A TupleExpr represents a tuple literal: (List).
+type TupleExpr struct {
+ commentsRef
+ Lparen Position // optional (e.g. in x, y = 0, 1), but required if List is empty
+ List []Expr
+ Rparen Position
+}
+
+func (x *TupleExpr) Span() (start, end Position) {
+ if x.Lparen.IsValid() {
+ return x.Lparen, x.Rparen
+ } else {
+ return Start(x.List[0]), End(x.List[len(x.List)-1])
+ }
+}
+
+// A UnaryExpr represents a unary expression: Op X.
+//
+// As a special case, UnaryOp{Op:Star} may also represent
+// the star parameter in def f(*args) or def f(*, x).
+type UnaryExpr struct {
+ commentsRef
+ OpPos Position
+ Op Token
+ X Expr // may be nil if Op==STAR
+}
+
+func (x *UnaryExpr) Span() (start, end Position) {
+ if x.X != nil {
+ _, end = x.X.Span()
+ } else {
+ end = x.OpPos.add("*")
+ }
+ return x.OpPos, end
+}
+
+// A BinaryExpr represents a binary expression: X Op Y.
+//
+// As a special case, BinaryExpr{Op:EQ} may also
+// represent a named argument in a call f(k=v)
+// or a named parameter in a function declaration
+// def f(param=default).
+type BinaryExpr struct {
+ commentsRef
+ X Expr
+ OpPos Position
+ Op Token
+ Y Expr
+}
+
+func (x *BinaryExpr) Span() (start, end Position) {
+ start, _ = x.X.Span()
+ _, end = x.Y.Span()
+ return start, end
+}
+
+// A SliceExpr represents a slice or substring expression: X[Lo:Hi:Step].
+type SliceExpr struct {
+ commentsRef
+ X Expr
+ Lbrack Position
+ Lo, Hi, Step Expr // all optional
+ Rbrack Position
+}
+
+func (x *SliceExpr) Span() (start, end Position) {
+ start, _ = x.X.Span()
+ return start, x.Rbrack
+}
+
+// An IndexExpr represents an index expression: X[Y].
+type IndexExpr struct {
+ commentsRef
+ X Expr
+ Lbrack Position
+ Y Expr
+ Rbrack Position
+}
+
+func (x *IndexExpr) Span() (start, end Position) {
+ start, _ = x.X.Span()
+ return start, x.Rbrack
+}
diff --git a/syntax/testdata/errors.star b/syntax/testdata/errors.star
new file mode 100644
index 0000000..cee1fc9
--- /dev/null
+++ b/syntax/testdata/errors.star
@@ -0,0 +1,212 @@
+# Tests of parse errors.
+# This is a "chunked" file; each "---" line demarcates a new parser input.
+#
+# TODO(adonovan): lots more tests.
+
+x = 1 +
+2 ### "got newline, want primary expression"
+
+---
+
+_ = *x ### `got '\*', want primary`
+
+---
+# trailing comma is ok
+
+def f(a, ): pass
+def f(*args, ): pass
+def f(**kwargs, ): pass
+
+---
+
+# Parameters are validated later.
+def f(**kwargs, *args, *, b=1, a, **kwargs, *args, *, b=1, a):
+ pass
+
+---
+
+def f(a, *-b, c): # ### `got '-', want ','`
+ pass
+
+---
+
+def f(**kwargs, *args, b=1, a, **kwargs, *args, b=1, a):
+ pass
+
+---
+
+def pass(): ### "not an identifier"
+ pass
+
+---
+
+def f : ### `got ':', want '\('`
+
+---
+# trailing comma is ok
+
+f(a, )
+f(*args, )
+f(**kwargs, )
+
+---
+
+f(a=1, *, b=2) ### `got ',', want primary`
+
+---
+
+_ = {x:y for y in z} # ok
+_ = {x for y in z} ### `got for, want ':'`
+
+---
+
+def f():
+ pass
+ pass ### `unindent does not match any outer indentation level`
+
+---
+def f(): pass
+---
+# Blank line after pass => outdent.
+def f():
+ pass
+
+---
+# No blank line after pass; EOF acts like a newline.
+def f():
+ pass
+---
+# This is a well known parsing ambiguity in Python.
+# Python 2.7 accepts it but Python3 and Starlark reject it.
+_ = [x for x in lambda: True, lambda: False if x()] ### "got lambda, want primary"
+
+_ = [x for x in (lambda: True, lambda: False) if x()] # ok in all dialects
+
+---
+# Starlark, following Python 3, allows an unparenthesized
+# tuple after 'in' only in a for statement but not in a comprehension.
+# (Python 2.7 allows both.)
+for x in 1, 2, 3:
+ print(x)
+
+_ = [x for x in 1, 2, 3] ### `got ',', want ']', for, or if`
+---
+# Unparenthesized tuple is not allowed as operand of 'if' in comprehension.
+_ = [a for b in c if 1, 2] ### `got ',', want ']', for, or if`
+
+---
+# Lambda is ok though.
+_ = [a for b in c if lambda: d] # ok
+
+# But the body of such a lambda may not be a conditional:
+_ = [a for b in c if (lambda: d if e else f)] # ok
+_ = [a for b in c if lambda: d if e else f] ### "got else, want ']'"
+
+---
+# A lambda is not allowed as the operand of a 'for' clause.
+_ = [a for b in lambda: c] ### `got lambda, want primary`
+
+---
+# Comparison operations are not associative.
+
+_ = (0 == 1) == 2 # ok
+_ = 0 == (1 == 2) # ok
+_ = 0 == 1 == 2 ### "== does not associate with =="
+
+---
+
+_ = (0 <= i) < n # ok
+_ = 0 <= (i < n) # ok
+_ = 0 <= i < n ### "<= does not associate with <"
+
+---
+
+_ = (a in b) not in c # ok
+_ = a in (b not in c) # ok
+_ = a in b not in c ### "in does not associate with not in"
+
+---
+# shift/reduce ambiguity is reduced
+_ = [x for x in a if b else c] ### `got else, want ']', for, or if`
+---
+[a for b in c else d] ### `got else, want ']', for, or if`
+---
+_ = a + b not c ### "got identifier, want in"
+---
+f(1+2 = 3) ### "keyword argument must have form name=expr"
+---
+print(1, 2, 3
+### `got end of file, want '\)'`
+---
+_ = a if b ### "conditional expression without else clause"
+---
+load("") ### "load statement must import at least 1 symbol"
+---
+load("", 1) ### `load operand must be "name" or localname="name" \(got int literal\)`
+---
+load("a", "x") # ok
+---
+load(1, 2) ### "first operand of load statement must be a string literal"
+---
+load("a", x) ### `load operand must be "x" or x="originalname"`
+---
+load("a", x2=x) ### `original name of loaded symbol must be quoted: x2="originalname"`
+---
+# All of these parse.
+load("a", "x")
+load("a", "x", y2="y")
+load("a", x2="x", "y") # => positional-before-named arg check happens later (!)
+---
+# 'load' is not an identifier
+load = 1 ### `got '=', want '\('`
+---
+# 'load' is not an identifier
+f(load()) ### `got load, want primary`
+---
+# 'load' is not an identifier
+def load(): ### `not an identifier`
+ pass
+---
+# 'load' is not an identifier
+def f(load): ### `not an identifier`
+ pass
+---
+# A load statement allows a trailing comma.
+load("module", "x",)
+---
+x = 1 +
+2 ### "got newline, want primary expression"
+---
+def f():
+ pass
+# this used to cause a spurious indentation error
+---
+print 1 2 ### `got int literal, want newline`
+
+---
+# newlines are not allowed in raw string literals
+raw = r'a ### `unexpected newline in string`
+b'
+
+---
+# The parser permits an unparenthesized tuple expression for the first index.
+x[1, 2:] # ok
+---
+# But not if it has a trailing comma.
+x[1, 2,:] ### `got ':', want primary`
+---
+# Trailing tuple commas are permitted only within parens; see b/28867036.
+(a, b,) = 1, 2 # ok
+c, d = 1, 2 # ok
+---
+a, b, = 1, 2 ### `unparenthesized tuple with trailing comma`
+---
+a, b = 1, 2, ### `unparenthesized tuple with trailing comma`
+
+---
+# See github.com/google/starlark-go/issues/48
+a = max(range(10))) ### `unexpected '\)'`
+
+---
+# github.com/google/starlark-go/issues/85
+s = "\x-0" ### `invalid escape sequence`
diff --git a/syntax/testdata/scan.star b/syntax/testdata/scan.star
new file mode 100644
index 0000000..4f62ba9
--- /dev/null
+++ b/syntax/testdata/scan.star
@@ -0,0 +1,1324 @@
+# Copyright 2014 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# (From https://github.com/bazelbuild/rules_go/blob/master/go/def.bzl@a6f9d0c)
+
+load("//go/private:repositories.bzl", "go_repositories")
+load("//go/private:go_repository.bzl", "go_repository", "new_go_repository")
+load("//go/private:go_prefix.bzl", "go_prefix")
+load("//go/private:json.bzl", "json_marshal")
+
+"""These are bare-bones Go rules.
+
+In order of priority:
+
+- BUILD file must be written by hand.
+
+- No support for SWIG
+
+- No test sharding or test XML.
+
+"""
+
+_DEFAULT_LIB = "go_default_library"
+
+_VENDOR_PREFIX = "/vendor/"
+
+go_filetype = FileType([
+ ".go",
+ ".s",
+ ".S",
+ ".h", # may be included by .s
+])
+
+# be consistent to cc_library.
+hdr_exts = [
+ ".h",
+ ".hh",
+ ".hpp",
+ ".hxx",
+ ".inc",
+]
+
+cc_hdr_filetype = FileType(hdr_exts)
+
+# Extensions of files we can build with the Go compiler or with cc_library.
+# This is a subset of the extensions recognized by go/build.
+cgo_filetype = FileType([
+ ".go",
+ ".c",
+ ".cc",
+ ".cxx",
+ ".cpp",
+ ".s",
+ ".S",
+ ".h",
+ ".hh",
+ ".hpp",
+ ".hxx",
+])
+
+################
+
+def go_environment_vars(ctx):
+ """Return a map of environment variables for use with actions, based on
+ the arguments. Uses the ctx.fragments.cpp.cpu attribute, if present,
+ and picks a default of target_os="linux" and target_arch="amd64"
+ otherwise.
+
+ Args:
+ The starlark Context.
+
+ Returns:
+ A dict of environment variables for running Go tool commands that build for
+ the target OS and architecture.
+ """
+ default_toolchain = {"GOOS": "linux", "GOARCH": "amd64"}
+ bazel_to_go_toolchain = {
+ "k8": {"GOOS": "linux", "GOARCH": "amd64"},
+ "piii": {"GOOS": "linux", "GOARCH": "386"},
+ "darwin": {"GOOS": "darwin", "GOARCH": "amd64"},
+ "darwin_x86_64": {"GOOS": "darwin", "GOARCH": "amd64"},
+ "freebsd": {"GOOS": "freebsd", "GOARCH": "amd64"},
+ "armeabi-v7a": {"GOOS": "linux", "GOARCH": "arm"},
+ "arm": {"GOOS": "linux", "GOARCH": "arm"},
+ }
+ env = {}
+ if hasattr(ctx.file, "go_tool"):
+ env["GOROOT"] = ctx.file.go_tool.dirname + "/.."
+ env.update(bazel_to_go_toolchain.get(ctx.fragments.cpp.cpu, default_toolchain))
+ return env
+
+def _is_darwin_cpu(ctx):
+ cpu = ctx.fragments.cpp.cpu
+ return cpu == "darwin" or cpu == "darwin_x86_64"
+
+def _emit_generate_params_action(cmds, ctx, fn):
+ cmds_all = [
+ # Use bash explicitly. /bin/sh is default, and it may be linked to a
+ # different shell, e.g., /bin/dash on Ubuntu.
+ "#!/bin/bash",
+ "set -e",
+ ]
+ cmds_all += cmds
+ cmds_all_str = "\n".join(cmds_all) + "\n"
+ f = ctx.new_file(ctx.configuration.bin_dir, fn)
+ ctx.file_action(
+ output = f,
+ content = cmds_all_str,
+ executable = True,
+ )
+ return f
+
+def _emit_go_asm_action(ctx, source, hdrs, out_obj):
+ """Construct the command line for compiling Go Assembly code.
+ Constructs a symlink tree to accomodate for workspace name.
+ Args:
+ ctx: The starlark Context.
+ source: a source code artifact
+ hdrs: list of .h files that may be included
+ out_obj: the artifact (configured target?) that should be produced
+ """
+ params = {
+ "go_tool": ctx.file.go_tool.path,
+ "includes": [f.dirname for f in hdrs] + [ctx.file.go_include.path],
+ "source": source.path,
+ "out": out_obj.path,
+ }
+
+ inputs = hdrs + ctx.files.toolchain + [source]
+ ctx.action(
+ inputs = inputs,
+ outputs = [out_obj],
+ mnemonic = "GoAsmCompile",
+ executable = ctx.executable._asm,
+ arguments = [json_marshal(params)],
+ )
+
+def _go_importpath(ctx):
+ """Returns the expected importpath of the go_library being built.
+
+ Args:
+ ctx: The starlark Context
+
+ Returns:
+ Go importpath of the library
+ """
+ path = ctx.attr.importpath
+ if path != "":
+ return path
+ path = ctx.attr.go_prefix.go_prefix
+ if path.endswith("/"):
+ path = path[:-1]
+ if ctx.label.package:
+ path += "/" + ctx.label.package
+ if ctx.label.name != _DEFAULT_LIB:
+ path += "/" + ctx.label.name
+ if path.rfind(_VENDOR_PREFIX) != -1:
+ path = path[len(_VENDOR_PREFIX) + path.rfind(_VENDOR_PREFIX):]
+ if path[0] == "/":
+ path = path[1:]
+ return path
+
+def _emit_go_compile_action(ctx, sources, deps, libpaths, out_object, gc_goopts):
+ """Construct the command line for compiling Go code.
+
+ Args:
+ ctx: The starlark Context.
+ sources: an iterable of source code artifacts (or CTs? or labels?)
+ deps: an iterable of dependencies. Each dependency d should have an
+ artifact in d.transitive_go_libraries representing all imported libraries.
+ libpaths: the set of paths to search for imported libraries.
+ out_object: the object file that should be produced
+ gc_goopts: additional flags to pass to the compiler.
+ """
+ if ctx.coverage_instrumented():
+ sources = _emit_go_cover_action(ctx, sources)
+
+ # Compile filtered files.
+ args = [
+ "-cgo",
+ ctx.file.go_tool.path,
+ "tool",
+ "compile",
+ "-o",
+ out_object.path,
+ "-trimpath",
+ "-abs-.",
+ "-I",
+ "-abs-.",
+ ]
+ inputs = depset(sources + ctx.files.toolchain)
+ for dep in deps:
+ inputs += dep.transitive_go_libraries
+ for path in libpaths:
+ args += ["-I", path]
+ args += gc_goopts + [("" if i.basename.startswith("_cgo") else "-filter-") + i.path for i in sources]
+ ctx.action(
+ inputs = list(inputs),
+ outputs = [out_object],
+ mnemonic = "GoCompile",
+ executable = ctx.executable._filter_exec,
+ arguments = args,
+ env = go_environment_vars(ctx),
+ )
+
+ return sources
+
+def _emit_go_pack_action(ctx, out_lib, objects):
+ """Construct the command line for packing objects together.
+
+ Args:
+ ctx: The starlark Context.
+ out_lib: the archive that should be produced
+ objects: an iterable of object files to be added to the output archive file.
+ """
+ ctx.action(
+ inputs = objects + ctx.files.toolchain,
+ outputs = [out_lib],
+ mnemonic = "GoPack",
+ executable = ctx.file.go_tool,
+ arguments = ["tool", "pack", "c", out_lib.path] + [a.path for a in objects],
+ env = go_environment_vars(ctx),
+ )
+
+def _emit_go_cover_action(ctx, sources):
+ """Construct the command line for test coverage instrument.
+
+ Args:
+ ctx: The starlark Context.
+ sources: an iterable of Go source files.
+
+ Returns:
+ A list of Go source code files which might be coverage instrumented.
+ """
+ outputs = []
+
+ # TODO(linuxerwang): make the mode configurable.
+ count = 0
+
+ for src in sources:
+ if not src.path.endswith(".go") or src.path.endswith("_test.go"):
+ outputs += [src]
+ continue
+
+ cover_var = "GoCover_%d" % count
+ out = ctx.new_file(src, src.basename[:-3] + "_" + cover_var + ".cover.go")
+ outputs += [out]
+ ctx.action(
+ inputs = [src] + ctx.files.toolchain,
+ outputs = [out],
+ mnemonic = "GoCover",
+ executable = ctx.file.go_tool,
+ arguments = ["tool", "cover", "--mode=set", "-var=%s" % cover_var, "-o", out.path, src.path],
+ env = go_environment_vars(ctx),
+ )
+ count += 1
+
+ return outputs
+
+def go_library_impl(ctx):
+ """Implements the go_library() rule."""
+
+ sources = depset(ctx.files.srcs)
+ go_srcs = depset([s for s in sources if s.basename.endswith(".go")])
+ asm_srcs = [s for s in sources if s.basename.endswith(".s") or s.basename.endswith(".S")]
+ asm_hdrs = [s for s in sources if s.basename.endswith(".h")]
+ deps = ctx.attr.deps
+ dep_runfiles = [d.data_runfiles for d in deps]
+
+ cgo_object = None
+ if hasattr(ctx.attr, "cgo_object"):
+ cgo_object = ctx.attr.cgo_object
+
+ if ctx.attr.library:
+ go_srcs += ctx.attr.library.go_sources
+ asm_srcs += ctx.attr.library.asm_sources
+ asm_hdrs += ctx.attr.library.asm_headers
+ deps += ctx.attr.library.direct_deps
+ dep_runfiles += [ctx.attr.library.data_runfiles]
+ if ctx.attr.library.cgo_object:
+ if cgo_object:
+ fail("go_library %s cannot have cgo_object because the package " +
+ "already has cgo_object in %s" % (
+ ctx.label.name,
+ ctx.attr.library.name,
+ ))
+ cgo_object = ctx.attr.library.cgo_object
+ if not go_srcs:
+ fail("may not be empty", "srcs")
+
+ transitive_cgo_deps = depset([], order = "topological")
+ if cgo_object:
+ dep_runfiles += [cgo_object.data_runfiles]
+ transitive_cgo_deps += cgo_object.cgo_deps
+
+ extra_objects = [cgo_object.cgo_obj] if cgo_object else []
+ for src in asm_srcs:
+ obj = ctx.new_file(src, "%s.dir/%s.o" % (ctx.label.name, src.basename[:-2]))
+ _emit_go_asm_action(ctx, src, asm_hdrs, obj)
+ extra_objects += [obj]
+
+ lib_name = _go_importpath(ctx) + ".a"
+ out_lib = ctx.new_file(lib_name)
+ out_object = ctx.new_file(ctx.label.name + ".o")
+ search_path = out_lib.path[:-len(lib_name)]
+ gc_goopts = _gc_goopts(ctx)
+ transitive_go_libraries = depset([out_lib])
+ transitive_go_library_paths = depset([search_path])
+ for dep in deps:
+ transitive_go_libraries += dep.transitive_go_libraries
+ transitive_cgo_deps += dep.transitive_cgo_deps
+ transitive_go_library_paths += dep.transitive_go_library_paths
+
+ go_srcs = _emit_go_compile_action(
+ ctx,
+ sources = go_srcs,
+ deps = deps,
+ libpaths = transitive_go_library_paths,
+ out_object = out_object,
+ gc_goopts = gc_goopts,
+ )
+ _emit_go_pack_action(ctx, out_lib, [out_object] + extra_objects)
+
+ dylibs = []
+ if cgo_object:
+ dylibs += [d for d in cgo_object.cgo_deps if d.path.endswith(".so")]
+
+ runfiles = ctx.runfiles(files = dylibs, collect_data = True)
+ for d in dep_runfiles:
+ runfiles = runfiles.merge(d)
+
+ return struct(
+ label = ctx.label,
+ files = depset([out_lib]),
+ runfiles = runfiles,
+ go_sources = go_srcs,
+ asm_sources = asm_srcs,
+ asm_headers = asm_hdrs,
+ cgo_object = cgo_object,
+ direct_deps = ctx.attr.deps,
+ transitive_cgo_deps = transitive_cgo_deps,
+ transitive_go_libraries = transitive_go_libraries,
+ transitive_go_library_paths = transitive_go_library_paths,
+ gc_goopts = gc_goopts,
+ )
+
+def _c_linker_options(ctx, blocklist = []):
+ """Extracts flags to pass to $(CC) on link from the current context
+
+ Args:
+ ctx: the current context
+ blocklist: Any flags starts with any of these prefixes are filtered out from
+ the return value.
+
+ Returns:
+ A list of command line flags
+ """
+ cpp = ctx.fragments.cpp
+ features = ctx.features
+ options = cpp.compiler_options(features)
+ options += cpp.unfiltered_compiler_options(features)
+ options += cpp.link_options
+ options += cpp.mostly_static_link_options(ctx.features, False)
+ filtered = []
+ for opt in options:
+ if any([opt.startswith(prefix) for prefix in blocklist]):
+ continue
+ filtered.append(opt)
+ return filtered
+
+def _gc_goopts(ctx):
+ gc_goopts = [
+ ctx.expand_make_variables("gc_goopts", f, {})
+ for f in ctx.attr.gc_goopts
+ ]
+ if ctx.attr.library:
+ gc_goopts += ctx.attr.library.gc_goopts
+ return gc_goopts
+
+def _gc_linkopts(ctx):
+ gc_linkopts = [
+ ctx.expand_make_variables("gc_linkopts", f, {})
+ for f in ctx.attr.gc_linkopts
+ ]
+ for k, v in ctx.attr.x_defs.items():
+ gc_linkopts += ["-X", "%s='%s'" % (k, v)]
+ return gc_linkopts
+
+def _extract_extldflags(gc_linkopts, extldflags):
+ """Extracts -extldflags from gc_linkopts and combines them into a single list.
+
+ Args:
+ gc_linkopts: a list of flags passed in through the gc_linkopts attributes.
+ ctx.expand_make_variables should have already been applied.
+ extldflags: a list of flags to be passed to the external linker.
+
+ Return:
+ A tuple containing the filtered gc_linkopts with external flags removed,
+ and a combined list of external flags.
+ """
+ filtered_gc_linkopts = []
+ is_extldflags = False
+ for opt in gc_linkopts:
+ if is_extldflags:
+ is_extldflags = False
+ extldflags += [opt]
+ elif opt == "-extldflags":
+ is_extldflags = True
+ else:
+ filtered_gc_linkopts += [opt]
+ return filtered_gc_linkopts, extldflags
+
+def _emit_go_link_action(
+ ctx,
+ transitive_go_library_paths,
+ transitive_go_libraries,
+ cgo_deps,
+ libs,
+ executable,
+ gc_linkopts):
+ """Sets up a symlink tree to libraries to link together."""
+ config_strip = len(ctx.configuration.bin_dir.path) + 1
+ pkg_depth = executable.dirname[config_strip:].count("/") + 1
+
+ ld = "%s" % ctx.fragments.cpp.compiler_executable
+ extldflags = _c_linker_options(ctx) + [
+ "-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth),
+ ]
+ for d in cgo_deps:
+ if d.basename.endswith(".so"):
+ short_dir = d.dirname[len(d.root.path):]
+ extldflags += ["-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth) + short_dir]
+ gc_linkopts, extldflags = _extract_extldflags(gc_linkopts, extldflags)
+
+ link_cmd = [
+ ctx.file.go_tool.path,
+ "tool",
+ "link",
+ "-L",
+ ".",
+ ]
+ for path in transitive_go_library_paths:
+ link_cmd += ["-L", path]
+ link_cmd += [
+ "-o",
+ executable.path,
+ ] + gc_linkopts + ['"${STAMP_XDEFS[@]}"']
+
+ # workaround for a bug in ld(1) on Mac OS X.
+ # http://lists.apple.com/archives/Darwin-dev/2006/Sep/msg00084.html
+ # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2
+ # or earlier.
+ if not _is_darwin_cpu(ctx):
+ link_cmd += ["-s"]
+
+ link_cmd += [
+ "-extld",
+ ld,
+ "-extldflags",
+ "'%s'" % " ".join(extldflags),
+ ] + [lib.path for lib in libs]
+
+ # Avoided -s on OSX but but it requires dsymutil to be on $PATH.
+ # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2
+ # or earlier.
+ cmds = ["export PATH=$PATH:/usr/bin"]
+
+ cmds += [
+ "STAMP_XDEFS=()",
+ ]
+
+ stamp_inputs = []
+ if ctx.attr.linkstamp:
+ # read workspace status files, converting "KEY value" lines
+ # to "-X $linkstamp.KEY=value" arguments to the go linker.
+ stamp_inputs = [ctx.info_file, ctx.version_file]
+ for f in stamp_inputs:
+ cmds += [
+ "while read -r key value || [[ -n $key ]]; do",
+ " STAMP_XDEFS+=(-X \"%s.$key=$value\")" % ctx.attr.linkstamp,
+ "done < " + f.path,
+ ]
+
+ cmds += [" ".join(link_cmd)]
+
+ f = _emit_generate_params_action(cmds, ctx, lib.basename + ".GoLinkFile.params")
+
+ ctx.action(
+ inputs = [f] + (list(transitive_go_libraries) + [lib] + list(cgo_deps) +
+ ctx.files.toolchain + ctx.files._crosstool) + stamp_inputs,
+ outputs = [executable],
+ command = f.path,
+ mnemonic = "GoLink",
+ env = go_environment_vars(ctx),
+ )
+
+def go_binary_impl(ctx):
+ """go_binary_impl emits actions for compiling and linking a go executable."""
+ lib_result = go_library_impl(ctx)
+ _emit_go_link_action(
+ ctx,
+ transitive_go_libraries = lib_result.transitive_go_libraries,
+ transitive_go_library_paths = lib_result.transitive_go_library_paths,
+ cgo_deps = lib_result.transitive_cgo_deps,
+ libs = lib_result.files,
+ executable = ctx.outputs.executable,
+ gc_linkopts = _gc_linkopts(ctx),
+ )
+
+ return struct(
+ files = depset([ctx.outputs.executable]),
+ runfiles = lib_result.runfiles,
+ cgo_object = lib_result.cgo_object,
+ )
+
+def go_test_impl(ctx):
+ """go_test_impl implements go testing.
+
+ It emits an action to run the test generator, and then compiles the
+ test into a binary."""
+
+ lib_result = go_library_impl(ctx)
+ main_go = ctx.new_file(ctx.label.name + "_main_test.go")
+ main_object = ctx.new_file(ctx.label.name + "_main_test.o")
+ main_lib = ctx.new_file(ctx.label.name + "_main_test.a")
+ go_import = _go_importpath(ctx)
+
+ cmds = [
+ "UNFILTERED_TEST_FILES=(%s)" %
+ " ".join(["'%s'" % f.path for f in lib_result.go_sources]),
+ "FILTERED_TEST_FILES=()",
+ "while read -r line; do",
+ ' if [ -n "$line" ]; then',
+ ' FILTERED_TEST_FILES+=("$line")',
+ " fi",
+ 'done < <(\'%s\' -cgo "${UNFILTERED_TEST_FILES[@]}")' %
+ ctx.executable._filter_tags.path,
+ " ".join([
+ "'%s'" % ctx.executable.test_generator.path,
+ "--package",
+ go_import,
+ "--output",
+ "'%s'" % main_go.path,
+ '"${FILTERED_TEST_FILES[@]}"',
+ ]),
+ ]
+ f = _emit_generate_params_action(
+ cmds,
+ ctx,
+ ctx.label.name + ".GoTestGenTest.params",
+ )
+ inputs = (list(lib_result.go_sources) + list(ctx.files.toolchain) +
+ [f, ctx.executable._filter_tags, ctx.executable.test_generator])
+ ctx.action(
+ inputs = inputs,
+ outputs = [main_go],
+ command = f.path,
+ mnemonic = "GoTestGenTest",
+ env = dict(go_environment_vars(ctx), RUNDIR = ctx.label.package),
+ )
+
+ _emit_go_compile_action(
+ ctx,
+ sources = depset([main_go]),
+ deps = ctx.attr.deps + [lib_result],
+ libpaths = lib_result.transitive_go_library_paths,
+ out_object = main_object,
+ gc_goopts = _gc_goopts(ctx),
+ )
+ _emit_go_pack_action(ctx, main_lib, [main_object])
+ _emit_go_link_action(
+ ctx,
+ transitive_go_library_paths = lib_result.transitive_go_library_paths,
+ transitive_go_libraries = lib_result.transitive_go_libraries,
+ cgo_deps = lib_result.transitive_cgo_deps,
+ libs = [main_lib],
+ executable = ctx.outputs.executable,
+ gc_linkopts = _gc_linkopts(ctx),
+ )
+
+ # TODO(bazel-team): the Go tests should do a chdir to the directory
+ # holding the data files, so open-source go tests continue to work
+ # without code changes.
+ runfiles = ctx.runfiles(files = [ctx.outputs.executable])
+ runfiles = runfiles.merge(lib_result.runfiles)
+ return struct(
+ files = depset([ctx.outputs.executable]),
+ runfiles = runfiles,
+ )
+
+go_env_attrs = {
+ "toolchain": attr.label(
+ default = Label("//go/toolchain:toolchain"),
+ allow_files = True,
+ cfg = "host",
+ ),
+ "go_tool": attr.label(
+ default = Label("//go/toolchain:go_tool"),
+ single_file = True,
+ allow_files = True,
+ cfg = "host",
+ ),
+ "go_prefix": attr.label(
+ providers = ["go_prefix"],
+ default = Label(
+ "//:go_prefix",
+ relative_to_caller_repository = True,
+ ),
+ allow_files = False,
+ cfg = "host",
+ ),
+ "go_src": attr.label(
+ default = Label("//go/toolchain:go_src"),
+ allow_files = True,
+ cfg = "host",
+ ),
+ "go_include": attr.label(
+ default = Label("//go/toolchain:go_include"),
+ single_file = True,
+ allow_files = True,
+ cfg = "host",
+ ),
+ "go_root": attr.label(
+ providers = ["go_root"],
+ default = Label(
+ "//go/toolchain:go_root",
+ ),
+ allow_files = False,
+ cfg = "host",
+ ),
+ "_filter_tags": attr.label(
+ default = Label("//go/tools/filter_tags"),
+ cfg = "host",
+ executable = True,
+ single_file = True,
+ ),
+ "_filter_exec": attr.label(
+ default = Label("//go/tools/filter_exec"),
+ cfg = "host",
+ executable = True,
+ single_file = True,
+ ),
+ "_asm": attr.label(
+ default = Label("//go/tools/builders:asm"),
+ cfg = "host",
+ executable = True,
+ single_file = True,
+ ),
+}
+
+go_library_attrs = go_env_attrs + {
+ "data": attr.label_list(
+ allow_files = True,
+ cfg = "data",
+ ),
+ "srcs": attr.label_list(allow_files = go_filetype),
+ "deps": attr.label_list(
+ providers = [
+ "transitive_go_library_paths",
+ "transitive_go_libraries",
+ "transitive_cgo_deps",
+ ],
+ ),
+ "importpath": attr.string(),
+ "library": attr.label(
+ providers = [
+ "direct_deps",
+ "go_sources",
+ "asm_sources",
+ "cgo_object",
+ "gc_goopts",
+ ],
+ ),
+ "gc_goopts": attr.string_list(),
+}
+
+_crosstool_attrs = {
+ "_crosstool": attr.label(
+ default = Label("//tools/defaults:crosstool"),
+ ),
+}
+
+go_link_attrs = go_library_attrs + _crosstool_attrs + {
+ "gc_linkopts": attr.string_list(),
+ "linkstamp": attr.string(),
+ "x_defs": attr.string_dict(),
+}
+
+go_library = rule(
+ go_library_impl,
+ attrs = go_library_attrs + {
+ "cgo_object": attr.label(
+ providers = [
+ "cgo_obj",
+ "cgo_deps",
+ ],
+ ),
+ },
+ fragments = ["cpp"],
+)
+
+go_binary = rule(
+ go_binary_impl,
+ attrs = go_library_attrs + _crosstool_attrs + go_link_attrs,
+ executable = True,
+ fragments = ["cpp"],
+)
+
+go_test = rule(
+ go_test_impl,
+ attrs = go_library_attrs + _crosstool_attrs + go_link_attrs + {
+ "test_generator": attr.label(
+ executable = True,
+ default = Label(
+ "//go/tools:generate_test_main",
+ ),
+ cfg = "host",
+ ),
+ },
+ executable = True,
+ fragments = ["cpp"],
+ test = True,
+)
+
+def _pkg_dir(workspace_root, package_name):
+ if workspace_root and package_name:
+ return workspace_root + "/" + package_name
+ if workspace_root:
+ return workspace_root
+ if package_name:
+ return package_name
+ return "."
+
+def _exec_path(path):
+ if path.startswith("/"):
+ return path
+ return "${execroot}/" + path
+
+def _cgo_filter_srcs_impl(ctx):
+ srcs = ctx.files.srcs
+ dsts = []
+ cmds = []
+ for src in srcs:
+ stem, _, ext = src.path.rpartition(".")
+ dst_basename = "%s.filtered.%s" % (stem, ext)
+ dst = ctx.new_file(src, dst_basename)
+ cmds += [
+ "if '%s' -cgo -quiet '%s'; then" %
+ (ctx.executable._filter_tags.path, src.path),
+ " cp '%s' '%s'" % (src.path, dst.path),
+ "else",
+ " echo -n >'%s'" % dst.path,
+ "fi",
+ ]
+ dsts.append(dst)
+
+ if ctx.label.package == "":
+ script_name = ctx.label.name + ".CGoFilterSrcs.params"
+ else:
+ script_name = ctx.label.package + "/" + ctx.label.name + ".CGoFilterSrcs.params"
+ f = _emit_generate_params_action(cmds, ctx, script_name)
+ ctx.action(
+ inputs = [f, ctx.executable._filter_tags] + srcs,
+ outputs = dsts,
+ command = f.path,
+ mnemonic = "CgoFilterSrcs",
+ )
+ return struct(
+ files = depset(dsts),
+ )
+
+_cgo_filter_srcs = rule(
+ implementation = _cgo_filter_srcs_impl,
+ attrs = {
+ "srcs": attr.label_list(
+ allow_files = cgo_filetype,
+ ),
+ "_filter_tags": attr.label(
+ default = Label("//go/tools/filter_tags"),
+ cfg = "host",
+ executable = True,
+ single_file = True,
+ ),
+ },
+ fragments = ["cpp"],
+)
+
+def _cgo_codegen_impl(ctx):
+ go_srcs = ctx.files.srcs
+ srcs = go_srcs + ctx.files.c_hdrs
+ linkopts = ctx.attr.linkopts
+ copts = ctx.fragments.cpp.c_options + ctx.attr.copts
+ deps = depset([], order = "topological")
+ for d in ctx.attr.deps:
+ srcs += list(d.cc.transitive_headers)
+ deps += d.cc.libs
+ copts += ["-D" + define for define in d.cc.defines]
+ for inc in d.cc.include_directories:
+ copts += ["-I", _exec_path(inc)]
+ for hdr in ctx.files.c_hdrs:
+ copts += ["-iquote", hdr.dirname]
+ for inc in d.cc.quote_include_directories:
+ copts += ["-iquote", _exec_path(inc)]
+ for inc in d.cc.system_include_directories:
+ copts += ["-isystem", _exec_path(inc)]
+ for lib in d.cc.libs:
+ if lib.basename.startswith("lib") and lib.basename.endswith(".so"):
+ linkopts += ["-L", lib.dirname, "-l", lib.basename[3:-3]]
+ else:
+ linkopts += [lib.path]
+ linkopts += d.cc.link_flags
+
+ p = _pkg_dir(ctx.label.workspace_root, ctx.label.package) + "/"
+ if p == "./":
+ p = "" # workaround when cgo_library in repository root
+ out_dir = (ctx.configuration.genfiles_dir.path + "/" +
+ p + ctx.attr.outdir)
+ cc = ctx.fragments.cpp.compiler_executable
+ cmds = [
+ # We cannot use env for CC because $(CC) on OSX is relative
+ # and '../' does not work fine due to symlinks.
+ "export CC=$(cd $(dirname {cc}); pwd)/$(basename {cc})".format(cc = cc),
+ "export CXX=$CC",
+ 'objdir="%s/gen"' % out_dir,
+ "execroot=$(pwd)",
+ 'mkdir -p "$objdir"',
+ "unfiltered_go_files=(%s)" % " ".join(["'%s'" % f.path for f in go_srcs]),
+ "filtered_go_files=()",
+ 'for file in "${unfiltered_go_files[@]}"; do',
+ ' stem=$(basename "$file" .go)',
+ ' if %s -cgo -quiet "$file"; then' % ctx.executable._filter_tags.path,
+ ' filtered_go_files+=("$file")',
+ " else",
+ ' grep --max-count 1 "^package " "$file" >"$objdir/$stem.go"',
+ ' echo -n >"$objdir/$stem.c"',
+ " fi",
+ "done",
+ "if [ ${#filtered_go_files[@]} -eq 0 ]; then",
+ " echo no buildable Go source files in %s >&1" % str(ctx.label),
+ " exit 1",
+ "fi",
+ '"$GOROOT/bin/go" tool cgo -objdir "$objdir" -- %s "${filtered_go_files[@]}"' %
+ " ".join(['"%s"' % copt for copt in copts]),
+ # Rename the outputs using glob so we don't have to understand cgo's mangling
+ # TODO(#350): might be fixed by this?.
+ 'for file in "${filtered_go_files[@]}"; do',
+ ' stem=$(basename "$file" .go)',
+ ' mv "$objdir/"*"$stem.cgo1.go" "$objdir/$stem.go"',
+ ' mv "$objdir/"*"$stem.cgo2.c" "$objdir/$stem.c"',
+ "done",
+ "rm -f $objdir/_cgo_.o $objdir/_cgo_flags",
+ ]
+
+ f = _emit_generate_params_action(cmds, ctx, out_dir + ".CGoCodeGenFile.params")
+
+ inputs = (srcs + ctx.files.toolchain + ctx.files._crosstool +
+ [f, ctx.executable._filter_tags])
+ ctx.action(
+ inputs = inputs,
+ outputs = ctx.outputs.outs,
+ mnemonic = "CGoCodeGen",
+ progress_message = "CGoCodeGen %s" % ctx.label,
+ command = f.path,
+ env = go_environment_vars(ctx) + {
+ "CGO_LDFLAGS": " ".join(linkopts),
+ },
+ )
+ return struct(
+ label = ctx.label,
+ files = depset(ctx.outputs.outs),
+ cgo_deps = deps,
+ )
+
+_cgo_codegen_rule = rule(
+ _cgo_codegen_impl,
+ attrs = go_env_attrs + _crosstool_attrs + {
+ "srcs": attr.label_list(
+ allow_files = go_filetype,
+ non_empty = True,
+ ),
+ "c_hdrs": attr.label_list(
+ allow_files = cc_hdr_filetype,
+ ),
+ "deps": attr.label_list(
+ allow_files = False,
+ providers = ["cc"],
+ ),
+ "copts": attr.string_list(),
+ "linkopts": attr.string_list(),
+ "outdir": attr.string(mandatory = True),
+ "outs": attr.output_list(
+ mandatory = True,
+ non_empty = True,
+ ),
+ },
+ fragments = ["cpp"],
+ output_to_genfiles = True,
+)
+
+def _cgo_codegen(
+ name,
+ srcs,
+ c_hdrs = [],
+ deps = [],
+ copts = [],
+ linkopts = [],
+ go_tool = None,
+ toolchain = None):
+ """Generates glue codes for interop between C and Go
+
+ Args:
+ name: A unique name of the rule
+ srcs: list of Go source files.
+ Each of them must contain `import "C"`.
+ c_hdrs: C/C++ header files necessary to determine kinds of
+ C/C++ identifiers in srcs.
+ deps: A list of cc_library rules.
+ The generated codes are expected to be linked with these deps.
+ linkopts: A list of linker options,
+ These flags are passed to the linker when the generated codes
+ are linked into the target binary.
+ """
+ outdir = name + ".dir"
+ outgen = outdir + "/gen"
+
+ go_thunks = []
+ c_thunks = []
+ for s in srcs:
+ if not s.endswith(".go"):
+ fail("not a .go file: %s" % s)
+ basename = s[:-3]
+ if basename.rfind("/") >= 0:
+ basename = basename[basename.rfind("/") + 1:]
+ go_thunks.append(outgen + "/" + basename + ".go")
+ c_thunks.append(outgen + "/" + basename + ".c")
+
+ outs = struct(
+ name = name,
+ outdir = outgen,
+ go_thunks = go_thunks,
+ c_thunks = c_thunks,
+ c_exports = [
+ outgen + "/_cgo_export.c",
+ outgen + "/_cgo_export.h",
+ ],
+ c_dummy = outgen + "/_cgo_main.c",
+ gotypes = outgen + "/_cgo_gotypes.go",
+ )
+
+ _cgo_codegen_rule(
+ name = name,
+ srcs = srcs,
+ c_hdrs = c_hdrs,
+ deps = deps,
+ copts = copts,
+ linkopts = linkopts,
+ go_tool = go_tool,
+ toolchain = toolchain,
+ outdir = outdir,
+ outs = outs.go_thunks + outs.c_thunks + outs.c_exports + [
+ outs.c_dummy,
+ outs.gotypes,
+ ],
+ visibility = ["//visibility:private"],
+ )
+ return outs
+
+def _cgo_import_impl(ctx):
+ cmds = [
+ (ctx.file.go_tool.path + " tool cgo" +
+ " -dynout " + ctx.outputs.out.path +
+ " -dynimport " + ctx.file.cgo_o.path +
+ " -dynpackage $(%s %s)" % (
+ ctx.executable._extract_package.path,
+ ctx.file.sample_go_src.path,
+ )),
+ ]
+ f = _emit_generate_params_action(cmds, ctx, ctx.outputs.out.path + ".CGoImportGenFile.params")
+ ctx.action(
+ inputs = (ctx.files.toolchain +
+ [
+ f,
+ ctx.file.go_tool,
+ ctx.executable._extract_package,
+ ctx.file.cgo_o,
+ ctx.file.sample_go_src,
+ ]),
+ outputs = [ctx.outputs.out],
+ command = f.path,
+ mnemonic = "CGoImportGen",
+ env = go_environment_vars(ctx),
+ )
+ return struct(
+ files = depset([ctx.outputs.out]),
+ )
+
+_cgo_import = rule(
+ _cgo_import_impl,
+ attrs = go_env_attrs + {
+ "cgo_o": attr.label(
+ allow_files = True,
+ single_file = True,
+ ),
+ "sample_go_src": attr.label(
+ allow_files = True,
+ single_file = True,
+ ),
+ "out": attr.output(
+ mandatory = True,
+ ),
+ "_extract_package": attr.label(
+ default = Label("//go/tools/extract_package"),
+ executable = True,
+ cfg = "host",
+ ),
+ },
+ fragments = ["cpp"],
+)
+
+def _cgo_genrule_impl(ctx):
+ return struct(
+ label = ctx.label,
+ go_sources = ctx.files.srcs,
+ asm_sources = [],
+ asm_headers = [],
+ cgo_object = ctx.attr.cgo_object,
+ direct_deps = ctx.attr.deps,
+ gc_goopts = [],
+ )
+
+_cgo_genrule = rule(
+ _cgo_genrule_impl,
+ attrs = {
+ "srcs": attr.label_list(allow_files = FileType([".go"])),
+ "cgo_object": attr.label(
+ providers = [
+ "cgo_obj",
+ "cgo_deps",
+ ],
+ ),
+ "deps": attr.label_list(
+ providers = [
+ "direct_deps",
+ "transitive_go_library_paths",
+ "transitive_go_libraries",
+ "transitive_cgo_deps",
+ ],
+ ),
+ },
+ fragments = ["cpp"],
+)
+
+"""Generates symbol-import directives for cgo
+
+Args:
+ cgo_o: The loadable object to extract dynamic symbols from.
+ sample_go_src: A go source which is compiled together with the generated file.
+ The generated file will have the same Go package name as this file.
+ out: Destination of the generated codes.
+"""
+
+def _cgo_object_impl(ctx):
+ arguments = _c_linker_options(ctx, blocklist = [
+ # never link any dependency libraries
+ "-l",
+ "-L",
+ # manage flags to ld(1) by ourselves
+ "-Wl,",
+ ])
+ arguments += [
+ "-o",
+ ctx.outputs.out.path,
+ "-nostdlib",
+ "-Wl,-r",
+ ]
+ if _is_darwin_cpu(ctx):
+ arguments += ["-shared", "-Wl,-all_load"]
+ else:
+ arguments += ["-Wl,-whole-archive"]
+
+ lo = ctx.files.src[-1]
+ arguments += [lo.path]
+
+ ctx.action(
+ inputs = [lo] + ctx.files._crosstool,
+ outputs = [ctx.outputs.out],
+ mnemonic = "CGoObject",
+ progress_message = "Linking %s" % ctx.outputs.out.short_path,
+ executable = ctx.fragments.cpp.compiler_executable,
+ arguments = arguments,
+ )
+ runfiles = ctx.runfiles(collect_data = True)
+ runfiles = runfiles.merge(ctx.attr.src.data_runfiles)
+ return struct(
+ files = depset([ctx.outputs.out]),
+ cgo_obj = ctx.outputs.out,
+ cgo_deps = ctx.attr.cgogen.cgo_deps,
+ runfiles = runfiles,
+ )
+
+_cgo_object = rule(
+ _cgo_object_impl,
+ attrs = _crosstool_attrs + {
+ "src": attr.label(
+ mandatory = True,
+ providers = ["cc"],
+ ),
+ "cgogen": attr.label(
+ mandatory = True,
+ providers = ["cgo_deps"],
+ ),
+ "out": attr.output(
+ mandatory = True,
+ ),
+ },
+ fragments = ["cpp"],
+)
+
+"""Generates _all.o to be archived together with Go objects.
+
+Args:
+ src: source static library which contains objects
+ cgogen: _cgo_codegen rule which knows the dependency cc_library() rules
+ to be linked together with src when we generate the final go binary.
+"""
+
+def _setup_cgo_library(name, srcs, cdeps, copts, clinkopts, go_tool, toolchain):
+ go_srcs = [s for s in srcs if s.endswith(".go")]
+ c_hdrs = [s for s in srcs if any([s.endswith(ext) for ext in hdr_exts])]
+ c_srcs = [s for s in srcs if not s in (go_srcs + c_hdrs)]
+
+ # Split cgo files into .go parts and .c parts (plus some other files).
+ cgogen = _cgo_codegen(
+ name = name + ".cgo",
+ srcs = go_srcs,
+ c_hdrs = c_hdrs,
+ deps = cdeps,
+ copts = copts,
+ linkopts = clinkopts,
+ go_tool = go_tool,
+ toolchain = toolchain,
+ )
+
+ # Filter c_srcs with build constraints.
+ c_filtered_srcs = []
+ if len(c_srcs) > 0:
+ c_filtered_srcs_name = name + "_filter_cgo_srcs"
+ _cgo_filter_srcs(
+ name = c_filtered_srcs_name,
+ srcs = c_srcs,
+ )
+ c_filtered_srcs.append(":" + c_filtered_srcs_name)
+
+ pkg_dir = _pkg_dir(
+ "external/" + REPOSITORY_NAME[1:] if len(REPOSITORY_NAME) > 1 else "",
+ PACKAGE_NAME,
+ )
+
+ # Platform-specific settings
+ native.config_setting(
+ name = name + "_windows_setting",
+ values = {
+ "cpu": "x64_windows_msvc",
+ },
+ )
+ platform_copts = select({
+ ":" + name + "_windows_setting": ["-mthreads"],
+ "//conditions:default": ["-pthread"],
+ })
+ platform_linkopts = select({
+ ":" + name + "_windows_setting": ["-mthreads"],
+ "//conditions:default": ["-pthread"],
+ })
+
+ # Bundles objects into an archive so that _cgo_.o and _all.o can share them.
+ native.cc_library(
+ name = cgogen.outdir + "/_cgo_lib",
+ srcs = cgogen.c_thunks + cgogen.c_exports + c_filtered_srcs + c_hdrs,
+ deps = cdeps,
+ copts = copts + platform_copts + [
+ "-I",
+ pkg_dir,
+ "-I",
+ "$(GENDIR)/" + pkg_dir + "/" + cgogen.outdir,
+ # The generated thunks often contain unused variables.
+ "-Wno-unused-variable",
+ ],
+ linkopts = clinkopts + platform_linkopts,
+ linkstatic = 1,
+ # _cgo_.o and _all.o keep all objects in this archive.
+ # But it should not be very annoying in the final binary target
+ # because _cgo_object rule does not propagate alwayslink=1
+ alwayslink = 1,
+ visibility = ["//visibility:private"],
+ )
+
+ # Loadable object which cgo reads when it generates _cgo_import.go
+ native.cc_binary(
+ name = cgogen.outdir + "/_cgo_.o",
+ srcs = [cgogen.c_dummy],
+ deps = cdeps + [cgogen.outdir + "/_cgo_lib"],
+ copts = copts,
+ linkopts = clinkopts,
+ visibility = ["//visibility:private"],
+ )
+ _cgo_import(
+ name = "%s.cgo.importgen" % name,
+ cgo_o = cgogen.outdir + "/_cgo_.o",
+ out = cgogen.outdir + "/_cgo_import.go",
+ sample_go_src = go_srcs[0],
+ go_tool = go_tool,
+ toolchain = toolchain,
+ visibility = ["//visibility:private"],
+ )
+
+ _cgo_object(
+ name = cgogen.outdir + "/_cgo_object",
+ src = cgogen.outdir + "/_cgo_lib",
+ out = cgogen.outdir + "/_all.o",
+ cgogen = cgogen.name,
+ visibility = ["//visibility:private"],
+ )
+ return cgogen
+
+def cgo_genrule(
+ name,
+ srcs,
+ copts = [],
+ clinkopts = [],
+ cdeps = [],
+ **kwargs):
+ cgogen = _setup_cgo_library(
+ name = name,
+ srcs = srcs,
+ cdeps = cdeps,
+ copts = copts,
+ clinkopts = clinkopts,
+ toolchain = None,
+ go_tool = None,
+ )
+ _cgo_genrule(
+ name = name,
+ srcs = cgogen.go_thunks + [
+ cgogen.gotypes,
+ cgogen.outdir + "/_cgo_import.go",
+ ],
+ cgo_object = cgogen.outdir + "/_cgo_object",
+ **kwargs
+ )
+
+def cgo_library(
+ name,
+ srcs,
+ toolchain = None,
+ go_tool = None,
+ copts = [],
+ clinkopts = [],
+ cdeps = [],
+ **kwargs):
+ """Builds a cgo-enabled go library.
+
+ Args:
+ name: A unique name for this rule.
+ srcs: List of Go, C and C++ files that are processed to build a Go library.
+ Those Go files must contain `import "C"`.
+ C and C++ files can be anything allowed in `srcs` attribute of
+ `cc_library`.
+ copts: Add these flags to the C++ compiler.
+ clinkopts: Add these flags to the C++ linker.
+ cdeps: List of C/C++ libraries to be linked into the binary target.
+ They must be `cc_library` rules.
+ deps: List of other libraries to be linked to this library target.
+ data: List of files needed by this rule at runtime.
+
+ NOTE:
+ `srcs` cannot contain pure-Go files, which do not have `import "C"`.
+ So you need to define another `go_library` when you build a go package with
+ both cgo-enabled and pure-Go sources.
+
+ ```
+ cgo_library(
+ name = "cgo_enabled",
+ srcs = ["cgo-enabled.go", "foo.cc", "bar.S", "baz.a"],
+ )
+
+ go_library(
+ name = "go_default_library",
+ srcs = ["pure-go.go"],
+ library = ":cgo_enabled",
+ )
+ ```
+ """
+ cgogen = _setup_cgo_library(
+ name = name,
+ srcs = srcs,
+ cdeps = cdeps,
+ copts = copts,
+ clinkopts = clinkopts,
+ go_tool = go_tool,
+ toolchain = toolchain,
+ )
+
+ go_library(
+ name = name,
+ srcs = cgogen.go_thunks + [
+ cgogen.gotypes,
+ cgogen.outdir + "/_cgo_import.go",
+ ],
+ cgo_object = cgogen.outdir + "/_cgo_object",
+ go_tool = go_tool,
+ toolchain = toolchain,
+ **kwargs
+ )
diff --git a/syntax/walk.go b/syntax/walk.go
new file mode 100644
index 0000000..1491149
--- /dev/null
+++ b/syntax/walk.go
@@ -0,0 +1,163 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Walk traverses a syntax tree in depth-first order.
+// It starts by calling f(n); n must not be nil.
+// If f returns true, Walk calls itself
+// recursively for each non-nil child of n.
+// Walk then calls f(nil).
+func Walk(n Node, f func(Node) bool) {
+ if n == nil {
+ panic("nil")
+ }
+ if !f(n) {
+ return
+ }
+
+ // TODO(adonovan): opt: order cases using profile data.
+ switch n := n.(type) {
+ case *File:
+ walkStmts(n.Stmts, f)
+
+ case *ExprStmt:
+ Walk(n.X, f)
+
+ case *BranchStmt:
+ // no-op
+
+ case *IfStmt:
+ Walk(n.Cond, f)
+ walkStmts(n.True, f)
+ walkStmts(n.False, f)
+
+ case *AssignStmt:
+ Walk(n.LHS, f)
+ Walk(n.RHS, f)
+
+ case *DefStmt:
+ Walk(n.Name, f)
+ for _, param := range n.Params {
+ Walk(param, f)
+ }
+ walkStmts(n.Body, f)
+
+ case *ForStmt:
+ Walk(n.Vars, f)
+ Walk(n.X, f)
+ walkStmts(n.Body, f)
+
+ case *ReturnStmt:
+ if n.Result != nil {
+ Walk(n.Result, f)
+ }
+
+ case *LoadStmt:
+ Walk(n.Module, f)
+ for _, from := range n.From {
+ Walk(from, f)
+ }
+ for _, to := range n.To {
+ Walk(to, f)
+ }
+
+ case *Ident, *Literal:
+ // no-op
+
+ case *ListExpr:
+ for _, x := range n.List {
+ Walk(x, f)
+ }
+
+ case *ParenExpr:
+ Walk(n.X, f)
+
+ case *CondExpr:
+ Walk(n.Cond, f)
+ Walk(n.True, f)
+ Walk(n.False, f)
+
+ case *IndexExpr:
+ Walk(n.X, f)
+ Walk(n.Y, f)
+
+ case *DictEntry:
+ Walk(n.Key, f)
+ Walk(n.Value, f)
+
+ case *SliceExpr:
+ Walk(n.X, f)
+ if n.Lo != nil {
+ Walk(n.Lo, f)
+ }
+ if n.Hi != nil {
+ Walk(n.Hi, f)
+ }
+ if n.Step != nil {
+ Walk(n.Step, f)
+ }
+
+ case *Comprehension:
+ Walk(n.Body, f)
+ for _, clause := range n.Clauses {
+ Walk(clause, f)
+ }
+
+ case *IfClause:
+ Walk(n.Cond, f)
+
+ case *ForClause:
+ Walk(n.Vars, f)
+ Walk(n.X, f)
+
+ case *TupleExpr:
+ for _, x := range n.List {
+ Walk(x, f)
+ }
+
+ case *DictExpr:
+ for _, entry := range n.List {
+ entry := entry.(*DictEntry)
+ Walk(entry.Key, f)
+ Walk(entry.Value, f)
+ }
+
+ case *UnaryExpr:
+ if n.X != nil {
+ Walk(n.X, f)
+ }
+
+ case *BinaryExpr:
+ Walk(n.X, f)
+ Walk(n.Y, f)
+
+ case *DotExpr:
+ Walk(n.X, f)
+ Walk(n.Name, f)
+
+ case *CallExpr:
+ Walk(n.Fn, f)
+ for _, arg := range n.Args {
+ Walk(arg, f)
+ }
+
+ case *LambdaExpr:
+ for _, param := range n.Params {
+ Walk(param, f)
+ }
+ Walk(n.Body, f)
+
+ default:
+ panic(n)
+ }
+
+ f(nil)
+}
+
+func walkStmts(stmts []Stmt, f func(Node) bool) {
+ for _, stmt := range stmts {
+ Walk(stmt, f)
+ }
+}
diff --git a/syntax/walk_test.go b/syntax/walk_test.go
new file mode 100644
index 0000000..00d9784
--- /dev/null
+++ b/syntax/walk_test.go
@@ -0,0 +1,103 @@
+package syntax_test
+
+import (
+ "bytes"
+ "fmt"
+ "log"
+ "reflect"
+ "strings"
+ "testing"
+
+ "go.starlark.net/syntax"
+)
+
+func TestWalk(t *testing.T) {
+ const src = `
+for x in y:
+ if x:
+ pass
+ else:
+ f([2*x for x in "abc"])
+`
+ // TODO(adonovan): test that it finds all syntax.Nodes
+ // (compare against a reflect-based implementation).
+ // TODO(adonovan): test that the result of f is used to prune
+ // the descent.
+ f, err := syntax.Parse("hello.go", src, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var buf bytes.Buffer
+ var depth int
+ syntax.Walk(f, func(n syntax.Node) bool {
+ if n == nil {
+ depth--
+ return true
+ }
+ fmt.Fprintf(&buf, "%s%s\n",
+ strings.Repeat(" ", depth),
+ strings.TrimPrefix(reflect.TypeOf(n).String(), "*syntax."))
+ depth++
+ return true
+ })
+ got := buf.String()
+ want := `
+File
+ ForStmt
+ Ident
+ Ident
+ IfStmt
+ Ident
+ BranchStmt
+ ExprStmt
+ CallExpr
+ Ident
+ Comprehension
+ BinaryExpr
+ Literal
+ Ident
+ ForClause
+ Ident
+ Literal`
+ got = strings.TrimSpace(got)
+ want = strings.TrimSpace(want)
+ if got != want {
+ t.Errorf("got %s, want %s", got, want)
+ }
+}
+
+// ExampleWalk demonstrates the use of Walk to
+// enumerate the identifiers in a Starlark source file
+// containing a nonsense program with varied grammar.
+func ExampleWalk() {
+ const src = `
+load("library", "a")
+
+def b(c, *, d=e):
+ f += {g: h}
+ i = -(j)
+ return k.l[m + n]
+
+for o in [p for q, r in s if t]:
+ u(lambda: v, w[x:y:z])
+`
+ f, err := syntax.Parse("hello.star", src, 0)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ var idents []string
+ syntax.Walk(f, func(n syntax.Node) bool {
+ if id, ok := n.(*syntax.Ident); ok {
+ idents = append(idents, id.Name)
+ }
+ return true
+ })
+ fmt.Println(strings.Join(idents, " "))
+
+ // The identifer 'a' appears in both LoadStmt.From[0] and LoadStmt.To[0].
+
+ // Output:
+ // a a b c d e f g h i j k l m n o p q r s t u v w x y z
+}