aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Zantow <kzantow@gmail.com>2022-10-07 15:39:20 -0400
committerKeith Zantow <kzantow@gmail.com>2022-10-07 15:39:20 -0400
commitab3e717feaf76784e3939592d98d788d19a4c022 (patch)
treebcc104b7bbbfd95b471fe6125d22956d510ddf6c
parentb57c493f0652477bf3764be7273eab6ff11d1eb1 (diff)
downloadspdx-tools-ab3e717feaf76784e3939592d98d788d19a4c022.tar.gz
chore: add idsercher functions for 2.3
Signed-off-by: Keith Zantow <kzantow@gmail.com>
-rw-r--r--idsearcher/idsearcher.go125
-rw-r--r--idsearcher/idsearcher_test.go289
2 files changed, 414 insertions, 0 deletions
diff --git a/idsearcher/idsearcher.go b/idsearcher/idsearcher.go
index 29b0faa..a5176ca 100644
--- a/idsearcher/idsearcher.go
+++ b/idsearcher/idsearcher.go
@@ -7,6 +7,7 @@ package idsearcher
import (
"bufio"
"fmt"
+ "github.com/spdx/tools-golang/spdx/v2_3"
"os"
"path/filepath"
"regexp"
@@ -267,6 +268,130 @@ func BuildIDsDocument2_2(packageName string, dirRoot string, idconfig *Config2_2
return doc, nil
}
+// ===== 2.3 Searcher functions =====
+
+// Config2_3 is a collection of configuration settings for docbuilder
+// (for version 2.3 SPDX Documents). A few mandatory fields are set here
+// so that they can be repeatedly reused in multiple calls to Build2_3.
+type Config2_3 struct {
+ // NamespacePrefix should be a URI representing a prefix for the
+ // namespace with which the SPDX Document will be associated.
+ // It will be used in the DocumentNamespace field in the CreationInfo
+ // section, followed by the per-Document package name and a random UUID.
+ NamespacePrefix string
+
+ // BuilderPathsIgnored lists certain paths to be omitted from the built
+ // document. Each string should be a path, relative to the package's
+ // dirRoot, to a specific file or (for all files in a directory) ending
+ // in a slash. Prefix the string with "**" to omit all instances of that
+ // file / directory, regardless of where it is in the file tree.
+ BuilderPathsIgnored []string
+
+ // SearcherPathsIgnored lists certain paths that should not be searched
+ // by idsearcher, even if those paths have Files present. It uses the
+ // same format as BuilderPathsIgnored.
+ SearcherPathsIgnored []string
+}
+
+// BuildIDsDocument2_3 creates an SPDX Document (version 2.3) and searches for
+// short-form IDs in each file, filling in license fields as appropriate. It
+// returns that document or error if any is encountered. Arguments:
+// - packageName: name of package / directory
+// - dirRoot: path to directory to be analyzed
+// - namespacePrefix: URI representing a prefix for the
+// namespace with which the SPDX Document will be associated
+func BuildIDsDocument2_3(packageName string, dirRoot string, idconfig *Config2_3) (*v2_3.Document, error) {
+ // first, build the Document using builder
+ bconfig := &builder.Config2_3{
+ NamespacePrefix: idconfig.NamespacePrefix,
+ CreatorType: "Tool",
+ Creator: "github.com/spdx/tools-golang/idsearcher",
+ PathsIgnored: idconfig.BuilderPathsIgnored,
+ }
+ doc, err := builder.Build2_3(packageName, dirRoot, bconfig)
+ if err != nil {
+ return nil, err
+ }
+ if doc == nil {
+ return nil, fmt.Errorf("builder returned nil Document")
+ }
+ if doc.Packages == nil {
+ return nil, fmt.Errorf("builder returned nil Packages map")
+ }
+ if len(doc.Packages) != 1 {
+ return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages))
+ }
+
+ // now, walk through each file and find its licenses (if any)
+ pkg := doc.Packages[0]
+ if pkg == nil {
+ return nil, fmt.Errorf("builder returned nil Package")
+ }
+ if pkg.Files == nil {
+ return nil, fmt.Errorf("builder returned nil Files in Package")
+ }
+ licsForPackage := map[string]int{}
+ for _, f := range pkg.Files {
+ // start by initializing / clearing values
+ f.LicenseInfoInFiles = []string{"NOASSERTION"}
+ f.LicenseConcluded = "NOASSERTION"
+
+ // check whether the searcher should ignore this file
+ if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) {
+ continue
+ }
+
+ fPath := filepath.Join(dirRoot, f.FileName)
+ // FIXME this is not preferable -- ignoring error
+ ids, _ := searchFileIDs(fPath)
+ // FIXME for now, proceed onwards with whatever IDs we obtained.
+ // FIXME instead of ignoring the error, should probably either log it,
+ // FIXME and/or enable the caller to configure what should happen.
+
+ // separate out for this file's licenses
+ licsForFile := map[string]int{}
+ licsParens := []string{}
+ for _, lid := range ids {
+ // get individual elements and add for file and package
+ licElements := getIndividualLicenses(lid)
+ for _, elt := range licElements {
+ licsForFile[elt] = 1
+ licsForPackage[elt] = 1
+ }
+ // parenthesize if needed and add to slice for joining
+ licsParens = append(licsParens, makeElement(lid))
+ }
+
+ // OK -- now we can fill in the file's details, or NOASSERTION if none
+ if len(licsForFile) > 0 {
+ f.LicenseInfoInFiles = []string{}
+ for lic := range licsForFile {
+ f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic)
+ }
+ sort.Strings(f.LicenseInfoInFiles)
+ // avoid adding parens and joining for single-ID items
+ if len(licsParens) == 1 {
+ f.LicenseConcluded = ids[0]
+ } else {
+ f.LicenseConcluded = strings.Join(licsParens, " AND ")
+ }
+ }
+ }
+
+ // and finally, we can fill in the package's details
+ if len(licsForPackage) == 0 {
+ pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"}
+ } else {
+ pkg.PackageLicenseInfoFromFiles = []string{}
+ for lic := range licsForPackage {
+ pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic)
+ }
+ sort.Strings(pkg.PackageLicenseInfoFromFiles)
+ }
+
+ return doc, nil
+}
+
// ===== Utility functions (not version-specific) =====
func searchFileIDs(filePath string) ([]string, error) {
idsMap := map[string]int{}
diff --git a/idsearcher/idsearcher_test.go b/idsearcher/idsearcher_test.go
index 00e5206..7d7a5bb 100644
--- a/idsearcher/idsearcher_test.go
+++ b/idsearcher/idsearcher_test.go
@@ -584,6 +584,295 @@ func Test2_2SearcherFailsWithInvalidPath(t *testing.T) {
}
}
+// ===== 2.3 Searcher top-level function tests =====
+func Test2_3SearcherCanFillInIDs(t *testing.T) {
+ packageName := "project2"
+ dirRoot := "../testdata/project2/"
+ config := &Config2_3{
+ NamespacePrefix: "https://github.com/swinslow/spdx-docs/spdx-go/testdata-",
+ }
+
+ doc, err := BuildIDsDocument2_3(packageName, dirRoot, config)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+ if doc == nil {
+ t.Fatalf("expected non-nil Document, got nil")
+ }
+
+ // not checking all contents of doc, see builder tests for those
+
+ // get the package and its files, checking size of each
+ if doc.Packages == nil {
+ t.Fatalf("expected non-nil Packages, got nil")
+ }
+ if len(doc.Packages) != 1 {
+ t.Fatalf("expected Packages len to be 1, got %d", len(doc.Packages))
+ }
+ pkg := doc.Packages[0]
+ if pkg == nil {
+ t.Fatalf("expected non-nil pkg, got nil")
+ }
+
+ if pkg.Files == nil {
+ t.Fatalf("expected non-nil Files, got nil")
+ }
+ if len(pkg.Files) != 6 {
+ t.Fatalf("expected Files len to be 6, got %d", len(pkg.Files))
+ }
+
+ fileInFolder := pkg.Files[0]
+ if fileInFolder.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileInFolder.LicenseInfoInFiles) != 1 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 1, got %d", len(fileInFolder.LicenseInfoInFiles))
+ }
+ if fileInFolder.LicenseInfoInFiles[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileInFolder.LicenseInfoInFiles[0])
+ }
+ if fileInFolder.LicenseConcluded != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileInFolder.LicenseConcluded)
+ }
+
+ fileTrailingComment := pkg.Files[1]
+ if fileTrailingComment.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileTrailingComment.LicenseInfoInFiles) != 1 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 1, got %d", len(fileTrailingComment.LicenseInfoInFiles))
+ }
+ if fileTrailingComment.LicenseInfoInFiles[0] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileTrailingComment.LicenseInfoInFiles[0])
+ }
+ if fileTrailingComment.LicenseConcluded != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileTrailingComment.LicenseConcluded)
+ }
+
+ fileHasDuplicateID := pkg.Files[2]
+ if fileHasDuplicateID.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileHasDuplicateID.LicenseInfoInFiles) != 1 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 1, got %d", len(fileHasDuplicateID.LicenseInfoInFiles))
+ }
+ if fileHasDuplicateID.LicenseInfoInFiles[0] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileHasDuplicateID.LicenseInfoInFiles[0])
+ }
+ if fileHasDuplicateID.LicenseConcluded != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileHasDuplicateID.LicenseConcluded)
+ }
+
+ fileHasID := pkg.Files[3]
+ if fileHasID.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileHasID.LicenseInfoInFiles) != 2 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 2, got %d", len(fileHasID.LicenseInfoInFiles))
+ }
+ if fileHasID.LicenseInfoInFiles[0] != "Apache-2.0" {
+ t.Errorf("expected %v, got %v", "Apache-2.0", fileHasID.LicenseInfoInFiles[0])
+ }
+ if fileHasID.LicenseInfoInFiles[1] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", fileHasID.LicenseInfoInFiles[1])
+ }
+ if fileHasID.LicenseConcluded != "Apache-2.0 OR GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "Apache-2.0 OR GPL-2.0-or-later", fileHasID.LicenseConcluded)
+ }
+
+ fileMultipleIDs := pkg.Files[4]
+ if fileMultipleIDs.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileMultipleIDs.LicenseInfoInFiles) != 5 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 5, got %d", len(fileMultipleIDs.LicenseInfoInFiles))
+ }
+ if fileMultipleIDs.LicenseInfoInFiles[0] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", fileMultipleIDs.LicenseInfoInFiles[0])
+ }
+ if fileMultipleIDs.LicenseInfoInFiles[1] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", fileMultipleIDs.LicenseInfoInFiles[1])
+ }
+ // here, DO NOT keep the +
+ if fileMultipleIDs.LicenseInfoInFiles[2] != "EPL-1.0" {
+ t.Errorf("expected %v, got %v", "EPL-1.0", fileMultipleIDs.LicenseInfoInFiles[2])
+ }
+ if fileMultipleIDs.LicenseInfoInFiles[3] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", fileMultipleIDs.LicenseInfoInFiles[3])
+ }
+ if fileMultipleIDs.LicenseInfoInFiles[4] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", fileMultipleIDs.LicenseInfoInFiles[4])
+ }
+ if fileMultipleIDs.LicenseConcluded != "((MIT AND BSD-3-Clause) OR ISC) AND BSD-2-Clause AND EPL-1.0+" {
+ t.Errorf("expected %v, got %v", "((MIT AND BSD-3-Clause) OR ISC) AND BSD-2-Clause AND EPL-1.0+", fileMultipleIDs.LicenseConcluded)
+ }
+
+ fileNoID := pkg.Files[5]
+ if fileNoID.LicenseInfoInFiles == nil {
+ t.Fatalf("expected non-nil LicenseInfoInFiles, got nil")
+ }
+ if len(fileNoID.LicenseInfoInFiles) != 1 {
+ t.Fatalf("expected LicenseInfoInFiles len to be 1, got %d", len(fileNoID.LicenseInfoInFiles))
+ }
+ if fileNoID.LicenseInfoInFiles[0] != "NOASSERTION" {
+ t.Errorf("expected %v, got %v", "NOASSERTION", fileNoID.LicenseInfoInFiles[0])
+ }
+ if fileNoID.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %v, got %v", "NOASSERTION", fileNoID.LicenseConcluded)
+ }
+
+ // and finally, the package should have all of these licenses
+ if pkg.PackageLicenseInfoFromFiles == nil {
+ t.Fatalf("expected non-nil PackageLicenseInfoFromFiles, got nil")
+ }
+ if len(pkg.PackageLicenseInfoFromFiles) != 7 {
+ t.Fatalf("expected PackageLicenseInfoFromFiles len to be 7, got %d", len(pkg.PackageLicenseInfoFromFiles))
+ }
+ if pkg.PackageLicenseInfoFromFiles[0] != "Apache-2.0" {
+ t.Errorf("expected %v, got %v", "Apache-2.0", pkg.PackageLicenseInfoFromFiles[0])
+ }
+ if pkg.PackageLicenseInfoFromFiles[1] != "BSD-2-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-2-Clause", pkg.PackageLicenseInfoFromFiles[1])
+ }
+ if pkg.PackageLicenseInfoFromFiles[2] != "BSD-3-Clause" {
+ t.Errorf("expected %v, got %v", "BSD-3-Clause", pkg.PackageLicenseInfoFromFiles[2])
+ }
+ // here, DO NOT keep the +
+ if pkg.PackageLicenseInfoFromFiles[3] != "EPL-1.0" {
+ t.Errorf("expected %v, got %v", "EPL-1.0", pkg.PackageLicenseInfoFromFiles[3])
+ }
+ if pkg.PackageLicenseInfoFromFiles[4] != "GPL-2.0-or-later" {
+ t.Errorf("expected %v, got %v", "GPL-2.0-or-later", pkg.PackageLicenseInfoFromFiles[4])
+ }
+ if pkg.PackageLicenseInfoFromFiles[5] != "ISC" {
+ t.Errorf("expected %v, got %v", "ISC", pkg.PackageLicenseInfoFromFiles[5])
+ }
+ if pkg.PackageLicenseInfoFromFiles[6] != "MIT" {
+ t.Errorf("expected %v, got %v", "MIT", pkg.PackageLicenseInfoFromFiles[6])
+ }
+
+}
+
+func Test2_3SearcherCanFillInIDsAndIgnorePaths(t *testing.T) {
+ packageName := "project3"
+ dirRoot := "../testdata/project3/"
+ config := &Config2_3{
+ NamespacePrefix: "https://github.com/swinslow/spdx-docs/spdx-go/testdata-",
+ BuilderPathsIgnored: []string{
+ "**/ignoredir/",
+ "/excludedir/",
+ "**/ignorefile.txt",
+ "/alsoEXCLUDEthis.txt",
+ },
+ SearcherPathsIgnored: []string{
+ "**/dontscan.txt",
+ },
+ }
+
+ doc, err := BuildIDsDocument2_3(packageName, dirRoot, config)
+ if err != nil {
+ t.Fatalf("expected nil error, got %v", err)
+ }
+ if doc == nil {
+ t.Fatalf("expected non-nil Document, got nil")
+ }
+
+ // not checking all contents of doc, see builder tests for those
+
+ // get the package and its files, checking licenses for each, and
+ // confirming NOASSERTION for those that are skipped
+ pkg := doc.Packages[0]
+ if pkg == nil {
+ t.Fatalf("expected non-nil pkg, got nil")
+ }
+ if len(pkg.Files) != 5 {
+ t.Fatalf("expected len %d, got %d", 5, len(pkg.Files))
+ }
+
+ f := pkg.Files[0]
+ if f.FileName != "./dontscan.txt" {
+ t.Errorf("expected %v, got %v", "./dontscan.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFiles) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFiles))
+ }
+ if f.LicenseInfoInFiles[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFiles[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[1]
+ if f.FileName != "./keep/keep.txt" {
+ t.Errorf("expected %v, got %v", "./keep/keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFiles) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFiles))
+ }
+ if f.LicenseInfoInFiles[0] != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseInfoInFiles[0])
+ }
+ if f.LicenseConcluded != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[2]
+ if f.FileName != "./keep.txt" {
+ t.Errorf("expected %v, got %v", "./keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFiles) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFiles))
+ }
+ if f.LicenseInfoInFiles[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFiles[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[3]
+ if f.FileName != "./subdir/keep/dontscan.txt" {
+ t.Errorf("expected %v, got %v", "./subdir/keep/dontscan.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFiles) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFiles))
+ }
+ if f.LicenseInfoInFiles[0] != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseInfoInFiles[0])
+ }
+ if f.LicenseConcluded != "NOASSERTION" {
+ t.Errorf("expected %s, got %s", "NOASSERTION", f.LicenseConcluded)
+ }
+
+ f = pkg.Files[4]
+ if f.FileName != "./subdir/keep/keep.txt" {
+ t.Errorf("expected %v, got %v", "./subdir/keep/keep.txt", f.FileName)
+ }
+ if len(f.LicenseInfoInFiles) != 1 {
+ t.Errorf("expected len to be %d, got %d", 1, len(f.LicenseInfoInFiles))
+ }
+ if f.LicenseInfoInFiles[0] != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseInfoInFiles[0])
+ }
+ if f.LicenseConcluded != "MIT" {
+ t.Errorf("expected %s, got %s", "MIT", f.LicenseConcluded)
+ }
+}
+
+func Test2_3SearcherFailsWithInvalidPath(t *testing.T) {
+ packageName := "project2"
+ dirRoot := "./oops/invalid"
+ config := &Config2_3{
+ NamespacePrefix: "whatever",
+ }
+
+ _, err := BuildIDsDocument2_3(packageName, dirRoot, config)
+ if err == nil {
+ t.Fatalf("expected non-nil error, got nil")
+ }
+}
+
// ===== Searcher utility tests =====
func TestCanFindShortFormIDWhenPresent(t *testing.T) {
filePath := "../testdata/project2/has-id.txt"