blob: 00deb41c11cfe87424288b5891c3fbcd174ab7d2 [file] [log] [blame]
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compliance
import (
"bufio"
"crypto/md5"
"fmt"
"io"
"io/fs"
"path/filepath"
"regexp"
"sort"
"strings"
)
const (
noProjectName = "\u2205"
)
var (
nameRegexp = regexp.MustCompile(`^\s*name\s*:\s*"(.*)"\s*$`)
descRegexp = regexp.MustCompile(`^\s*description\s*:\s*"(.*)"\s*$`)
versionRegexp = regexp.MustCompile(`^\s*version\s*:\s*"(.*)"\s*$`)
licensesPathRegexp = regexp.MustCompile(`licen[cs]es?/`)
)
// NoticeIndex transforms license metadata into license text hashes, library
// names, and install paths indexing them for fast lookup/iteration.
type NoticeIndex struct {
// lg identifies the license graph to which the index applies.
lg *LicenseGraph
// rs identifies the set of resolutions upon which the index is based.
rs ResolutionSet
// shipped identifies the set of target nodes shipped directly or as derivative works.
shipped *TargetNodeSet
// rootFS locates the root of the file system from which to read the files.
rootFS fs.FS
// hash maps license text filenames to content hashes
hash map[string]hash
// text maps content hashes to content
text map[hash][]byte
// hashLibInstall maps hashes to libraries to install paths.
hashLibInstall map[hash]map[string]map[string]struct{}
// installHashLib maps install paths to libraries to hashes.
installHashLib map[string]map[hash]map[string]struct{}
// libHash maps libraries to hashes.
libHash map[string]map[hash]struct{}
// targetHash maps target nodes to hashes.
targetHashes map[*TargetNode]map[hash]struct{}
// projectName maps project directory names to project name text.
projectName map[string]string
// files lists all the files accessed during indexing
files []string
}
// IndexLicenseTexts creates a hashed index of license texts for `lg` and `rs`
// using the files rooted at `rootFS`.
func IndexLicenseTexts(rootFS fs.FS, lg *LicenseGraph, rs ResolutionSet) (*NoticeIndex, error) {
if rs == nil {
rs = ResolveNotices(lg)
}
ni := &NoticeIndex{
lg: lg,
rs: rs,
shipped: ShippedNodes(lg),
rootFS: rootFS,
hash: make(map[string]hash),
text: make(map[hash][]byte),
hashLibInstall: make(map[hash]map[string]map[string]struct{}),
installHashLib: make(map[string]map[hash]map[string]struct{}),
libHash: make(map[string]map[hash]struct{}),
targetHashes: make(map[*TargetNode]map[hash]struct{}),
projectName: make(map[string]string),
}
// index adds all license texts for `tn` to the index.
index := func(tn *TargetNode) (map[hash]struct{}, error) {
if hashes, ok := ni.targetHashes[tn]; ok {
return hashes, nil
}
hashes := make(map[hash]struct{})
for _, text := range tn.LicenseTexts() {
if _, ok := ni.hash[text]; !ok {
err := ni.addText(text)
if err != nil {
return nil, err
}
}
hash := ni.hash[text]
if _, ok := hashes[hash]; !ok {
hashes[hash] = struct{}{}
}
}
ni.targetHashes[tn] = hashes
return hashes, nil
}
link := func(libName string, hashes map[hash]struct{}, installPaths []string) {
if _, ok := ni.libHash[libName]; !ok {
ni.libHash[libName] = make(map[hash]struct{})
}
for h := range hashes {
if _, ok := ni.hashLibInstall[h]; !ok {
ni.hashLibInstall[h] = make(map[string]map[string]struct{})
}
if _, ok := ni.libHash[libName][h]; !ok {
ni.libHash[libName][h] = struct{}{}
}
for _, installPath := range installPaths {
if _, ok := ni.installHashLib[installPath]; !ok {
ni.installHashLib[installPath] = make(map[hash]map[string]struct{})
ni.installHashLib[installPath][h] = make(map[string]struct{})
ni.installHashLib[installPath][h][libName] = struct{}{}
} else if _, ok = ni.installHashLib[installPath][h]; !ok {
ni.installHashLib[installPath][h] = make(map[string]struct{})
ni.installHashLib[installPath][h][libName] = struct{}{}
} else if _, ok = ni.installHashLib[installPath][h][libName]; !ok {
ni.installHashLib[installPath][h][libName] = struct{}{}
}
if _, ok := ni.hashLibInstall[h]; !ok {
ni.hashLibInstall[h] = make(map[string]map[string]struct{})
ni.hashLibInstall[h][libName] = make(map[string]struct{})
ni.hashLibInstall[h][libName][installPath] = struct{}{}
} else if _, ok = ni.hashLibInstall[h][libName]; !ok {
ni.hashLibInstall[h][libName] = make(map[string]struct{})
ni.hashLibInstall[h][libName][installPath] = struct{}{}
} else if _, ok = ni.hashLibInstall[h][libName][installPath]; !ok {
ni.hashLibInstall[h][libName][installPath] = struct{}{}
}
}
}
}
// returns error from walk below.
var err error
WalkTopDown(NoEdgeContext{}, lg, func(lg *LicenseGraph, tn *TargetNode, path TargetEdgePath) bool {
if err != nil {
return false
}
if !ni.shipped.Contains(tn) {
return false
}
installPaths := getInstallPaths(tn, path)
var hashes map[hash]struct{}
hashes, err = index(tn)
if err != nil {
return false
}
link(ni.getLibName(tn), hashes, installPaths)
if tn.IsContainer() {
return true
}
for _, r := range rs.Resolutions(tn) {
hashes, err = index(r.actsOn)
if err != nil {
return false
}
link(ni.getLibName(r.actsOn), hashes, installPaths)
}
return false
})
if err != nil {
return nil, err
}
return ni, nil
}
// Hashes returns an ordered channel of the hashed license texts.
func (ni *NoticeIndex) Hashes() chan hash {
c := make(chan hash)
go func() {
libs := make([]string, 0, len(ni.libHash))
for libName := range ni.libHash {
libs = append(libs, libName)
}
sort.Strings(libs)
hashes := make(map[hash]struct{})
for _, libName := range libs {
hl := make([]hash, 0, len(ni.libHash[libName]))
for h := range ni.libHash[libName] {
if _, ok := hashes[h]; ok {
continue
}
hashes[h] = struct{}{}
hl = append(hl, h)
}
if len(hl) > 0 {
sort.Sort(hashList{ni, libName, "", &hl})
for _, h := range hl {
c <- h
}
}
}
close(c)
}()
return c
}
// InputNoticeFiles returns the list of files that were hashed during IndexLicenseTexts.
func (ni *NoticeIndex) InputNoticeFiles() []string {
files := append([]string(nil), ni.files...)
sort.Strings(files)
return files
}
// HashLibs returns the ordered array of library names using the license text
// hashed as `h`.
func (ni *NoticeIndex) HashLibs(h hash) []string {
libs := make([]string, 0, len(ni.hashLibInstall[h]))
for libName := range ni.hashLibInstall[h] {
libs = append(libs, libName)
}
sort.Strings(libs)
return libs
}
// HashLibInstalls returns the ordered array of install paths referencing
// library `libName` using the license text hashed as `h`.
func (ni *NoticeIndex) HashLibInstalls(h hash, libName string) []string {
installs := make([]string, 0, len(ni.hashLibInstall[h][libName]))
for installPath := range ni.hashLibInstall[h][libName] {
installs = append(installs, installPath)
}
sort.Strings(installs)
return installs
}
// InstallPaths returns the ordered channel of indexed install paths.
func (ni *NoticeIndex) InstallPaths() chan string {
c := make(chan string)
go func() {
paths := make([]string, 0, len(ni.installHashLib))
for path := range ni.installHashLib {
paths = append(paths, path)
}
sort.Strings(paths)
for _, installPath := range paths {
c <- installPath
}
close(c)
}()
return c
}
// InstallHashes returns the ordered array of hashes attached to `installPath`.
func (ni *NoticeIndex) InstallHashes(installPath string) []hash {
result := make([]hash, 0, len(ni.installHashLib[installPath]))
for h := range ni.installHashLib[installPath] {
result = append(result, h)
}
if len(result) > 0 {
sort.Sort(hashList{ni, "", installPath, &result})
}
return result
}
// InstallHashLibs returns the ordered array of library names attached to
// `installPath` as hash `h`.
func (ni *NoticeIndex) InstallHashLibs(installPath string, h hash) []string {
result := make([]string, 0, len(ni.installHashLib[installPath][h]))
for libName := range ni.installHashLib[installPath][h] {
result = append(result, libName)
}
sort.Strings(result)
return result
}
// Libraries returns the ordered channel of indexed library names.
func (ni *NoticeIndex) Libraries() chan string {
c := make(chan string)
go func() {
libs := make([]string, 0, len(ni.libHash))
for lib := range ni.libHash {
libs = append(libs, lib)
}
sort.Strings(libs)
for _, lib := range libs {
c <- lib
}
close(c)
}()
return c
}
// HashText returns the file content of the license text hashed as `h`.
func (ni *NoticeIndex) HashText(h hash) []byte {
return ni.text[h]
}
// getLibName returns the name of the library associated with `noticeFor`.
func (ni *NoticeIndex) getLibName(noticeFor *TargetNode) string {
// use name from METADATA if available
ln := ni.checkMetadata(noticeFor)
if len(ln) > 0 {
return ln
}
// use package_name: from license{} module if available
pn := noticeFor.PackageName()
if len(pn) > 0 {
return pn
}
for _, p := range noticeFor.Projects() {
if strings.HasPrefix(p, "prebuilts/") {
for _, licenseText := range noticeFor.LicenseTexts() {
if !strings.HasPrefix(licenseText, "prebuilts/") {
continue
}
for r, prefix := range SafePrebuiltPrefixes {
match := r.FindString(licenseText)
if len(match) == 0 {
continue
}
strip := SafePathPrefixes[prefix]
if strip {
// strip entire prefix
match = licenseText[len(match):]
} else {
// strip from prebuilts/ until safe prefix
match = licenseText[len(match)-len(prefix):]
}
// remove LICENSE or NOTICE or other filename
li := strings.LastIndex(match, "/")
if li > 0 {
match = match[:li]
}
// remove *licenses/ path segment and subdirectory if in path
if offsets := licensesPathRegexp.FindAllStringIndex(match, -1); offsets != nil && offsets[len(offsets)-1][0] > 0 {
match = match[:offsets[len(offsets)-1][0]]
li = strings.LastIndex(match, "/")
if li > 0 {
match = match[:li]
}
}
return match
}
break
}
}
for prefix, strip := range SafePathPrefixes {
if strings.HasPrefix(p, prefix) {
if strip {
return p[len(prefix):]
} else {
return p
}
}
}
}
// strip off [./]meta_lic from license metadata path and extract base name
n := noticeFor.name[:len(noticeFor.name)-9]
li := strings.LastIndex(n, "/")
if li > 0 {
n = n[li+1:]
}
return n
}
// checkMetadata tries to look up a library name from a METADATA file associated with `noticeFor`.
func (ni *NoticeIndex) checkMetadata(noticeFor *TargetNode) string {
for _, p := range noticeFor.Projects() {
if name, ok := ni.projectName[p]; ok {
if name == noProjectName {
continue
}
return name
}
f, err := ni.rootFS.Open(filepath.Join(p, "METADATA"))
if err != nil {
ni.projectName[p] = noProjectName
continue
}
name := ""
description := ""
version := ""
s := bufio.NewScanner(f)
for s.Scan() {
line := s.Text()
m := nameRegexp.FindStringSubmatch(line)
if m != nil {
if 1 < len(m) && m[1] != "" {
name = m[1]
}
if version != "" {
break
}
continue
}
m = versionRegexp.FindStringSubmatch(line)
if m != nil {
if 1 < len(m) && m[1] != "" {
version = m[1]
}
if name != "" {
break
}
continue
}
m = descRegexp.FindStringSubmatch(line)
if m != nil {
if 1 < len(m) && m[1] != "" {
description = m[1]
}
}
}
_ = s.Err()
_ = f.Close()
if name != "" {
if version != "" {
if version[0] == 'v' || version[0] == 'V' {
ni.projectName[p] = name + "_" + version
} else {
ni.projectName[p] = name + "_v_" + version
}
} else {
ni.projectName[p] = name
}
return ni.projectName[p]
}
if description != "" {
ni.projectName[p] = description
return ni.projectName[p]
}
ni.projectName[p] = noProjectName
}
return ""
}
// addText reads and indexes the content of a license text file.
func (ni *NoticeIndex) addText(file string) error {
f, err := ni.rootFS.Open(filepath.Clean(file))
if err != nil {
return fmt.Errorf("error opening license text file %q: %w", file, err)
}
// read the file
text, err := io.ReadAll(f)
if err != nil {
return fmt.Errorf("error reading license text file %q: %w", file, err)
}
hash := hash{fmt.Sprintf("%x", md5.Sum(text))}
ni.hash[file] = hash
if _, alreadyPresent := ni.text[hash]; !alreadyPresent {
ni.text[hash] = text
}
ni.files = append(ni.files, file)
return nil
}
// getInstallPaths returns the names of the used dependencies mapped to their
// installed locations.
func getInstallPaths(attachesTo *TargetNode, path TargetEdgePath) []string {
if len(path) == 0 {
installs := attachesTo.Installed()
if 0 == len(installs) {
installs = attachesTo.Built()
}
return installs
}
var getInstalls func(path TargetEdgePath) []string
getInstalls = func(path TargetEdgePath) []string {
// deps contains the output targets from the dependencies in the path
var deps []string
if len(path) > 1 {
// recursively get the targets from the sub-path skipping 1 path segment
deps = getInstalls(path[1:])
} else {
// stop recursion at 1 path segment
deps = path[0].Dependency().TargetFiles()
}
size := 0
prefixes := path[0].Target().TargetFiles()
installMap := path[0].Target().InstallMap()
sources := path[0].Target().Sources()
for _, dep := range deps {
found := false
for _, source := range sources {
if strings.HasPrefix(dep, source) {
found = true
break
}
}
if !found {
continue
}
for _, im := range installMap {
if strings.HasPrefix(dep, im.FromPath) {
size += len(prefixes)
break
}
}
}
installs := make([]string, 0, size)
for _, dep := range deps {
found := false
for _, source := range sources {
if strings.HasPrefix(dep, source) {
found = true
break
}
}
if !found {
continue
}
for _, im := range installMap {
if strings.HasPrefix(dep, im.FromPath) {
for _, prefix := range prefixes {
installs = append(installs, prefix+im.ContainerPath+dep[len(im.FromPath):])
}
break
}
}
}
return installs
}
allInstalls := getInstalls(path)
installs := path[0].Target().Installed()
if len(installs) == 0 {
return allInstalls
}
result := make([]string, 0, len(allInstalls))
for _, install := range allInstalls {
for _, prefix := range installs {
if strings.HasPrefix(install, prefix) {
result = append(result, install)
}
}
}
return result
}
// hash is an opaque string derived from md5sum.
type hash struct {
key string
}
// String returns the hexadecimal representation of the hash.
func (h hash) String() string {
return h.key
}
// hashList orders an array of hashes
type hashList struct {
ni *NoticeIndex
libName string
installPath string
hashes *[]hash
}
// Len returns the count of elements in the slice.
func (l hashList) Len() int { return len(*l.hashes) }
// Swap rearranges 2 elements of the slice so that each occupies the other's
// former position.
func (l hashList) Swap(i, j int) { (*l.hashes)[i], (*l.hashes)[j] = (*l.hashes)[j], (*l.hashes)[i] }
// Less returns true when the `i`th element is lexicographically less than
// the `j`th element.
func (l hashList) Less(i, j int) bool {
var insti, instj int
if len(l.libName) > 0 {
insti = len(l.ni.hashLibInstall[(*l.hashes)[i]][l.libName])
instj = len(l.ni.hashLibInstall[(*l.hashes)[j]][l.libName])
} else {
libsi := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[i])
libsj := l.ni.InstallHashLibs(l.installPath, (*l.hashes)[j])
libsis := strings.Join(libsi, " ")
libsjs := strings.Join(libsj, " ")
if libsis != libsjs {
return libsis < libsjs
}
}
if insti == instj {
leni := len(l.ni.text[(*l.hashes)[i]])
lenj := len(l.ni.text[(*l.hashes)[j]])
if leni == lenj {
// all else equal, just order by hash value
return (*l.hashes)[i].key < (*l.hashes)[j].key
}
// put shortest texts first within same # of installs
return leni < lenj
}
// reverse order of # installs so that most popular appears first
return instj < insti
}