blob: c809a96dd96b7fb6ad7b950dd1bbc8f10525a2cf [file] [log] [blame]
Bob Badoura99ac622021-10-25 16:21:00 -07001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "fmt"
19 "io"
20 "io/fs"
21 "strings"
22 "sync"
23
24 "android/soong/compliance/license_metadata_proto"
25
26 "google.golang.org/protobuf/encoding/prototext"
27)
28
29var (
30 // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
31 ConcurrentReaders = 5
32)
33
34// result describes the outcome of reading and parsing a single license metadata file.
35type result struct {
36 // file identifies the path to the license metadata file
37 file string
38
39 // target contains the parsed metadata or nil if an error
40 target *TargetNode
41
Bob Badoura99ac622021-10-25 16:21:00 -070042 // err is nil unless an error occurs
43 err error
44}
45
46// receiver coordinates the tasks for reading and parsing license metadata files.
47type receiver struct {
Bob Badour103eb0f2022-01-10 13:50:57 -080048 // lg accumulates the read metadata and becomes the final resulting LicenseGraph.
Bob Badoura99ac622021-10-25 16:21:00 -070049 lg *LicenseGraph
50
51 // rootFS locates the root of the file system from which to read the files.
52 rootFS fs.FS
53
54 // stderr identifies the error output writer.
55 stderr io.Writer
56
57 // task provides a fixed-size task pool to limit concurrent open files etc.
58 task chan bool
59
60 // results returns one license metadata file result at a time.
61 results chan *result
62
63 // wg detects when done
64 wg sync.WaitGroup
65}
66
67// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
68//
69// `files` become the root files of the graph for top-down walks of the graph.
70func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
71 if len(files) == 0 {
72 return nil, fmt.Errorf("no license metadata to analyze")
73 }
74 if ConcurrentReaders < 1 {
75 return nil, fmt.Errorf("need at least one task in pool")
76 }
77
78 lg := newLicenseGraph()
79 for _, f := range files {
Bob Badour63a281c2022-01-10 17:59:14 -080080 if strings.HasSuffix(f, "meta_lic") {
Bob Badoura99ac622021-10-25 16:21:00 -070081 lg.rootFiles = append(lg.rootFiles, f)
82 } else {
83 lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
84 }
85 }
86
87 recv := &receiver{
88 lg: lg,
89 rootFS: rootFS,
90 stderr: stderr,
91 task: make(chan bool, ConcurrentReaders),
92 results: make(chan *result, ConcurrentReaders),
93 wg: sync.WaitGroup{},
94 }
95 for i := 0; i < ConcurrentReaders; i++ {
96 recv.task <- true
97 }
98
99 readFiles := func() {
100 lg.mu.Lock()
101 // identify the metadata files to schedule reading tasks for
102 for _, f := range lg.rootFiles {
103 lg.targets[f] = nil
104 }
105 lg.mu.Unlock()
106
107 // schedule tasks to read the files
108 for _, f := range lg.rootFiles {
109 readFile(recv, f)
110 }
111
112 // schedule a task to wait until finished and close the channel.
113 go func() {
114 recv.wg.Wait()
115 close(recv.task)
116 close(recv.results)
117 }()
118 }
119 go readFiles()
120
121 // tasks to read license metadata files are scheduled; read and process results from channel
122 var err error
123 for recv.results != nil {
124 select {
125 case r, ok := <-recv.results:
126 if ok {
127 // handle errors by nil'ing ls, setting err, and clobbering results channel
128 if r.err != nil {
129 err = r.err
130 fmt.Fprintf(recv.stderr, "%s\n", err.Error())
131 lg = nil
132 recv.results = nil
133 continue
134 }
135
136 // record the parsed metadata (guarded by mutex)
137 recv.lg.mu.Lock()
Bob Badour103eb0f2022-01-10 13:50:57 -0800138 lg.targets[r.target.name] = r.target
Bob Badoura99ac622021-10-25 16:21:00 -0700139 recv.lg.mu.Unlock()
140 } else {
141 // finished -- nil the results channel
142 recv.results = nil
143 }
144 }
145 }
146
Bob Badour103eb0f2022-01-10 13:50:57 -0800147 if lg != nil {
148 esize := 0
149 for _, tn := range lg.targets {
150 esize += len(tn.proto.Deps)
151 }
152 lg.edges = make(TargetEdgeList, 0, esize)
153 for _, tn := range lg.targets {
154 tn.licenseConditions = LicenseConditionSetFromNames(tn, tn.proto.LicenseConditions...)
155 err = addDependencies(lg, tn)
156 if err != nil {
157 return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err)
158 }
159 tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
160 }
161 }
Bob Badoura99ac622021-10-25 16:21:00 -0700162 return lg, err
163
164}
165
166// targetNode contains the license metadata for a node in the license graph.
167type targetNode struct {
168 proto license_metadata_proto.LicenseMetadata
169
Bob Badour103eb0f2022-01-10 13:50:57 -0800170 // name is the path to the metadata file.
Bob Badoura99ac622021-10-25 16:21:00 -0700171 name string
Bob Badoura99ac622021-10-25 16:21:00 -0700172
Bob Badour103eb0f2022-01-10 13:50:57 -0800173 // lg is the license graph the node belongs to.
174 lg *LicenseGraph
Bob Badoura99ac622021-10-25 16:21:00 -0700175
Bob Badour103eb0f2022-01-10 13:50:57 -0800176 // edges identifies the dependencies of the target.
177 edges TargetEdgeList
Bob Badoura99ac622021-10-25 16:21:00 -0700178
Bob Badour103eb0f2022-01-10 13:50:57 -0800179 // licenseConditions identifies the set of license conditions originating at the target node.
180 licenseConditions LicenseConditionSet
181
182 // resolution identifies the set of conditions resolved by acting on the target node.
183 resolution LicenseConditionSet
Bob Badoura99ac622021-10-25 16:21:00 -0700184}
185
186// addDependencies converts the proto AnnotatedDependencies into `edges`
Bob Badour103eb0f2022-01-10 13:50:57 -0800187func addDependencies(lg *LicenseGraph, tn *TargetNode) error {
188 tn.edges = make(TargetEdgeList, 0,len(tn.proto.Deps))
189 for _, ad := range tn.proto.Deps {
Bob Badoura99ac622021-10-25 16:21:00 -0700190 dependency := ad.GetFile()
191 if len(dependency) == 0 {
192 return fmt.Errorf("missing dependency name")
193 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800194 dtn, ok := lg.targets[dependency]
195 if !ok {
196 return fmt.Errorf("unknown dependency name %q", dependency)
197 }
198 if dtn == nil {
199 return fmt.Errorf("nil dependency for name %q", dependency)
200 }
Bob Badoura99ac622021-10-25 16:21:00 -0700201 annotations := newEdgeAnnotations()
202 for _, a := range ad.Annotations {
Bob Badour67d8ae32022-01-10 18:32:54 -0800203 // look up a common constant annotation string from a small map
204 // instead of creating 1000's of copies of the same 3 strings.
205 if ann, ok := RecognizedAnnotations[a]; ok {
Bob Badour5446a6f2022-01-10 18:44:59 -0800206 annotations.annotations[ann] = struct{}{}
Bob Badoura99ac622021-10-25 16:21:00 -0700207 }
Bob Badoura99ac622021-10-25 16:21:00 -0700208 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800209 edge := &TargetEdge{tn, dtn, annotations}
210 lg.edges = append(lg.edges, edge)
211 tn.edges = append(tn.edges, edge)
Bob Badoura99ac622021-10-25 16:21:00 -0700212 }
213 return nil
214}
215
216// readFile is a task to read and parse a single license metadata file, and to schedule
217// additional tasks for reading and parsing dependencies as necessary.
218func readFile(recv *receiver, file string) {
219 recv.wg.Add(1)
220 <-recv.task
221 go func() {
222 f, err := recv.rootFS.Open(file)
223 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800224 recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700225 return
226 }
227
228 // read the file
229 data, err := io.ReadAll(f)
230 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800231 recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700232 return
233 }
Bob Badour103eb0f2022-01-10 13:50:57 -0800234 f.Close()
Bob Badoura99ac622021-10-25 16:21:00 -0700235
Bob Badour103eb0f2022-01-10 13:50:57 -0800236 tn := &TargetNode{lg: recv.lg, name: file}
Bob Badoura99ac622021-10-25 16:21:00 -0700237
238 err = prototext.Unmarshal(data, &tn.proto)
239 if err != nil {
Bob Badour103eb0f2022-01-10 13:50:57 -0800240 recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
Bob Badoura99ac622021-10-25 16:21:00 -0700241 return
242 }
243
Bob Badoura99ac622021-10-25 16:21:00 -0700244 // send result for this file and release task before scheduling dependencies,
245 // but do not signal done to WaitGroup until dependencies are scheduled.
Bob Badour103eb0f2022-01-10 13:50:57 -0800246 recv.results <- &result{file, tn, nil}
Bob Badoura99ac622021-10-25 16:21:00 -0700247 recv.task <- true
248
249 // schedule tasks as necessary to read dependencies
Bob Badour103eb0f2022-01-10 13:50:57 -0800250 for _, ad := range tn.proto.Deps {
251 dependency := ad.GetFile()
Bob Badoura99ac622021-10-25 16:21:00 -0700252 // decide, signal and record whether to schedule task in critical section
253 recv.lg.mu.Lock()
Bob Badour103eb0f2022-01-10 13:50:57 -0800254 _, alreadyScheduled := recv.lg.targets[dependency]
Bob Badoura99ac622021-10-25 16:21:00 -0700255 if !alreadyScheduled {
Bob Badour103eb0f2022-01-10 13:50:57 -0800256 recv.lg.targets[dependency] = nil
Bob Badoura99ac622021-10-25 16:21:00 -0700257 }
258 recv.lg.mu.Unlock()
259 // schedule task to read dependency file outside critical section
260 if !alreadyScheduled {
Bob Badour103eb0f2022-01-10 13:50:57 -0800261 readFile(recv, dependency)
Bob Badoura99ac622021-10-25 16:21:00 -0700262 }
263 }
264
265 // signal task done after scheduling dependencies
266 recv.wg.Done()
267 }()
268}