blob: 45fa1343ba963f08bd039671fca1536ca9616eb2 [file] [log] [blame]
Bob Badoura99ac622021-10-25 16:21:00 -07001// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package compliance
16
17import (
18 "fmt"
19 "io"
20 "io/fs"
21 "strings"
22 "sync"
23
24 "android/soong/compliance/license_metadata_proto"
25
26 "google.golang.org/protobuf/encoding/prototext"
27)
28
29var (
30 // ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
31 ConcurrentReaders = 5
32)
33
34// result describes the outcome of reading and parsing a single license metadata file.
35type result struct {
36 // file identifies the path to the license metadata file
37 file string
38
39 // target contains the parsed metadata or nil if an error
40 target *TargetNode
41
42 // edges contains the parsed dependencies
43 edges []*dependencyEdge
44
45 // err is nil unless an error occurs
46 err error
47}
48
49// receiver coordinates the tasks for reading and parsing license metadata files.
50type receiver struct {
51 // lg accumulates the read metadata and becomes the final resulting LicensGraph.
52 lg *LicenseGraph
53
54 // rootFS locates the root of the file system from which to read the files.
55 rootFS fs.FS
56
57 // stderr identifies the error output writer.
58 stderr io.Writer
59
60 // task provides a fixed-size task pool to limit concurrent open files etc.
61 task chan bool
62
63 // results returns one license metadata file result at a time.
64 results chan *result
65
66 // wg detects when done
67 wg sync.WaitGroup
68}
69
70// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
71//
72// `files` become the root files of the graph for top-down walks of the graph.
73func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
74 if len(files) == 0 {
75 return nil, fmt.Errorf("no license metadata to analyze")
76 }
77 if ConcurrentReaders < 1 {
78 return nil, fmt.Errorf("need at least one task in pool")
79 }
80
81 lg := newLicenseGraph()
82 for _, f := range files {
Bob Badour63a281c2022-01-10 17:59:14 -080083 if strings.HasSuffix(f, "meta_lic") {
Bob Badoura99ac622021-10-25 16:21:00 -070084 lg.rootFiles = append(lg.rootFiles, f)
85 } else {
86 lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
87 }
88 }
89
90 recv := &receiver{
91 lg: lg,
92 rootFS: rootFS,
93 stderr: stderr,
94 task: make(chan bool, ConcurrentReaders),
95 results: make(chan *result, ConcurrentReaders),
96 wg: sync.WaitGroup{},
97 }
98 for i := 0; i < ConcurrentReaders; i++ {
99 recv.task <- true
100 }
101
102 readFiles := func() {
103 lg.mu.Lock()
104 // identify the metadata files to schedule reading tasks for
105 for _, f := range lg.rootFiles {
106 lg.targets[f] = nil
107 }
108 lg.mu.Unlock()
109
110 // schedule tasks to read the files
111 for _, f := range lg.rootFiles {
112 readFile(recv, f)
113 }
114
115 // schedule a task to wait until finished and close the channel.
116 go func() {
117 recv.wg.Wait()
118 close(recv.task)
119 close(recv.results)
120 }()
121 }
122 go readFiles()
123
124 // tasks to read license metadata files are scheduled; read and process results from channel
125 var err error
126 for recv.results != nil {
127 select {
128 case r, ok := <-recv.results:
129 if ok {
130 // handle errors by nil'ing ls, setting err, and clobbering results channel
131 if r.err != nil {
132 err = r.err
133 fmt.Fprintf(recv.stderr, "%s\n", err.Error())
134 lg = nil
135 recv.results = nil
136 continue
137 }
138
139 // record the parsed metadata (guarded by mutex)
140 recv.lg.mu.Lock()
141 recv.lg.targets[r.file] = r.target
142 if len(r.edges) > 0 {
143 recv.lg.edges = append(recv.lg.edges, r.edges...)
144 }
145 recv.lg.mu.Unlock()
146 } else {
147 // finished -- nil the results channel
148 recv.results = nil
149 }
150 }
151 }
152
153 return lg, err
154
155}
156
157// targetNode contains the license metadata for a node in the license graph.
158type targetNode struct {
159 proto license_metadata_proto.LicenseMetadata
160
161 // name is the path to the metadata file
162 name string
163}
164
165// dependencyEdge describes a single edge in the license graph.
166type dependencyEdge struct {
167 // target identifies the target node being built and/or installed.
168 target string
169
170 // dependency identifies the target node being depended on.
171 //
172 // i.e. `dependency` is necessary to build `target`.
173 dependency string
174
175 // annotations are a set of text attributes attached to the edge.
176 //
177 // Policy prescribes meaning to a limited set of annotations; others
178 // are preserved and ignored.
179 annotations TargetEdgeAnnotations
180}
181
182// addDependencies converts the proto AnnotatedDependencies into `edges`
183func addDependencies(edges *[]*dependencyEdge, target string, dependencies []*license_metadata_proto.AnnotatedDependency) error {
184 for _, ad := range dependencies {
185 dependency := ad.GetFile()
186 if len(dependency) == 0 {
187 return fmt.Errorf("missing dependency name")
188 }
189 annotations := newEdgeAnnotations()
190 for _, a := range ad.Annotations {
191 if len(a) == 0 {
192 continue
193 }
194 annotations.annotations[a] = true
195 }
196 *edges = append(*edges, &dependencyEdge{target, dependency, annotations})
197 }
198 return nil
199}
200
201// readFile is a task to read and parse a single license metadata file, and to schedule
202// additional tasks for reading and parsing dependencies as necessary.
203func readFile(recv *receiver, file string) {
204 recv.wg.Add(1)
205 <-recv.task
206 go func() {
207 f, err := recv.rootFS.Open(file)
208 if err != nil {
209 recv.results <- &result{file, nil, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
210 return
211 }
212
213 // read the file
214 data, err := io.ReadAll(f)
215 if err != nil {
216 recv.results <- &result{file, nil, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
217 return
218 }
219
220 tn := &TargetNode{name: file}
221
222 err = prototext.Unmarshal(data, &tn.proto)
223 if err != nil {
224 recv.results <- &result{file, nil, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
225 return
226 }
227
228 edges := []*dependencyEdge{}
229 err = addDependencies(&edges, file, tn.proto.Deps)
230 if err != nil {
231 recv.results <- &result{file, nil, nil, fmt.Errorf("error license metadata dependency %q: %w", file, err)}
232 return
233 }
234 tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
235
236 // send result for this file and release task before scheduling dependencies,
237 // but do not signal done to WaitGroup until dependencies are scheduled.
238 recv.results <- &result{file, tn, edges, nil}
239 recv.task <- true
240
241 // schedule tasks as necessary to read dependencies
242 for _, e := range edges {
243 // decide, signal and record whether to schedule task in critical section
244 recv.lg.mu.Lock()
245 _, alreadyScheduled := recv.lg.targets[e.dependency]
246 if !alreadyScheduled {
247 recv.lg.targets[e.dependency] = nil
248 }
249 recv.lg.mu.Unlock()
250 // schedule task to read dependency file outside critical section
251 if !alreadyScheduled {
252 readFile(recv, e.dependency)
253 }
254 }
255
256 // signal task done after scheduling dependencies
257 recv.wg.Done()
258 }()
259}