blob: 0c5deb28682e624500556b6b6c5f2e9686b1ae7b [file] [log] [blame]
Wei Lidec97b12023-04-07 16:45:17 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Generate the SBOM of the current target product in SPDX format.
19Usage example:
20 generate-sbom.py --output_file out/target/product/vsoc_x86_64/sbom.spdx \
21 --metadata out/target/product/vsoc_x86_64/sbom-metadata.csv \
22 --product_out_dir=out/target/product/vsoc_x86_64 \
23 --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \
24 --product_mfr=Google
25"""
26
27import argparse
28import csv
29import datetime
30import google.protobuf.text_format as text_format
31import hashlib
32import os
33import metadata_file_pb2
34import sbom_data
35import sbom_writers
36
37
38# Package type
39PKG_SOURCE = 'SOURCE'
40PKG_UPSTREAM = 'UPSTREAM'
41PKG_PREBUILT = 'PREBUILT'
42
43# Security tag
44NVD_CPE23 = 'NVD-CPE2.3:'
45
46# Report
47ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:'
48ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:'
49ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:'
50ISSUE_UNKNOWN_SECURITY_TAG_TYPE = 'Unknown security tag type:'
51ISSUE_INSTALLED_FILE_NOT_EXIST = 'Non-exist installed files:'
52INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:'
53
54
55def get_args():
56 parser = argparse.ArgumentParser()
57 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.')
58 parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.')
59 parser.add_argument('--metadata', required=True, help='The SBOM metadata file path.')
60 parser.add_argument('--product_out_dir', required=True, help='The parent directory of all the installed files.')
61 parser.add_argument('--build_version', required=True, help='The build version.')
62 parser.add_argument('--product_mfr', required=True, help='The product manufacturer.')
63 parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format')
64 parser.add_argument('--unbundled', action='store_true', default=False, help='Generate SBOM file for unbundled module')
65
66 return parser.parse_args()
67
68
69def log(*info):
70 if args.verbose:
71 for i in info:
72 print(i)
73
74
75def encode_for_spdxid(s):
76 """Simple encode for string values used in SPDXID which uses the charset of A-Za-Z0-9.-"""
77 result = ''
78 for c in s:
79 if c.isalnum() or c in '.-':
80 result += c
81 elif c in '_@/':
82 result += '-'
83 else:
84 result += '0x' + c.encode('utf-8').hex()
85
86 return result.lstrip('-')
87
88
89def new_package_id(package_name, type):
90 return f'SPDXRef-{type}-{encode_for_spdxid(package_name)}'
91
92
93def new_file_id(file_path):
94 return f'SPDXRef-{encode_for_spdxid(file_path)}'
95
96
97def checksum(file_path):
98 file_path = args.product_out_dir + '/' + file_path
99 h = hashlib.sha1()
100 if os.path.islink(file_path):
101 h.update(os.readlink(file_path).encode('utf-8'))
102 else:
103 with open(file_path, 'rb') as f:
104 h.update(f.read())
105 return f'SHA1: {h.hexdigest()}'
106
107
108def is_soong_prebuilt_module(file_metadata):
109 return file_metadata['soong_module_type'] and file_metadata['soong_module_type'] in [
110 'android_app_import', 'android_library_import', 'cc_prebuilt_binary', 'cc_prebuilt_library',
111 'cc_prebuilt_library_headers', 'cc_prebuilt_library_shared', 'cc_prebuilt_library_static', 'cc_prebuilt_object',
112 'dex_import', 'java_import', 'java_sdk_library_import', 'java_system_modules_import',
113 'libclang_rt_prebuilt_library_static', 'libclang_rt_prebuilt_library_shared', 'llvm_prebuilt_library_static',
114 'ndk_prebuilt_object', 'ndk_prebuilt_shared_stl', 'nkd_prebuilt_static_stl', 'prebuilt_apex',
115 'prebuilt_bootclasspath_fragment', 'prebuilt_dsp', 'prebuilt_firmware', 'prebuilt_kernel_modules',
116 'prebuilt_rfsa', 'prebuilt_root', 'rust_prebuilt_dylib', 'rust_prebuilt_library', 'rust_prebuilt_rlib',
117 'vndk_prebuilt_shared',
118
119 # 'android_test_import',
120 # 'cc_prebuilt_test_library_shared',
121 # 'java_import_host',
122 # 'java_test_import',
123 # 'llvm_host_prebuilt_library_shared',
124 # 'prebuilt_apis',
125 # 'prebuilt_build_tool',
126 # 'prebuilt_defaults',
127 # 'prebuilt_etc',
128 # 'prebuilt_etc_host',
129 # 'prebuilt_etc_xml',
130 # 'prebuilt_font',
131 # 'prebuilt_hidl_interfaces',
132 # 'prebuilt_platform_compat_config',
133 # 'prebuilt_stubs_sources',
134 # 'prebuilt_usr_share',
135 # 'prebuilt_usr_share_host',
136 # 'soong_config_module_type_import',
137 ]
138
139
140def is_source_package(file_metadata):
141 module_path = file_metadata['module_path']
142 return module_path.startswith('external/') and not is_prebuilt_package(file_metadata)
143
144
145def is_prebuilt_package(file_metadata):
146 module_path = file_metadata['module_path']
147 if module_path:
148 return (module_path.startswith('prebuilts/') or
149 is_soong_prebuilt_module(file_metadata) or
150 file_metadata['is_prebuilt_make_module'])
151
152 kernel_module_copy_files = file_metadata['kernel_module_copy_files']
153 if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'):
154 return True
155
156 return False
157
158
159def get_source_package_info(file_metadata, metadata_file_path):
160 """Return source package info exists in its METADATA file, currently including name, security tag
161 and external SBOM reference.
162
163 See go/android-spdx and go/android-sbom-gen for more details.
164 """
165 if not metadata_file_path:
166 return file_metadata['module_path'], []
167
168 metadata_proto = metadata_file_protos[metadata_file_path]
169 external_refs = []
170 for tag in metadata_proto.third_party.security.tag:
171 if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()):
172 external_refs.append(
173 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
174 type=sbom_data.PackageExternalRefType.cpe23Type,
175 locator=tag.removeprefix(NVD_CPE23)))
176 elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()):
177 external_refs.append(
178 sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY,
179 type=sbom_data.PackageExternalRefType.cpe22Type,
180 locator=tag.removeprefix(NVD_CPE23)))
181
182 if metadata_proto.name:
183 return metadata_proto.name, external_refs
184 else:
185 return os.path.basename(metadata_file_path), external_refs # return the directory name only as package name
186
187
188def get_prebuilt_package_name(file_metadata, metadata_file_path):
189 """Return name of a prebuilt package, which can be from the METADATA file, metadata file path,
190 module path or kernel module's source path if the installed file is a kernel module.
191
192 See go/android-spdx and go/android-sbom-gen for more details.
193 """
194 name = None
195 if metadata_file_path:
196 metadata_proto = metadata_file_protos[metadata_file_path]
197 if metadata_proto.name:
198 name = metadata_proto.name
199 else:
200 name = metadata_file_path
201 elif file_metadata['module_path']:
202 name = file_metadata['module_path']
203 elif file_metadata['kernel_module_copy_files']:
204 src_path = file_metadata['kernel_module_copy_files'].split(':')[0]
205 name = os.path.dirname(src_path)
206
207 return name.removeprefix('prebuilts/').replace('/', '-')
208
209
210def get_metadata_file_path(file_metadata):
211 """Search for METADATA file of a package and return its path."""
212 metadata_path = ''
213 if file_metadata['module_path']:
214 metadata_path = file_metadata['module_path']
215 elif file_metadata['kernel_module_copy_files']:
216 metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
217
218 while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
219 metadata_path = os.path.dirname(metadata_path)
220
221 return metadata_path
222
223
224def get_package_version(metadata_file_path):
225 """Return a package's version in its METADATA file."""
226 if not metadata_file_path:
227 return None
228 metadata_proto = metadata_file_protos[metadata_file_path]
229 return metadata_proto.third_party.version
230
231
232def get_package_homepage(metadata_file_path):
233 """Return a package's homepage URL in its METADATA file."""
234 if not metadata_file_path:
235 return None
236 metadata_proto = metadata_file_protos[metadata_file_path]
237 if metadata_proto.third_party.homepage:
238 return metadata_proto.third_party.homepage
239 for url in metadata_proto.third_party.url:
240 if url.type == metadata_file_pb2.URL.Type.HOMEPAGE:
241 return url.value
242
243 return None
244
245
246def get_package_download_location(metadata_file_path):
247 """Return a package's code repository URL in its METADATA file."""
248 if not metadata_file_path:
249 return None
250 metadata_proto = metadata_file_protos[metadata_file_path]
251 if metadata_proto.third_party.url:
252 urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type)
253 if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE:
254 return urls[0].value
255 elif len(urls) > 1:
256 return urls[1].value
257
258 return None
259
260
261def get_sbom_fragments(installed_file_metadata, metadata_file_path):
262 """Return SPDX fragment of source/prebuilt packages, which usually contains a SOURCE/PREBUILT
263 package, a UPSTREAM package if it's a source package and a external SBOM document reference if
264 it's a prebuilt package with sbom_ref defined in its METADATA file.
265
266 See go/android-spdx and go/android-sbom-gen for more details.
267 """
268 external_doc_ref = None
269 packages = []
270 relationships = []
271
272 # Info from METADATA file
273 homepage = get_package_homepage(metadata_file_path)
274 version = get_package_version(metadata_file_path)
275 download_location = get_package_download_location(metadata_file_path)
276
277 if is_source_package(installed_file_metadata):
278 # Source fork packages
279 name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path)
280 source_package_id = new_package_id(name, PKG_SOURCE)
281 source_package = sbom_data.Package(id=source_package_id, name=name, version=args.build_version,
282 supplier='Organization: ' + args.product_mfr,
283 external_refs=external_refs)
284
285 upstream_package_id = new_package_id(name, PKG_UPSTREAM)
286 upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version,
287 supplier='Organization: ' + homepage if homepage else None,
288 download_location=download_location)
289 packages += [source_package, upstream_package]
290 relationships.append(sbom_data.Relationship(id1=source_package_id,
291 relationship=sbom_data.RelationshipType.VARIANT_OF,
292 id2=upstream_package_id))
293 elif is_prebuilt_package(installed_file_metadata):
294 # Prebuilt fork packages
295 name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path)
296 prebuilt_package_id = new_package_id(name, PKG_PREBUILT)
297 prebuilt_package = sbom_data.Package(id=prebuilt_package_id,
298 name=name,
299 version=args.build_version,
300 supplier='Organization: ' + args.product_mfr)
301 packages.append(prebuilt_package)
302
303 if metadata_file_path:
304 metadata_proto = metadata_file_protos[metadata_file_path]
305 if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref':
306 sbom_url = metadata_proto.third_party.sbom_ref.url
307 sbom_checksum = metadata_proto.third_party.sbom_ref.checksum
308 upstream_element_id = metadata_proto.third_party.sbom_ref.element_id
309 if sbom_url and sbom_checksum and upstream_element_id:
310 doc_ref_id = f'DocumentRef-{PKG_UPSTREAM}-{encode_for_spdxid(name)}'
311 external_doc_ref = sbom_data.DocumentExternalReference(id=doc_ref_id,
312 uri=sbom_url,
313 checksum=sbom_checksum)
314 relationships.append(
315 sbom_data.Relationship(id1=prebuilt_package_id,
316 relationship=sbom_data.RelationshipType.VARIANT_OF,
317 id2=doc_ref_id + ':' + upstream_element_id))
318
319 return external_doc_ref, packages, relationships
320
321
322def generate_package_verification_code(files):
323 checksums = [file.checksum for file in files]
324 checksums.sort()
325 h = hashlib.sha1()
326 h.update(''.join(checksums).encode(encoding='utf-8'))
327 return h.hexdigest()
328
329
330def save_report(report):
331 prefix, _ = os.path.splitext(args.output_file)
332 with open(prefix + '-gen-report.txt', 'w', encoding='utf-8') as report_file:
333 for type, issues in report.items():
334 report_file.write(type + '\n')
335 for issue in issues:
336 report_file.write('\t' + issue + '\n')
337 report_file.write('\n')
338
339
340# Validate the metadata generated by Make for installed files and report if there is no metadata.
341def installed_file_has_metadata(installed_file_metadata, report):
342 installed_file = installed_file_metadata['installed_file']
343 module_path = installed_file_metadata['module_path']
344 product_copy_files = installed_file_metadata['product_copy_files']
345 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
346 is_platform_generated = installed_file_metadata['is_platform_generated']
347
348 if (not module_path and
349 not product_copy_files and
350 not kernel_module_copy_files and
351 not is_platform_generated and
352 not installed_file.endswith('.fsv_meta')):
353 report[ISSUE_NO_METADATA].append(installed_file)
354 return False
355
356 return True
357
358
359def report_metadata_file(metadata_file_path, installed_file_metadata, report):
360 if metadata_file_path:
361 report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
362 'installed_file: {}, module_path: {}, METADATA file: {}'.format(
363 installed_file_metadata['installed_file'],
364 installed_file_metadata['module_path'],
365 metadata_file_path + '/METADATA'))
366
367 package_metadata = metadata_file_pb2.Metadata()
368 with open(metadata_file_path + '/METADATA', 'rt') as f:
369 text_format.Parse(f.read(), package_metadata)
370
371 if not metadata_file_path in metadata_file_protos:
372 metadata_file_protos[metadata_file_path] = package_metadata
373 if not package_metadata.name:
374 report[ISSUE_METADATA_FILE_INCOMPLETE].append(f'{metadata_file_path}/METADATA does not has "name"')
375
376 if not package_metadata.third_party.version:
377 report[ISSUE_METADATA_FILE_INCOMPLETE].append(
378 f'{metadata_file_path}/METADATA does not has "third_party.version"')
379
380 for tag in package_metadata.third_party.security.tag:
381 if not tag.startswith(NVD_CPE23):
382 report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append(
383 f'Unknown security tag type: {tag} in {metadata_file_path}/METADATA')
384 else:
385 report[ISSUE_NO_METADATA_FILE].append(
386 "installed_file: {}, module_path: {}".format(
387 installed_file_metadata['installed_file'], installed_file_metadata['module_path']))
388
389
390def generate_sbom_for_unbundled():
391 with open(args.metadata, newline='') as sbom_metadata_file:
392 reader = csv.DictReader(sbom_metadata_file)
393 doc = sbom_data.Document(name=args.build_version,
394 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
395 creators=['Organization: ' + args.product_mfr])
396 for installed_file_metadata in reader:
397 installed_file = installed_file_metadata['installed_file']
398 if args.output_file != args.product_out_dir + installed_file + ".spdx":
399 continue
400
401 module_path = installed_file_metadata['module_path']
402 package_id = new_package_id(module_path, PKG_PREBUILT)
403 package = sbom_data.Package(id=package_id,
404 name=module_path,
405 version=args.build_version,
406 supplier='Organization: ' + args.product_mfr)
407 file_id = new_file_id(installed_file)
408 file = sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file))
409 relationship = sbom_data.Relationship(id1=file_id,
410 relationship=sbom_data.RelationshipType.GENERATED_FROM,
411 id2=package_id)
412 doc.add_package(package)
413 doc.files.append(file)
414 doc.describes = file_id
415 doc.add_relationship(relationship)
416 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
417 break
418
419 with open(args.output_file, 'w', encoding="utf-8") as file:
420 sbom_writers.TagValueWriter.write(doc, file, fragment=True)
421
422
423def main():
424 global args
425 args = get_args()
426 log('Args:', vars(args))
427
428 if args.unbundled:
429 generate_sbom_for_unbundled()
430 return
431
432 global metadata_file_protos
433 metadata_file_protos = {}
434
435 doc = sbom_data.Document(name=args.build_version,
436 namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}',
437 creators=['Organization: ' + args.product_mfr])
438
439 product_package = sbom_data.Package(id=sbom_data.SPDXID_PRODUCT,
440 name=sbom_data.PACKAGE_NAME_PRODUCT,
441 version=args.build_version,
442 supplier='Organization: ' + args.product_mfr,
443 files_analyzed=True)
444 doc.packages.append(product_package)
445
446 doc.packages.append(sbom_data.Package(id=sbom_data.SPDXID_PLATFORM,
447 name=sbom_data.PACKAGE_NAME_PLATFORM,
448 version=args.build_version,
449 supplier='Organization: ' + args.product_mfr))
450
451 # Report on some issues and information
452 report = {
453 ISSUE_NO_METADATA: [],
454 ISSUE_NO_METADATA_FILE: [],
455 ISSUE_METADATA_FILE_INCOMPLETE: [],
456 ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [],
457 ISSUE_INSTALLED_FILE_NOT_EXIST: [],
458 INFO_METADATA_FOUND_FOR_PACKAGE: [],
459 }
460
461 # Scan the metadata in CSV file and create the corresponding package and file records in SPDX
462 with open(args.metadata, newline='') as sbom_metadata_file:
463 reader = csv.DictReader(sbom_metadata_file)
464 for installed_file_metadata in reader:
465 installed_file = installed_file_metadata['installed_file']
466 module_path = installed_file_metadata['module_path']
467 product_copy_files = installed_file_metadata['product_copy_files']
468 kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
469
470 if not installed_file_has_metadata(installed_file_metadata, report):
471 continue
472 file_path = args.product_out_dir + '/' + installed_file
473 if not (os.path.islink(file_path) or os.path.isfile(file_path)):
474 report[ISSUE_INSTALLED_FILE_NOT_EXIST].append(installed_file)
475 continue
476
477 file_id = new_file_id(installed_file)
478 doc.files.append(
479 sbom_data.File(id=file_id, name=installed_file, checksum=checksum(installed_file)))
480 product_package.file_ids.append(file_id)
481
482 if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata):
483 metadata_file_path = get_metadata_file_path(installed_file_metadata)
484 report_metadata_file(metadata_file_path, installed_file_metadata, report)
485
486 # File from source fork packages or prebuilt fork packages
487 external_doc_ref, pkgs, rels = get_sbom_fragments(installed_file_metadata, metadata_file_path)
488 if len(pkgs) > 0:
489 if external_doc_ref:
490 doc.add_external_ref(external_doc_ref)
491 for p in pkgs:
492 doc.add_package(p)
493 for rel in rels:
494 doc.add_relationship(rel)
495 fork_package_id = pkgs[0].id # The first package should be the source/prebuilt fork package
496 doc.add_relationship(sbom_data.Relationship(id1=file_id,
497 relationship=sbom_data.RelationshipType.GENERATED_FROM,
498 id2=fork_package_id))
499 elif module_path or installed_file_metadata['is_platform_generated']:
500 # File from PLATFORM package
501 doc.add_relationship(sbom_data.Relationship(id1=file_id,
502 relationship=sbom_data.RelationshipType.GENERATED_FROM,
503 id2=sbom_data.SPDXID_PLATFORM))
504 elif product_copy_files:
505 # Format of product_copy_files: <source path>:<dest path>
506 src_path = product_copy_files.split(':')[0]
507 # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device,
508 # so process them as files from PLATFORM package
509 doc.add_relationship(sbom_data.Relationship(id1=file_id,
510 relationship=sbom_data.RelationshipType.GENERATED_FROM,
511 id2=sbom_data.SPDXID_PLATFORM))
512 elif installed_file.endswith('.fsv_meta'):
513 # See build/make/core/Makefile:2988
514 doc.add_relationship(sbom_data.Relationship(id1=file_id,
515 relationship=sbom_data.RelationshipType.GENERATED_FROM,
516 id2=sbom_data.SPDXID_PLATFORM))
517 elif kernel_module_copy_files.startswith('ANDROID-GEN'):
518 # For the four files generated for _dlkm, _ramdisk partitions
519 # See build/make/core/Makefile:323
520 doc.add_relationship(sbom_data.Relationship(id1=file_id,
521 relationship=sbom_data.RelationshipType.GENERATED_FROM,
522 id2=sbom_data.SPDXID_PLATFORM))
523
524 product_package.verification_code = generate_package_verification_code(doc.files)
525
526 # Save SBOM records to output file
527 doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
528 with open(args.output_file, 'w', encoding="utf-8") as file:
529 sbom_writers.TagValueWriter.write(doc, file)
530 if args.json:
531 with open(args.output_file+'.json', 'w', encoding="utf-8") as file:
532 sbom_writers.JSONWriter.write(doc, file)
533
534
535if __name__ == '__main__':
536 main()