Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (C) 2023 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | """ |
| 18 | Generate the SBOM of the current target product in SPDX format. |
| 19 | Usage example: |
| 20 | generate-sbom.py --output_file out/target/product/vsoc_x86_64/sbom.spdx \ |
| 21 | --metadata out/target/product/vsoc_x86_64/sbom-metadata.csv \ |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 22 | --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \ |
| 23 | --product_mfr=Google |
| 24 | """ |
| 25 | |
| 26 | import argparse |
| 27 | import csv |
| 28 | import datetime |
| 29 | import google.protobuf.text_format as text_format |
| 30 | import hashlib |
| 31 | import os |
| 32 | import metadata_file_pb2 |
| 33 | import sbom_data |
| 34 | import sbom_writers |
| 35 | |
| 36 | |
| 37 | # Package type |
| 38 | PKG_SOURCE = 'SOURCE' |
| 39 | PKG_UPSTREAM = 'UPSTREAM' |
| 40 | PKG_PREBUILT = 'PREBUILT' |
| 41 | |
| 42 | # Security tag |
| 43 | NVD_CPE23 = 'NVD-CPE2.3:' |
| 44 | |
| 45 | # Report |
| 46 | ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:' |
| 47 | ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:' |
| 48 | ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:' |
| 49 | ISSUE_UNKNOWN_SECURITY_TAG_TYPE = 'Unknown security tag type:' |
| 50 | ISSUE_INSTALLED_FILE_NOT_EXIST = 'Non-exist installed files:' |
| 51 | INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:' |
| 52 | |
Wei Li | 6f407ba | 2023-04-19 12:39:07 -0700 | [diff] [blame] | 53 | SOONG_PREBUILT_MODULE_TYPES = [ |
| 54 | 'android_app_import', |
| 55 | 'android_library_import', |
| 56 | 'cc_prebuilt_binary', |
| 57 | 'cc_prebuilt_library', |
| 58 | 'cc_prebuilt_library_headers', |
| 59 | 'cc_prebuilt_library_shared', |
| 60 | 'cc_prebuilt_library_static', |
| 61 | 'cc_prebuilt_object', |
| 62 | 'dex_import', |
| 63 | 'java_import', |
| 64 | 'java_sdk_library_import', |
| 65 | 'java_system_modules_import', |
| 66 | 'libclang_rt_prebuilt_library_static', |
| 67 | 'libclang_rt_prebuilt_library_shared', |
| 68 | 'llvm_prebuilt_library_static', |
| 69 | 'ndk_prebuilt_object', |
| 70 | 'ndk_prebuilt_shared_stl', |
| 71 | 'nkd_prebuilt_static_stl', |
| 72 | 'prebuilt_apex', |
| 73 | 'prebuilt_bootclasspath_fragment', |
| 74 | 'prebuilt_dsp', |
| 75 | 'prebuilt_firmware', |
| 76 | 'prebuilt_kernel_modules', |
| 77 | 'prebuilt_rfsa', |
| 78 | 'prebuilt_root', |
| 79 | 'rust_prebuilt_dylib', |
| 80 | 'rust_prebuilt_library', |
| 81 | 'rust_prebuilt_rlib', |
| 82 | 'vndk_prebuilt_shared', |
| 83 | ] |
| 84 | |
Wei Li | 427dacb | 2023-10-18 16:45:31 -0700 | [diff] [blame] | 85 | THIRD_PARTY_IDENTIFIER_TYPES = [ |
| 86 | # Types defined in metadata_file.proto |
| 87 | 'Git', |
| 88 | 'SVN', |
| 89 | 'Hg', |
| 90 | 'Darcs', |
| 91 | 'VCS', |
| 92 | 'Archive', |
| 93 | 'PrebuiltByAlphabet', |
| 94 | 'LocalSource', |
| 95 | 'Other', |
| 96 | # OSV ecosystems defined at https://ossf.github.io/osv-schema/#affectedpackage-field. |
| 97 | 'Go', |
| 98 | 'npm', |
| 99 | 'OSS-Fuzz', |
| 100 | 'PyPI', |
| 101 | 'RubyGems', |
| 102 | 'crates.io', |
| 103 | 'Hackage', |
| 104 | 'GHC', |
| 105 | 'Packagist', |
| 106 | 'Maven', |
| 107 | 'NuGet', |
| 108 | 'Linux', |
| 109 | 'Debian', |
| 110 | 'Alpine', |
| 111 | 'Hex', |
| 112 | 'Android', |
| 113 | 'GitHub Actions', |
| 114 | 'Pub', |
| 115 | 'ConanCenter', |
| 116 | 'Rocky Linux', |
| 117 | 'AlmaLinux', |
| 118 | 'Bitnami', |
| 119 | 'Photon OS', |
| 120 | 'CRAN', |
| 121 | 'Bioconductor', |
| 122 | 'SwiftURL' |
| 123 | ] |
| 124 | |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 125 | |
| 126 | def get_args(): |
| 127 | parser = argparse.ArgumentParser() |
| 128 | parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.') |
| 129 | parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.') |
| 130 | parser.add_argument('--metadata', required=True, help='The SBOM metadata file path.') |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 131 | parser.add_argument('--build_version', required=True, help='The build version.') |
| 132 | parser.add_argument('--product_mfr', required=True, help='The product manufacturer.') |
Wei Li | ee4ab53 | 2023-10-25 15:49:26 -0700 | [diff] [blame] | 133 | parser.add_argument('--module_name', help='The module name. If specified, the generated SBOM is for the module.') |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 134 | parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format') |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 135 | parser.add_argument('--unbundled_apk', action='store_true', default=False, help='Generate SBOM for unbundled APKs') |
| 136 | parser.add_argument('--unbundled_apex', action='store_true', default=False, help='Generate SBOM for unbundled APEXs') |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 137 | |
| 138 | return parser.parse_args() |
| 139 | |
| 140 | |
| 141 | def log(*info): |
| 142 | if args.verbose: |
| 143 | for i in info: |
| 144 | print(i) |
| 145 | |
| 146 | |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 147 | def new_package_id(package_name, type): |
Wei Li | c134b76 | 2023-10-17 23:52:30 -0700 | [diff] [blame] | 148 | return f'SPDXRef-{type}-{sbom_data.encode_for_spdxid(package_name)}' |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 149 | |
| 150 | |
| 151 | def new_file_id(file_path): |
Wei Li | c134b76 | 2023-10-17 23:52:30 -0700 | [diff] [blame] | 152 | return f'SPDXRef-{sbom_data.encode_for_spdxid(file_path)}' |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 153 | |
| 154 | |
| 155 | def checksum(file_path): |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 156 | h = hashlib.sha1() |
| 157 | if os.path.islink(file_path): |
| 158 | h.update(os.readlink(file_path).encode('utf-8')) |
| 159 | else: |
| 160 | with open(file_path, 'rb') as f: |
| 161 | h.update(f.read()) |
| 162 | return f'SHA1: {h.hexdigest()}' |
| 163 | |
| 164 | |
| 165 | def is_soong_prebuilt_module(file_metadata): |
Wei Li | 6f407ba | 2023-04-19 12:39:07 -0700 | [diff] [blame] | 166 | return (file_metadata['soong_module_type'] and |
| 167 | file_metadata['soong_module_type'] in SOONG_PREBUILT_MODULE_TYPES) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 168 | |
| 169 | |
| 170 | def is_source_package(file_metadata): |
| 171 | module_path = file_metadata['module_path'] |
| 172 | return module_path.startswith('external/') and not is_prebuilt_package(file_metadata) |
| 173 | |
| 174 | |
| 175 | def is_prebuilt_package(file_metadata): |
| 176 | module_path = file_metadata['module_path'] |
| 177 | if module_path: |
| 178 | return (module_path.startswith('prebuilts/') or |
| 179 | is_soong_prebuilt_module(file_metadata) or |
| 180 | file_metadata['is_prebuilt_make_module']) |
| 181 | |
| 182 | kernel_module_copy_files = file_metadata['kernel_module_copy_files'] |
| 183 | if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'): |
| 184 | return True |
| 185 | |
| 186 | return False |
| 187 | |
| 188 | |
| 189 | def get_source_package_info(file_metadata, metadata_file_path): |
| 190 | """Return source package info exists in its METADATA file, currently including name, security tag |
| 191 | and external SBOM reference. |
| 192 | |
| 193 | See go/android-spdx and go/android-sbom-gen for more details. |
| 194 | """ |
| 195 | if not metadata_file_path: |
| 196 | return file_metadata['module_path'], [] |
| 197 | |
| 198 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 199 | external_refs = [] |
| 200 | for tag in metadata_proto.third_party.security.tag: |
| 201 | if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()): |
| 202 | external_refs.append( |
| 203 | sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY, |
| 204 | type=sbom_data.PackageExternalRefType.cpe23Type, |
| 205 | locator=tag.removeprefix(NVD_CPE23))) |
| 206 | elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()): |
| 207 | external_refs.append( |
| 208 | sbom_data.PackageExternalRef(category=sbom_data.PackageExternalRefCategory.SECURITY, |
| 209 | type=sbom_data.PackageExternalRefType.cpe22Type, |
| 210 | locator=tag.removeprefix(NVD_CPE23))) |
| 211 | |
| 212 | if metadata_proto.name: |
| 213 | return metadata_proto.name, external_refs |
| 214 | else: |
| 215 | return os.path.basename(metadata_file_path), external_refs # return the directory name only as package name |
| 216 | |
| 217 | |
| 218 | def get_prebuilt_package_name(file_metadata, metadata_file_path): |
| 219 | """Return name of a prebuilt package, which can be from the METADATA file, metadata file path, |
| 220 | module path or kernel module's source path if the installed file is a kernel module. |
| 221 | |
| 222 | See go/android-spdx and go/android-sbom-gen for more details. |
| 223 | """ |
| 224 | name = None |
| 225 | if metadata_file_path: |
| 226 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 227 | if metadata_proto.name: |
| 228 | name = metadata_proto.name |
| 229 | else: |
| 230 | name = metadata_file_path |
| 231 | elif file_metadata['module_path']: |
| 232 | name = file_metadata['module_path'] |
| 233 | elif file_metadata['kernel_module_copy_files']: |
| 234 | src_path = file_metadata['kernel_module_copy_files'].split(':')[0] |
| 235 | name = os.path.dirname(src_path) |
| 236 | |
| 237 | return name.removeprefix('prebuilts/').replace('/', '-') |
| 238 | |
| 239 | |
| 240 | def get_metadata_file_path(file_metadata): |
| 241 | """Search for METADATA file of a package and return its path.""" |
| 242 | metadata_path = '' |
| 243 | if file_metadata['module_path']: |
| 244 | metadata_path = file_metadata['module_path'] |
| 245 | elif file_metadata['kernel_module_copy_files']: |
| 246 | metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0]) |
| 247 | |
| 248 | while metadata_path and not os.path.exists(metadata_path + '/METADATA'): |
| 249 | metadata_path = os.path.dirname(metadata_path) |
| 250 | |
| 251 | return metadata_path |
| 252 | |
| 253 | |
| 254 | def get_package_version(metadata_file_path): |
| 255 | """Return a package's version in its METADATA file.""" |
| 256 | if not metadata_file_path: |
| 257 | return None |
| 258 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 259 | return metadata_proto.third_party.version |
| 260 | |
| 261 | |
| 262 | def get_package_homepage(metadata_file_path): |
| 263 | """Return a package's homepage URL in its METADATA file.""" |
| 264 | if not metadata_file_path: |
| 265 | return None |
| 266 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 267 | if metadata_proto.third_party.homepage: |
| 268 | return metadata_proto.third_party.homepage |
| 269 | for url in metadata_proto.third_party.url: |
| 270 | if url.type == metadata_file_pb2.URL.Type.HOMEPAGE: |
| 271 | return url.value |
| 272 | |
| 273 | return None |
| 274 | |
| 275 | |
| 276 | def get_package_download_location(metadata_file_path): |
| 277 | """Return a package's code repository URL in its METADATA file.""" |
| 278 | if not metadata_file_path: |
| 279 | return None |
| 280 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 281 | if metadata_proto.third_party.url: |
| 282 | urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type) |
| 283 | if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE: |
| 284 | return urls[0].value |
| 285 | elif len(urls) > 1: |
| 286 | return urls[1].value |
| 287 | |
| 288 | return None |
| 289 | |
| 290 | |
| 291 | def get_sbom_fragments(installed_file_metadata, metadata_file_path): |
| 292 | """Return SPDX fragment of source/prebuilt packages, which usually contains a SOURCE/PREBUILT |
Wei Li | 16e7aa3 | 2023-05-15 15:11:43 -0700 | [diff] [blame] | 293 | package, a UPSTREAM package and an external SBOM document reference if sbom_ref defined in its |
| 294 | METADATA file. |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 295 | |
| 296 | See go/android-spdx and go/android-sbom-gen for more details. |
| 297 | """ |
| 298 | external_doc_ref = None |
| 299 | packages = [] |
| 300 | relationships = [] |
| 301 | |
| 302 | # Info from METADATA file |
| 303 | homepage = get_package_homepage(metadata_file_path) |
| 304 | version = get_package_version(metadata_file_path) |
| 305 | download_location = get_package_download_location(metadata_file_path) |
| 306 | |
| 307 | if is_source_package(installed_file_metadata): |
| 308 | # Source fork packages |
| 309 | name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path) |
| 310 | source_package_id = new_package_id(name, PKG_SOURCE) |
| 311 | source_package = sbom_data.Package(id=source_package_id, name=name, version=args.build_version, |
Wei Li | 5290825 | 2023-04-14 18:49:42 -0700 | [diff] [blame] | 312 | download_location=sbom_data.VALUE_NONE, |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 313 | supplier='Organization: ' + args.product_mfr, |
| 314 | external_refs=external_refs) |
| 315 | |
| 316 | upstream_package_id = new_package_id(name, PKG_UPSTREAM) |
| 317 | upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version=version, |
Wei Li | 5290825 | 2023-04-14 18:49:42 -0700 | [diff] [blame] | 318 | supplier=('Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION, |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 319 | download_location=download_location) |
| 320 | packages += [source_package, upstream_package] |
| 321 | relationships.append(sbom_data.Relationship(id1=source_package_id, |
| 322 | relationship=sbom_data.RelationshipType.VARIANT_OF, |
| 323 | id2=upstream_package_id)) |
| 324 | elif is_prebuilt_package(installed_file_metadata): |
| 325 | # Prebuilt fork packages |
| 326 | name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path) |
| 327 | prebuilt_package_id = new_package_id(name, PKG_PREBUILT) |
| 328 | prebuilt_package = sbom_data.Package(id=prebuilt_package_id, |
| 329 | name=name, |
Wei Li | 5290825 | 2023-04-14 18:49:42 -0700 | [diff] [blame] | 330 | download_location=sbom_data.VALUE_NONE, |
Wei Li | 16e7aa3 | 2023-05-15 15:11:43 -0700 | [diff] [blame] | 331 | version=version if version else args.build_version, |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 332 | supplier='Organization: ' + args.product_mfr) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 333 | |
Wei Li | 16e7aa3 | 2023-05-15 15:11:43 -0700 | [diff] [blame] | 334 | upstream_package_id = new_package_id(name, PKG_UPSTREAM) |
| 335 | upstream_package = sbom_data.Package(id=upstream_package_id, name=name, version = version, |
| 336 | supplier=('Organization: ' + homepage) if homepage else sbom_data.VALUE_NOASSERTION, |
| 337 | download_location=download_location) |
| 338 | packages += [prebuilt_package, upstream_package] |
| 339 | relationships.append(sbom_data.Relationship(id1=prebuilt_package_id, |
| 340 | relationship=sbom_data.RelationshipType.VARIANT_OF, |
| 341 | id2=upstream_package_id)) |
| 342 | |
| 343 | if metadata_file_path: |
| 344 | metadata_proto = metadata_file_protos[metadata_file_path] |
| 345 | if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref': |
| 346 | sbom_url = metadata_proto.third_party.sbom_ref.url |
| 347 | sbom_checksum = metadata_proto.third_party.sbom_ref.checksum |
| 348 | upstream_element_id = metadata_proto.third_party.sbom_ref.element_id |
| 349 | if sbom_url and sbom_checksum and upstream_element_id: |
Wei Li | b32ad82 | 2023-12-11 12:19:30 -0800 | [diff] [blame] | 350 | doc_ref_id = f'DocumentRef-{PKG_UPSTREAM}-{sbom_data.encode_for_spdxid(name)}' |
Wei Li | 16e7aa3 | 2023-05-15 15:11:43 -0700 | [diff] [blame] | 351 | external_doc_ref = sbom_data.DocumentExternalReference(id=doc_ref_id, |
| 352 | uri=sbom_url, |
| 353 | checksum=sbom_checksum) |
| 354 | relationships.append( |
| 355 | sbom_data.Relationship(id1=upstream_package_id, |
| 356 | relationship=sbom_data.RelationshipType.VARIANT_OF, |
| 357 | id2=doc_ref_id + ':' + upstream_element_id)) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 358 | |
| 359 | return external_doc_ref, packages, relationships |
| 360 | |
| 361 | |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 362 | def save_report(report_file_path, report): |
| 363 | with open(report_file_path, 'w', encoding='utf-8') as report_file: |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 364 | for type, issues in report.items(): |
| 365 | report_file.write(type + '\n') |
| 366 | for issue in issues: |
| 367 | report_file.write('\t' + issue + '\n') |
| 368 | report_file.write('\n') |
| 369 | |
| 370 | |
| 371 | # Validate the metadata generated by Make for installed files and report if there is no metadata. |
| 372 | def installed_file_has_metadata(installed_file_metadata, report): |
| 373 | installed_file = installed_file_metadata['installed_file'] |
| 374 | module_path = installed_file_metadata['module_path'] |
| 375 | product_copy_files = installed_file_metadata['product_copy_files'] |
| 376 | kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files'] |
| 377 | is_platform_generated = installed_file_metadata['is_platform_generated'] |
| 378 | |
| 379 | if (not module_path and |
| 380 | not product_copy_files and |
| 381 | not kernel_module_copy_files and |
| 382 | not is_platform_generated and |
| 383 | not installed_file.endswith('.fsv_meta')): |
| 384 | report[ISSUE_NO_METADATA].append(installed_file) |
| 385 | return False |
| 386 | |
| 387 | return True |
| 388 | |
| 389 | |
Wei Li | 427dacb | 2023-10-18 16:45:31 -0700 | [diff] [blame] | 390 | # Validate identifiers in a package's METADATA. |
| 391 | # 1) Only known identifier type is allowed |
| 392 | # 2) Only one identifier's primary_source can be true |
| 393 | def validate_package_metadata(metadata_file_path, package_metadata): |
| 394 | primary_source_found = False |
| 395 | for identifier in package_metadata.third_party.identifier: |
| 396 | if identifier.type not in THIRD_PARTY_IDENTIFIER_TYPES: |
| 397 | sys.exit(f'Unknown value of third_party.identifier.type in {metadata_file_path}/METADATA: {identifier.type}.') |
| 398 | if primary_source_found and identifier.primary_source: |
| 399 | sys.exit( |
| 400 | f'Field "primary_source" is set to true in multiple third_party.identifier in {metadata_file_path}/METADATA.') |
| 401 | primary_source_found = identifier.primary_source |
| 402 | |
| 403 | |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 404 | def report_metadata_file(metadata_file_path, installed_file_metadata, report): |
| 405 | if metadata_file_path: |
| 406 | report[INFO_METADATA_FOUND_FOR_PACKAGE].append( |
| 407 | 'installed_file: {}, module_path: {}, METADATA file: {}'.format( |
| 408 | installed_file_metadata['installed_file'], |
| 409 | installed_file_metadata['module_path'], |
| 410 | metadata_file_path + '/METADATA')) |
| 411 | |
| 412 | package_metadata = metadata_file_pb2.Metadata() |
| 413 | with open(metadata_file_path + '/METADATA', 'rt') as f: |
| 414 | text_format.Parse(f.read(), package_metadata) |
| 415 | |
Wei Li | 427dacb | 2023-10-18 16:45:31 -0700 | [diff] [blame] | 416 | validate_package_metadata(metadata_file_path, package_metadata) |
| 417 | |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 418 | if not metadata_file_path in metadata_file_protos: |
| 419 | metadata_file_protos[metadata_file_path] = package_metadata |
| 420 | if not package_metadata.name: |
| 421 | report[ISSUE_METADATA_FILE_INCOMPLETE].append(f'{metadata_file_path}/METADATA does not has "name"') |
| 422 | |
| 423 | if not package_metadata.third_party.version: |
| 424 | report[ISSUE_METADATA_FILE_INCOMPLETE].append( |
| 425 | f'{metadata_file_path}/METADATA does not has "third_party.version"') |
| 426 | |
| 427 | for tag in package_metadata.third_party.security.tag: |
| 428 | if not tag.startswith(NVD_CPE23): |
| 429 | report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append( |
| 430 | f'Unknown security tag type: {tag} in {metadata_file_path}/METADATA') |
| 431 | else: |
| 432 | report[ISSUE_NO_METADATA_FILE].append( |
| 433 | "installed_file: {}, module_path: {}".format( |
| 434 | installed_file_metadata['installed_file'], installed_file_metadata['module_path'])) |
| 435 | |
| 436 | |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 437 | def generate_sbom_for_unbundled_apk(): |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 438 | with open(args.metadata, newline='') as sbom_metadata_file: |
| 439 | reader = csv.DictReader(sbom_metadata_file) |
| 440 | doc = sbom_data.Document(name=args.build_version, |
| 441 | namespace=f'https://www.google.com/sbom/spdx/android/{args.build_version}', |
| 442 | creators=['Organization: ' + args.product_mfr]) |
| 443 | for installed_file_metadata in reader: |
| 444 | installed_file = installed_file_metadata['installed_file'] |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 445 | if args.output_file != installed_file_metadata['build_output_path'] + '.spdx.json': |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 446 | continue |
| 447 | |
| 448 | module_path = installed_file_metadata['module_path'] |
| 449 | package_id = new_package_id(module_path, PKG_PREBUILT) |
| 450 | package = sbom_data.Package(id=package_id, |
| 451 | name=module_path, |
| 452 | version=args.build_version, |
| 453 | supplier='Organization: ' + args.product_mfr) |
| 454 | file_id = new_file_id(installed_file) |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 455 | file = sbom_data.File(id=file_id, |
| 456 | name=installed_file, |
| 457 | checksum=checksum(installed_file_metadata['build_output_path'])) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 458 | relationship = sbom_data.Relationship(id1=file_id, |
| 459 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 460 | id2=package_id) |
| 461 | doc.add_package(package) |
| 462 | doc.files.append(file) |
| 463 | doc.describes = file_id |
| 464 | doc.add_relationship(relationship) |
| 465 | doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') |
| 466 | break |
| 467 | |
Wei Li | 49af939 | 2023-04-12 17:35:26 -0700 | [diff] [blame] | 468 | with open(args.output_file, 'w', encoding='utf-8') as file: |
| 469 | sbom_writers.JSONWriter.write(doc, file) |
| 470 | fragment_file = args.output_file.removesuffix('.spdx.json') + '-fragment.spdx' |
| 471 | with open(fragment_file, 'w', encoding='utf-8') as file: |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 472 | sbom_writers.TagValueWriter.write(doc, file, fragment=True) |
| 473 | |
| 474 | |
| 475 | def main(): |
| 476 | global args |
| 477 | args = get_args() |
| 478 | log('Args:', vars(args)) |
| 479 | |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 480 | if args.unbundled_apk: |
| 481 | generate_sbom_for_unbundled_apk() |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 482 | return |
| 483 | |
| 484 | global metadata_file_protos |
| 485 | metadata_file_protos = {} |
| 486 | |
Wei Li | ee4ab53 | 2023-10-25 15:49:26 -0700 | [diff] [blame] | 487 | product_package_id = sbom_data.SPDXID_PRODUCT |
| 488 | product_package_name = sbom_data.PACKAGE_NAME_PRODUCT |
| 489 | if args.module_name: |
| 490 | # Build SBOM of a module so use the module name instead. |
| 491 | product_package_id = f'SPDXRef-{sbom_data.encode_for_spdxid(args.module_name)}' |
| 492 | product_package_name = args.module_name |
| 493 | product_package = sbom_data.Package(id=product_package_id, |
| 494 | name=product_package_name, |
Wei Li | 5290825 | 2023-04-14 18:49:42 -0700 | [diff] [blame] | 495 | download_location=sbom_data.VALUE_NONE, |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 496 | version=args.build_version, |
| 497 | supplier='Organization: ' + args.product_mfr, |
| 498 | files_analyzed=True) |
Wei Li | ee4ab53 | 2023-10-25 15:49:26 -0700 | [diff] [blame] | 499 | doc_name = args.build_version |
| 500 | if args.module_name: |
| 501 | doc_name = f'{args.build_version}/{args.module_name}' |
| 502 | doc = sbom_data.Document(name=doc_name, |
| 503 | namespace=f'https://www.google.com/sbom/spdx/android/{doc_name}', |
| 504 | creators=['Organization: ' + args.product_mfr], |
| 505 | describes=product_package_id) |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 506 | if not args.unbundled_apex: |
| 507 | doc.packages.append(product_package) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 508 | |
| 509 | doc.packages.append(sbom_data.Package(id=sbom_data.SPDXID_PLATFORM, |
| 510 | name=sbom_data.PACKAGE_NAME_PLATFORM, |
Wei Li | 5290825 | 2023-04-14 18:49:42 -0700 | [diff] [blame] | 511 | download_location=sbom_data.VALUE_NONE, |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 512 | version=args.build_version, |
| 513 | supplier='Organization: ' + args.product_mfr)) |
| 514 | |
| 515 | # Report on some issues and information |
| 516 | report = { |
| 517 | ISSUE_NO_METADATA: [], |
| 518 | ISSUE_NO_METADATA_FILE: [], |
| 519 | ISSUE_METADATA_FILE_INCOMPLETE: [], |
| 520 | ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [], |
| 521 | ISSUE_INSTALLED_FILE_NOT_EXIST: [], |
| 522 | INFO_METADATA_FOUND_FOR_PACKAGE: [], |
| 523 | } |
| 524 | |
| 525 | # Scan the metadata in CSV file and create the corresponding package and file records in SPDX |
| 526 | with open(args.metadata, newline='') as sbom_metadata_file: |
| 527 | reader = csv.DictReader(sbom_metadata_file) |
| 528 | for installed_file_metadata in reader: |
| 529 | installed_file = installed_file_metadata['installed_file'] |
| 530 | module_path = installed_file_metadata['module_path'] |
| 531 | product_copy_files = installed_file_metadata['product_copy_files'] |
| 532 | kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files'] |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 533 | build_output_path = installed_file_metadata['build_output_path'] |
Wei Li | d263695 | 2023-05-30 15:03:03 -0700 | [diff] [blame] | 534 | is_static_lib = installed_file_metadata['is_static_lib'] |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 535 | |
| 536 | if not installed_file_has_metadata(installed_file_metadata, report): |
| 537 | continue |
Wei Li | d263695 | 2023-05-30 15:03:03 -0700 | [diff] [blame] | 538 | if not is_static_lib and not (os.path.islink(build_output_path) or os.path.isfile(build_output_path)): |
| 539 | # Ignore non-existing static library files for now since they are not shipped on devices. |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 540 | report[ISSUE_INSTALLED_FILE_NOT_EXIST].append(installed_file) |
| 541 | continue |
| 542 | |
| 543 | file_id = new_file_id(installed_file) |
Wei Li | d263695 | 2023-05-30 15:03:03 -0700 | [diff] [blame] | 544 | # TODO(b/285453664): Soong should report the information of statically linked libraries to Make. |
| 545 | # This happens when a different sanitized version of static libraries is used in linking. |
| 546 | # As a workaround, use the following SHA1 checksum for static libraries created by Soong, if .a files could not be |
| 547 | # located correctly because Soong doesn't report the information to Make. |
| 548 | sha1 = 'SHA1: da39a3ee5e6b4b0d3255bfef95601890afd80709' # SHA1 of empty string |
| 549 | if os.path.islink(build_output_path) or os.path.isfile(build_output_path): |
| 550 | sha1 = checksum(build_output_path) |
| 551 | doc.files.append(sbom_data.File(id=file_id, |
| 552 | name=installed_file, |
| 553 | checksum=sha1)) |
| 554 | |
| 555 | if not is_static_lib: |
| 556 | if not args.unbundled_apex: |
| 557 | product_package.file_ids.append(file_id) |
| 558 | elif len(doc.files) > 1: |
| 559 | doc.add_relationship(sbom_data.Relationship(doc.files[0].id, sbom_data.RelationshipType.CONTAINS, file_id)) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 560 | |
| 561 | if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata): |
| 562 | metadata_file_path = get_metadata_file_path(installed_file_metadata) |
| 563 | report_metadata_file(metadata_file_path, installed_file_metadata, report) |
| 564 | |
| 565 | # File from source fork packages or prebuilt fork packages |
| 566 | external_doc_ref, pkgs, rels = get_sbom_fragments(installed_file_metadata, metadata_file_path) |
| 567 | if len(pkgs) > 0: |
| 568 | if external_doc_ref: |
| 569 | doc.add_external_ref(external_doc_ref) |
| 570 | for p in pkgs: |
| 571 | doc.add_package(p) |
| 572 | for rel in rels: |
| 573 | doc.add_relationship(rel) |
| 574 | fork_package_id = pkgs[0].id # The first package should be the source/prebuilt fork package |
| 575 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 576 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 577 | id2=fork_package_id)) |
| 578 | elif module_path or installed_file_metadata['is_platform_generated']: |
| 579 | # File from PLATFORM package |
| 580 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 581 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 582 | id2=sbom_data.SPDXID_PLATFORM)) |
| 583 | elif product_copy_files: |
| 584 | # Format of product_copy_files: <source path>:<dest path> |
| 585 | src_path = product_copy_files.split(':')[0] |
| 586 | # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device, |
| 587 | # so process them as files from PLATFORM package |
| 588 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 589 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 590 | id2=sbom_data.SPDXID_PLATFORM)) |
| 591 | elif installed_file.endswith('.fsv_meta'): |
| 592 | # See build/make/core/Makefile:2988 |
| 593 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 594 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 595 | id2=sbom_data.SPDXID_PLATFORM)) |
| 596 | elif kernel_module_copy_files.startswith('ANDROID-GEN'): |
| 597 | # For the four files generated for _dlkm, _ramdisk partitions |
| 598 | # See build/make/core/Makefile:323 |
| 599 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 600 | relationship=sbom_data.RelationshipType.GENERATED_FROM, |
| 601 | id2=sbom_data.SPDXID_PLATFORM)) |
| 602 | |
Wei Li | d263695 | 2023-05-30 15:03:03 -0700 | [diff] [blame] | 603 | # Process static libraries and whole static libraries the installed file links to |
| 604 | static_libs = installed_file_metadata['static_libraries'] |
| 605 | whole_static_libs = installed_file_metadata['whole_static_libraries'] |
| 606 | all_static_libs = (static_libs + ' ' + whole_static_libs).strip() |
| 607 | if all_static_libs: |
| 608 | for lib in all_static_libs.split(' '): |
| 609 | doc.add_relationship(sbom_data.Relationship(id1=file_id, |
| 610 | relationship=sbom_data.RelationshipType.STATIC_LINK, |
| 611 | id2=new_file_id(lib + '.a'))) |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 612 | |
| 613 | if args.unbundled_apex: |
| 614 | doc.describes = doc.files[0].id |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 615 | |
| 616 | # Save SBOM records to output file |
Wei Li | d263695 | 2023-05-30 15:03:03 -0700 | [diff] [blame] | 617 | doc.generate_packages_verification_code() |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 618 | doc.created = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 619 | prefix = args.output_file |
| 620 | if prefix.endswith('.spdx'): |
| 621 | prefix = prefix.removesuffix('.spdx') |
| 622 | elif prefix.endswith('.spdx.json'): |
| 623 | prefix = prefix.removesuffix('.spdx.json') |
| 624 | |
| 625 | output_file = prefix + '.spdx' |
| 626 | if args.unbundled_apex: |
| 627 | output_file = prefix + '-fragment.spdx' |
| 628 | with open(output_file, 'w', encoding="utf-8") as file: |
| 629 | sbom_writers.TagValueWriter.write(doc, file, fragment=args.unbundled_apex) |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 630 | if args.json: |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 631 | with open(prefix + '.spdx.json', 'w', encoding="utf-8") as file: |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 632 | sbom_writers.JSONWriter.write(doc, file) |
| 633 | |
Wei Li | fd7e651 | 2023-05-05 10:49:28 -0700 | [diff] [blame] | 634 | save_report(prefix + '-gen-report.txt', report) |
| 635 | |
Wei Li | dec97b1 | 2023-04-07 16:45:17 -0700 | [diff] [blame] | 636 | |
| 637 | if __name__ == '__main__': |
| 638 | main() |