diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b96f596ba..6f861e6b2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -34,6 +34,10 @@ v33.2.0 (unreleased) - Remove "packageFileName" entry from SPDX output. https://github.com/nexB/scancode.io/issues/1076 +- Add an add-on pipeline for collecting DWARF debug symbol compilation + unit paths when available from elfs. + https://github.com/nexB/purldb/issues/260 + v33.1.0 (2024-02-02) -------------------- diff --git a/docs/built-in-pipelines.rst b/docs/built-in-pipelines.rst index f2e8a9810..7cbe75458 100644 --- a/docs/built-in-pipelines.rst +++ b/docs/built-in-pipelines.rst @@ -56,6 +56,14 @@ Find Vulnerabilities (addon) :members: :member-order: bysource +.. _pipeline_inspect_elf: + +Inspect ELF Binaries +-------------------- +.. autoclass:: scanpipe.pipelines.inspect_elf_binaries.InspectELFBinaries() + :members: + :member-order: bysource + .. _pipeline_inspect_packages: Inspect Packages diff --git a/docs/faq.rst b/docs/faq.rst index 1b854b341..ac00f8b0b 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -48,6 +48,8 @@ Here are some general guidelines based on different input scenarios: :ref:`inspect_packages ` pipeline. - For scenarios involving both a **development and deployment codebase**, consider using the :ref:`map_deploy_to_develop ` pipeline. +- For getting the DWARF debug symbol compilation unit paths when available from an elf binary. + use the :ref:`inspect_elf_binaries ` pipeline. These pipelines will automatically execute the necessary steps to scan and create the packages, dependencies, and resources for your project based on the input data provided. diff --git a/scanpipe/models.py b/scanpipe/models.py index bcf7eadd2..d55f08d91 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -1946,6 +1946,24 @@ def has_directory_content_fingerprint(self): and ~Q(extra_data__directory_content__in=IGNORED_DIRECTORY_FINGERPRINTS) ) + def elfs(self): + """ + Resources that are ``files`` and their filetype starts with "ELF" and + contains any of these "executable", "relocatable", "shared object". + Keep sync with the content type implementation at ``typecode.contenttype``. + """ + return ( + self.files() + .filter( + file_type__istartswith="ELF", + ) + .filter( + Q(file_type__icontains="executable") + | Q(file_type__icontains="relocatable") + | Q(file_type__icontains="shared object") + ) + ) + class ScanFieldsModelMixin(models.Model): """Fields returned by the ScanCode-toolkit scans.""" diff --git a/scanpipe/pipelines/inspect_elf_binaries.py b/scanpipe/pipelines/inspect_elf_binaries.py new file mode 100644 index 000000000..1d109a612 --- /dev/null +++ b/scanpipe/pipelines/inspect_elf_binaries.py @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +from pathlib import Path + +from elf_inspector.dwarf import get_dwarf_paths + +from scanpipe.pipelines import Pipeline + + +class InspectELFBinaries(Pipeline): + """Inspect ELF binaries and collect DWARF paths.""" + + download_inputs = False + is_addon = True + + @classmethod + def steps(cls): + return (cls.collect_dwarf_source_path_references,) + + def collect_dwarf_source_path_references(self): + """ + Update ``extra_data`` of ELF files with + dwarf data extracted from ELF files. + """ + for elf in self.project.codebaseresources.elfs(): + dwarf_paths = get_dwarf_paths(Path(self.project.codebase_path / elf.path)) + elf.update_extra_data(dwarf_paths) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 436f04de5..0ec953142 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -2110,6 +2110,52 @@ def test_scanpipe_codebase_resource_queryset_has_directory_content_fingerprint( results = self.project1.codebaseresources.has_directory_content_fingerprint() self.assertQuerySetEqual(expected, results, ordered=False) + def test_scanpipe_codebase_resource_queryset_elfs(self): + project = Project.objects.create(name="Test") + resource_starting_with_elf_and_executable_in_file_type = CodebaseResource( + file_type="""ELF 32-bit LSB executable, ARM, version 1 (ARM), statically + linked, with debug_info, not stripped""", + project=project, + path="a", + type=CodebaseResource.Type.FILE, + ) + resource_starting_with_elf_and_executable_in_file_type.save() + resource_with_executable_in_file_type = CodebaseResource( + file_type="""32-bit LSB executable, ARM, version 1 (ARM), statically + linked, with debug_info, not stripped""", + project=project, + path="b", + type=CodebaseResource.Type.FILE, + ) + resource_with_executable_in_file_type.save() + resource_starting_with_elf_in_file_type = CodebaseResource( + file_type="""ELF 32-bit LSB resourcable, ARM, version 1 (ARM), statically + linked, with debug_info, not stripped""", + project=project, + path="c", + type=CodebaseResource.Type.FILE, + ) + resource_starting_with_elf_in_file_type.save() + resource = CodebaseResource( + file_type="""32-bit LSB relocatable, ARM, version 1 (ARM), statically + linked, with debug_info, not stripped""", + project=project, + path="d", + type=CodebaseResource.Type.FILE, + ) + resource.save() + resource_starting_with_elf_and_relocatable_in_file_type = CodebaseResource( + file_type="""ELF 32-bit LSB relocatable, ARM, version 1 (ARM), statically + linked, with debug_info, not stripped""", + project=project, + path="e", + type=CodebaseResource.Type.FILE, + ) + resource_starting_with_elf_and_relocatable_in_file_type.save() + paths = [str(resource.path) for resource in project.codebaseresources.elfs()] + self.assertTrue("e" in paths) + self.assertTrue("a" in paths) + class ScanPipeModelsTransactionTest(TransactionTestCase): """ diff --git a/setup.cfg b/setup.cfg index b2c041292..9cd03e843 100644 --- a/setup.cfg +++ b/setup.cfg @@ -77,6 +77,7 @@ install_requires = fetchcode-container==1.2.3.210512; sys_platform == "linux" # Inspectors python-inspector==0.11.0 + elf-inspector==0.0.1 aboutcode-toolkit==10.1.0 # Utilities XlsxWriter==3.1.9 @@ -126,6 +127,7 @@ scancodeio_pipelines = analyze_root_filesystem_or_vm_image = scanpipe.pipelines.root_filesystem:RootFS analyze_windows_docker_image = scanpipe.pipelines.docker_windows:DockerWindows find_vulnerabilities = scanpipe.pipelines.find_vulnerabilities:FindVulnerabilities + inspect_elf_binaries = scanpipe.pipelines.inspect_elf_binaries:InspectELFBinaries inspect_packages = scanpipe.pipelines.inspect_packages:InspectPackages load_inventory = scanpipe.pipelines.load_inventory:LoadInventory load_sbom = scanpipe.pipelines.load_sbom:LoadSBOM