Skip to content

Commit 0b0fc4a

Browse files
authored
Add an add-on pipeline for collecting dwarfs from elfs (#1068)
Signed-off-by: Tushar Goel <[email protected]>
1 parent 27480f5 commit 0b0fc4a

File tree

7 files changed

+127
-0
lines changed

7 files changed

+127
-0
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ v33.2.0 (unreleased)
3434
- Remove "packageFileName" entry from SPDX output.
3535
https://github.com/nexB/scancode.io/issues/1076
3636

37+
- Add an add-on pipeline for collecting DWARF debug symbol compilation
38+
unit paths when available from elfs.
39+
https://github.com/nexB/purldb/issues/260
40+
3741
v33.1.0 (2024-02-02)
3842
--------------------
3943

docs/built-in-pipelines.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ Find Vulnerabilities (addon)
5656
:members:
5757
:member-order: bysource
5858

59+
.. _pipeline_inspect_elf:
60+
61+
Inspect ELF Binaries
62+
--------------------
63+
.. autoclass:: scanpipe.pipelines.inspect_elf_binaries.InspectELFBinaries()
64+
:members:
65+
:member-order: bysource
66+
5967
.. _pipeline_inspect_packages:
6068

6169
Inspect Packages

docs/faq.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ Here are some general guidelines based on different input scenarios:
4848
:ref:`inspect_packages <pipeline_inspect_packages>` pipeline.
4949
- For scenarios involving both a **development and deployment codebase**, consider using
5050
the :ref:`map_deploy_to_develop <pipeline_map_deploy_to_develop>` pipeline.
51+
- For getting the DWARF debug symbol compilation unit paths when available from an elf binary.
52+
use the :ref:`inspect_elf_binaries <pipeline_inspect_elf>` pipeline.
5153

5254
These pipelines will automatically execute the necessary steps to scan and create the
5355
packages, dependencies, and resources for your project based on the input data provided.

scanpipe/models.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,24 @@ def has_directory_content_fingerprint(self):
19461946
and ~Q(extra_data__directory_content__in=IGNORED_DIRECTORY_FINGERPRINTS)
19471947
)
19481948

1949+
def elfs(self):
1950+
"""
1951+
Resources that are ``files`` and their filetype starts with "ELF" and
1952+
contains any of these "executable", "relocatable", "shared object".
1953+
Keep sync with the content type implementation at ``typecode.contenttype``.
1954+
"""
1955+
return (
1956+
self.files()
1957+
.filter(
1958+
file_type__istartswith="ELF",
1959+
)
1960+
.filter(
1961+
Q(file_type__icontains="executable")
1962+
| Q(file_type__icontains="relocatable")
1963+
| Q(file_type__icontains="shared object")
1964+
)
1965+
)
1966+
19491967

19501968
class ScanFieldsModelMixin(models.Model):
19511969
"""Fields returned by the ScanCode-toolkit scans."""
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from pathlib import Path
24+
25+
from elf_inspector.dwarf import get_dwarf_paths
26+
27+
from scanpipe.pipelines import Pipeline
28+
29+
30+
class InspectELFBinaries(Pipeline):
31+
"""Inspect ELF binaries and collect DWARF paths."""
32+
33+
download_inputs = False
34+
is_addon = True
35+
36+
@classmethod
37+
def steps(cls):
38+
return (cls.collect_dwarf_source_path_references,)
39+
40+
def collect_dwarf_source_path_references(self):
41+
"""
42+
Update ``extra_data`` of ELF files with
43+
dwarf data extracted from ELF files.
44+
"""
45+
for elf in self.project.codebaseresources.elfs():
46+
dwarf_paths = get_dwarf_paths(Path(self.project.codebase_path / elf.path))
47+
elf.update_extra_data(dwarf_paths)

scanpipe/tests/test_models.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,6 +2110,52 @@ def test_scanpipe_codebase_resource_queryset_has_directory_content_fingerprint(
21102110
results = self.project1.codebaseresources.has_directory_content_fingerprint()
21112111
self.assertQuerySetEqual(expected, results, ordered=False)
21122112

2113+
def test_scanpipe_codebase_resource_queryset_elfs(self):
2114+
project = Project.objects.create(name="Test")
2115+
resource_starting_with_elf_and_executable_in_file_type = CodebaseResource(
2116+
file_type="""ELF 32-bit LSB executable, ARM, version 1 (ARM), statically
2117+
linked, with debug_info, not stripped""",
2118+
project=project,
2119+
path="a",
2120+
type=CodebaseResource.Type.FILE,
2121+
)
2122+
resource_starting_with_elf_and_executable_in_file_type.save()
2123+
resource_with_executable_in_file_type = CodebaseResource(
2124+
file_type="""32-bit LSB executable, ARM, version 1 (ARM), statically
2125+
linked, with debug_info, not stripped""",
2126+
project=project,
2127+
path="b",
2128+
type=CodebaseResource.Type.FILE,
2129+
)
2130+
resource_with_executable_in_file_type.save()
2131+
resource_starting_with_elf_in_file_type = CodebaseResource(
2132+
file_type="""ELF 32-bit LSB resourcable, ARM, version 1 (ARM), statically
2133+
linked, with debug_info, not stripped""",
2134+
project=project,
2135+
path="c",
2136+
type=CodebaseResource.Type.FILE,
2137+
)
2138+
resource_starting_with_elf_in_file_type.save()
2139+
resource = CodebaseResource(
2140+
file_type="""32-bit LSB relocatable, ARM, version 1 (ARM), statically
2141+
linked, with debug_info, not stripped""",
2142+
project=project,
2143+
path="d",
2144+
type=CodebaseResource.Type.FILE,
2145+
)
2146+
resource.save()
2147+
resource_starting_with_elf_and_relocatable_in_file_type = CodebaseResource(
2148+
file_type="""ELF 32-bit LSB relocatable, ARM, version 1 (ARM), statically
2149+
linked, with debug_info, not stripped""",
2150+
project=project,
2151+
path="e",
2152+
type=CodebaseResource.Type.FILE,
2153+
)
2154+
resource_starting_with_elf_and_relocatable_in_file_type.save()
2155+
paths = [str(resource.path) for resource in project.codebaseresources.elfs()]
2156+
self.assertTrue("e" in paths)
2157+
self.assertTrue("a" in paths)
2158+
21132159

21142160
class ScanPipeModelsTransactionTest(TransactionTestCase):
21152161
"""

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ install_requires =
7777
fetchcode-container==1.2.3.210512; sys_platform == "linux"
7878
# Inspectors
7979
python-inspector==0.11.0
80+
elf-inspector==0.0.1
8081
aboutcode-toolkit==10.1.0
8182
# Utilities
8283
XlsxWriter==3.1.9
@@ -126,6 +127,7 @@ scancodeio_pipelines =
126127
analyze_root_filesystem_or_vm_image = scanpipe.pipelines.root_filesystem:RootFS
127128
analyze_windows_docker_image = scanpipe.pipelines.docker_windows:DockerWindows
128129
find_vulnerabilities = scanpipe.pipelines.find_vulnerabilities:FindVulnerabilities
130+
inspect_elf_binaries = scanpipe.pipelines.inspect_elf_binaries:InspectELFBinaries
129131
inspect_packages = scanpipe.pipelines.inspect_packages:InspectPackages
130132
load_inventory = scanpipe.pipelines.load_inventory:LoadInventory
131133
load_sbom = scanpipe.pipelines.load_sbom:LoadSBOM

0 commit comments

Comments
 (0)