Skip to content

Commit 99db098

Browse files
Add ecosystem specific inclusions or exclusions
Also ignore specific files paths containing metadata in ruby gems. Reference: #1438 Reference: #1476 Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent fa9ac3f commit 99db098

File tree

7 files changed

+219
-35
lines changed

7 files changed

+219
-35
lines changed

scanpipe/config.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
24+
class EcosystemConfig:
25+
"""
26+
Base class for ecosystem specific configurations to be defined
27+
for each ecosystems.
28+
"""
29+
30+
# This should be defined for each ecosystem which
31+
# are options in the pipelines
32+
ecosystem_option = None
33+
34+
# These are extensions for packages of this ecosystem which
35+
# needs to be matched from purldb
36+
purldb_package_extensions = []
37+
38+
# These are extensions for resources of this ecosystem which
39+
# needs to be macthed from purldb
40+
purldb_resource_extensions = []
41+
42+
# Extensions for document files which do not require review
43+
doc_extensions = []
44+
45+
# Paths in the deployed binaries/archives (on the to/ side) which
46+
# do not need review even if they are not matched to the source side
47+
deployed_resource_path_exclusions = []
48+
49+
# Paths in the developement/source archive (on the from/ side) which
50+
# should not be considered even if unmapped to the deployed side when
51+
# assesing what to review on the deployed side
52+
devel_resource_path_exclusions = []
53+
54+
# Symbols which are found in ecosystem specific standard libraries
55+
# which are not so useful in mapping
56+
standard_symbols_to_exclude = []
57+
58+
59+
class DefaultEcosystemConfig(EcosystemConfig):
60+
"""Configurations which are common across multiple ecosystems."""
61+
62+
ecosystem_option = "Default"
63+
purldb_package_extensions = [".zip", ".tar.gz", ".tar.xz"]
64+
devel_resource_path_exclusions = ["*/tests/*"]
65+
doc_extensions = [
66+
".pdf",
67+
".doc",
68+
".docx",
69+
".ppt",
70+
".pptx",
71+
".tex",
72+
".odt",
73+
".odp",
74+
]
75+
76+
77+
class JavaEcosystemConfig(EcosystemConfig):
78+
ecosystem_option = "Java"
79+
purldb_package_extensions = [".jar", ".war"]
80+
purldb_resource_extensions = [".class"]
81+
82+
83+
class JavaScriptEcosystemConfig(EcosystemConfig):
84+
ecosystem_option = "JavaScript"
85+
purldb_resource_extensions = [
86+
".map",
87+
".js",
88+
".mjs",
89+
".ts",
90+
".d.ts",
91+
".jsx",
92+
".tsx",
93+
".css",
94+
".scss",
95+
".less",
96+
".sass",
97+
".soy",
98+
]
99+
100+
101+
class GoEcosystemConfig(EcosystemConfig):
102+
ecosystem_option = "Go"
103+
purldb_resource_extensions = [".go"]
104+
105+
106+
class RustEcosystemConfig(EcosystemConfig):
107+
ecosystem_option = "Rust"
108+
purldb_resource_extensions = [".rs"]
109+
110+
111+
class RubyEcosystemConfig(EcosystemConfig):
112+
ecosystem_option = "Ruby"
113+
purldb_package_extensions = [".gem"]
114+
purldb_resource_extensions = [".rb"]
115+
deployed_resource_path_exclusions = ["*checksums.yaml.gz*", "*metadata.gz*"]

scanpipe/pipelines/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,10 @@ def flag_ignored_resources(self):
7878
ignored_patterns = ignored_patterns.splitlines()
7979
ignored_patterns.extend(flag.DEFAULT_IGNORED_PATTERNS)
8080

81-
flag.flag_ignored_patterns(self.project, patterns=ignored_patterns)
81+
flag.flag_ignored_patterns(
82+
codebaseresources=self.project.codebaseresources.no_status(),
83+
patterns=ignored_patterns,
84+
)
8285

8386
def extract_archive(self, location, target):
8487
"""Extract archive at `location` to `target`. Save errors as messages."""

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from aboutcode.pipeline import optional_step
2424
from scanpipe import pipes
25+
from scanpipe.config import DefaultEcosystemConfig
2526
from scanpipe.pipelines import Pipeline
2627
from scanpipe.pipes import d2d
2728
from scanpipe.pipes import flag
@@ -31,7 +32,7 @@
3132
from scanpipe.pipes import scancode
3233

3334

34-
class DeployToDevelop(Pipeline):
35+
class DeployToDevelop(Pipeline, DefaultEcosystemConfig):
3536
"""
3637
Establish relationships between two code trees: deployment and development.
3738
@@ -91,33 +92,6 @@ def steps(cls):
9192
cls.create_local_files_packages,
9293
)
9394

94-
purldb_package_extensions = [".jar", ".war", ".zip"]
95-
purldb_resource_extensions = [
96-
".map",
97-
".js",
98-
".mjs",
99-
".ts",
100-
".d.ts",
101-
".jsx",
102-
".tsx",
103-
".css",
104-
".scss",
105-
".less",
106-
".sass",
107-
".soy",
108-
".class",
109-
]
110-
doc_extensions = [
111-
".pdf",
112-
".doc",
113-
".docx",
114-
".ppt",
115-
".pptx",
116-
".tex",
117-
".odt",
118-
".odp",
119-
]
120-
12195
def get_inputs(self):
12296
"""Locate the ``from`` and ``to`` input files."""
12397
self.from_files, self.to_files = d2d.get_inputs(self.project)
@@ -152,6 +126,15 @@ def flag_whitespace_files(self):
152126
"""Flag whitespace files with size less than or equal to 100 byte as ignored."""
153127
d2d.flag_whitespace_files(project=self.project)
154128

129+
def load_ecosystem_config(self):
130+
"""Load ecosystem specific configurations for d2d steps for selected options."""
131+
d2d.load_ecosystem_config(pipeline=self, options=self.selected_groups)
132+
133+
@optional_step("Ruby")
134+
def load_ecosystem_config_ruby(self):
135+
"""Load Ruby specific configurations for d2d steps."""
136+
pass
137+
155138
def map_about_files(self):
156139
"""Map ``from/`` .ABOUT files to their related ``to/`` resources."""
157140
d2d.map_about_files(project=self.project, logger=self.log)
@@ -268,6 +251,7 @@ def flag_mapped_resources_archives_and_ignored_directories(self):
268251
def perform_house_keeping_tasks(self):
269252
"""
270253
On deployed side
254+
- Ignore specific files based on ecosystem based configurations.
271255
- PurlDB match files with ``no-java-source`` and empty status,
272256
if no match is found update status to ``requires-review``.
273257
- Update status for uninteresting files.
@@ -278,6 +262,11 @@ def perform_house_keeping_tasks(self):
278262
"""
279263
d2d.match_resources_with_no_java_source(project=self.project, logger=self.log)
280264
d2d.handle_dangling_deployed_legal_files(project=self.project, logger=self.log)
265+
d2d.ignore_unmapped_resources_from_config(
266+
project=self.project,
267+
patterns_to_ignore=self.deployed_resource_path_exclusions,
268+
logger=self.log,
269+
)
281270
d2d.match_unmapped_resources(
282271
project=self.project,
283272
matched_extensions=self.purldb_resource_extensions,

scanpipe/pipes/d2d.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from summarycode.classify import LEGAL_STARTS_ENDS
4848

4949
from aboutcode.pipeline import LoopProgress
50+
from scanpipe import config
5051
from scanpipe import pipes
5152
from scanpipe.models import CodebaseRelation
5253
from scanpipe.models import CodebaseResource
@@ -66,6 +67,16 @@
6667
TO = "to/"
6768

6869

70+
ECOSYSTEM_CONFIGS = [
71+
config.DefaultEcosystemConfig,
72+
config.JavaEcosystemConfig,
73+
config.JavaScriptEcosystemConfig,
74+
config.RubyEcosystemConfig,
75+
config.RustEcosystemConfig,
76+
config.GoEcosystemConfig,
77+
]
78+
79+
6980
def get_inputs(project):
7081
"""
7182
Locate the ``from`` and ``to`` input files in project inputs/ directory.
@@ -114,6 +125,51 @@ def get_best_path_matches(to_resource, matches):
114125
return matches
115126

116127

128+
def load_ecosystem_config(pipeline, options):
129+
"""
130+
Add ecosystem specific configurations for each ecosystem selected
131+
as `options` to the `pipeline`.
132+
"""
133+
configs_by_ecosystem = {
134+
ecosystem.ecosystem_option: ecosystem for ecosystem in ECOSYSTEM_CONFIGS
135+
}
136+
137+
# Add default configurations which are common accross ecosystems
138+
add_ecosystem_config(
139+
pipeline=pipeline,
140+
configs_by_ecosystem=configs_by_ecosystem,
141+
selected_option="Default",
142+
)
143+
144+
# Add configurations for each selected ecosystem
145+
for selected_option in options:
146+
if selected_option not in configs_by_ecosystem:
147+
continue
148+
149+
add_ecosystem_config(
150+
pipeline=pipeline,
151+
configs_by_ecosystem=configs_by_ecosystem,
152+
selected_option=selected_option,
153+
)
154+
155+
156+
def add_ecosystem_config(pipeline, configs_by_ecosystem, selected_option):
157+
d2d_pipeline_configs = [
158+
"purldb_package_extensions",
159+
"purldb_resource_extensions",
160+
"deployed_resource_path_exclusions",
161+
]
162+
163+
ecosystem_config = configs_by_ecosystem.get(selected_option)
164+
165+
for pipeline_config in d2d_pipeline_configs:
166+
config_value = getattr(ecosystem_config, pipeline_config)
167+
pipeline_config_value = getattr(pipeline, pipeline_config)
168+
if config_value:
169+
new_pipeline_config = pipeline_config_value.extend(config_value)
170+
setattr(pipeline, pipeline_config, new_pipeline_config)
171+
172+
117173
def get_from_files_for_scanning(resources):
118174
"""
119175
Return resources in the "from/" side which has been mapped to the "to/"
@@ -1453,6 +1509,20 @@ def match_resources_with_no_java_source(project, logger=None):
14531509
)
14541510

14551511

1512+
def ignore_unmapped_resources_from_config(project, patterns_to_ignore, logger=None):
1513+
"""Ignore unmapped resources for a project using `patterns_to_ignore`."""
1514+
ignored_resources_count = flag.flag_ignored_patterns(
1515+
codebaseresources=project.codebaseresources.to_codebase().no_status(),
1516+
patterns=patterns_to_ignore,
1517+
status=flag.IGNORED_FROM_CONFIG,
1518+
)
1519+
if logger:
1520+
logger(
1521+
f"Ignoring {ignored_resources_count:,d} to/ resources with "
1522+
"from ecosystem specific configurations."
1523+
)
1524+
1525+
14561526
def match_unmapped_resources(project, matched_extensions=None, logger=None):
14571527
"""
14581528
Match resources with empty status to PurlDB, if unmatched

scanpipe/pipes/flag.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
IGNORED_DEFAULT_IGNORES = "ignored-default-ignores"
4444
IGNORED_DATA_FILE_NO_CLUES = "ignored-data-file-no-clues"
4545
IGNORED_DOC_FILE = "ignored-doc-file"
46+
IGNORED_FROM_CONFIG = "ignored-from-config"
4647

4748
COMPLIANCE_LICENSES = "compliance-licenses"
4849
COMPLIANCE_SOURCEMIRROR = "compliance-sourcemirror"
@@ -89,15 +90,15 @@ def flag_ignored_directories(project):
8990
return qs.update(status=IGNORED_DIRECTORY)
9091

9192

92-
def flag_ignored_patterns(project, patterns):
93+
def flag_ignored_patterns(codebaseresources, patterns, status=IGNORED_PATTERN):
9394
"""Flag codebase resource as ``ignored`` status from list of ``patterns``."""
9495
if isinstance(patterns, str):
9596
patterns = patterns.splitlines()
9697

9798
update_count = 0
9899
for pattern in patterns:
99-
qs = project.codebaseresources.no_status().path_pattern(pattern)
100-
update_count += qs.update(status=IGNORED_PATTERN)
100+
qs = codebaseresources.path_pattern(pattern)
101+
update_count += qs.update(status=status)
101102

102103
return update_count
103104

scanpipe/tests/pipes/test_flag.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ def test_scanpipe_pipes_flag_flag_ignored_directories(self):
7070

7171
def test_scanpipe_pipes_flag_flag_ignored_patterns(self):
7272
patterns = ["*.ext", "dir/*"]
73-
updated = flag.flag_ignored_patterns(self.project1, patterns)
73+
updated = flag.flag_ignored_patterns(
74+
self.project1.codebaseresources.no_status(), patterns
75+
)
7476

7577
self.assertEqual(3, updated)
7678
self.resource1.refresh_from_db()
@@ -85,7 +87,8 @@ def test_scanpipe_pipes_flag_flag_ignored_patterns(self):
8587
make_resource_file(self.project1, "path/deeper/policies.yml")
8688
make_resource_file(self.project1, "path/other-policies.yml")
8789
updated = flag.flag_ignored_patterns(
88-
self.project1, flag.DEFAULT_IGNORED_PATTERNS
90+
self.project1.codebaseresources.no_status(),
91+
flag.DEFAULT_IGNORED_PATTERNS,
8992
)
9093
self.assertEqual(3, updated)
9194

scanpipe/tests/test_pipelines.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,10 @@ def test_scanpipe_pipeline_class_flag_ignored_resources(self):
423423
mock_flag.return_value = None
424424
pipeline.flag_ignored_resources()
425425
patterns_args = ["*.ext", *flag.DEFAULT_IGNORED_PATTERNS]
426-
mock_flag.assert_called_with(project1, patterns=patterns_args)
426+
mock_flag.assert_called_with(
427+
codebaseresources=project1.codebaseresources.no_status(),
428+
patterns=patterns_args,
429+
)
427430

428431
def test_scanpipe_pipeline_class_extract_archive(self):
429432
project1 = Project.objects.create(name="Analysis")

0 commit comments

Comments
 (0)