Skip to content

Commit fa9ac3f

Browse files
authored
Escape special characters in directory path regex (#1542)
Signed-off-by: Keshav Priyadarshi <[email protected]>
1 parent 72b73ea commit fa9ac3f

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

scanpipe/pipes/d2d.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23+
import re
2324
from collections import Counter
2425
from collections import defaultdict
2526
from contextlib import suppress
@@ -1623,21 +1624,20 @@ def match_purldb_resources_post_process(project, logger=None):
16231624
map_count = 0
16241625

16251626
for directory in progress.iter(resource_iterator):
1626-
map_count += _match_purldb_resources_post_process(
1627-
directory, to_extract_directories, to_resources
1628-
)
1627+
map_count += _match_purldb_resources_post_process(directory, to_resources)
16291628

16301629
logger(f"{map_count:,d} resource processed")
16311630

16321631

1633-
def _match_purldb_resources_post_process(
1634-
directory_path, to_extract_directories, to_resources
1635-
):
1632+
def _match_purldb_resources_post_process(directory, to_resources):
1633+
# Escape special character in directory path
1634+
escaped_directory_path = re.escape(directory.path)
1635+
16361636
# Exclude the content of nested archive.
16371637
interesting_codebase_resources = (
1638-
to_resources.filter(path__startswith=directory_path)
1638+
to_resources.filter(path__startswith=directory.path)
16391639
.filter(status=flag.MATCHED_TO_PURLDB_RESOURCE)
1640-
.exclude(path__regex=rf"^{directory_path}.*-extract\/.*$")
1640+
.exclude(path__regex=rf"^{escaped_directory_path}.*-extract\/.*$")
16411641
)
16421642

16431643
if not interesting_codebase_resources:

scanpipe/tests/pipes/test_d2d.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from unittest import mock
2929
from unittest import skipIf
3030

31+
from django.db.utils import DataError
3132
from django.test import TestCase
3233

3334
from scanpipe import pipes
@@ -1594,3 +1595,36 @@ def test_scanpipe_pipes_d2d_match_purldb_resource_no_package_data(
15941595

15951596
package_count = self.project1.discoveredpackages.count()
15961597
self.assertEqual(0, package_count)
1598+
1599+
def test_scanpipe_pipes_d2d_match_purldb_resources_post_process_with_special_char(
1600+
self,
1601+
):
1602+
to_map = self.data / "d2d-javascript" / "to" / "main.js.map"
1603+
1604+
to_dir = self.project1.codebase_path / "to/lib/Matplot++/nodesoup.lib-extract"
1605+
to_dir.mkdir(parents=True)
1606+
copy_inputs([to_map], to_dir)
1607+
1608+
pipes.collect_and_create_codebase_resources(self.project1)
1609+
1610+
to_resources = self.project1.codebaseresources.filter(
1611+
path__startswith=("to/lib/Matplot++/nodesoup.lib-extract/main.js")
1612+
)
1613+
1614+
dummy_package_data1 = package_data1.copy()
1615+
dummy_package_data1["uuid"] = uuid.uuid4()
1616+
d2d.create_package_from_purldb_data(
1617+
self.project1,
1618+
to_resources,
1619+
dummy_package_data1,
1620+
flag.MATCHED_TO_PURLDB_RESOURCE,
1621+
)
1622+
1623+
buffer = io.StringIO()
1624+
try:
1625+
d2d.match_purldb_resources_post_process(
1626+
self.project1,
1627+
logger=buffer.write,
1628+
)
1629+
except DataError:
1630+
self.fail("DataError was raised, but it should not occur.")

0 commit comments

Comments
 (0)