Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 17 additions & 31 deletions component_catalog/tests/test_scancodeio.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
from component_catalog.models import Package
from component_catalog.tests import make_package
from dejacode_toolkit.scancodeio import ScanCodeIO
from dejacode_toolkit.scancodeio import check_for_existing_scan_workaround
from dejacode_toolkit.scancodeio import get_hash_uid
from dejacode_toolkit.scancodeio import get_notice_text_from_key_files
from dejacode_toolkit.scancodeio import update_package_from_existing_scan_data
from dje.models import Dataspace
from dje.models import History
from dje.tasks import scancodeio_submit_scan
Expand Down Expand Up @@ -67,41 +67,38 @@ def test_scancodeio_submit_scan_task(self, mock_submit_scan, mock_request_head):

expected = [
mock.call("http://okurl.com", user_uuid, dataspace_uuid),
mock.call().__bool__(),
mock.call("https://okurl2.com", user_uuid, dataspace_uuid),
mock.call().__bool__(),
]
self.assertEqual(expected, mock_submit_scan.mock_calls)

@mock.patch("requests.sessions.Session.get")
def test_scancodeio_fetch_scan_list(self, mock_session_get):
scancodeio = ScanCodeIO(self.dataspace)
self.assertIsNone(scancodeio.fetch_scan_list())
self.assertFalse(mock_session_get.called)
dataspace_uid = get_hash_uid(self.dataspace.uuid)
user_uid = get_hash_uid(self.basic_user.uuid)

scancodeio.fetch_scan_list(user=self.basic_user)
scancodeio.fetch_scan_list()
params = mock_session_get.call_args.kwargs["params"]
expected = {"format": "json", "name__endswith": get_hash_uid(self.basic_user.uuid)}
expected = {
"format": "json",
"name__contains": dataspace_uid,
}
self.assertEqual(expected, params)

scancodeio.fetch_scan_list(dataspace=self.basic_user.dataspace)
scancodeio.fetch_scan_list(user=self.basic_user)
params = mock_session_get.call_args.kwargs["params"]
expected = {
"format": "json",
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
"name__contains": dataspace_uid,
"label": user_uid,
}
self.assertEqual(expected, params)

scancodeio.fetch_scan_list(
user=self.basic_user,
dataspace=self.basic_user.dataspace,
extra_params="extra",
)
scancodeio.fetch_scan_list(extra_params="extra")
params = mock_session_get.call_args.kwargs["params"]
expected = {
"format": "json",
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
"name__endswith": get_hash_uid(self.basic_user.uuid),
"name__contains": get_hash_uid(self.dataspace.uuid),
"extra_params": "extra",
}
self.assertEqual(expected, params)
Expand All @@ -115,16 +112,11 @@ def test_scancodeio_fetch_scan_info(self, mock_session_get):
params = mock_session_get.call_args.kwargs["params"]
expected = {
"name__startswith": get_hash_uid(uri),
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
"name__contains": get_hash_uid(self.dataspace.uuid),
"format": "json",
}
self.assertEqual(expected, params)

scancodeio.fetch_scan_info(uri=uri, user=self.basic_user)
params = mock_session_get.call_args.kwargs["params"]
expected["name__endswith"] = get_hash_uid(self.basic_user.uuid)
self.assertEqual(expected, params)

@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.request_get")
def test_scancodeio_find_project(self, mock_request_get):
scancodeio = ScanCodeIO(self.dataspace)
Expand Down Expand Up @@ -340,19 +332,13 @@ def test_scancodeio_get_notice_text_from_key_files(self):
self.assertEqual("", notice_text)

@mock.patch("component_catalog.models.Package.update_from_scan")
def test_scancodeio_check_for_existing_scan_workaround(self, mock_update_from_scan):
def test_scancodeio_update_package_from_existing_scan_data(self, mock_update_from_scan):
mock_update_from_scan.return_value = ["updated_field"]
download_url = self.package1.download_url
user = self.basic_user

response_json = None
results = check_for_existing_scan_workaround(response_json, download_url, user)
self.assertIsNone(results)

response_json = {"success": True}
results = check_for_existing_scan_workaround(response_json, download_url, user)
results = update_package_from_existing_scan_data("unknown_url", user)
self.assertIsNone(results)

response_json = {"name": "project with this name already exists."}
results = check_for_existing_scan_workaround(response_json, download_url, user)
results = update_package_from_existing_scan_data(download_url, user)
self.assertEqual(["updated_field"], results)
1 change: 0 additions & 1 deletion component_catalog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1742,7 +1742,6 @@ def get_queryset(self):
scancodeio = ScanCodeIO(dataspace)
self.list_data = (
scancodeio.fetch_scan_list(
dataspace=dataspace,
user=user if self.request.GET.get("created_by_me") else None,
**filters,
)
Expand Down
59 changes: 24 additions & 35 deletions dejacode_toolkit/scancodeio.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,21 @@ def get_project_info(self, download_url):
return scan_info.get("results")[0]

def submit_scan(self, uri, user_uuid, dataspace_uuid):
"""
Submit package scan request to ScanCode.io.
An unique ID for the user is set as a project label, available for filtering.
"""
webhook_url = get_webhook_url("notifications:send_scan_notification", user_uuid)

data = {
"name": get_project_name(uri, user_uuid, dataspace_uuid),
"name": get_project_name(uri, dataspace_uuid),
"input_urls": uri,
"pipeline": "scan_single_package",
"execute_now": True,
"webhook_url": webhook_url,
"labels": [get_hash_uid(user_uuid)],
}

webhook_url = get_webhook_url("notifications:send_scan_notification", user_uuid)
data["webhook_url"] = webhook_url

logger.debug(f'{self.label}: submit scan uri="{uri}" webhook_url="{webhook_url}"')
return self.request_post(url=self.project_api_url, json=data)

Expand Down Expand Up @@ -102,14 +107,11 @@ def start_pipeline(self, run_url):
start_pipeline_url = run_url + "start_pipeline/"
return self.request_post(url=start_pipeline_url)

def fetch_scan_list(self, user=None, dataspace=None, **extra_payload):
payload = {}

if dataspace:
payload["name__contains"] = get_hash_uid(dataspace.uuid)
def fetch_scan_list(self, user=None, **extra_payload):
payload = {"name__contains": get_hash_uid(self.dataspace.uuid)}

if user:
payload["name__endswith"] = get_hash_uid(user.uuid)
payload["label"] = get_hash_uid(user.uuid)

payload.update(extra_payload)
if not payload:
Expand All @@ -131,15 +133,11 @@ def find_project(self, **kwargs):
if response.get("count") == 1:
return response.get("results")[0]

def fetch_scan_info(self, uri, user=None):
def fetch_scan_info(self, uri):
payload = {
"name__startswith": get_hash_uid(uri),
"name__contains": get_hash_uid(self.dataspace.uuid),
}

if user:
payload["name__endswith"] = get_hash_uid(user.uuid)

logger.debug(f'{self.label}: fetch scan info uri="{uri}"')
return self.request_get(url=self.project_api_url, params=payload)

Expand Down Expand Up @@ -491,18 +489,16 @@ def get_hash_uid(value):
return md5(str(value).encode("utf-8"), usedforsecurity=False).hexdigest()[:10]


def get_project_name(uri, user_uuid, dataspace_uuid):
def get_project_name(uri, dataspace_uuid):
"""
Return a project name based on a hash of the provided `uri` combined with a hash
of the `user_uuid` and `dataspace_uuid`.
of the `dataspace_uuid`.

project_name = "uri_hash.dataspace_uuid_hash.user_uuid_hash"
project_name = "uri_hash.dataspace_uuid_hash"
"""
uri_hash = get_hash_uid(uri)
dataspace_hash = get_hash_uid(dataspace_uuid)
user_hash = get_hash_uid(user_uuid)

return f"{uri_hash}.{dataspace_hash}.{user_hash}"
return f"{uri_hash}.{dataspace_hash}"


def get_webhook_url(view_name, user_uuid):
Expand Down Expand Up @@ -560,22 +556,15 @@ def get_notice_text_from_key_files(scan_summary, separator="\n\n---\n\n"):
return notice_text


def check_for_existing_scan_workaround(response_json, uri, user):
def update_package_from_existing_scan_data(uri, user):
"""
Workaroud the case where the Scan already exisit on the ScanCode.io side before
Workaroud the case where the Scan already exisits on the ScanCode.io side before
the package is created on the DejaCode side.
This can happen if the package is deleted then re-created from the same user
providing the same download URL.
"""
if not response_json or not isinstance(response_json, dict):
return

already_exists_message = "project with this name already exists."
already_exists = already_exists_message in response_json.get("name", [])

if already_exists:
Package = apps.get_model("component_catalog", "package")
package = Package.objects.get_or_none(download_url=uri, dataspace=user.dataspace)
if package:
updated_fields = package.update_from_scan(user)
return updated_fields
Package = apps.get_model("component_catalog", "package")
package = Package.objects.get_or_none(download_url=uri, dataspace=user.dataspace)
if package:
updated_fields = package.update_from_scan(user)
return updated_fields
16 changes: 11 additions & 5 deletions dje/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from django_rq import job

from dejacode_toolkit.scancodeio import ScanCodeIO
from dejacode_toolkit.scancodeio import check_for_existing_scan_workaround
from dejacode_toolkit.scancodeio import update_package_from_existing_scan_data
from dje.utils import is_available

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,11 +109,17 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid):

scancodeio = ScanCodeIO(user.dataspace)
for uri in uris:
if is_available(uri):
response_json = scancodeio.submit_scan(uri, user_uuid, dataspace_uuid)
check_for_existing_scan_workaround(response_json, uri, user)
else:
if not is_available(uri):
logger.info(f'uri="{uri}" is not reachable.')
continue

# Check if a Scan is already available in ScanCode.io for this URI.
existing_project = scancodeio.get_project_info(download_url=uri)
if existing_project:
logger.info(f'Update the local uri="{uri}" package from available Scan data.')
update_package_from_existing_scan_data(uri, user)
else:
scancodeio.submit_scan(uri, user_uuid, dataspace_uuid)


@job("default", timeout="3h")
Expand Down