Skip to content

Commit 8638e53

Browse files
authored
Merge pull request #261 from boschglobal/nnobelis/fix_fetching_packages_from_private_repositories
2 parents f602238 + ed9c3c6 commit 8638e53

12 files changed

Lines changed: 210 additions & 403 deletions

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Changelog
22
=========
33

4+
v0.15.1
5+
-----------
6+
7+
- Fetch package metadata from private artifactory if specified https://github.com/aboutcode-org/python-inspector/pull/261
8+
49
v0.15.0
510
-----------
611

src/python_inspector/api.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,11 @@ def resolve_dependencies(
296296
async def gather_pypi_data():
297297
async def get_pypi_data(package):
298298
data = await get_pypi_data_from_purl(
299-
package, repos=repos, environment=environment, prefer_source=prefer_source
299+
package,
300+
repos=repos,
301+
environment=environment,
302+
prefer_source=prefer_source,
303+
index_urls=list(repos_by_url.keys()),
300304
)
301305

302306
if verbose:

src/python_inspector/package_data.py

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
1111

12+
import os
13+
from urllib.parse import urlparse, urlunparse
14+
1215
from typing import Dict
1316
from typing import List
1417
from typing import Optional
@@ -27,7 +30,11 @@
2730

2831

2932
async def get_pypi_data_from_purl(
30-
purl: str, environment: Environment, repos: List[PypiSimpleRepository], prefer_source: bool
33+
purl: str,
34+
environment: Environment,
35+
repos: List[PypiSimpleRepository],
36+
prefer_source: bool,
37+
index_urls: List[str],
3138
) -> Optional[PackageData]:
3239
"""
3340
Generate `Package` object from the `purl` string of pypi type
@@ -43,7 +50,22 @@ async def get_pypi_data_from_purl(
4350
version = parsed_purl.version
4451
if not version:
4552
raise Exception("Version is not specified in the purl")
46-
base_path = "https://pypi.org/pypi"
53+
54+
# Todo: address the case where several index URLs are passed
55+
if index_urls:
56+
# Backward compatibility: If pypi.org is passed as index url, always resolve against it.
57+
# When multiple index URLs are supported and the todo above is fixed, then this hack can be removed.
58+
if "https://pypi.org/simple" in index_urls:
59+
index_url = None
60+
else:
61+
index_url = index_urls[0]
62+
else:
63+
index_url = None
64+
65+
base_path = (
66+
index_url.removesuffix("/simple") + "/pypi" if index_url else "https://pypi.org/pypi"
67+
)
68+
4769
api_url = f"{base_path}/{name}/{version}/json"
4870

4971
from python_inspector.utils import get_response_async
@@ -62,10 +84,32 @@ async def get_pypi_data_from_purl(
6284
sdist_url = await get_sdist_download_url(
6385
purl=parsed_purl, repos=repos, python_version=python_version
6486
)
87+
88+
def canonicalize_url(url: str):
89+
# Parse the URL into its components
90+
parsed = urlparse(url)
91+
92+
# Canonicalize the path component to resolve ".."
93+
# os.path.normpath will handle segments like '.' and '..'
94+
canonical_path = os.path.normpath(parsed.path)
95+
96+
# On Windows, normpath uses backslashes ('\\').
97+
# We must replace them with forward slashes ('/') for a valid URL path.
98+
if os.path.sep == "\\":
99+
canonical_path = canonical_path.replace("\\", "/")
100+
101+
# Rebuild the URL with the canonicalized path
102+
# We replace the original path with the new one
103+
parsed = parsed._replace(path=canonical_path)
104+
canonical_url = urlunparse(parsed)
105+
106+
return canonical_url
107+
65108
if sdist_url:
66109
valid_distribution_urls.append(sdist_url)
67110

68111
valid_distribution_urls = [url for url in valid_distribution_urls if url]
112+
valid_distribution_urls = list(map(canonicalize_url, valid_distribution_urls))
69113

70114
# if prefer_source is True then only source distribution is used
71115
# in case of no source distribution available then wheel is used
@@ -81,28 +125,60 @@ async def get_pypi_data_from_purl(
81125
]
82126
wheel_url = choose_single_wheel(wheel_urls)
83127
if wheel_url:
84-
valid_distribution_urls.insert(0, wheel_url)
128+
valid_distribution_urls.insert(0, canonicalize_url(wheel_url))
85129

86130
urls = {url.get("url"): url for url in response.get("urls") or []}
131+
132+
# Sanitize all URLs that are relative and canonicalize them
133+
urls_sanitized = {}
134+
for url in urls:
135+
value = urls.get(url)
136+
137+
# remove the URL anchor fragment
138+
url_parsed = urlparse(url)
139+
url = urlunparse(url_parsed._replace(fragment=""))
140+
141+
if url.startswith("https"):
142+
url_sanitized = canonicalize_url(url)
143+
else:
144+
url_sanitized = canonicalize_url(base_path + url)
145+
146+
urls_sanitized[url_sanitized] = value
147+
148+
def remove_credentials_from_url(url: str):
149+
# Parse the URL into its components
150+
parsed = urlparse(url)
151+
152+
new_netloc = parsed.hostname
153+
if parsed.port:
154+
new_netloc += f":{parsed.port}"
155+
156+
# Create a new parsed result object, replacing the old netloc
157+
# with our new one that has no credentials.
158+
parsed = parsed._replace(netloc=new_netloc)
159+
url_without_credentials = urlunparse(parsed)
160+
161+
return url_without_credentials
162+
87163
# iterate over the valid distribution urls and return the first
88164
# one that is matching.
89165
for dist_url in valid_distribution_urls:
90-
if dist_url not in urls:
166+
if dist_url not in urls_sanitized:
91167
continue
92168

93-
url_data = urls.get(dist_url)
169+
url_data = urls_sanitized.get(dist_url)
94170
digests = url_data.get("digests") or {}
95171

96172
return PackageData(
97173
primary_language="Python",
98174
description=get_description(info),
99175
homepage_url=homepage_url,
100-
api_data_url=api_url,
176+
api_data_url=remove_credentials_from_url(api_url),
101177
bug_tracking_url=bug_tracking_url,
102178
code_view_url=code_view_url,
103179
license_expression=info.get("license_expression"),
104180
declared_license=get_declared_license(info),
105-
download_url=dist_url,
181+
download_url=remove_credentials_from_url(dist_url),
106182
size=url_data.get("size"),
107183
md5=digests.get("md5") or url_data.get("md5_digest"),
108184
sha256=digests.get("sha256"),

tests/data/azure-devops.req-310-expected.json

Lines changed: 22 additions & 23 deletions
Large diffs are not rendered by default.

tests/data/azure-devops.req-312-expected.json

Lines changed: 22 additions & 23 deletions
Large diffs are not rendered by default.

tests/data/azure-devops.req-313-expected.json

Lines changed: 22 additions & 23 deletions
Large diffs are not rendered by default.

tests/data/azure-devops.req-314-expected.json

Lines changed: 22 additions & 23 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)