|
26 | 26 | from typing import Union |
27 | 27 | from urllib.parse import quote_plus |
28 | 28 | from urllib.parse import unquote |
| 29 | +from urllib.parse import urljoin |
29 | 30 | from urllib.parse import urlparse |
30 | 31 | from urllib.parse import urlunparse |
31 | 32 |
|
@@ -1631,26 +1632,21 @@ async def fetch_links( |
1631 | 1632 |
|
1632 | 1633 | def resolve_relative_url(package_url, url): |
1633 | 1634 | """ |
1634 | | - Return the resolved `url` URLstring given a `package_url` base URL string |
| 1635 | + Return the resolved `url` URL string given a `package_url` base URL string |
1635 | 1636 | of a package. |
1636 | 1637 |
|
| 1638 | + Per PEP 503, links in the simple index may be relative. Use stdlib urljoin |
| 1639 | + which correctly handles multi-level '../' traversal and path normalization. |
| 1640 | +
|
1637 | 1641 | For example: |
1638 | | - >>> resolve_relative_url("https://example.com/package", "../path/file.txt") |
| 1642 | + >>> resolve_relative_url("https://example.com/package/", "../path/file.txt") |
1639 | 1643 | 'https://example.com/path/file.txt' |
| 1644 | + >>> resolve_relative_url("https://example.com/simple/pkg/", "../../packages/file.whl") |
| 1645 | + 'https://example.com/packages/file.whl' |
| 1646 | + >>> resolve_relative_url("https://example.com/a/b/c/", "https://other.com/file.whl") |
| 1647 | + 'https://other.com/file.whl' |
1640 | 1648 | """ |
1641 | | - if not url.startswith(("http://", "https://")): |
1642 | | - base_url_parts = urlparse(package_url) |
1643 | | - url_parts = urlparse(url) |
1644 | | - # If the relative URL starts with '..', remove the last directory from the base URL |
1645 | | - if url_parts.path.startswith(".."): |
1646 | | - path = base_url_parts.path.rstrip("/").rsplit("/", 1)[0] + url_parts.path[2:] |
1647 | | - else: |
1648 | | - path = urlunparse( |
1649 | | - ("", "", url_parts.path, url_parts.params, url_parts.query, url_parts.fragment) |
1650 | | - ) |
1651 | | - resolved_url_parts = base_url_parts._replace(path=path) |
1652 | | - url = urlunparse(resolved_url_parts) |
1653 | | - return url |
| 1649 | + return urljoin(package_url, url) |
1654 | 1650 |
|
1655 | 1651 |
|
1656 | 1652 | ################################################################################ |
|
0 commit comments