From d905a94a9b892328fdf47cd0c1afc130b1e93d97 Mon Sep 17 00:00:00 2001 From: Ernesto de Gracia Herranz Date: Wed, 24 Jun 2026 22:19:57 +0200 Subject: [PATCH 1/4] spdx expresion on sbom --- conan/tools/sbom/cyclonedx.py | 70 ++++++++++++--- .../tools/sbom/test_spdx_expression.py | 86 +++++++++++++++++++ 2 files changed, 144 insertions(+), 12 deletions(-) create mode 100644 test/unittests/tools/sbom/test_spdx_expression.py diff --git a/conan/tools/sbom/cyclonedx.py b/conan/tools/sbom/cyclonedx.py index 06ddc649b04..dc02570f9fb 100644 --- a/conan/tools/sbom/cyclonedx.py +++ b/conan/tools/sbom/cyclonedx.py @@ -194,21 +194,67 @@ def cyclonedx_1_6(conanfile, name=None, add_build=False, add_tests=False, **kwar return sbom_cyclonedx_1_6 +def _is_valid_spdx_license(license_value): + from conan.tools.sbom.spdx_licenses import NORMALIZED_VALID_SPDX_LICENSES + + token = license_value.lower() + if token.endswith("+") and len(token) > 1: + return token[:-1] in NORMALIZED_VALID_SPDX_LICENSES + if token in NORMALIZED_VALID_SPDX_LICENSES: + return True + if token.startswith("licenseref-"): + value = token[len("licenseref-"):] + return bool(value) and all(c.isalnum() or c in ".-" for c in value) + if token.startswith("documentref-") and ":" in token: + document_ref, right = token.split(":", 1) + ref_id = document_ref[len("documentref-"):] + if not ref_id or not all(c.isalnum() or c in ".-" for c in ref_id): + return False + if right.startswith("licenseref-"): + value = right[len("licenseref-"):] + return bool(value) and all(c.isalnum() or c in ".-" for c in value) + if right.endswith("+") and len(right) > 1: + return right[:-1] in NORMALIZED_VALID_SPDX_LICENSES + return right in NORMALIZED_VALID_SPDX_LICENSES + return (bool(token) and all(c.isalnum() or c in ".-" for c in token) + and "exception" in token) + +def _is_valid_spdx_expression(license_value): + import re + _VALID_SPDX_OPERATORS = ["AND", "OR", "WITH"] + _normalized_expresion = "" + for w in (t for t in re.findall(r"\(|\)|[^\s()]+", license_value)): + if _is_valid_spdx_license(w): + _normalized_expresion += "L" + elif w in _VALID_SPDX_OPERATORS: + _normalized_expresion += "O" + elif w in ("(", ")"): + _normalized_expresion += w + else: + return False + if not _normalized_expresion: + return False + _SPDX_EXPR_TERM = r'(?:L|\((?:L|\([^()]*\))(?:O(?:L|\([^()]*\)))*\))' + return re.compile(rf'^{_SPDX_EXPR_TERM}(?:O{_SPDX_EXPR_TERM})*$').fullmatch(_normalized_expresion) is not None + + def _calculate_licenses(component): from conan.tools.sbom.spdx_licenses import NORMALIZED_VALID_SPDX_LICENSES - licenses = component.conanfile.license - if isinstance(licenses, str): # Just one license - field = "id" if licenses.lower() in NORMALIZED_VALID_SPDX_LICENSES else "name" - return [{"license": {field: licenses}}] - - return [ - # More than one license - {"license": { - "id" if lic.lower() in NORMALIZED_VALID_SPDX_LICENSES else "name": lic - }} - for lic in licenses - ] + licenses = component.conanfile.license + if isinstance(licenses, str): + licenses = [licenses] + + result = [] + for lic in licenses: + if lic.lower() in NORMALIZED_VALID_SPDX_LICENSES: + field = "id" + elif _is_valid_spdx_expression(lic): + field = "expression" + else: + field = "name" + result.append({"license": {field: lic}}) + return result def _calculate_bomref(component): diff --git a/test/unittests/tools/sbom/test_spdx_expression.py b/test/unittests/tools/sbom/test_spdx_expression.py new file mode 100644 index 00000000000..b3ca39e0f55 --- /dev/null +++ b/test/unittests/tools/sbom/test_spdx_expression.py @@ -0,0 +1,86 @@ +import pytest + +from conan.tools.sbom.cyclonedx import _calculate_licenses, _is_valid_spdx_expression, _is_valid_spdx_license +from conan.test.utils.mocks import ConanFileMock + + +@pytest.mark.parametrize( + "expression", + [ + "MIT OR Apache-2.0", + "MIT AND Apache-2.0", + "MIT WITH Apache-2.0", + "( MIT OR Apache-2.0 )", + "( MIT OR Apache-2.0 ) AND BSD-3-Clause", + "MIT OR ( Apache-2.0 )", + "( MIT OR ( MIT ) )", + "(MIT OR (MIT))", + ], +) +def test_valid_spdx_expressions(expression): + assert _is_valid_spdx_expression(expression) + + +@pytest.mark.parametrize( + "expression", + [ + "", + "custom license", + "MIT OR custom", + "MIT AND", + "OR MIT", + "MIT OR", + "( MIT OR Apache-2.0", + "MIT OR Apache-2.0 )", + ], +) +def test_invalid_spdx_expressions(expression): + assert not _is_valid_spdx_expression(expression) + + +@pytest.mark.parametrize( + "license_value", + [ + "MIT", + "apache-2.0", + "GPL-2.0-or-later+", + "LicenseRef-Proprietary", + "LLVM-exception", + "LicenseRef-23", + "LicenseRef-MIT-Style-1", + "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", + ], +) +def test_valid_spdx_license(license_value): + assert _is_valid_spdx_license(license_value) + + +@pytest.mark.parametrize( + "license_value", + [ + "custom", + "LicenseRef-", + "OR", + ], +) +def test_invalid_spdx_license(license_value): + assert not _is_valid_spdx_license(license_value) + + +@pytest.mark.parametrize( + "license_value, expected", + [ + ("MIT", "id"), + ("mit", "id"), + ("MIT OR Apache-2.0", "expression"), + ("( MIT OR ( MIT ) )", "expression"), + ("(MIT OR (MIT))", "expression"), + ("custom license", "name"), + ], +) +def test_license_field(license_value, expected): + component = type("Component", (), {})() + component.conanfile = ConanFileMock() + component.conanfile.license = license_value + field = next(iter(_calculate_licenses(component)[0]["license"])) + assert field == expected From 9f1c56aba197c9f05bd960a366f3f67299fe83b7 Mon Sep 17 00:00:00 2001 From: Ernesto de Gracia Herranz Date: Thu, 25 Jun 2026 09:55:31 +0200 Subject: [PATCH 2/4] simplify regex --- conan/tools/sbom/cyclonedx.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/conan/tools/sbom/cyclonedx.py b/conan/tools/sbom/cyclonedx.py index dc02570f9fb..6a4e311b62c 100644 --- a/conan/tools/sbom/cyclonedx.py +++ b/conan/tools/sbom/cyclonedx.py @@ -228,14 +228,19 @@ def _is_valid_spdx_expression(license_value): _normalized_expresion += "L" elif w in _VALID_SPDX_OPERATORS: _normalized_expresion += "O" - elif w in ("(", ")"): - _normalized_expresion += w + elif w == "(": + _normalized_expresion += "<" + elif w == ")": + _normalized_expresion += ">" else: return False if not _normalized_expresion: return False - _SPDX_EXPR_TERM = r'(?:L|\((?:L|\([^()]*\))(?:O(?:L|\([^()]*\)))*\))' - return re.compile(rf'^{_SPDX_EXPR_TERM}(?:O{_SPDX_EXPR_TERM})*$').fullmatch(_normalized_expresion) is not None + # L = license, O = operator (AND/OR/WITH), <> = grouping from parentheses. + _operand = r'(?:L|<[^>]*>)' + _grouped_operand = rf'(?:L|<{_operand}(?:O{_operand})*>)' + _expression = rf'^{_grouped_operand}(?:O{_grouped_operand})*$' + return re.compile(_expression).fullmatch(_normalized_expresion) is not None def _calculate_licenses(component): From 218f551a5429730e288f1f8a3e7bd9a13803b24a Mon Sep 17 00:00:00 2001 From: Ernesto de Gracia Herranz Date: Thu, 25 Jun 2026 11:17:06 +0200 Subject: [PATCH 3/4] simplify check --- conan/tools/sbom/cyclonedx.py | 52 ++------------ .../tools/sbom/test_spdx_expression.py | 70 +------------------ 2 files changed, 6 insertions(+), 116 deletions(-) diff --git a/conan/tools/sbom/cyclonedx.py b/conan/tools/sbom/cyclonedx.py index 6a4e311b62c..8e2f4b17194 100644 --- a/conan/tools/sbom/cyclonedx.py +++ b/conan/tools/sbom/cyclonedx.py @@ -194,53 +194,9 @@ def cyclonedx_1_6(conanfile, name=None, add_build=False, add_tests=False, **kwar return sbom_cyclonedx_1_6 -def _is_valid_spdx_license(license_value): - from conan.tools.sbom.spdx_licenses import NORMALIZED_VALID_SPDX_LICENSES - - token = license_value.lower() - if token.endswith("+") and len(token) > 1: - return token[:-1] in NORMALIZED_VALID_SPDX_LICENSES - if token in NORMALIZED_VALID_SPDX_LICENSES: - return True - if token.startswith("licenseref-"): - value = token[len("licenseref-"):] - return bool(value) and all(c.isalnum() or c in ".-" for c in value) - if token.startswith("documentref-") and ":" in token: - document_ref, right = token.split(":", 1) - ref_id = document_ref[len("documentref-"):] - if not ref_id or not all(c.isalnum() or c in ".-" for c in ref_id): - return False - if right.startswith("licenseref-"): - value = right[len("licenseref-"):] - return bool(value) and all(c.isalnum() or c in ".-" for c in value) - if right.endswith("+") and len(right) > 1: - return right[:-1] in NORMALIZED_VALID_SPDX_LICENSES - return right in NORMALIZED_VALID_SPDX_LICENSES - return (bool(token) and all(c.isalnum() or c in ".-" for c in token) - and "exception" in token) - -def _is_valid_spdx_expression(license_value): - import re - _VALID_SPDX_OPERATORS = ["AND", "OR", "WITH"] - _normalized_expresion = "" - for w in (t for t in re.findall(r"\(|\)|[^\s()]+", license_value)): - if _is_valid_spdx_license(w): - _normalized_expresion += "L" - elif w in _VALID_SPDX_OPERATORS: - _normalized_expresion += "O" - elif w == "(": - _normalized_expresion += "<" - elif w == ")": - _normalized_expresion += ">" - else: - return False - if not _normalized_expresion: - return False - # L = license, O = operator (AND/OR/WITH), <> = grouping from parentheses. - _operand = r'(?:L|<[^>]*>)' - _grouped_operand = rf'(?:L|<{_operand}(?:O{_operand})*>)' - _expression = rf'^{_grouped_operand}(?:O{_grouped_operand})*$' - return re.compile(_expression).fullmatch(_normalized_expresion) is not None +def _is_expr(license_value): + v = license_value.upper() + return " AND " in v or " OR " in v or " WITH " in v def _calculate_licenses(component): @@ -254,7 +210,7 @@ def _calculate_licenses(component): for lic in licenses: if lic.lower() in NORMALIZED_VALID_SPDX_LICENSES: field = "id" - elif _is_valid_spdx_expression(lic): + elif _is_expr(lic): field = "expression" else: field = "name" diff --git a/test/unittests/tools/sbom/test_spdx_expression.py b/test/unittests/tools/sbom/test_spdx_expression.py index b3ca39e0f55..e64c6ee2a84 100644 --- a/test/unittests/tools/sbom/test_spdx_expression.py +++ b/test/unittests/tools/sbom/test_spdx_expression.py @@ -1,71 +1,5 @@ import pytest -from conan.tools.sbom.cyclonedx import _calculate_licenses, _is_valid_spdx_expression, _is_valid_spdx_license -from conan.test.utils.mocks import ConanFileMock - - -@pytest.mark.parametrize( - "expression", - [ - "MIT OR Apache-2.0", - "MIT AND Apache-2.0", - "MIT WITH Apache-2.0", - "( MIT OR Apache-2.0 )", - "( MIT OR Apache-2.0 ) AND BSD-3-Clause", - "MIT OR ( Apache-2.0 )", - "( MIT OR ( MIT ) )", - "(MIT OR (MIT))", - ], -) -def test_valid_spdx_expressions(expression): - assert _is_valid_spdx_expression(expression) - - -@pytest.mark.parametrize( - "expression", - [ - "", - "custom license", - "MIT OR custom", - "MIT AND", - "OR MIT", - "MIT OR", - "( MIT OR Apache-2.0", - "MIT OR Apache-2.0 )", - ], -) -def test_invalid_spdx_expressions(expression): - assert not _is_valid_spdx_expression(expression) - - -@pytest.mark.parametrize( - "license_value", - [ - "MIT", - "apache-2.0", - "GPL-2.0-or-later+", - "LicenseRef-Proprietary", - "LLVM-exception", - "LicenseRef-23", - "LicenseRef-MIT-Style-1", - "DocumentRef-spdx-tool-1.2:LicenseRef-MIT-Style-2", - ], -) -def test_valid_spdx_license(license_value): - assert _is_valid_spdx_license(license_value) - - -@pytest.mark.parametrize( - "license_value", - [ - "custom", - "LicenseRef-", - "OR", - ], -) -def test_invalid_spdx_license(license_value): - assert not _is_valid_spdx_license(license_value) - @pytest.mark.parametrize( "license_value, expected", @@ -73,8 +7,8 @@ def test_invalid_spdx_license(license_value): ("MIT", "id"), ("mit", "id"), ("MIT OR Apache-2.0", "expression"), - ("( MIT OR ( MIT ) )", "expression"), - ("(MIT OR (MIT))", "expression"), + ("( MIT AND ( MIT ) )", "expression"), + ("(MIT WITH (MIT))", "expression"), ("custom license", "name"), ], ) From 0817a6a44386f8f2f53982da3b3d4a8182f545b2 Mon Sep 17 00:00:00 2001 From: Ernesto de Gracia Herranz Date: Thu, 25 Jun 2026 11:25:51 +0200 Subject: [PATCH 4/4] fix --- test/unittests/tools/sbom/test_spdx_expression.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/unittests/tools/sbom/test_spdx_expression.py b/test/unittests/tools/sbom/test_spdx_expression.py index e64c6ee2a84..a22875c8637 100644 --- a/test/unittests/tools/sbom/test_spdx_expression.py +++ b/test/unittests/tools/sbom/test_spdx_expression.py @@ -1,5 +1,8 @@ import pytest +from conan.tools.sbom.cyclonedx import _calculate_licenses +from conan.test.utils.mocks import ConanFileMock + @pytest.mark.parametrize( "license_value, expected",