From 304b684190c22907e6d78c6c677722d498cc0d44 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Fri, 19 Jun 2026 08:18:32 +0200 Subject: [PATCH 1/3] Detect acronym holder with parenthesized expansion Signed-off-by: Vincent Gao --- AUTHORS.rst | 1 + src/cluecode/copyrights.py | 3 +++ tests/cluecode/data/copyrights/kisa_seed_local.h | 1 + tests/cluecode/data/copyrights/kisa_seed_local.h.yml | 7 +++++++ 4 files changed, 12 insertions(+) create mode 100644 tests/cluecode/data/copyrights/kisa_seed_local.h create mode 100644 tests/cluecode/data/copyrights/kisa_seed_local.h.yml diff --git a/AUTHORS.rst b/AUTHORS.rst index 9a8224dc7d..4a24560be5 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -102,6 +102,7 @@ The following organizations or individuals have contributed to ScanCode: - Vibhu Agarwal @Vibhu-Agarwal - Viktor Tiulpin @tiulpin - Vinay Kumar Singh @Vinay0001 +- Vincent Gao @gaoflow - Virag Umathe @viragumathe5 - Yash D. Saraf @yashdsaraf - Yash Nisar @yash-nisar diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py index 6d17467acf..f1d52dc7dd 100644 --- a/src/cluecode/copyrights.py +++ b/src/cluecode/copyrights.py @@ -1234,6 +1234,9 @@ def build_detection_from_node( (r"^[a-z].+\(s\)[\.,]?$", 'JUNK'), + # acronym with an opening parenthesized expansion, as in "KISA(Korea" + (r"^[A-Z]{2,}\([A-Z][A-Za-z]+$", 'NNP'), + # parens in the middle: for(var (r"^[a-zA-Z]+[\)\(]+,?[\)\(]?[a-zA-Z]+[\.,]?$", 'JUNK'), diff --git a/tests/cluecode/data/copyrights/kisa_seed_local.h b/tests/cluecode/data/copyrights/kisa_seed_local.h new file mode 100644 index 0000000000..3d871423ff --- /dev/null +++ b/tests/cluecode/data/copyrights/kisa_seed_local.h @@ -0,0 +1 @@ +Copyright (c) 2007 KISA(Korea Information Security Agency). diff --git a/tests/cluecode/data/copyrights/kisa_seed_local.h.yml b/tests/cluecode/data/copyrights/kisa_seed_local.h.yml new file mode 100644 index 0000000000..32e7904a29 --- /dev/null +++ b/tests/cluecode/data/copyrights/kisa_seed_local.h.yml @@ -0,0 +1,7 @@ +what: + - copyrights + - holders +copyrights: + - Copyright (c) 2007 KISA(Korea Information Security Agency) +holders: + - KISA(Korea Information Security Agency) From 5eea338bd17022c92938b6a07a0c191317390d40 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Fri, 19 Jun 2026 16:45:30 +0200 Subject: [PATCH 2/3] Narrow KISA holder pattern Signed-off-by: Vincent Gao --- src/cluecode/copyrights.py | 4 ++-- tests/licensedcode/test_detect.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py index f1d52dc7dd..dd1b420ae1 100644 --- a/src/cluecode/copyrights.py +++ b/src/cluecode/copyrights.py @@ -1234,8 +1234,8 @@ def build_detection_from_node( (r"^[a-z].+\(s\)[\.,]?$", 'JUNK'), - # acronym with an opening parenthesized expansion, as in "KISA(Korea" - (r"^[A-Z]{2,}\([A-Z][A-Za-z]+$", 'NNP'), + # KISA with an opening parenthesized expansion, as in "KISA(Korea" + (r"^KISA\(Korea$", 'NNP'), # parens in the middle: for(var (r"^[a-zA-Z]+[\)\(]+,?[\)\(]?[a-zA-Z]+[\.,]?$", 'JUNK'), diff --git a/tests/licensedcode/test_detect.py b/tests/licensedcode/test_detect.py index 5dbf7b369b..a1475c390f 100644 --- a/tests/licensedcode/test_detect.py +++ b/tests/licensedcode/test_detect.py @@ -1075,8 +1075,8 @@ def test_match_has_correct_line_positions_in_automake_perl_file(self): expected = [ # detected, match.lines(), match.qspan, ('gpl-2.0-plus', (12, 25), Span(51, 160)), - ('fsf-unlimited-no-warranty', (231, 238), Span(986, 1049)), - ('warranty-disclaimer', (306, 307), Span(1359, 1381)), + ('fsf-unlimited-no-warranty', (231, 238), Span(998, 1061)), + ('warranty-disclaimer', (306, 307), Span(1371, 1393)), ] self.check_position('positions/automake.pl', expected) From 5b07d794936d3b5916043b6cf97724ab460197c7 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Sun, 21 Jun 2026 00:24:24 +0200 Subject: [PATCH 3/3] Fix full-root path expectations Signed-off-by: Vincent Gao --- tests/scancode/test_cli.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py index 9d038f71e6..0caeefa5f4 100644 --- a/tests/scancode/test_cli.py +++ b/tests/scancode/test_cli.py @@ -167,8 +167,7 @@ def test_scan_info_returns_full_root(): result_data = json.loads(open(result_file).read()) file_paths = [f['path'] for f in result_data['files']] assert len(file_paths) == 12 - # note that we strip paths from leading and trailing slashes - root = fileutils.as_posixpath(test_dir).strip('/') + root = fileutils.as_posixpath(test_dir) assert all(p.startswith(root) for p in file_paths) @@ -182,9 +181,8 @@ def test_scan_info_returns_correct_full_root_with_single_file(): # we have a single file assert len(files) == 1 scanned_file = files[0] - # and we check that the path is the full path without repeating the file name - # note that the path never contain leading and trailing slashes - assert scanned_file['path'] == fileutils.as_posixpath(test_file).strip('/') + # and we check that the path is the full absolute path without repeating the file name + assert scanned_file['path'] == fileutils.as_posixpath(test_file) def test_scan_info_returns_does_not_strip_root_with_single_file():