From cbdb3fc73aaf2772ab6f4b4923b97c0e4b117c4f Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Wed, 13 Aug 2025 18:14:02 +0530 Subject: [PATCH 1/2] Measure package assembly timing with CLI option Signed-off-by: Ayan Sinha Mahapatra --- src/packagedcode/plugin_package.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/packagedcode/plugin_package.py b/src/packagedcode/plugin_package.py index 3ced6dc8718..6ece36ac579 100644 --- a/src/packagedcode/plugin_package.py +++ b/src/packagedcode/plugin_package.py @@ -10,6 +10,7 @@ import functools import logging import os +from time import time import attr import click @@ -17,6 +18,7 @@ from commoncode.cliutils import PluggableCommandLineOption from commoncode.cliutils import DOC_GROUP from commoncode.cliutils import SCAN_GROUP +from commoncode.cliutils import CORE_GROUP from commoncode.resource import Resource from commoncode.resource import strip_first_path_segment from plugincode.scan import scan_impl @@ -133,6 +135,7 @@ class PackageScanner(ScanPlugin): codebase_attributes = dict( # a list of packages packages=attr.ib(default=attr.Factory(list), repr=False), + package_timing=attr.ib(default=0, repr=False), # a list of dependencies dependencies=attr.ib(default=attr.Factory(list), repr=False), ) @@ -192,6 +195,14 @@ class PackageScanner(ScanPlugin): help='Show the list of supported package manifest parsers and exit.', help_group=DOC_GROUP, ), + PluggableCommandLineOption( + ('--package-timing',), + is_flag=True, + default=False, + hidden=True, + help='Collect scan timing for package assembly.', + help_group=CORE_GROUP, + ), ] def is_enabled(self, package, system_package, package_only, **kwargs): @@ -210,7 +221,7 @@ def get_scanner(self, package=True, system_package=False, package_only=False, ** package_only=package_only, ) - def process_codebase(self, codebase, strip_root=False, package_only=False, **kwargs): + def process_codebase(self, codebase, strip_root=False, package_only=False, package_timing=False, **kwargs): """ Populate the ``codebase`` top level ``packages`` and ``dependencies`` with package and dependency instances, assembling parsed package data @@ -270,7 +281,7 @@ def process_codebase(self, codebase, strip_root=False, package_only=False, **kwa logger_debug(f'packagedcode: process_codebase: add_license_from_sibling_file: modified: {modified}') # Create codebase-level packages and dependencies - create_package_and_deps(codebase, strip_root=strip_root, **kwargs) + create_package_and_deps(codebase, strip_root=strip_root, package_timing=package_timing, **kwargs) if has_licenses: # This step is dependent on top level packages @@ -367,11 +378,14 @@ def get_installed_packages(root_dir, processes=2, **kwargs): yield from packages_by_uid.values() -def create_package_and_deps(codebase, package_adder=add_to_package, strip_root=False, **kwargs): +def create_package_and_deps(codebase, package_adder=add_to_package, strip_root=False, package_timing=False, **kwargs): """ Create and save top-level Package and Dependency from the parsed package data present in the codebase. """ + if package_timing: + package_assembly_start_time = time() + packages, dependencies = get_package_and_deps( codebase, package_adder=package_adder, @@ -381,6 +395,8 @@ def create_package_and_deps(codebase, package_adder=add_to_package, strip_root=F codebase.attributes.packages.extend(package.to_dict() for package in packages) codebase.attributes.dependencies.extend(dep.to_dict() for dep in dependencies) + if package_timing: + codebase.attributes.package_timing = time() - package_assembly_start_time def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=False, **kwargs): From 83a3a0a64c745fdd24af999cea2ef517ee4049a8 Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Thu, 25 Jun 2026 18:48:45 +0530 Subject: [PATCH 2/2] Add timeout to process codebase and plugin steps Signed-off-by: Ayan Sinha Mahapatra --- src/scancode/cli.py | 31 +++++++++++++++++++++++++++++-- src/scancode/interrupt.py | 1 + 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/scancode/cli.py b/src/scancode/cli.py index 1376c6cfee9..de577c15c24 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -68,6 +68,7 @@ class WindowsError(Exception): from scancode.help import epilog_text from scancode.help import examples_text from scancode.interrupt import DEFAULT_TIMEOUT +from scancode.interrupt import DEFAULT_PLUGIN_TIMEOUT from scancode.interrupt import fake_interruptible from scancode.interrupt import interruptible from scancode.pool import ScanCodeTimeoutError @@ -253,6 +254,14 @@ def default_processes(): f'[default: {DEFAULT_TIMEOUT} seconds]', help_group=cliutils.CORE_GROUP, sort_order=10, cls=PluggableCommandLineOption) +@click.option('--timeout-plugins', + type=float, + default=DEFAULT_PLUGIN_TIMEOUT, + metavar='', + help='Stop an unfinished codebase processing or post-scan plugin after' + f' a timeout in seconds. [default: {DEFAULT_TIMEOUT} seconds]', + help_group=cliutils.CORE_GROUP, sort_order=10, cls=PluggableCommandLineOption) + @click.option('-q', '--quiet', is_flag=True, default=False, @@ -399,6 +408,7 @@ def scancode( full_root, processes, timeout, + timeout_plugins, quiet, verbose, max_depth, @@ -510,6 +520,7 @@ def scancode( full_root=full_root, processes=processes, timeout=timeout, + timeout_plugins=timeout_plugins, quiet=quiet, verbose=verbose, max_depth=max_depth, @@ -551,7 +562,8 @@ def run_scan( full_root=False, max_in_memory=10000, processes=1, - timeout=120, + timeout=DEFAULT_TIMEOUT, + timeout_plugins=DEFAULT_PLUGIN_TIMEOUT, quiet=True, verbose=False, max_depth=0, @@ -659,6 +671,7 @@ def echo_func(*_args, **_kwargs): full_root=full_root, processes=processes, timeout=timeout, + timeout_plugins=timeout_plugins, quiet=quiet, verbose=verbose, from_json=from_json, @@ -947,6 +960,7 @@ def echo_func(*_args, **_kwargs): stage='pre-scan', plugins=pre_scan_plugins, codebase=codebase, + timeout=timeout_plugins, stage_msg='Run %(stage)ss...', plugin_msg=' Run %(stage)s: %(name)s...', quiet=quiet, @@ -966,6 +980,7 @@ def echo_func(*_args, **_kwargs): codebase=codebase, processes=processes, timeout=timeout, + timeout_plugins=timeout_plugins, timing=timeout, quiet=quiet, verbose=verbose, @@ -983,6 +998,7 @@ def echo_func(*_args, **_kwargs): stage='post-scan', plugins=post_scan_plugins, codebase=codebase, + timeout=timeout_plugins, stage_msg='Run %(stage)ss...', plugin_msg=' Run %(stage)s: %(name)s...', quiet=quiet, @@ -1001,6 +1017,7 @@ def echo_func(*_args, **_kwargs): stage='output-filter', plugins=output_filter_plugins, codebase=codebase, + timeout=timeout_plugins, stage_msg='Apply %(stage)ss...', plugin_msg=' Apply %(stage)s: %(name)s...', quiet=quiet, @@ -1035,6 +1052,7 @@ def echo_func(*_args, **_kwargs): stage='output', plugins=output_plugins, codebase=codebase, + timeout=timeout_plugins, stage_msg='Save scan results...', plugin_msg=' Save scan results as: %(name)s...', quiet=quiet, @@ -1095,6 +1113,7 @@ def run_codebase_plugins( stage, plugins, codebase, + timeout, stage_msg='', plugin_msg='', quiet=False, @@ -1119,6 +1138,7 @@ def run_codebase_plugins( # Sort plugins by run_order, from low to high sorted_plugins = sorted(plugins, key=lambda x: x.run_order) + scan_errors = [] success = True # TODO: add progress indicator for plugin in sorted_plugins: @@ -1135,7 +1155,11 @@ def run_codebase_plugins( logger_debug(pformat(sorted(kwargs.items()))) logger_debug() - plugin.process_codebase(codebase, **kwargs) + process_codebase_func = partial(plugin.process_codebase, codebase, **kwargs) + error, _value = interruptible(process_codebase_func, timeout=timeout) + if error: + msg = 'ERROR: for scanner: ' + plugin.name + ':\n' + error + codebase.errors.append(msg) except Exception as _e: msg = 'ERROR: failed to run %(stage)s plugin: %(name)s:' % locals() @@ -1158,6 +1182,7 @@ def run_scanners( codebase, processes, timeout, + timeout_plugins, timing, quiet=False, verbose=False, @@ -1208,8 +1233,10 @@ def run_scanners( # TODO: add progress indicator # run the process codebase of each scan plugin (most often a no-op) + use_threading = processes >= 0 scan_process_codebase_success = run_codebase_plugins( stage, plugins, codebase, + timeout=timeout_plugins, stage_msg='Filter %(stage)ss...', plugin_msg=' Filter %(stage)s: %(name)s...', quiet=quiet, verbose=verbose, kwargs=kwargs, diff --git a/src/scancode/interrupt.py b/src/scancode/interrupt.py index e26d090567d..89fd11edd42 100644 --- a/src/scancode/interrupt.py +++ b/src/scancode/interrupt.py @@ -45,6 +45,7 @@ class TimeoutError(Exception): # NOQA DEFAULT_TIMEOUT = 120 # seconds +DEFAULT_PLUGIN_TIMEOUT = 2400 # seconds TIMEOUT_MSG = 'ERROR: Processing interrupted: timeout after %(timeout)d seconds.' ERROR_MSG = 'ERROR: Unknown error:\n'