From 4a26255a053de693cea8f45cc867868e27d7467b Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Mon, 23 Feb 2026 22:45:36 +0100
Subject: [PATCH 01/15] adress #86

---
 psiflow/data/dataset.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/psiflow/data/dataset.py b/psiflow/data/dataset.py
index a2e866e..e332167 100644
--- a/psiflow/data/dataset.py
+++ b/psiflow/data/dataset.py
@@ -9,7 +9,7 @@
 from parsl.app.app import join_app, python_app
 from parsl.app.python import PythonApp
 from parsl.data_provider.files import File
-from parsl.dataflow.futures import AppFuture
+from parsl.dataflow.futures import AppFuture, DataFuture
 
 import psiflow
 from psiflow.geometry import QUANTITIES, Geometry
@@ -127,7 +127,7 @@ def __getitem__(
             ).outputs[0]
             return Dataset(None, extxyz)
 
-    def save(self, path: Union[Path, str]) -> AppFuture:
+    def save(self, path: Union[Path, str]) -> DataFuture:
         """
         Save the dataset to a file.
 
@@ -135,13 +135,14 @@ def save(self, path: Union[Path, str]) -> AppFuture:
             path: Path to save the dataset.
 
         Returns:
-            AppFuture: Future representing the completion of the save operation.
+            DataFuture: Future representing the file to which will be saved.
         """
         path = psiflow.resolve_and_check(Path(path))
-        _ = copy_data_future(
+        future = copy_data_future(
             inputs=[self.extxyz],
             outputs=[File(str(path))],
         )
+        return future.outputs[0]
 
     def geometries(self) -> AppFuture:
         """

From 32324d62307bbb63f049c23639b9eb7dc3938a0f Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Wed, 25 Feb 2026 01:21:03 +0100
Subject: [PATCH 02/15] Update execution.py

- working towards cleaner input configs
- started moving more functionality into ExecutionDefinition
- added lots of TODO things and quesions
---
 psiflow/execution.py | 845 ++++++++++++++++++++++---------------------
 1 file changed, 435 insertions(+), 410 deletions(-)

diff --git a/psiflow/execution.py b/psiflow/execution.py
index c632935..1e4b758 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -1,10 +1,10 @@
-from __future__ import annotations  # necessary for type-guarding class methods
-
 import logging
-import math
 import re
 import shutil
 import sys
+import warnings
+import subprocess
+from datetime import datetime, timedelta
 from dataclasses import dataclass
 from pathlib import Path
 from threading import Lock
@@ -14,12 +14,10 @@
 
 import parsl
 import psutil
-import pytimeparse
-import typeguard
 import yaml
 from parsl.config import Config
 from parsl.data_provider.files import File
-from parsl.executors import (  # WorkQueueExecutor,
+from parsl.executors import (
     HighThroughputExecutor,
     ThreadPoolExecutor,
     WorkQueueExecutor,
@@ -36,364 +34,481 @@
 
 
 PSIFLOW_INTERNAL = "psiflow_internal"
-
-EXECUTION_KWARGS = (
-    "container_uri",
-    "container_engine",
-    "container_addopts",
-    "container_entrypoint",
-)
+DEFAULT_CONFIG = {  # TODO: remove
+    "ModelEvaluation": {"gpu": False, "use_threadpool": True},
+    "ModelTraining": {"gpu": True, "use_threadpool": True},
+}
 
 
 @dataclass
 class ContainerSpec:
+    """Controls container configuration"""
+
     uri: str
     engine: str = "apptainer"
     addopts: str = " --no-eval -e --no-mount home -W /tmp --writable-tmpfs"
-    entrypoint: str = "/opt/entry.sh"
+    gpu_flavour: str | None = None  # TODO: add yaml argument
 
     def __post_init__(self):
         assert self.engine in ("apptainer", "singularity")
         assert len(self.uri) > 0
+        assert self.gpu_flavour in ("cuda", "rocm", None)
 
-    def launch_command(self, gpu: bool = False) -> str:
-        # TODO: pretty sure some of this is overkill
+    def launch_command(self) -> str:
         pwd = Path.cwd().resolve()  # access to data / internal dir
         args = [self.engine, "exec", self.addopts, f"--bind {pwd}"]
-        if gpu:
-            if "rocm" in self.uri:
-                args.append("--rocm")
-            else:
-                args.append("--nv")
-        args += [self.uri, self.entrypoint]
+        if self.gpu_flavour == "cuda":
+            args.append("--nv")
+        elif self.gpu_flavour == "rocm":
+            args.append("--rocm")
         return " ".join(args)
 
     @staticmethod
-    def from_kwargs(kwargs: dict) -> Optional[ContainerSpec]:
+    def from_kwargs(kwargs: dict) -> Optional["ContainerSpec"]:
         if "container_uri" not in kwargs:
             return None
-        keys = (
-            "container_uri",
-            "container_engine",
-            "container_addopts",
-            "container_entrypoint",
-        )
+        keys = ("container_uri", "container_engine", "container_addopts")
         args = [kwargs[key] for key in keys if key in kwargs]
-        return ContainerSpec(*args)  # TODO: slightly hacky
+        return ContainerSpec(*args)
+
+
+class ReferenceSpec(Protocol):
+    """Defines default options for Reference implementations"""
+
+    name: ClassVar[str]
+    reference_args: ClassVar[tuple[str, ...]]
+    mpi_command: str
+    mpi_args: Iterable[str]
+    executable: str
+
+    def launch_command(self) -> str:
+        raise NotImplementedError
+
+    @classmethod
+    def from_kwargs(cls, **kwargs):
+        keys = ("mpi_command", "mpi_args", "executable")
+        return cls(**{k: kwargs[k] for k in keys if k in kwargs})
+
+
+@dataclass
+class CP2KReferenceSpec(ReferenceSpec):
+    name = "CP2K"
+    reference_args = ("cores_per_worker",)
+    mpi_command: str = "mpirun -np {cores_per_worker}"
+    mpi_args: tuple[str, ...] = (
+        "-ENV OMP_NUM_THREADS=1",
+        "--bind-to core",
+        "--map-by core",
+    )
+    executable: str = "cp2k.psmp -i cp2k.inp"
+
+    def launch_command(self):
+        return " ".join([self.mpi_command, *self.mpi_args, self.executable])
+
+
+@dataclass
+class GPAWReferenceSpec(ReferenceSpec):
+    name = "GPAW"
+    reference_args = ("cores_per_worker",)
+    mpi_command: str = "mpirun -np {cores_per_worker}"
+    mpi_args: tuple[str, ...] = (
+        "-x OMP_NUM_THREADS=1",
+        "--bind-to core",
+        "--map-by core",
+    )
+    executable: str = "gpaw python script_gpaw.py input.json"
+
+    def launch_command(self):
+        return " ".join([self.mpi_command, *self.mpi_args, self.executable])
+
+
+@dataclass
+class ORCAReferenceSpec(ReferenceSpec):
+    name = "ORCA"
+    reference_args = ()
+    mpi_command: str = ""
+    mpi_args: tuple[str, ...] = (
+        "-x OMP_NUM_THREADS=1",
+        "--bind-to core",
+        "--map-by core",
+    )
+    executable: str = "$(which orca) orca.inp"
+
+    def launch_command(self):
+        mpi_str = " ".join(self.mpi_args)
+        return f'{self.executable} "{mpi_str}"'
+
+
+REFERENCE_SPECS = {
+    "CP2K": CP2KReferenceSpec,
+    "GPAW": GPAWReferenceSpec,
+    "ORCA": ORCAReferenceSpec,
+}
+
+
+def str_to_timedelta(s: str) -> timedelta:
+    # TODO: move to utils
+    t = datetime.strptime(s, "%H:%M:%S")
+    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
+
+
+def make_slurm_provider(kwargs: dict) -> tuple[SlurmProvider, dict]:
+    defaults = {"init_blocks": 0, "exclusive": False}
+    required = ("cores_per_node", "walltime", "gpus_per_node")
+    kwargs = defaults | kwargs
+    assert all(key in kwargs for key in required)
+    provider = SlurmProvider(**kwargs)  # does not configure Launcher
+    resources = {
+        "nodes": provider.nodes_per_block,
+        "cores": provider.cores_per_node,
+        "memory": provider.mem_per_node,
+        "gpus": provider.gpus_per_node,
+        "lifetime": str_to_timedelta(provider.walltime).seconds,
+    }
+    return provider, resources
+
+
+def make_local_provider(kwargs: dict) -> tuple[LocalProvider, dict]:
+    resources = {
+        "nodes": 1,
+        "cores": kwargs.get("cores", psutil.cpu_count()),
+        "memory": kwargs.get(
+            "memory", psutil.virtual_memory().available / 1e9
+        ),  # TODO: available?
+        "lifetime": float("inf"),
+    }
+    if "gpus" in kwargs:
+        resources["gpus"] = kwargs["gpus"]
+    else:
+        out = ""
+        try:
+            out = subprocess.check_output(
+                "nvidia-smi -L || amd-smi list",
+                shell=True,
+                text=True,
+                stderr=subprocess.DEVNULL,
+            )
+        except subprocess.CalledProcessError:
+            pass  # nvidia-sm and amd-smi not found  TODO: not properly tested
+        resources["gpus"] = out.count("\n")
+    provider = LocalProvider(init_blocks=0)
+    return provider, resources
 
 
-@typeguard.typechecked
 class ExecutionDefinition:
+    # TODO: do not like defining some kwargs in class method and other kwargs in init...
     def __init__(
         self,
-        parsl_provider: ExecutionProvider,
-        gpu: bool,
-        cores_per_worker: int,
-        use_threadpool: bool,
+        provider: ExecutionProvider | None,
+        executor_type: str,
+        executor_kwargs: dict,
+        resources: dict,
         container: Optional[ContainerSpec],
-        max_workers: Optional[int] = None,
+        max_runtime: str | None = None,
+        env_vars: Optional[dict[str, str]] = None,
         **kwargs,
-    ) -> None:
-        self.parsl_provider = parsl_provider
-        self.gpu = gpu
-        self.cores_per_worker = cores_per_worker
-        self.use_threadpool = use_threadpool
+    ):
+        self.provider = provider
+        self.executor_type = executor_type
+        self.kwargs = executor_kwargs
+        self.resources = resources  # compute per node
         self.container = container
-        self._max_workers = max_workers
+        self.env_vars = env_vars or {}
+
+        if self.use_gpu:
+            msg = ""
+            if resources["gpus"] == 0:
+                msg = "GPU usage requested but no GPUs available"
+            elif container is not None and container.gpu_flavour is None:
+                msg = "Provide 'gpu_flavour' to choose between CUDA and ROCM"
+            if msg:
+                raise ValueError(msg)
+
+        # how long can individual tasks run (in seconds)
+        if max_runtime is None:
+            # allow some margin for task cleanup  TODO: pretty random
+            max_runtime = max(0.9 * self.lifetime, self.lifetime - 60)
+        else:
+            max_runtime = str_to_timedelta(max_runtime).seconds
+        if max_runtime != float("inf") and max_runtime >= self.lifetime:
+            warnings.warn(
+                "Allowed task runtime exceeds provider walltime. Tasks might get killed by the scheduler."
+            )
+        self.max_runtime = max_runtime
+
+        # TODO: check that WQ kwargs do not exceed resources?
+        # TODO: how to handle env variables?
+        pass
 
     @property
     def name(self) -> str:
         return self.__class__.__name__
 
     @property
-    def cores_available(self):
-        if type(self.parsl_provider) is LocalProvider:  # noqa: F405
-            cores_available = psutil.cpu_count(logical=False)
-        elif type(self.parsl_provider) is SlurmProvider:
-            cores_available = self.parsl_provider.cores_per_node
-        else:
-            cores_available = float("inf")
-        return cores_available
-
-    @property
-    def max_workers(self):
-        if self._max_workers is not None:
-            return self._max_workers
-        else:
-            return max(1, math.floor(self.cores_available / self.cores_per_worker))
+    def lifetime(self) -> float:
+        """How long will this manager survive (in seconds)"""
+        return self.resources["lifetime"]
 
     @property
-    def max_runtime(self):
-        if type(self.parsl_provider) is SlurmProvider:
-            walltime = pytimeparse.parse(self.parsl_provider.walltime)
-        else:
-            walltime = 1e9
-        return walltime
+    def use_gpu(self) -> bool:
+        return self.kwargs.get("use_gpu") or self.kwargs.get("gpus_per_task") > 0
+
+    def wrap_in_timeout(self, command: str) -> str:
+        if self.max_runtime == float("inf"):
+            return command  # noop
+
+        # send SIGTERM after max_runtime, follow with SIGKILL 30s later
+        return f"timeout -k 30s {self.max_runtime}s {command}"
+
+    def _create_threadpool(self, path: Path) -> ThreadPoolExecutor:
+        max_threads = self.kwargs["max_threads"]
+        return ThreadPoolExecutor(self.name, max_threads, working_dir=str(path))
+
+    def _create_workqueue(self, path: Path) -> WorkQueueExecutor:
+        """See https://cctools.readthedocs.io/en/latest/man_pages/work_queue_worker/#synopsis"""
+
+        # ensure proper scale in # TODO: why is this needed?
+        timeout = int(1e6) if self.resources["nodes"] > 1 else 20
+        cores = self.resources["cores"]
+
+        worker_options = [
+            "--parent-death",
+            f"--cores={cores}",
+            f"--timeout={timeout}",
+        ]
+        if (memory := self.resources["memory"]) is not None:
+            worker_options.append(f"--memory={memory * 1000}")  # in MB
+        if (lifetime := self.lifetime) != float("inf"):
+            # allow some margin for WQ startup
+            walltime = max(0.95 * lifetime, lifetime - 30)
+            worker_options.append(f"-wall-time={walltime}")
+        if self.use_gpu:
+            gpus = self.resources["gpus"]
+            worker_options.append(f"--gpus={gpus}")
+
+        worker_executable = "work_queue_worker"
+        if not isinstance(self, ReferenceEvaluation) and self.container:
+            # ModelEvaluation / ModelTraining run in container themselves
+            # Reference instances launch tasks in container
+            prepend = self.container.launch_command()
+            worker_executable = f"{prepend} {worker_executable}"
+
+        # TODO: why the custom WQ?
+        executor = MyWorkQueueExecutor(
+            label=self.name,
+            working_dir=str(path / self.name),
+            provider=self.provider,
+            shared_fs=True,
+            # autocategory=False,
+            # port=0,
+            # max_retries=1,
+            # coprocess=False,
+            worker_options=" ".join(worker_options),
+            worker_executable=worker_executable,
+            scaling_cores_per_worker=cores,
+        )
+        return executor
 
     def create_executor(self, path: Path) -> ParslExecutor:
-        if self.use_threadpool:
-            executor = ThreadPoolExecutor(
-                max_threads=self.cores_per_worker,
-                working_dir=str(path),
-                label=self.name,
-            )
-        else:
-            cores = self.max_workers * self.cores_per_worker
-            worker_options = [
-                "--parent-death",
-                "--wall-time={}".format(self.max_runtime),
-                "--cores={}".format(cores),
-            ]
-            if self.gpu:
-                worker_options.append("--gpus={}".format(self.max_workers))
-
-            # ensure proper scale in
-            if getattr(self.parsl_provider, "nodes_per_block", 1) > 1:
-                worker_options.append("--idle-timeout={}".format(int(1e6)))
-            else:
-                worker_options.append("--idle-timeout={}".format(20))
+        if self.executor_type == "threadpool":
+            return self._create_threadpool(path)
+        return self._create_workqueue(path)
 
-            # only ModelEvaluation / ModelTraining / default_htex run in containers
-            if not isinstance(self, ReferenceEvaluation) and self.container:
-                prepend = self.container.launch_command(self.gpu)
-                worker_executable = f"{prepend} work_queue_worker"
-            else:
-                worker_executable = "work_queue_worker"
-
-            executor = MyWorkQueueExecutor(
-                label=self.name,
-                working_dir=str(path / self.name),
-                provider=self.parsl_provider,
-                shared_fs=True,
-                autocategory=False,
-                port=0,
-                max_retries=1,
-                coprocess=False,
-                worker_options=" ".join(worker_options),
-                worker_executable=worker_executable,
-                scaling_cores_per_worker=cores,
-            )
-        return executor
+    def wq_resources(self, *args, **kwargs) -> dict:
+        if self.executor_type == "threadpool":
+            return {}
+
+        # TODO: why recreate every call?
+        # TODO: priority
+        spec = {
+            "cores": self.kwargs["cores_per_task"],
+            "memory": int(self.kwargs["mem_per_task"] * 1000),  # in MB
+            "gpus": self.kwargs["gpus_per_task"],
+            "disk": 0,  # not implemented
+            "running_time_min": self.kwargs["min_runtime"],
+        }
+        return self._modify_wq_resources(spec, *args, **kwargs)
+
+    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
+        raise NotImplementedError
 
     @classmethod
     def from_config(
         cls,
-        gpu: bool = False,
-        cores_per_worker: int = 1,
-        use_threadpool: bool = False,
+        executor: str = "workqueue",
         container: Optional[ContainerSpec] = None,
         **kwargs,
     ):
-        # search for any section in the config which defines the Parsl ExecutionProvider
-        # if none are found, default to LocalProvider
-        # currently only checking for SLURM
-        if "slurm" in kwargs:
-            provider_cls = SlurmProvider
-            provider_kwargs = kwargs.pop("slurm")  # do not allow empty dict
-            provider_kwargs["init_blocks"] = 0
-            provider_kwargs.setdefault("exclusive", False)
+        if executor == "threadpool":
+            assert container is None, "Threadpool not compatible with containers"
+            assert (
+                "slurm" not in kwargs
+            ), "Threadpool not compatible with remote execution"
+            assert "max_threads" in kwargs, "Specify 'max_threads' for parallelism"
+            executor_kwargs = {
+                "max_threads": kwargs["max_threads"],
+                "use_gpu": kwargs.get("use_gpu", False),
+            }
+        elif executor == "workqueue":
+            executor_kwargs = {
+                "cores_per_task": kwargs.get("cores_per_task", 0),
+                "gpus_per_task": kwargs.get("gpus_per_task", 0),
+                "mem_per_task": kwargs.get("mem_per_task", 0),
+            }
+            assert any(v != 0 for v in executor_kwargs.values())
+            min_runtime = kwargs.get("min_runtime", "00:00:00")
+            executor_kwargs["min_runtime"] = str_to_timedelta(min_runtime).seconds
         else:
-            provider_cls = LocalProvider  # noqa: F405
-            provider_kwargs = kwargs.pop("local", {})
+            raise ValueError("Key 'executor' must be 'threadpool' or 'workqueue'")
 
-        # if multi-node blocks are requested, make sure we're using SlurmProvider
-        if provider_kwargs.get("nodes_per_block", 1) > 1:
-            launcher = SlurmLauncher()
+        # search for Parsl ExecutionProvider block, defaulting to "local"
+        if "slurm" in kwargs:
+            # use SlurmLauncher if multi-node blocks are requested TODO: what does this fix?
+            provider, resources = make_slurm_provider(kwargs["slurm"])
+            launcher = SlurmLauncher() if resources["nodes"] > 1 else SimpleLauncher()
+            provider.launcher = launcher
         else:
-            launcher = SimpleLauncher()
-
-        if container is not None:
-            # TODO: why not exactly?
-            assert not use_threadpool
+            provider, resources = make_local_provider(kwargs.get("local", {}))
+        if executor == "threadpool":
+            provider = None  # no provider needed
 
-        # initialize provider
-        parsl_provider = provider_cls(
-            launcher=launcher,
-            **provider_kwargs,
-        )
         return cls(
-            parsl_provider=parsl_provider,
-            gpu=gpu,
-            use_threadpool=use_threadpool,
+            provider=provider,
+            executor_type=executor,
+            executor_kwargs=executor_kwargs,
+            resources=resources,
             container=container,
-            cores_per_worker=cores_per_worker,
             **kwargs,
         )
 
 
-@typeguard.typechecked
 class ModelEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        max_simulation_time: Optional[float] = None,
-        timeout: float = (10 / 60),  # 5 seconds
-        env_vars: Optional[dict[str, str]] = None,
+        timeout: float = 5,  # TODO: units?
         **kwargs,
-    ) -> None:
+    ):
         super().__init__(**kwargs)
-        if max_simulation_time is not None:
-            assert max_simulation_time * 60 < self.max_runtime
-        self.max_simulation_time = max_simulation_time
         self.timeout = timeout
 
-        default_env_vars = {
-            "OMP_NUM_THREADS": str(self.cores_per_worker),
-            "KMP_AFFINITY": "granularity=fine,compact,1,0",
-            "KMP_BLOCKTIME": "1",
-            "OMP_PROC_BIND": "false",
-            "PYTHONUNBUFFERED": "TRUE",
-        }
-        if env_vars is None:
-            env_vars = default_env_vars
-        else:
-            default_env_vars.update(env_vars)
-            env_vars = default_env_vars
-        self.env_vars = env_vars
-
-    def server_command(self):
-        command_list = ["psiflow-server"]
-        if self.max_simulation_time is not None:
-            max_time = 0.9 * (60 * self.max_simulation_time)
-            command_list = ["timeout -s 15 {}s".format(max_time), *command_list]
-        return " ".join(command_list)
-
-    def client_command(self):
-        command_list = ["psiflow-client"]
-        return " ".join(command_list)
-
-    # def get_client_args(
-    #     self,
-    #     hamiltonian_name: str,
-    #     nwalkers: int,
-    #     motion: str,
-    # ) -> list[str]:
-    #     # TODO: redo this
-    #     if "MACE" in hamiltonian_name:
-    #         if motion in ["minimize", "vibrations"]:
-    #             dtype = "float64"
-    #         else:
-    #             dtype = "float32"
-    #         nclients = min(nwalkers, self.max_workers)
-    #         if self.gpu:
-    #             template = "--dtype={} --device=cuda:{}"
-    #             args = [template.format(dtype, i) for i in range(nclients)]
-    #         else:
-    #             template = "--dtype={} --device=cpu"
-    #             args = [template.format(dtype) for i in range(nclients)]
-    #         return args
-    #     else:
-    #         return [""]
+        # TODO: temporary
+        self.cores_per_worker = self.kwargs.get("cores_per_task", 1)
+        self.gpu = False
+
+        # TODO: what with env vars?
+        # default_env_vars = {
+        #     "OMP_NUM_THREADS": str(self.cores_per_worker),
+        #     "KMP_AFFINITY": "granularity=fine,compact,1,0",
+        #     "KMP_BLOCKTIME": "1",
+        #     "OMP_PROC_BIND": "false",
+        #     "PYTHONUNBUFFERED": "TRUE",
+        # }
+
+    def server_command(self) -> str:
+        command = "psiflow-server"
+        return self.wrap_in_timeout(command)
 
     def get_driver_devices(self, nwalkers: int) -> list[dict]:
         # assumes driver is GPU capable
-        # TODO: what if only 1 gpu is available?
-        nclients = min(nwalkers, self.max_workers)
+        # TODO: what if only 1 gpu is available? Redo this
+        # nclients = min(nwalkers, self.max_workers)
+        nclients = min(nwalkers, 2)
         if self.gpu:
-            return [{'device': f'cuda:{i}'} for i in range(nclients)]
+            return [{"device": f"cuda:{i}"} for i in range(nclients)]
         else:
-            return [{'device': 'cpu'} for _ in range(nclients)]
-
-    def wq_resources(self, nwalkers):
-        if self.use_threadpool:
-            return {}
-        nclients = min(nwalkers, self.max_workers)
-        resource_specification = {}
-        resource_specification["cores"] = nclients * self.cores_per_worker
-        resource_specification["disk"] = 1000  # some random nontrivial amount?
-        memory = 2000 * self.cores_per_worker  # similarly rather random
-        resource_specification["memory"] = int(memory)
-        resource_specification["running_time_min"] = self.max_simulation_time
-        if self.gpu:
-            resource_specification["gpus"] = nclients
-        return resource_specification
+            return [{"device": "cpu"} for _ in range(nclients)]
+
+    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
+        pass
+
+    # def wq_resources(self, nwalkers):
+    #     if self.use_threadpool:
+    #         return {}
+    #     nclients = min(nwalkers, self.max_workers)
+    #     resource_specification = {}
+    #     resource_specification["cores"] = nclients * self.cores_per_worker
+    #     resource_specification["disk"] = 1000  # some random nontrivial amount?
+    #     memory = 2000 * self.cores_per_worker  # similarly rather random
+    #     resource_specification["memory"] = int(memory)
+    #     resource_specification["running_time_min"] = self.max_simulation_time
+    #     if self.gpu:
+    #         resource_specification["gpus"] = nclients
+    #     return resource_specification
 
 
-@typeguard.typechecked
 class ModelTraining(ExecutionDefinition):
     def __init__(
         self,
-        gpu=True,
-        max_training_time: Optional[float] = None,
-        env_vars: Optional[dict[str, str]] = None,
-        multigpu: bool = False,
+        multigpu: bool = False,  # TODO: how to handle this?
         **kwargs,
     ) -> None:
-        super().__init__(gpu=gpu, **kwargs)
-        assert self.gpu
-        if max_training_time is not None:
-            assert max_training_time * 60 < self.max_runtime
-        self.max_training_time = max_training_time
+        super().__init__(**kwargs)
         self.multigpu = multigpu
         if self.multigpu:
+            # TODO: why? Think this might be a multinode thing - which I do not care about
             message = (
                 "the max_training_time keyword does not work "
                 "in combination with multi-gpu training. Adjust "
                 "the maximum number of epochs to control the "
                 "duration of training"
             )
-            assert self.max_training_time is None, message
-
-        default_env_vars = {
-            "OMP_NUM_THREADS": str(self.cores_per_worker),
-            "KMP_AFFINITY": "granularity=fine,compact,1,0",
-            "KMP_BLOCKTIME": "1",
-            "OMP_PROC_BIND": "spread",  # different from Model Eval
-            "PYTHONUNBUFFERED": "TRUE",
-        }
-        if env_vars is None:
-            env_vars = default_env_vars
-        else:
-            default_env_vars.update(env_vars)
-            env_vars = default_env_vars
-        self.env_vars = env_vars
+            assert self.max_runtime is None, message
+
+        # default_env_vars = {
+        #     "OMP_NUM_THREADS": str(self.cores_per_worker),
+        #     "KMP_AFFINITY": "granularity=fine,compact,1,0",
+        #     "KMP_BLOCKTIME": "1",
+        #     "OMP_PROC_BIND": "spread",  # different from Model Eval
+        #     "PYTHONUNBUFFERED": "TRUE",
+        # }
+        # if env_vars is None:
+        #     env_vars = default_env_vars
+        # else:
+        #     default_env_vars.update(env_vars)
+        #     env_vars = default_env_vars
 
     def train_command(self, initialize: bool = False):
-        # script = "$(python -c 'import psiflow.models.mace_utils; print(psiflow.models.mace_utils.__file__)')"
-        command_list = ["psiflow-mace-train"]
-        if (self.max_training_time is not None) and not initialize:
-            max_time = 0.9 * (60 * self.max_training_time)
-            command_list = ["timeout -s 15 {}s".format(max_time), *command_list]
-        return " ".join(command_list)
-
-    def wq_resources(self):
-        if self.use_threadpool:
-            return {}
-        resource_specification = {}
-
-        if self.multigpu:
-            nworkers = int(self.cores_available / self.cores_per_worker)
-        else:
-            nworkers = 1
-
-        resource_specification["gpus"] = nworkers  # one per GPU
-        resource_specification["cores"] = self.cores_available
-        resource_specification["disk"] = (
-            1000 * nworkers
-        )  # some random nontrivial amount?
-        memory = 1000 * self.cores_available  # similarly rather random
-        resource_specification["memory"] = int(memory)
-        resource_specification["running_time_min"] = self.max_training_time
-        return resource_specification
+        command = "psiflow-mace-train"
+        return self.wrap_in_timeout(command)
+
+    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
+        pass
+
+    # def wq_resources(self):
+    #     if self.use_threadpool:
+    #         return {}
+    #     resource_specification = {}
+    #
+    #     if self.multigpu:
+    #         nworkers = int(self.cores_available / self.cores_per_worker)
+    #     else:
+    #         nworkers = 1
+    #
+    #     resource_specification["gpus"] = nworkers  # one per GPU
+    #     resource_specification["cores"] = self.cores_available
+    #     resource_specification["disk"] = (
+    #         1000 * nworkers
+    #     )  # some random nontrivial amount?
+    #     memory = 1000 * self.cores_available  # similarly rather random
+    #     resource_specification["memory"] = int(memory)
+    #     resource_specification["running_time_min"] = self.max_training_time
+    #     return resource_specification
 
 
-@typeguard.typechecked
 class ReferenceEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        spec: ReferenceSpec,
-        max_evaluation_time: Optional[float] = None,
-        memory_limit: Optional[str] = None,
+        spec: "ReferenceSpec",
+        memory_limit: Optional[str] = None,  # TODO: how does this work?
         **kwargs,
     ) -> None:
         # TODO: how to know which code?
         super().__init__(**kwargs)
         self.spec = spec
-        self.max_evaluation_time = max_evaluation_time * 60  # seconds
-        if max_evaluation_time:
-            assert 0 < max_evaluation_time < self.max_runtime
         self.memory_limit = memory_limit
 
     def command(self):
+        # TODO: this does not work probably
         launch_command = self.spec.launch_command()
         kwargs = {k: getattr(self, k) for k in self.spec.reference_args}
         launch_command = launch_command.format(**kwargs)
@@ -401,10 +516,7 @@ def command(self):
         if self.container is not None:
             launch_command = f"{self.container.launch_command()} {launch_command}"
 
-        if (max_time := self.max_evaluation_time) is None:
-            # leave some slack for startup and cleanup
-            max_time = max(0.9 * self.max_runtime, self.max_runtime - 5)
-        launch_command = f"timeout -s 9 {max_time}s {launch_command}"
+        launch_command = self.wrap_in_timeout(launch_command)
 
         commands = []
         if self.memory_limit is not None:
@@ -423,85 +535,14 @@ def parse_size(size):  # TODO: to utils?
         # exit code 0 so parsl always thinks bash app succeeded
         return "\n".join([*commands, launch_command, "exit 0"])
 
-    def wq_resources(self):
-        if self.use_threadpool:
-            return {}
-        resource_specification = {}
-        resource_specification["cores"] = self.cores_per_worker
-        resource_specification["disk"] = 1000  # some random nontrivial amount?
-        memory = 2000 * self.cores_per_worker  # similarly rather random
-        resource_specification["memory"] = int(memory)
-        resource_specification["running_time_min"] = self.max_evaluation_time
-        return resource_specification
+    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
+        return spec
 
     @property
     def name(self) -> str:
         return self.spec.name
 
 
-class ReferenceSpec(Protocol):
-    name: ClassVar[str]
-    reference_args: ClassVar[tuple[str, ...]]
-    mpi_command: str
-    mpi_args: Iterable[str]
-    executable: str
-
-    def launch_command(self) -> str:
-        raise NotImplementedError
-
-
-@dataclass
-class CP2KReferenceSpec(ReferenceSpec):
-    name = "CP2K"
-    reference_args = ("cores_per_worker",)
-    mpi_command: str = "mpirun -np {cores_per_worker}"
-    mpi_args: tuple[str, ...] = (
-        "-ENV OMP_NUM_THREADS=1",
-        "--bind-to core",
-        "--map-by core",
-    )
-    executable: str = "cp2k.psmp -i cp2k.inp"
-
-    def launch_command(self):
-        # use nprocs = ncores, nthreads = 1
-        return " ".join([self.mpi_command, *self.mpi_args, self.executable])
-
-
-@dataclass
-class GPAWReferenceSpec(ReferenceSpec):
-    name = "GPAW"
-    reference_args = ("cores_per_worker",)
-    mpi_command: str = "mpirun -np {cores_per_worker}"
-    mpi_args: tuple[str, ...] = (
-        "-x OMP_NUM_THREADS=1",
-        "--bind-to core",
-        "--map-by core",
-    )
-    executable: str = "gpaw python script_gpaw.py input.json"
-
-    def launch_command(self):
-        # use nprocs = ncores, nthreads = 1
-        return " ".join([self.mpi_command, *self.mpi_args, self.executable])
-
-
-@dataclass
-class ORCAReferenceSpec(ReferenceSpec):
-    name = "ORCA"
-    reference_args = ()
-    mpi_command: str = ""
-    mpi_args: tuple[str, ...] = (
-        "-x OMP_NUM_THREADS=1",
-        "--bind-to core",
-        "--map-by core",
-    )
-    executable: str = "$(which orca) orca.inp"
-
-    def launch_command(self):
-        mpi_str = " ".join(self.mpi_args)
-        return f'{self.executable} "{mpi_str}"'
-
-
-@typeguard.typechecked
 class ExecutionContext:
     """
     Psiflow centralizes all execution-level configuration options using an ExecutionContext.
@@ -513,7 +554,6 @@ class ExecutionContext:
     and QM evaluation apps. As such, we ensure that execution-side details are strictly
     separated from the definition of the computational graph itself.
     For more information, check out the psiflow documentation regarding execution.
-
     """
 
     def __init__(
@@ -540,15 +580,15 @@ def __exit__(self, exc_type, exc_value, traceback):
         parsl.dfk().cleanup()
 
     def new_file(self, prefix: str, suffix: str) -> File:
+        assert prefix[-1] == "_"
+        assert suffix[0] == "."
+        padding = 6
         with self.lock:
-            assert prefix[-1] == "_"
-            assert suffix[0] == "."
             key = (prefix, suffix)
             if key not in self.file_index.keys():
                 self.file_index[key] = 0
-            padding = 6
             assert self.file_index[key] < (16**padding)
-            identifier = "{0:0{1}x}".format(self.file_index[key], padding)
+            identifier = f"{self.file_index[key]:0{padding}x}"
             self.file_index[key] += 1
             return File(str(self.path / (prefix + identifier + suffix)))
 
@@ -562,25 +602,22 @@ def from_config(
         max_idletime: float = 20,
         internal_tasks_max_threads: int = 10,
         default_threads: int = 4,
-        htex_address: str = "127.0.0.1",
+        # htex_address: str = "127.0.0.1",
         zip_staging: Optional[bool] = None,
         make_symlinks: bool = False,
         **kwargs,
-    ) -> ExecutionContext:
+    ) -> "ExecutionContext":
         path = Path.cwd().resolve() / PSIFLOW_INTERNAL
-        psiflow.resolve_and_check(path)
         if path.exists():
             shutil.rmtree(path)
-        path.mkdir(parents=True, exist_ok=True)
-        parsl.set_file_logger(
-            filename=str(path / "parsl.log"),
-            name="parsl",
-            level=getattr(logging, parsl_log_level),
-        )
+        path.mkdir(parents=True)
+
+        log_file = str(path / "parsl.log")
+        log_level = getattr(logging, parsl_log_level)
+        parsl.set_file_logger(filename=log_file, name="parsl", level=log_level)
 
         # create definitions
         base_container = ContainerSpec.from_kwargs(kwargs)
-        kwargs.pop("container_uri", None)
         model_evaluation = ModelEvaluation.from_config(
             container=base_container,
             **kwargs.pop("ModelEvaluation", {}),
@@ -589,12 +626,19 @@ def from_config(
             container=base_container,
             **kwargs.pop("ModelTraining", {"gpu": True}),  # avoid triggering assertion
         )
+
+        # TODO: remove this and check below
+        model_evaluation.wq_resources(0)
+        model_evaluation.server_command()
+        model_training.wq_resources()
+
         reference_evaluations = []  # reference evaluations might be class specific
         for key in list(kwargs.keys()):
             if key[:4] in REFERENCE_SPECS:  # allow for e.g., CP2K_small
                 config = kwargs.pop(key)
                 reference_evaluation = ReferenceEvaluation.from_config(
-                    spec=init_spec(REFERENCE_SPECS[key[:4]], config),
+                    # spec=init_spec(REFERENCE_SPECS[key[:4]], config),
+                    spec=REFERENCE_SPECS[key[:4]].from_kwargs(**config),
                     container=ContainerSpec.from_kwargs(kwargs | config),
                     **config,
                 )
@@ -605,13 +649,14 @@ def from_config(
         executors = [d.create_executor(path=path) for d in definitions]
 
         # create default executors
+        # TODO: extract this into function
         if base_container is not None:
             launcher = WrappedLauncher(prepend=base_container.launch_command())
         else:
             launcher = SimpleLauncher()
         htex = HighThroughputExecutor(
             label="default_htex",
-            address=htex_address,
+            # address=htex_address,
             working_dir=str(path / "default_htex"),
             cores_per_worker=1,
             max_workers_per_node=default_threads,
@@ -640,7 +685,7 @@ def from_config(
             executors=executors,
             run_dir=str(path),
             initialize_logging=False,
-            app_cache=False,
+            # app_cache=False,
             usage_tracking=usage_tracking,
             retries=retries,
             strategy=strategy,
@@ -650,16 +695,16 @@ def from_config(
         )
         context = ExecutionContext(config, definitions, path / "context_dir")
 
-        if make_symlinks:
-            src, dest = Path.cwd() / "psiflow_log", path / "parsl.log"
-            _create_symlink(src, dest)
-            src, dest = (
-                Path.cwd() / "psiflow_submit_scripts",
-                path / "000" / "submit_scripts",
-            )
-            _create_symlink(src, dest, is_dir=True)
-            src, dest = Path.cwd() / "psiflow_task_logs", path / "000" / "task_logs"
-            _create_symlink(src, dest, is_dir=True)
+        # if make_symlinks:
+        #     src, dest = Path.cwd() / "psiflow_log", path / "parsl.log"
+        #     _create_symlink(src, dest)
+        #     src, dest = (
+        #         Path.cwd() / "psiflow_submit_scripts",
+        #         path / "000" / "submit_scripts",
+        #     )
+        #     _create_symlink(src, dest, is_dir=True)
+        #     src, dest = Path.cwd() / "psiflow_task_logs", path / "000" / "task_logs"
+        #     _create_symlink(src, dest, is_dir=True)
 
         return context
 
@@ -670,33 +715,24 @@ class ExecutionContextLoader:
     @classmethod
     def load(
         cls,
-        psiflow_config: Optional[dict[str, Any]] = None,
+        config: Optional[dict[str, Any]] = None,
     ) -> ExecutionContext:
         if cls._context is not None:
             raise RuntimeError("ExecutionContext has already been loaded")
-        if psiflow_config is None:  # assume yaml is passed as argument
-            if len(sys.argv) == 1:  # no config passed, use threadpools:
-                psiflow_config = {
-                    "ModelEvaluation": {
-                        "gpu": False,
-                        "use_threadpool": True,
-                    },
-                    "ModelTraining": {
-                        "gpu": True,
-                        "use_threadpool": True,
-                    },
-                }
+        if config is None:
+            if len(sys.argv) == 1:  # no yaml config passed, use threadpools:
+                config = DEFAULT_CONFIG
             else:
                 assert len(sys.argv) == 2
                 path_config = psiflow.resolve_and_check(Path(sys.argv[1]))
                 assert path_config.exists()
                 assert path_config.suffix in [".yaml", ".yml"], (
-                    "the execution configuration needs to be specified"
-                    " as a YAML file, but got {}".format(path_config)
+                    f"the execution configuration needs to be specified"
+                    f" as a YAML file, but got {path_config}"
                 )
                 with open(path_config, "r") as f:
-                    psiflow_config = yaml.safe_load(f)
-        cls._context = ExecutionContext.from_config(**psiflow_config)
+                    config = yaml.safe_load(f)
+        cls._context = ExecutionContext.from_config(**config)
         return cls._context
 
     @classmethod
@@ -711,6 +747,7 @@ def wait(cls):
 
 
 class SlurmLauncher(Launcher):
+    # TODO: what does this do?
     def __init__(self, debug: bool = True, overrides: str = ""):
         super().__init__(debug=debug)
         self.overrides = overrides
@@ -746,29 +783,17 @@ def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> s
 
 
 class MyWorkQueueExecutor(WorkQueueExecutor):
+    # TODO: what does this do?
     def _get_launch_command(self, block_id):
         return self.worker_command
 
 
-def _create_symlink(src: Path, dest: Path, is_dir: bool = False) -> None:
-    """Create or replace symbolic link"""
-    if src.is_symlink():
-        src.unlink()
-    if is_dir:
-        dest.mkdir(parents=True, exist_ok=True)
-    else:
-        dest.touch(exist_ok=True)
-    src.symlink_to(dest, target_is_directory=is_dir)
-
-
-REFERENCE_SPECS = {
-    "CP2K": CP2KReferenceSpec,
-    "GPAW": GPAWReferenceSpec,
-    "ORCA": ORCAReferenceSpec,
-}
-
-
-def init_spec(spec_cls: type(ReferenceSpec), kwargs: dict) -> ReferenceSpec:
-    keys = ("mpi_command", "mpi_args", "executable")
-    cls_kwargs = {k: kwargs[k] for k in keys if k in kwargs}
-    return spec_cls(**cls_kwargs)
+# def _create_symlink(src: Path, dest: Path, is_dir: bool = False) -> None:
+#     """Create or replace symbolic link"""
+#     if src.is_symlink():
+#         src.unlink()
+#     if is_dir:
+#         dest.mkdir(parents=True, exist_ok=True)
+#     else:
+#         dest.touch(exist_ok=True)
+#     src.symlink_to(dest, target_is_directory=is_dir)

From ffc0c11862bd8aa0d14421bf97f3c381246d6641 Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Fri, 27 Feb 2026 16:39:45 +0100
Subject: [PATCH 03/15] implement very basic WQ priority handling

WQ priority can be controlled using the context manager
`with SetWQPriority:` which will set the 'priority' resource_spec argument. Very basic implementation, and we will need to be careful with how `wq_resources` is called and used
---
 psiflow/execution.py | 167 +++++++++++++++++++++++++++++++------------
 1 file changed, 120 insertions(+), 47 deletions(-)

diff --git a/psiflow/execution.py b/psiflow/execution.py
index 1e4b758..4ec0c70 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -28,16 +28,11 @@
 from parsl.providers import LocalProvider, SlurmProvider
 from parsl.providers.base import ExecutionProvider
 
-import psiflow
 
 logger = logging.getLogger(__name__)  # logging per module
 
 
-PSIFLOW_INTERNAL = "psiflow_internal"
-DEFAULT_CONFIG = {  # TODO: remove
-    "ModelEvaluation": {"gpu": False, "use_threadpool": True},
-    "ModelTraining": {"gpu": True, "use_threadpool": True},
-}
+PSIFLOW_INTERNAL = "psiflow_internal"  # TODO: move configuration files somewhere
 
 
 @dataclass
@@ -220,8 +215,22 @@ def __init__(
             if resources["gpus"] == 0:
                 msg = "GPU usage requested but no GPUs available"
             elif container is not None and container.gpu_flavour is None:
-                msg = "Provide 'gpu_flavour' to choose between CUDA and ROCM"
+                msg = "Provide container 'gpu_flavour' to choose between CUDA and ROCM"
+            if msg:
+                raise ValueError(msg)
+
+        if self.executor_type == "workqueue":
+            # WQ-specific checks TODO: check that WQ kwargs do not exceed resources?
+            msg = ""
+            if self.kwargs["gpus_per_task"] > resources["gpus"]:
+                msg = "GPUs"
+            if self.kwargs["cores_per_task"] > resources["cores"]:
+                msg = "cores"
+            if self.kwargs["mem_per_task"] > (resources["memory"] or float("inf")):
+                # TODO: do we need memory=None anywhere? otherwise default to inf?
+                msg = "memory"
             if msg:
+                msg = f"Apps will request more {msg} than available per Parsl block"
                 raise ValueError(msg)
 
         # how long can individual tasks run (in seconds)
@@ -231,13 +240,24 @@ def __init__(
         else:
             max_runtime = str_to_timedelta(max_runtime).seconds
         if max_runtime != float("inf") and max_runtime >= self.lifetime:
-            warnings.warn(
-                "Allowed task runtime exceeds provider walltime. Tasks might get killed by the scheduler."
-            )
+            msg = "Allowed task runtime exceeds provider walltime. Tasks might get killed by the scheduler."
+            warnings.warn(msg)
         self.max_runtime = max_runtime
 
-        # TODO: check that WQ kwargs do not exceed resources?
+        # set default WQ resource specs  TODO: type_hint
+        self.spec = None
+        if self.executor_type == "workqueue":
+            self.spec = {
+                "cores": self.kwargs["cores_per_task"],
+                "memory": int(self.kwargs["mem_per_task"] * 1000),  # in MB
+                "gpus": self.kwargs["gpus_per_task"],
+                "disk": 0,  # not implemented
+                "running_time_min": self.kwargs["min_runtime"],
+            }
+        register_definition(definition=self)
+
         # TODO: how to handle env variables?
+
         pass
 
     @property
@@ -251,7 +271,7 @@ def lifetime(self) -> float:
 
     @property
     def use_gpu(self) -> bool:
-        return self.kwargs.get("use_gpu") or self.kwargs.get("gpus_per_task") > 0
+        return self.kwargs.get("use_gpu") or self.kwargs.get("gpus_per_task", 0) > 0
 
     def wrap_in_timeout(self, command: str) -> str:
         if self.max_runtime == float("inf"):
@@ -315,28 +335,13 @@ def create_executor(self, path: Path) -> ParslExecutor:
         return self._create_workqueue(path)
 
     def wq_resources(self, *args, **kwargs) -> dict:
-        if self.executor_type == "threadpool":
-            return {}
-
-        # TODO: why recreate every call?
-        # TODO: priority
-        spec = {
-            "cores": self.kwargs["cores_per_task"],
-            "memory": int(self.kwargs["mem_per_task"] * 1000),  # in MB
-            "gpus": self.kwargs["gpus_per_task"],
-            "disk": 0,  # not implemented
-            "running_time_min": self.kwargs["min_runtime"],
-        }
-        return self._modify_wq_resources(spec, *args, **kwargs)
-
-    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
         raise NotImplementedError
 
     @classmethod
     def from_config(
         cls,
-        executor: str = "workqueue",
-        container: Optional[ContainerSpec] = None,
+        executor: str,
+        container: Optional[ContainerSpec],
         **kwargs,
     ):
         if executor == "threadpool":
@@ -394,6 +399,8 @@ def __init__(
         # TODO: temporary
         self.cores_per_worker = self.kwargs.get("cores_per_task", 1)
         self.gpu = False
+        self.max_simulation_time = self.max_runtime
+        self.env_vars = {"OMP_NUM_THREADS": "1"}
 
         # TODO: what with env vars?
         # default_env_vars = {
@@ -418,8 +425,11 @@ def get_driver_devices(self, nwalkers: int) -> list[dict]:
         else:
             return [{"device": "cpu"} for _ in range(nclients)]
 
-    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
-        pass
+    def wq_resources(self, nwalkers: int) -> dict:
+        if self.spec is None:
+            return {}  # threadpool
+
+        return self.spec
 
     # def wq_resources(self, nwalkers):
     #     if self.use_threadpool:
@@ -471,8 +481,11 @@ def train_command(self, initialize: bool = False):
         command = "psiflow-mace-train"
         return self.wrap_in_timeout(command)
 
-    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
-        pass
+    def wq_resources(self, *args, **kwargs) -> dict:
+        if self.spec is None:
+            return {}  # threadpool
+
+        return self.spec
 
     # def wq_resources(self):
     #     if self.use_threadpool:
@@ -624,7 +637,9 @@ def from_config(
         )
         model_training = ModelTraining.from_config(
             container=base_container,
-            **kwargs.pop("ModelTraining", {"gpu": True}),  # avoid triggering assertion
+            **kwargs.pop(
+                "ModelTraining", {"gpu": True}
+            ),  # avoid triggering assertion  TODO: change into warning
         )
 
         # TODO: remove this and check below
@@ -719,19 +734,23 @@ def load(
     ) -> ExecutionContext:
         if cls._context is not None:
             raise RuntimeError("ExecutionContext has already been loaded")
-        if config is None:
-            if len(sys.argv) == 1:  # no yaml config passed, use threadpools:
-                config = DEFAULT_CONFIG
-            else:
-                assert len(sys.argv) == 2
-                path_config = psiflow.resolve_and_check(Path(sys.argv[1]))
-                assert path_config.exists()
-                assert path_config.suffix in [".yaml", ".yml"], (
-                    f"the execution configuration needs to be specified"
-                    f" as a YAML file, but got {path_config}"
-                )
-                with open(path_config, "r") as f:
-                    config = yaml.safe_load(f)
+        if config is not None:
+            pass
+        elif len(sys.argv) == 1:
+            config = {}
+        else:
+            assert len(sys.argv) <= 2  # only accept a single argument
+            path_config = Path(sys.argv[1])
+            assert path_config.exists()
+            assert path_config.suffix in [".yaml", ".yml"], (
+                f"the execution configuration needs to be specified"
+                f" as a YAML file, but got {path_config}"
+            )
+            with open(path_config, "r") as f:
+                config = yaml.safe_load(f)
+
+        # set the context so it can be retrieved later
+        config = yaml.safe_load(DEFAULT_CONFIG) | config
         cls._context = ExecutionContext.from_config(**config)
         return cls._context
 
@@ -797,3 +816,57 @@ def _get_launch_command(self, block_id):
 #     else:
 #         dest.touch(exist_ok=True)
 #     src.symlink_to(dest, target_is_directory=is_dir)
+
+
+# TODO: attempt at managing priority through global state
+WQ_RESOURCES_REGISTRY = {}
+
+
+def register_definition(definition: ExecutionDefinition) -> None:
+    """"""
+    if (spec := definition.spec) is None:
+        return  # threadpool does not have priority
+
+    WQ_RESOURCES_REGISTRY[definition.name] = spec
+    spec["priority"] = SetWQPriority.default
+
+
+class SetWQPriority:
+    """Manage the WQ priority tag as context manager"""
+
+    # TODO: this probably does not work in a nested way
+    # TODO: log to parsl.log?
+    default = 0
+
+    def __init__(self, value: int, verbose: bool = False) -> None:
+        self.value = value
+        self.verbose = verbose
+
+    def __enter__(self):
+        if self.verbose:
+            print(f'SetWQPriority setting priority:\t{self.value}')
+        for n, spec in WQ_RESOURCES_REGISTRY.items():
+            spec["priority"] = self.value
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.verbose:
+            print(f'SetWQPriority unsetting {self.value}')
+        for n, spec in WQ_RESOURCES_REGISTRY.items():
+            spec["priority"] = SetWQPriority.default
+
+
+# This is the default psiflow config which is always passed into the ExecutionContext
+# TODO: find a place for this
+DEFAULT_CONFIG = """
+parsl_log_level: WARNING
+usage_tracking: 3
+
+ModelEvaluation:
+  executor: threadpool
+  max_threads: 2
+
+ModelTraining:
+  executor: threadpool
+  max_threads: 2
+"""

From 4e896d7b3d4bb94da588f2a9015d480f7c595f79 Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Fri, 27 Feb 2026 17:14:27 +0100
Subject: [PATCH 04/15] cleanup psiflow_internal

remove the annoying '000' nesting level and place all executor output directly in psiflow_internal
---
 psiflow/execution.py | 40 +++++++++++++---------------------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/psiflow/execution.py b/psiflow/execution.py
index 4ec0c70..79b3745 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -624,6 +624,7 @@ def from_config(
         if path.exists():
             shutil.rmtree(path)
         path.mkdir(parents=True)
+        patch_parsl_dirtree()
 
         log_file = str(path / "parsl.log")
         log_level = getattr(logging, parsl_log_level)
@@ -708,20 +709,7 @@ def from_config(
             internal_tasks_max_threads=internal_tasks_max_threads,
             # std_autopath=std_autopath,
         )
-        context = ExecutionContext(config, definitions, path / "context_dir")
-
-        # if make_symlinks:
-        #     src, dest = Path.cwd() / "psiflow_log", path / "parsl.log"
-        #     _create_symlink(src, dest)
-        #     src, dest = (
-        #         Path.cwd() / "psiflow_submit_scripts",
-        #         path / "000" / "submit_scripts",
-        #     )
-        #     _create_symlink(src, dest, is_dir=True)
-        #     src, dest = Path.cwd() / "psiflow_task_logs", path / "000" / "task_logs"
-        #     _create_symlink(src, dest, is_dir=True)
-
-        return context
+        return ExecutionContext(config, definitions, path / "context_dir")
 
 
 class ExecutionContextLoader:
@@ -807,17 +795,6 @@ def _get_launch_command(self, block_id):
         return self.worker_command
 
 
-# def _create_symlink(src: Path, dest: Path, is_dir: bool = False) -> None:
-#     """Create or replace symbolic link"""
-#     if src.is_symlink():
-#         src.unlink()
-#     if is_dir:
-#         dest.mkdir(parents=True, exist_ok=True)
-#     else:
-#         dest.touch(exist_ok=True)
-#     src.symlink_to(dest, target_is_directory=is_dir)
-
-
 # TODO: attempt at managing priority through global state
 WQ_RESOURCES_REGISTRY = {}
 
@@ -844,14 +821,14 @@ def __init__(self, value: int, verbose: bool = False) -> None:
 
     def __enter__(self):
         if self.verbose:
-            print(f'SetWQPriority setting priority:\t{self.value}')
+            print(f"SetWQPriority setting priority:\t{self.value}")
         for n, spec in WQ_RESOURCES_REGISTRY.items():
             spec["priority"] = self.value
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if self.verbose:
-            print(f'SetWQPriority unsetting {self.value}')
+            print(f"SetWQPriority unsetting {self.value}")
         for n, spec in WQ_RESOURCES_REGISTRY.items():
             spec["priority"] = SetWQPriority.default
 
@@ -870,3 +847,12 @@ def __exit__(self, exc_type, exc_val, exc_tb):
   executor: threadpool
   max_threads: 2
 """
+
+
+def patch_parsl_dirtree() -> None:
+    """By default, Parsl will put Executor logs etc. under numbered directories.
+    We do not need this level of nesting, as psiflow_internal is refreshed every run"""
+    import parsl.dataflow.dflow
+
+    # replace with noop, which needs to happen after parsl.dataflow.dflow initialises
+    parsl.dataflow.dflow.make_rundir = lambda x: x

From 9c48ce0eae53ea2e3065fa341cd765b4c8d7a21a Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Mon, 2 Mar 2026 23:09:00 +0100
Subject: [PATCH 05/15] update execution part 2

- further cleanup execution
- redo `ModelEvaluation` + threadpool (workqueue still needs work)
- make bash app template for all bash apps. It is now possible to specify where tmpdirs are created through the `tmpdir_root` config option. Also, you can specify whether tmpdirs should be removed after the tasks finish (for debugging purposes)

ATM, `ModelTraining` and `ReferenceEvaluation` are moderately broken, most likely
---
 psiflow/__init__.py            |   3 -
 psiflow/execution.py           | 250 ++++++++++++++++++---------------
 psiflow/functions.py           |   1 +
 psiflow/hamiltonians.py        |   4 +-
 psiflow/models/_mace.py        |   2 +-
 psiflow/order_parameters.py    |   2 -
 psiflow/reference/cp2k_.py     |   9 +-
 psiflow/reference/gpaw_.py     |   3 -
 psiflow/reference/orca_.py     |   8 +-
 psiflow/reference/reference.py |   5 +-
 psiflow/sampling/ase.py        |  19 +--
 psiflow/sampling/optimize.py   |  10 +-
 psiflow/sampling/sampling.py   |  14 +-
 psiflow/sampling/server.py     |   1 -
 psiflow/sampling/walker.py     |  10 +-
 psiflow/serialization.py       |   4 +-
 psiflow/utils/__init__.py      |   8 --
 17 files changed, 172 insertions(+), 181 deletions(-)
 delete mode 100644 psiflow/order_parameters.py

diff --git a/psiflow/__init__.py b/psiflow/__init__.py
index e8de3e3..73f2e3f 100644
--- a/psiflow/__init__.py
+++ b/psiflow/__init__.py
@@ -1,7 +1,5 @@
 from pathlib import Path
 
-import typeguard
-
 from .config import setup_slurm_config  # noqa: F401
 from .execution import ExecutionContextLoader
 from .serialization import (  # noqa: F401
@@ -12,7 +10,6 @@
 )
 
 
-@typeguard.typechecked
 def resolve_and_check(path: Path) -> Path:
     path = path.resolve()
     if Path.cwd() in path.parents:
diff --git a/psiflow/execution.py b/psiflow/execution.py
index 79b3745..2a3e0bc 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -4,13 +4,14 @@
 import sys
 import warnings
 import subprocess
+import textwrap
 from datetime import datetime, timedelta
 from dataclasses import dataclass
 from pathlib import Path
 from threading import Lock
 
 # see https://stackoverflow.com/questions/59904631/python-class-constants-in-dataclasses
-from typing import Any, Optional, Union, ClassVar, Protocol, Iterable
+from typing import Any, Optional, Union, ClassVar, Protocol, Iterable, Sequence
 
 import parsl
 import psutil
@@ -71,9 +72,9 @@ class ReferenceSpec(Protocol):
     """Defines default options for Reference implementations"""
 
     name: ClassVar[str]
-    reference_args: ClassVar[tuple[str, ...]]
+    reference_args: ClassVar[tuple[str, ...]]  # TODO: update 'cores_per_worker'
     mpi_command: str
-    mpi_args: Iterable[str]
+    mpi_args: Sequence[str]
     executable: str
 
     def launch_command(self) -> str:
@@ -90,7 +91,7 @@ class CP2KReferenceSpec(ReferenceSpec):
     name = "CP2K"
     reference_args = ("cores_per_worker",)
     mpi_command: str = "mpirun -np {cores_per_worker}"
-    mpi_args: tuple[str, ...] = (
+    mpi_args: Sequence[str] = (
         "-ENV OMP_NUM_THREADS=1",
         "--bind-to core",
         "--map-by core",
@@ -106,7 +107,7 @@ class GPAWReferenceSpec(ReferenceSpec):
     name = "GPAW"
     reference_args = ("cores_per_worker",)
     mpi_command: str = "mpirun -np {cores_per_worker}"
-    mpi_args: tuple[str, ...] = (
+    mpi_args: Sequence[str] = (
         "-x OMP_NUM_THREADS=1",
         "--bind-to core",
         "--map-by core",
@@ -122,7 +123,7 @@ class ORCAReferenceSpec(ReferenceSpec):
     name = "ORCA"
     reference_args = ()
     mpi_command: str = ""
-    mpi_args: tuple[str, ...] = (
+    mpi_args: Sequence[str] = (
         "-x OMP_NUM_THREADS=1",
         "--bind-to core",
         "--map-by core",
@@ -141,12 +142,6 @@ def launch_command(self):
 }
 
 
-def str_to_timedelta(s: str) -> timedelta:
-    # TODO: move to utils
-    t = datetime.strptime(s, "%H:%M:%S")
-    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
-
-
 def make_slurm_provider(kwargs: dict) -> tuple[SlurmProvider, dict]:
     defaults = {"init_blocks": 0, "exclusive": False}
     required = ("cores_per_node", "walltime", "gpus_per_node")
@@ -166,7 +161,7 @@ def make_slurm_provider(kwargs: dict) -> tuple[SlurmProvider, dict]:
 def make_local_provider(kwargs: dict) -> tuple[LocalProvider, dict]:
     resources = {
         "nodes": 1,
-        "cores": kwargs.get("cores", psutil.cpu_count()),
+        "cores": kwargs.get("cores", psutil.cpu_count(logical=False)),
         "memory": kwargs.get(
             "memory", psutil.virtual_memory().available / 1e9
         ),  # TODO: available?
@@ -190,8 +185,31 @@ def make_local_provider(kwargs: dict) -> tuple[LocalProvider, dict]:
     return provider, resources
 
 
+def make_default_executors(
+    max_workers: int, path: Path, container: ContainerSpec
+) -> tuple[HighThroughputExecutor, ThreadPoolExecutor]:
+    """Construct executors for internal app handling"""
+    launcher = SimpleLauncher()
+    if container is not None:
+        launcher = WrappedLauncher(prepend=container.launch_command())
+
+    htex = HighThroughputExecutor(
+        label="default_htex",
+        working_dir=str(path / "default_htex"),
+        cores_per_worker=1,
+        max_workers_per_node=max_workers,
+        cpu_affinity="none",
+        provider=LocalProvider(launcher=launcher, init_blocks=0),
+    )
+    threadpool = ThreadPoolExecutor(
+        label="default_threads",
+        max_threads=max_workers,
+        working_dir=str(path),
+    )
+    return htex, threadpool
+
+
 class ExecutionDefinition:
-    # TODO: do not like defining some kwargs in class method and other kwargs in init...
     def __init__(
         self,
         provider: ExecutionProvider | None,
@@ -257,6 +275,7 @@ def __init__(
         register_definition(definition=self)
 
         # TODO: how to handle env variables?
+        # TODO: check between min_runtime and max_runtime?
 
         pass
 
@@ -271,7 +290,16 @@ def lifetime(self) -> float:
 
     @property
     def use_gpu(self) -> bool:
-        return self.kwargs.get("use_gpu") or self.kwargs.get("gpus_per_task", 0) > 0
+        if self.executor_type == "threadpool":
+            return self.kwargs["use_gpu"]
+        return self.kwargs["gpus_per_task"] > 0
+
+    @property
+    def cores_per_task(self) -> int:
+        if self.executor_type == "workqueue":
+            return self.kwargs["cores_per_task"]
+        # assumes all threads are working
+        return int(self.resources["cores"] / self.kwargs["max_threads"])
 
     def wrap_in_timeout(self, command: str) -> str:
         if self.max_runtime == float("inf"):
@@ -313,8 +341,9 @@ def _create_workqueue(self, path: Path) -> WorkQueueExecutor:
             prepend = self.container.launch_command()
             worker_executable = f"{prepend} {worker_executable}"
 
-        # TODO: why the custom WQ?
-        executor = MyWorkQueueExecutor(
+        # TODO: why the custom WQ? -- does not seem necessary (anymore)
+        # executor = MyWorkQueueExecutor(
+        executor = WorkQueueExecutor(
             label=self.name,
             working_dir=str(path / self.name),
             provider=self.provider,
@@ -340,7 +369,7 @@ def wq_resources(self, *args, **kwargs) -> dict:
     @classmethod
     def from_config(
         cls,
-        executor: str,
+        executor: str,  # TODO: no default value?
         container: Optional[ContainerSpec],
         **kwargs,
     ):
@@ -390,26 +419,24 @@ def from_config(
 class ModelEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        timeout: float = 5,  # TODO: units?
+        timeout: float = 5.0,
         **kwargs,
     ):
         super().__init__(**kwargs)
-        self.timeout = timeout
 
-        # TODO: temporary
-        self.cores_per_worker = self.kwargs.get("cores_per_task", 1)
-        self.gpu = False
-        self.max_simulation_time = self.max_runtime
-        self.env_vars = {"OMP_NUM_THREADS": "1"}
+        self.timeout = timeout  # i-Pi will kill client connections after no response for timeout seconds
 
-        # TODO: what with env vars?
-        # default_env_vars = {
-        #     "OMP_NUM_THREADS": str(self.cores_per_worker),
-        #     "KMP_AFFINITY": "granularity=fine,compact,1,0",
-        #     "KMP_BLOCKTIME": "1",
-        #     "OMP_PROC_BIND": "false",
-        #     "PYTHONUNBUFFERED": "TRUE",
-        # }
+        if self.executor_type == "threadpool":
+            # disable thread affinity and busy-idling
+            env_vars = {
+                "OMP_PROC_BIND": "FALSE",
+                "OMP_WAIT_POLICY": "PASSIVE",
+                "OMP_NUM_THREADS": f"{self.cores_per_task}",
+                # "OMP_DISPLAY_ENV": "VERBOSE",  # verbose OMP log
+            }
+        else:
+            assert False, "IMPLEMENT THIS"
+        self.env_vars = env_vars | self.env_vars
 
     def server_command(self) -> str:
         command = "psiflow-server"
@@ -420,7 +447,7 @@ def get_driver_devices(self, nwalkers: int) -> list[dict]:
         # TODO: what if only 1 gpu is available? Redo this
         # nclients = min(nwalkers, self.max_workers)
         nclients = min(nwalkers, 2)
-        if self.gpu:
+        if self.use_gpu:
             return [{"device": f"cuda:{i}"} for i in range(nclients)]
         else:
             return [{"device": "cpu"} for _ in range(nclients)]
@@ -428,7 +455,7 @@ def get_driver_devices(self, nwalkers: int) -> list[dict]:
     def wq_resources(self, nwalkers: int) -> dict:
         if self.spec is None:
             return {}  # threadpool
-
+        # TODO: reimplement this
         return self.spec
 
     # def wq_resources(self, nwalkers):
@@ -464,6 +491,11 @@ def __init__(
             )
             assert self.max_runtime is None, message
 
+        if not self.use_gpu:
+            warnings.warn(
+                "ModelTraining is configured for CPU operation. Is this what you want?"
+            )
+
         # default_env_vars = {
         #     "OMP_NUM_THREADS": str(self.cores_per_worker),
         #     "KMP_AFFINITY": "granularity=fine,compact,1,0",
@@ -484,7 +516,7 @@ def train_command(self, initialize: bool = False):
     def wq_resources(self, *args, **kwargs) -> dict:
         if self.spec is None:
             return {}  # threadpool
-
+        # TODO: reimplement this
         return self.spec
 
     # def wq_resources(self):
@@ -511,13 +543,14 @@ def wq_resources(self, *args, **kwargs) -> dict:
 class ReferenceEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        spec: "ReferenceSpec",
+        reference: "ReferenceSpec",
         memory_limit: Optional[str] = None,  # TODO: how does this work?
         **kwargs,
     ) -> None:
         # TODO: how to know which code?
+        # before super().__init__ because 'name' attribute needed
+        self.reference = reference
         super().__init__(**kwargs)
-        self.spec = spec
         self.memory_limit = memory_limit
 
     def command(self):
@@ -548,12 +581,14 @@ def parse_size(size):  # TODO: to utils?
         # exit code 0 so parsl always thinks bash app succeeded
         return "\n".join([*commands, launch_command, "exit 0"])
 
-    def _modify_wq_resources(self, spec: dict, *args, **kwargs) -> dict:
-        return spec
+    def wq_resources(self, *args, **kwargs) -> dict:
+        if self.spec is None:
+            return {}  # threadpool
+        return self.spec
 
     @property
     def name(self) -> str:
-        return self.spec.name
+        return self.reference.name
 
 
 class ExecutionContext:
@@ -574,12 +609,21 @@ def __init__(
         config: Config,
         definitions: list[ExecutionDefinition],
         path: Union[Path, str],
+        tmpdir_root: str,
+        keep_tmpdirs: bool,
+        **kwargs,
     ) -> None:
         self.config = config
         self.path = Path(path).resolve()
         self.path.mkdir(parents=True, exist_ok=True)
+
         self.definitions = {d.name: d for d in definitions}
         assert len(self.definitions) == len(definitions)
+
+        # make sure task tmpdirs can be made
+        Path(tmpdir_root).mkdir(parents=True, exist_ok=True)
+        self.bash_template = create_bash_template(tmpdir_root, keep_tmpdirs)
+
         self.file_index = {}
         self.lock = Lock()
         parsl.load(config)
@@ -608,16 +652,8 @@ def new_file(self, prefix: str, suffix: str) -> File:
     @classmethod
     def from_config(
         cls,
-        parsl_log_level: str = "WARNING",
-        usage_tracking: int = 3,
-        retries: int = 2,
-        strategy: str = "simple",
-        max_idletime: float = 20,
-        internal_tasks_max_threads: int = 10,
-        default_threads: int = 4,
-        # htex_address: str = "127.0.0.1",
-        zip_staging: Optional[bool] = None,
-        make_symlinks: bool = False,
+        parsl_log_level: str,
+        default_threads: int,
         **kwargs,
     ) -> "ExecutionContext":
         path = Path.cwd().resolve() / PSIFLOW_INTERNAL
@@ -633,28 +669,18 @@ def from_config(
         # create definitions
         base_container = ContainerSpec.from_kwargs(kwargs)
         model_evaluation = ModelEvaluation.from_config(
-            container=base_container,
-            **kwargs.pop("ModelEvaluation", {}),
+            container=base_container, **kwargs["ModelEvaluation"]
         )
         model_training = ModelTraining.from_config(
-            container=base_container,
-            **kwargs.pop(
-                "ModelTraining", {"gpu": True}
-            ),  # avoid triggering assertion  TODO: change into warning
+            container=base_container, **kwargs["ModelTraining"]
         )
 
-        # TODO: remove this and check below
-        model_evaluation.wq_resources(0)
-        model_evaluation.server_command()
-        model_training.wq_resources()
-
         reference_evaluations = []  # reference evaluations might be class specific
         for key in list(kwargs.keys()):
             if key[:4] in REFERENCE_SPECS:  # allow for e.g., CP2K_small
-                config = kwargs.pop(key)
+                config = kwargs[key]
                 reference_evaluation = ReferenceEvaluation.from_config(
-                    # spec=init_spec(REFERENCE_SPECS[key[:4]], config),
-                    spec=REFERENCE_SPECS[key[:4]].from_kwargs(**config),
+                    reference=REFERENCE_SPECS[key[:4]].from_kwargs(**config),
                     container=ContainerSpec.from_kwargs(kwargs | config),
                     **config,
                 )
@@ -663,53 +689,13 @@ def from_config(
 
         # create main parsl executors
         executors = [d.create_executor(path=path) for d in definitions]
+        internal = make_default_executors(default_threads, path, base_container)
+        executors.extend(internal)
 
-        # create default executors
-        # TODO: extract this into function
-        if base_container is not None:
-            launcher = WrappedLauncher(prepend=base_container.launch_command())
-        else:
-            launcher = SimpleLauncher()
-        htex = HighThroughputExecutor(
-            label="default_htex",
-            # address=htex_address,
-            working_dir=str(path / "default_htex"),
-            cores_per_worker=1,
-            max_workers_per_node=default_threads,
-            cpu_affinity="none",
-            provider=LocalProvider(launcher=launcher, init_blocks=0),  # noqa: F405
-        )
-        threadpool = ThreadPoolExecutor(
-            label="default_threads",
-            max_threads=default_threads,
-            working_dir=str(path),
-        )
-        executors.extend([htex, threadpool])
-
-        # remove additional kwargs
-        # if zip_staging:
-
-        #    def zip_uri(base, task_record, err_or_out):
-        #        zip_path = base / "base.zip"
-        #        file = f"{task_record['func_name']}.{task_record['id']}.{task_record['try_id']}.{err_or_out}"
-        #        return File(f"zip:{zip_path}/{file}")
-
-        #    std_autopath = partial(zip_uri, path)
-        # else:
-        #    std_autopath = None
         config = Config(
-            executors=executors,
-            run_dir=str(path),
-            initialize_logging=False,
-            # app_cache=False,
-            usage_tracking=usage_tracking,
-            retries=retries,
-            strategy=strategy,
-            max_idletime=max_idletime,
-            internal_tasks_max_threads=internal_tasks_max_threads,
-            # std_autopath=std_autopath,
+            executors=executors, run_dir=str(path), initialize_logging=False
         )
-        return ExecutionContext(config, definitions, path / "context_dir")
+        return ExecutionContext(config, definitions, path / "context_dir", **kwargs)
 
 
 class ExecutionContextLoader:
@@ -789,12 +775,14 @@ def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> s
         return x
 
 
-class MyWorkQueueExecutor(WorkQueueExecutor):
-    # TODO: what does this do?
-    def _get_launch_command(self, block_id):
-        return self.worker_command
+# class MyWorkQueueExecutor(WorkQueueExecutor):
+#     # TODO: what does this do?
+#     def _get_launch_command(self, block_id):
+#         return self.worker_command
 
 
+# TODO: move everything below to appropriate files
+
 # TODO: attempt at managing priority through global state
 WQ_RESOURCES_REGISTRY = {}
 
@@ -838,6 +826,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 DEFAULT_CONFIG = """
 parsl_log_level: WARNING
 usage_tracking: 3
+default_threads: 4
+max_idletime: 20
+tmpdir_root: /tmp
+keep_tmpdirs: false
+gpu_flavour: nvidia
 
 ModelEvaluation:
   executor: threadpool
@@ -856,3 +849,34 @@ def patch_parsl_dirtree() -> None:
 
     # replace with noop, which needs to happen after parsl.dataflow.dflow initialises
     parsl.dataflow.dflow.make_rundir = lambda x: x
+
+
+# TODO: arguments that need documenting: retries, strategy?, timeout, garbage_collect (Config)
+
+
+def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
+    """Create general wrapper for all bash apps. The exitcode ensures that every app completes successfully."""
+    template = f"""
+    # Create and move into new tmpdir for app execution
+    tmpdir=$(mktemp -d -p {tmpdir_root} "psiflow-tmp.XXXXXXXXXX")
+    cd $tmpdir; echo "tmpdir: $PWD"
+    export {{env}}
+    printenv
+
+    # Actual app definition goes here
+    {{commands}}
+
+    # Cleanup
+    {'cd ../.. && rm -r $tmpdir' if not keep_tmpdirs else ''}
+    exit 0
+    """
+    return textwrap.dedent(template)
+
+
+def format_env_vars(env_vars: dict) -> str:
+    return " ".join([f"{k}={v}" for k, v in env_vars.items()])
+
+
+def str_to_timedelta(s: str) -> timedelta:
+    t = datetime.strptime(s, "%H:%M:%S")
+    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
\ No newline at end of file
diff --git a/psiflow/functions.py b/psiflow/functions.py
index e12bec9..8689616 100644
--- a/psiflow/functions.py
+++ b/psiflow/functions.py
@@ -296,6 +296,7 @@ class DispersionFunction(EnergyFunction):
     def __post_init__(self):
         # OMP_NUM_THREADS for parallel evaluation does not work..
         # https://github.com/dftd3/simple-dftd3/issues/49
+        # TODO: check whether this is still the case
         os.environ["OMP_NUM_THREADS"] = str(self.num_threads * 10)
 
         from dftd3.ase import DFTD3
diff --git a/psiflow/hamiltonians.py b/psiflow/hamiltonians.py
index a065484..59ca43e 100644
--- a/psiflow/hamiltonians.py
+++ b/psiflow/hamiltonians.py
@@ -444,9 +444,9 @@ def parameters(self) -> dict:
         return {
             "model_path": model_path,
             "atomic_energies": self.atomic_energies,
-            "ncores": evaluation.cores_per_worker,
+            "ncores": evaluation.cores_per_task,
             "dtype": "float32",
-            "device": "gpu" if evaluation.gpu else "cpu",
+            "device": "gpu" if evaluation.use_gpu else "cpu",
             "env_vars": evaluation.env_vars,
         }
 
diff --git a/psiflow/models/_mace.py b/psiflow/models/_mace.py
index 5ed6772..46fc04d 100644
--- a/psiflow/models/_mace.py
+++ b/psiflow/models/_mace.py
@@ -227,7 +227,7 @@ def _create_apps(self):  # initialize apps
         app_initialize = bash_app(initialize, executors=[evaluation.name])
         resources_init = evaluation.wq_resources(1)
         # TODO: find a better way for model init
-        if not evaluation.use_threadpool:
+        if not evaluation.executor_type == "threadpool":
             resources_init["running_time_min"] = 30  # at least 30 mins for init?
         app_train = bash_app(train, executors=[training.name])
         resources_train = training.wq_resources()
diff --git a/psiflow/order_parameters.py b/psiflow/order_parameters.py
deleted file mode 100644
index 204de2e..0000000
--- a/psiflow/order_parameters.py
+++ /dev/null
@@ -1,2 +0,0 @@
-class OrderParameter:
-    pass
diff --git a/psiflow/reference/cp2k_.py b/psiflow/reference/cp2k_.py
index 6af6095..eb30e73 100644
--- a/psiflow/reference/cp2k_.py
+++ b/psiflow/reference/cp2k_.py
@@ -15,8 +15,6 @@
 from psiflow.geometry import Geometry
 from psiflow.reference.reference import Reference, Status, get_spin_multiplicities
 from psiflow.utils.parse import find_line, lines_to_array
-from psiflow.utils import TMP_COMMAND, CD_COMMAND
-
 
 # costly to initialise
 input_parser = CP2KInputParserSimplified(
@@ -141,12 +139,7 @@ def get_single_atom_references(self, element: str) -> dict[int, Reference]:
         return references
 
     def get_shell_command(self, inputs: list[File]) -> str:
-        command_list = [
-            TMP_COMMAND,
-            CD_COMMAND,
-            f"cp {inputs[0].filepath} cp2k.inp",
-            self.execute_command,
-        ]
+        command_list = [f"cp {inputs[0].filepath} cp2k.inp", self.execute_command]
         return "\n".join(command_list)
 
     def parse_output(self, stdout: str) -> dict:
diff --git a/psiflow/reference/gpaw_.py b/psiflow/reference/gpaw_.py
index 5c6d5bd..a119271 100644
--- a/psiflow/reference/gpaw_.py
+++ b/psiflow/reference/gpaw_.py
@@ -8,7 +8,6 @@
 import psiflow
 from psiflow.geometry import Geometry
 from psiflow.reference.reference import Reference, Status
-from psiflow.utils import TMP_COMMAND, CD_COMMAND
 from psiflow.utils.apps import copy_app_future
 from psiflow.utils.parse import find_line
 from psiflow.reference._gpaw import FILEPATH, DEFAULTS, STDOUT_KEY
@@ -58,8 +57,6 @@ def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
 
     def get_shell_command(self, inputs: list[File]) -> str:
         command_list = [
-            TMP_COMMAND,
-            CD_COMMAND,
             f"cp {inputs[0].filepath} input.json",
             f"cp {self.script} script_gpaw.py",
             self.execute_command,
diff --git a/psiflow/reference/orca_.py b/psiflow/reference/orca_.py
index 05cd2d1..ca14115 100644
--- a/psiflow/reference/orca_.py
+++ b/psiflow/reference/orca_.py
@@ -12,7 +12,6 @@
 import psiflow
 from psiflow.geometry import Geometry
 from psiflow.reference.reference import Reference, Status, get_spin_multiplicities
-from psiflow.utils import TMP_COMMAND, CD_COMMAND
 from psiflow.utils.parse import find_line, lines_to_array, string_to_timedelta
 
 
@@ -162,12 +161,7 @@ def get_single_atom_references(self, element: str) -> dict[int, Reference]:
         return references
 
     def get_shell_command(self, inputs: list[File]) -> str:
-        command_list = [
-            TMP_COMMAND,
-            CD_COMMAND,
-            f"cp {inputs[0].filepath} orca.inp",
-            self.execute_command,
-        ]
+        command_list = [f"cp {inputs[0].filepath} orca.inp", self.execute_command]
         return "\n".join(command_list)
 
     def parse_output(self, stdout: str) -> dict:
diff --git a/psiflow/reference/reference.py b/psiflow/reference/reference.py
index 96ff842..6a8c008 100644
--- a/psiflow/reference/reference.py
+++ b/psiflow/reference/reference.py
@@ -98,7 +98,10 @@ def _execute(
     stderr: str = parsl.AUTO_LOGNAME,
     label: str = "singlepoint",
 ) -> str:
-    return reference.get_shell_command(inputs)
+    # TODO: we do not set env_vars here?
+    command = reference.get_shell_command(inputs)
+    template = psiflow.context().bash_template
+    return template.format(commands=command, env='>/dev/null')
 
 
 def _process_output(
diff --git a/psiflow/sampling/ase.py b/psiflow/sampling/ase.py
index 87434fe..cd341fb 100644
--- a/psiflow/sampling/ase.py
+++ b/psiflow/sampling/ase.py
@@ -11,12 +11,12 @@
 from psiflow.hamiltonians import Hamiltonian
 from psiflow.utils.apps import setup_logger
 from psiflow.utils.io import _dump_json
-from psiflow.utils import TMP_COMMAND, CD_COMMAND, export_env_command
 from psiflow.utils.parse import get_task_name_id
+from psiflow.execution import format_env_vars
 
 from ._ase import ALLOWED_MODES, __file__ as file_ase
 
-DEFAULT_EXECUTABLE = 'script.py'
+DEFAULT_EXECUTABLE = "script.py"
 logger = setup_logger(__name__)  # logging per module
 
 
@@ -63,14 +63,13 @@ def _execute_ase(
         command_opt_args.append(f"--output_traj={outputs[1].filepath}")
 
     command_list = [
-        TMP_COMMAND,
-        CD_COMMAND,
-        export_env_command(env_vars),
         f"cp {inputs[0].filepath} {DEFAULT_EXECUTABLE}",
         " ".join(command_opt_args),
         "exit 0",  # ignore timeout exitcode
     ]
-    return "\n".join(command_list)
+    template = psiflow.context().bash_template
+    commands, env = "\n".join(command_list), format_env_vars(env_vars)
+    return template.format(commands=commands, env=env)
 
 
 execute_ase = bash_app(_execute_ase, executors=["ModelEvaluation"])
@@ -93,12 +92,8 @@ def optimize(
 
     context = psiflow.context()
     definition = context.definitions["ModelEvaluation"]
-
-    command_list = [f"python -u {DEFAULT_EXECUTABLE}"]
-    if definition.max_simulation_time is not None:
-        max_time = 0.9 * (60 * definition.max_simulation_time)
-        command_list = ["timeout -s 15 {}s".format(max_time), *command_list]
-    command_launch = " ".join(command_list)
+    command = f"python -u {DEFAULT_EXECUTABLE}"
+    command_launch = definition.wrap_in_timeout(command)
 
     input_geometry = Dataset([state]).extxyz  # state can be future
     hamiltonian = 1.0 * hamiltonian  # convert to mixture
diff --git a/psiflow/sampling/optimize.py b/psiflow/sampling/optimize.py
index 74d7d54..b92f423 100644
--- a/psiflow/sampling/optimize.py
+++ b/psiflow/sampling/optimize.py
@@ -20,7 +20,7 @@
 )
 from psiflow.sampling.output import HamiltonianComponent
 from psiflow.utils.io import save_xml
-from psiflow.utils import TMP_COMMAND, CD_COMMAND, export_env_command
+from psiflow.execution import format_env_vars
 
 
 warnings.warn(
@@ -111,17 +111,15 @@ def _execute_ipi(
         set(d["address"] for d in driver_kwargs)
     )
     commands_driver = make_driver_commands(driver_kwargs, file_xyz_in, files_in)
-
     command_list = [
-        TMP_COMMAND,
-        CD_COMMAND,
-        export_env_command(env_vars),
         command_start,
         command_wait,
         *commands_driver,
         "wait",
     ]
-    return "\n".join(command_list)
+    template = psiflow.context().bash_template
+    commands, env = "\n".join(command_list), format_env_vars(env_vars)
+    return template.format(commands=commands, env=env)
 
 
 execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index 4328ea3..25d0d52 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -19,10 +19,10 @@
     potential_component_name,
     HamiltonianComponent,
 )
+from psiflow.execution import format_env_vars
 from psiflow.sampling.utils import create_xml_list
 from psiflow.sampling.walker import Coupling, Walker, partition, Ensemble
 from psiflow.utils.io import _save_xml
-from psiflow.utils import TMP_COMMAND, CD_COMMAND, export_env_command
 from psiflow.sampling.driver import __file__ as PATH_DRIVER
 
 
@@ -113,7 +113,7 @@ def setup_sockets(components: list[HamiltonianComponent]) -> list[ET.Element]:
         <address>{address}</address>
     </ffsocket>
     """
-    timeout = 60 * psiflow.context().definitions["ModelEvaluation"].timeout
+    timeout = psiflow.context().definitions["ModelEvaluation"].timeout
 
     sockets = []
     for comp in components:
@@ -450,10 +450,7 @@ def _execute_ipi(
     commands_driver = make_driver_commands(driver_kwargs, file_xyz_in, files_in)
 
     command_list = [
-        TMP_COMMAND,
-        CD_COMMAND,
-        "\n".join(write_command_args),
-        export_env_command(env_vars),
+        *write_command_args,
         command_start,
         command_wait,
         *commands_driver,
@@ -461,7 +458,10 @@ def _execute_ipi(
     ]
     if coupling_command:
         command_list.append(coupling_command)
-    return "\n".join(command_list)
+
+    template = psiflow.context().bash_template
+    commands, env = "\n".join(command_list), format_env_vars(env_vars)
+    return template.format(commands=commands, env=env)
 
 
 execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
diff --git a/psiflow/sampling/server.py b/psiflow/sampling/server.py
index 2fe3c71..6ade9fe 100644
--- a/psiflow/sampling/server.py
+++ b/psiflow/sampling/server.py
@@ -97,7 +97,6 @@ def run(start_xyz: str, input_xml: str):
     # prepare starting geometries from context_dir
     data_start: list[ase.Atoms] = read(start_xyz, index=":")
     for i, at in enumerate(data_start):
-        print(at.pbc)
         if not any(at.pbc):  # set fake large cell for i-PI
             at.pbc = True
             at.cell = Cell(NONPERIODIC_CELL)
diff --git a/psiflow/sampling/walker.py b/psiflow/sampling/walker.py
index 0b2cdec..9b2ce9c 100644
--- a/psiflow/sampling/walker.py
+++ b/psiflow/sampling/walker.py
@@ -14,7 +14,7 @@
 from psiflow.data import Dataset
 from psiflow.geometry import Geometry, check_equality
 from psiflow.hamiltonians import Hamiltonian, Zero, combine_hamiltonians
-from psiflow.order_parameters import OrderParameter
+# from psiflow.order_parameters import OrderParameter
 from psiflow.sampling.metadynamics import Metadynamics
 from psiflow.utils.apps import copy_app_future
 
@@ -78,7 +78,7 @@ class Walker:
     masses: Union[np.ndarray, float, None] = None
     nbeads: int = 1
     metadynamics: Optional[Metadynamics] = None
-    order_parameter: Optional[OrderParameter] = None
+    # order_parameter: Optional['OrderParameter'] = None
 
     state: Union[Geometry, AppFuture] = field(init=False)
     coupling: Optional[Coupling] = field(init=False)
@@ -96,9 +96,9 @@ def __post_init__(self):
             # we cannot check this for futures
             assert self.pressure is None, "Pressure requires PBC"
 
-        if self.order_parameter is not None:
-            # TODO: order_parameter out of commission
-            self.start = self.order_parameter.evaluate(self.start)
+        # if self.order_parameter is not None:
+        #     # TODO: order_parameter out of commission
+        #     self.start = self.order_parameter.evaluate(self.start)
 
         if (m := self.masses) is None:
             pass  # do nothing
diff --git a/psiflow/serialization.py b/psiflow/serialization.py
index c37eea6..6e49038 100644
--- a/psiflow/serialization.py
+++ b/psiflow/serialization.py
@@ -279,7 +279,7 @@ def deserialize(data_str: str, custom_cls: Optional[list] = None):
     from psiflow.learning import Learning
     from psiflow.metrics import Metrics
     from psiflow.models import MACE
-    from psiflow.order_parameters import OrderParameter
+    # from psiflow.order_parameters import OrderParameter
     from psiflow.reference import CP2K, GPAW, ORCA, ReferenceDummy
     from psiflow.sampling import Metadynamics, ReplicaExchange, SimulationOutput, Walker
 
@@ -300,7 +300,7 @@ def deserialize(data_str: str, custom_cls: Optional[list] = None):
         Harmonic,
         MixtureHamiltonian,
         Metadynamics,
-        OrderParameter,
+        # OrderParameter,
         ReplicaExchange,
         SimulationOutput,
         Walker,
diff --git a/psiflow/utils/__init__.py b/psiflow/utils/__init__.py
index e103e43..e69de29 100644
--- a/psiflow/utils/__init__.py
+++ b/psiflow/utils/__init__.py
@@ -1,8 +0,0 @@
-TMP_COMMAND = 'tmpdir=$(mktemp -d -p /tmp "mytmpdir.XXXXXXXXXX" || mktemp -d -t "mytmpdir.XXXXXXXXXX")'
-CD_COMMAND = 'cd $tmpdir; echo "tmpdir: $PWD"'
-
-
-def export_env_command(env_vars: dict) -> str:
-    return "export " + " ".join(
-        [f"{name}={value}" for name, value in env_vars.items()]
-    )

From c2d0e459dfbb16225a25b1bf3571cab107ac51ce Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 3 Mar 2026 16:34:05 +0100
Subject: [PATCH 06/15] bugfix

bash apps do not know about global scope variables (unless in threadpool)
---
 psiflow/free_energy/phonons.py | 13 ++++++-------
 psiflow/sampling/ase.py        |  6 +++---
 psiflow/sampling/optimize.py   |  5 +++--
 psiflow/sampling/sampling.py   |  5 +++--
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/psiflow/free_energy/phonons.py b/psiflow/free_energy/phonons.py
index c93dd47..2c2513c 100644
--- a/psiflow/free_energy/phonons.py
+++ b/psiflow/free_energy/phonons.py
@@ -17,10 +17,10 @@
     make_driver_commands,
     make_wait_for_sockets_command,
 )
-from psiflow.sampling.optimize import setup_forces, export_env_command
+from psiflow.sampling.optimize import setup_forces
 from psiflow.utils.apps import multiply
 from psiflow.utils.io import load_numpy, save_xml
-from psiflow.utils import TMP_COMMAND, CD_COMMAND
+from psiflow.execution import format_env_vars
 
 
 def _compute_frequencies(hessian: np.ndarray, geometry: Geometry) -> np.ndarray:
@@ -88,6 +88,7 @@ def _execute_ipi(
     driver_kwargs: list[dict],
     command_server: str,
     env_vars: dict = {},
+    bash_template: str = "",
     stdout: str = parsl.AUTO_LOGNAME,
     stderr: str = parsl.AUTO_LOGNAME,
     inputs: list = [],
@@ -102,18 +103,15 @@ def _execute_ipi(
         set(d["address"] for d in driver_kwargs)
     )
     commands_driver = make_driver_commands(driver_kwargs, file_xyz_in, files_in)
-
     command_list = [
-        TMP_COMMAND,
-        CD_COMMAND,
-        export_env_command(env_vars),
         command_start,
         command_wait,
         *commands_driver,
         "wait",
         f"cp i-pi.output_full.hess {outputs[0]}",
     ]
-    return "\n".join(command_list)
+    commands, env = "\n".join(command_list), format_env_vars(env_vars)
+    return bash_template.format(commands=commands, env=env)
 
 
 execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
@@ -171,6 +169,7 @@ def compute_harmonic(
         driver_kwargs,
         definition.server_command(),
         env_vars=definition.env_vars,
+        bash_template=context.bash_template,
         inputs=inputs,
         outputs=[context.new_file("hess_", ".txt")],
         parsl_resource_specification=definition.wq_resources(1),
diff --git a/psiflow/sampling/ase.py b/psiflow/sampling/ase.py
index cd341fb..e039791 100644
--- a/psiflow/sampling/ase.py
+++ b/psiflow/sampling/ase.py
@@ -48,6 +48,7 @@ def _execute_ase(
     inputs: list[DataFuture],
     outputs: list[DataFuture],
     env_vars: dict = {},
+    bash_template: str = "",
     stdout: str = parsl.AUTO_LOGNAME,
     stderr: str = parsl.AUTO_LOGNAME,
     parsl_resource_specification: Optional[dict] = None,
@@ -65,11 +66,9 @@ def _execute_ase(
     command_list = [
         f"cp {inputs[0].filepath} {DEFAULT_EXECUTABLE}",
         " ".join(command_opt_args),
-        "exit 0",  # ignore timeout exitcode
     ]
-    template = psiflow.context().bash_template
     commands, env = "\n".join(command_list), format_env_vars(env_vars)
-    return template.format(commands=commands, env=env)
+    return bash_template.format(commands=commands, env=env)
 
 
 execute_ase = bash_app(_execute_ase, executors=["ModelEvaluation"])
@@ -123,6 +122,7 @@ def optimize(
     result = execute_ase(
         command_launch=command_launch,
         env_vars=definition.env_vars,
+        bash_template=context.bash_template,
         inputs=inputs,
         outputs=outputs,
         parsl_resource_specification=definition.wq_resources(1),
diff --git a/psiflow/sampling/optimize.py b/psiflow/sampling/optimize.py
index b92f423..c8cd68c 100644
--- a/psiflow/sampling/optimize.py
+++ b/psiflow/sampling/optimize.py
@@ -97,6 +97,7 @@ def _execute_ipi(
     driver_kwargs: list[dict],
     command_server: str,
     env_vars: dict = {},
+    bash_template: str = "",
     stdout: str = parsl.AUTO_LOGNAME,
     stderr: str = parsl.AUTO_LOGNAME,
     inputs: list = [],
@@ -117,9 +118,8 @@ def _execute_ipi(
         *commands_driver,
         "wait",
     ]
-    template = psiflow.context().bash_template
     commands, env = "\n".join(command_list), format_env_vars(env_vars)
-    return template.format(commands=commands, env=env)
+    return bash_template.format(commands=commands, env=env)
 
 
 execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
@@ -186,6 +186,7 @@ def optimize(
         driver_kwargs,
         definition.server_command(),
         env_vars=definition.env_vars,
+        bash_template=context.bash_template,
         inputs=inputs,
         outputs=outputs,
         parsl_resource_specification=definition.wq_resources(1),
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index 25d0d52..3007c37 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -424,6 +424,7 @@ def _execute_ipi(
     command_server: str,
     *plumed_list: str,
     env_vars: dict = {},
+    bash_template: str = "",
     stdout: str = parsl.AUTO_LOGNAME,
     stderr: str = parsl.AUTO_LOGNAME,
     inputs: list = [],
@@ -459,9 +460,8 @@ def _execute_ipi(
     if coupling_command:
         command_list.append(coupling_command)
 
-    template = psiflow.context().bash_template
     commands, env = "\n".join(command_list), format_env_vars(env_vars)
-    return template.format(commands=commands, env=env)
+    return bash_template.format(commands=commands, env=env)
 
 
 execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
@@ -599,6 +599,7 @@ def _sample(
         definition.server_command(),
         *plumed_list,  # futures
         env_vars=dict(definition.env_vars),
+        bash_template=context.bash_template,
         inputs=inputs,
         outputs=outputs,
         parsl_resource_specification=definition.wq_resources(max_nclients),

From a0479fdb195a06a51da1053afd25511c54f9a8ec Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Wed, 4 Mar 2026 22:45:23 +0100
Subject: [PATCH 07/15] overhaul ModelEvaluation

(Re)implementing some logic
- to dynamically scale up MD resources depending on walkers/hamiltonians (capped by the 'max_resource_multiplier' option)
- to decide how many clients to spawn for an MD run to avoid resource oversubsciption ('allow_oversubscription' options)

Also 'log_dfk_tasks' for debugging

 figure out how many clients can be used in a simulation
---
 psiflow/execution.py         | 188 +++++++++++++++++++++++++----------
 psiflow/sampling/sampling.py |  63 ++++++++----
 2 files changed, 179 insertions(+), 72 deletions(-)

diff --git a/psiflow/execution.py b/psiflow/execution.py
index 2a3e0bc..854f020 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -41,9 +41,9 @@ class ContainerSpec:
     """Controls container configuration"""
 
     uri: str
-    engine: str = "apptainer"
+    engine: str
     addopts: str = " --no-eval -e --no-mount home -W /tmp --writable-tmpfs"
-    gpu_flavour: str | None = None  # TODO: add yaml argument
+    gpu_flavour: str | None = None
 
     def __post_init__(self):
         assert self.engine in ("apptainer", "singularity")
@@ -63,9 +63,9 @@ def launch_command(self) -> str:
     def from_kwargs(kwargs: dict) -> Optional["ContainerSpec"]:
         if "container_uri" not in kwargs:
             return None
-        keys = ("container_uri", "container_engine", "container_addopts")
-        args = [kwargs[key] for key in keys if key in kwargs]
-        return ContainerSpec(*args)
+        keys = ("uri", "engine", "addopts", "gpu_flavour")
+        kwargs = {k: kwargs[k2] for k in keys if (k2 := f"container_{k}") in kwargs}
+        return ContainerSpec(**kwargs)
 
 
 class ReferenceSpec(Protocol):
@@ -226,7 +226,6 @@ def __init__(
         self.kwargs = executor_kwargs
         self.resources = resources  # compute per node
         self.container = container
-        self.env_vars = env_vars or {}
 
         if self.use_gpu:
             msg = ""
@@ -253,7 +252,7 @@ def __init__(
 
         # how long can individual tasks run (in seconds)
         if max_runtime is None:
-            # allow some margin for task cleanup  TODO: pretty random
+            # allow some margin for task cleanup
             max_runtime = max(0.9 * self.lifetime, self.lifetime - 60)
         else:
             max_runtime = str_to_timedelta(max_runtime).seconds
@@ -262,8 +261,8 @@ def __init__(
             warnings.warn(msg)
         self.max_runtime = max_runtime
 
-        # set default WQ resource specs  TODO: type_hint
-        self.spec = None
+        # set default WQ resource specs
+        self.spec: dict | None = None
         if self.executor_type == "workqueue":
             self.spec = {
                 "cores": self.kwargs["cores_per_task"],
@@ -274,7 +273,21 @@ def __init__(
             }
         register_definition(definition=self)
 
+        # handle task environment variables
         # TODO: how to handle env variables?
+        if self.executor_type == "threadpool":
+            # disable thread affinity and busy-idling
+            default_env_vars = {
+                "OMP_PROC_BIND": "FALSE",
+                "OMP_WAIT_POLICY": "PASSIVE",
+                "OMP_NUM_THREADS": f"{self.cores_per_task}",
+                # "OMP_DISPLAY_ENV": "VERBOSE",  # verbose OMP log
+            }
+        else:
+            # assert False, "IMPLEMENT THIS"
+            default_env_vars = {}
+        self.env_vars = default_env_vars | (env_vars or {})
+
         # TODO: check between min_runtime and max_runtime?
 
         pass
@@ -301,6 +314,19 @@ def cores_per_task(self) -> int:
         # assumes all threads are working
         return int(self.resources["cores"] / self.kwargs["max_threads"])
 
+    @property
+    def task_slots(self) -> int:
+        if self.executor_type == "threadpool":
+            return self.kwargs["max_threads"]
+
+        slots = self.resources["cores"] // self.cores_per_task
+        gpu_slots, memory_slots = float("inf"), float("inf")
+        if self.use_gpu:
+            gpu_slots = self.resources["gpus"] // self.kwargs["gpus_per_task"]
+        if (mem_per_task := self.kwargs["mem_per_task"]) > 0:
+            memory_slots = self.resources["memory"] // mem_per_task
+        return min(slots, gpu_slots, memory_slots)
+
     def wrap_in_timeout(self, command: str) -> str:
         if self.max_runtime == float("inf"):
             return command  # noop
@@ -337,19 +363,18 @@ def _create_workqueue(self, path: Path) -> WorkQueueExecutor:
         worker_executable = "work_queue_worker"
         if not isinstance(self, ReferenceEvaluation) and self.container:
             # ModelEvaluation / ModelTraining run in container themselves
-            # Reference instances launch tasks in container
+            # Reference launches tasks in container
             prepend = self.container.launch_command()
             worker_executable = f"{prepend} {worker_executable}"
 
         # TODO: why the custom WQ? -- does not seem necessary (anymore)
-        # executor = MyWorkQueueExecutor(
         executor = WorkQueueExecutor(
             label=self.name,
             working_dir=str(path / self.name),
             provider=self.provider,
             shared_fs=True,
             # autocategory=False,
-            # port=0,
+            port=0,  # avoid multiple executors trying to use the same port
             # max_retries=1,
             # coprocess=False,
             worker_options=" ".join(worker_options),
@@ -420,57 +445,79 @@ class ModelEvaluation(ExecutionDefinition):
     def __init__(
         self,
         timeout: float = 5.0,
+        max_resource_multiplier: int | None = None,
+        allow_oversubscription: bool = True,
         **kwargs,
     ):
         super().__init__(**kwargs)
 
-        self.timeout = timeout  # i-Pi will kill client connections after no response for timeout seconds
+        if self.use_gpu and self.kwargs['gpus_per_task'] > 1:
+            # TODO: 'ConfigurationError' maybe?
+            raise ValueError("No Hamiltonian can do multi-GPU evaluation")
 
-        if self.executor_type == "threadpool":
-            # disable thread affinity and busy-idling
-            env_vars = {
-                "OMP_PROC_BIND": "FALSE",
-                "OMP_WAIT_POLICY": "PASSIVE",
-                "OMP_NUM_THREADS": f"{self.cores_per_task}",
-                # "OMP_DISPLAY_ENV": "VERBOSE",  # verbose OMP log
-            }
-        else:
-            assert False, "IMPLEMENT THIS"
-        self.env_vars = env_vars | self.env_vars
+        # i-Pi will kill client connections after no response for timeout seconds
+        self.timeout = timeout
+
+        # allow MD tasks to consume more computational resources based on walkers and hamiltonians
+        # but never more than available in a single resource block
+        if max_resource_multiplier is None:
+            max_resource_multiplier = self.task_slots
+        elif max_resource_multiplier > self.task_slots:
+            warnings.warn(
+                "Provided 'max_resource_multiplier' exceeds available task slots "
+                f"({max_resource_multiplier} -> {self.task_slots}). "
+                f"Limiting 'max_resource_multiplier'."
+            )
+            max_resource_multiplier = self.task_slots
+        self.max_resource_multiplier = max_resource_multiplier
+
+        # whether i-Pi clients are allowed to share cores/GPUs
+        self.allow_oversubscription = allow_oversubscription
 
     def server_command(self) -> str:
         command = "psiflow-server"
         return self.wrap_in_timeout(command)
 
-    def get_driver_devices(self, nwalkers: int) -> list[dict]:
-        # assumes driver is GPU capable
-        # TODO: what if only 1 gpu is available? Redo this
-        # nclients = min(nwalkers, self.max_workers)
-        nclients = min(nwalkers, 2)
-        if self.use_gpu:
-            return [{"device": f"cuda:{i}"} for i in range(nclients)]
-        else:
-            return [{"device": "cpu"} for _ in range(nclients)]
+    def get_driver_resources(self, n_walkers: int, n_drivers: int) -> list[dict]:
+        """Divide 'expensive' drivers over available resources."""
+        n_clients = n_walkers * n_drivers
+        m = self.max_resource_multiplier
+
+        if n_drivers > m and not self.allow_oversubscription:
+            # the combination of drivers does not fit on available resources
+            raise ValueError(
+                f"Simulation with {n_drivers} independent drivers not possible. "
+                f"Either increase 'max_resource_multiplier' or enable resource oversubscription."
+            )
+        if n_clients > m and self.allow_oversubscription:
+            warnings.warn(
+                f"Simulation wants to employ {n_clients} clients, "
+                f"but can only use {m}x the per-client budget. "
+                f"Oversubscribing CPU/GPU resources."
+            )
+        elif n_clients > m and not self.allow_oversubscription:
+            # limit total numer of clients so they do not fight over resources
+            n_clients = m
 
-    def wq_resources(self, nwalkers: int) -> dict:
+        # TODO: what if (n_clients % n_drivers != 0)
+        #  you will have more copies of some drivers and fewer of others..
+        # TODO: what if (n_clients % m != 0)
+        #  you will have more clients on some GPUs than others
+
+        if not self.use_gpu:
+            return [{"device": "cpu"} for _ in range(n_clients)]
+        return [{"device": f"cuda:{_ % m}"} for _ in range(n_clients)]
+
+    def wq_resources(self, n_clients: int) -> dict:
         if self.spec is None:
             return {}  # threadpool
-        # TODO: reimplement this
-        return self.spec
 
-    # def wq_resources(self, nwalkers):
-    #     if self.use_threadpool:
-    #         return {}
-    #     nclients = min(nwalkers, self.max_workers)
-    #     resource_specification = {}
-    #     resource_specification["cores"] = nclients * self.cores_per_worker
-    #     resource_specification["disk"] = 1000  # some random nontrivial amount?
-    #     memory = 2000 * self.cores_per_worker  # similarly rather random
-    #     resource_specification["memory"] = int(memory)
-    #     resource_specification["running_time_min"] = self.max_simulation_time
-    #     if self.gpu:
-    #         resource_specification["gpus"] = nclients
-    #     return resource_specification
+        spec = self.spec.copy()
+        multi = min(n_clients, self.max_resource_multiplier)
+        spec["cores"] *= multi
+        spec["gpus"] *= multi
+        spec["memory"] *= multi
+        return spec
 
 
 class ModelTraining(ExecutionDefinition):
@@ -654,6 +701,8 @@ def from_config(
         cls,
         parsl_log_level: str,
         default_threads: int,
+        garbage_collect: bool,
+        retries: int,
         **kwargs,
     ) -> "ExecutionContext":
         path = Path.cwd().resolve() / PSIFLOW_INTERNAL
@@ -693,7 +742,11 @@ def from_config(
         executors.extend(internal)
 
         config = Config(
-            executors=executors, run_dir=str(path), initialize_logging=False
+            executors=executors,
+            run_dir=str(path),
+            initialize_logging=False,
+            garbage_collect=garbage_collect,
+            retries=retries,
         )
         return ExecutionContext(config, definitions, path / "context_dir", **kwargs)
 
@@ -827,10 +880,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 parsl_log_level: WARNING
 usage_tracking: 3
 default_threads: 4
-max_idletime: 20
 tmpdir_root: /tmp
 keep_tmpdirs: false
-gpu_flavour: nvidia
+container_engine: apptainer
+garbage_collect: true
+retries: 0
 
 ModelEvaluation:
   executor: threadpool
@@ -860,7 +914,7 @@ def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
     # Create and move into new tmpdir for app execution
     tmpdir=$(mktemp -d -p {tmpdir_root} "psiflow-tmp.XXXXXXXXXX")
     cd $tmpdir; echo "tmpdir: $PWD"
-    export {{env}}
+    {{env}}
     printenv
 
     # Actual app definition goes here
@@ -874,9 +928,33 @@ def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
 
 
 def format_env_vars(env_vars: dict) -> str:
-    return " ".join([f"{k}={v}" for k, v in env_vars.items()])
+    if len(env_vars) == 0:
+        return ""
+    return "export" + " ".join([f"{k}={v}" for k, v in env_vars.items()])
 
 
 def str_to_timedelta(s: str) -> timedelta:
     t = datetime.strptime(s, "%H:%M:%S")
-    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
\ No newline at end of file
+    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
+
+
+def log_dfk_tasks(verbose: bool = False):
+    """Get an overview of all tasks stored in the parsl DFK. For debugging purposes."""
+    dfk = parsl.dfk()
+    parsl.wait_for_current_tasks()
+    log = ["- Parsl task overview -"]
+    if not verbose:
+        log += [f"{i}\t{d['func_name']}" for i, d in dfk.tasks.items()]
+        log.append("- Parsl task overview -")
+        print(*log, sep="\n")
+        return
+
+    for i, d in dfk.tasks.items():
+        args = [(_.split("/")[-1] if isinstance(_, str) else _) for _ in d["args"]]
+        if "inputs" in (kwargs := d["kwargs"]):
+            kwargs["inputs"] = [f.filename for f in kwargs["inputs"]]
+        if "outputs" in kwargs:
+            kwargs["outputs"] = [f.filename for f in kwargs["outputs"]]
+        log.append(f"\n{i}\t{d['func_name']:<30}\n{args}\n{kwargs}")
+    log.append("- Parsl task overview -")
+    print(*log, sep="\n")
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index 3007c37..e37e1b5 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -1,6 +1,7 @@
 import math
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
+from itertools import cycle
 from typing import Optional, Union, Iterable
 
 import parsl
@@ -8,6 +9,7 @@
 from parsl.app.app import bash_app
 from parsl.data_provider.files import File
 from parsl.dataflow.futures import AppFuture, DataFuture
+from sympy import print_glsl
 
 import psiflow
 from psiflow.data import Dataset
@@ -362,6 +364,42 @@ def setup_smotion(
     return smotion
 
 
+def define_clients_n_kwargs(
+    walkers: list[Walker],
+    components: list[HamiltonianComponent],
+    definition: "ModelEvaluation",
+    defaults: dict,
+) -> tuple[list[dict], int]:
+    """Figure out i-Pi MD driver (force evaluator) configuration.
+    How many clients with which arguments on which resources?"""
+
+    # separate hamiltonian components by computational cost
+    cheap, expensive = {}, {}
+    for i, comp in enumerate(components):
+        # the "idx" key corresponds with a serialized function in app inputs
+        if isinstance(comp.hamiltonian, MACEHamiltonian):
+            expensive[i] = comp
+        else:
+            cheap[i] = comp
+
+    # cheap drivers only get a single client
+    cheap_kwargs = []
+    for i, comp in cheap.items():
+        cheap_kwargs.append(defaults | {"idx": i, "address": comp.address})
+
+    # expensive drivers are assigned to clients by ModelEvaluation
+    # TODO: currently there is no distinction between global MLPs (for every system) and
+    #  bias MLPs (for a few systems in the total simulation), possibly leading to load balancing problems
+    n_systems = int(sum([w.nbeads for w in walkers]))
+    expensive_kwargs = definition.get_driver_resources(n_systems, len(expensive))
+    driver_iterator = cycle(expensive.items())
+    for kwargs, (i, comp) in zip(expensive_kwargs, driver_iterator):
+        # TODO: should dtype be configurable?
+        kwargs |= defaults | {"idx": i, "address": comp.address, "dtype": "float32"}
+
+    return cheap_kwargs + expensive_kwargs, len(expensive_kwargs)
+
+
 def make_server_command(
     command: str,
     input_xml: File,
@@ -392,6 +430,7 @@ def make_driver_commands(
     driver_kwargs: list[dict], file_xyz: File, files_hamiltonian: list[File]
 ) -> list[str]:
     """"""
+    # TODO: what if 'file_xyz' contains multiple geometries of different size?
     assert len(driver_kwargs) >= len(files_hamiltonian)
     default = f'i-pi-driver-py -u -S "" -m custom -P {PATH_DRIVER} -a {{address}} -o {{options}} &'
 
@@ -548,29 +587,14 @@ def _sample(
 
     # app setup and IO
     context = psiflow.context()
-    definition = context.definitions["ModelEvaluation"]
     input_file = context.new_file("input_", ".xml")
     _save_xml(simulation, outputs=[input_file])
     inputs = [
         input_file,
         Dataset([w.state for w in walkers]).extxyz,
+        *[c.hamiltonian.serialize_function() for c in hamiltonian_components],
     ]
 
-    # figure out i-Pi MD driver configuration
-    # how many drivers (force evaluators) with which arguments?
-    # remove any Harmonic instances because they are not implemented with sockets     -- TODO: why?
-    max_nclients = int(sum([w.nbeads for w in walkers]))
-    driver_kwargs = []
-    for i, comp in enumerate(hamiltonian_components):
-        inputs.append(comp.hamiltonian.serialize_function())
-        kwargs = {"idx": i, "address": comp.address, "max_force": max_force}
-        if isinstance(comp.hamiltonian, MACEHamiltonian):
-            kwargs["dtype"] = "float32"  # TODO: should this be configurable?
-            for instance_kwargs in definition.get_driver_devices(max_nclients):
-                driver_kwargs.append(kwargs | instance_kwargs)
-        else:
-            driver_kwargs.append(kwargs)
-
     outputs = [context.new_file("data_", ".xyz")]
     outputs += [context.new_file("simulation_", ".txt") for _ in walkers]
     if keep_trajectory:
@@ -590,6 +614,11 @@ def _sample(
     else:
         coupling_copy_command = None
 
+    definition = context.definitions["ModelEvaluation"]
+    driver_kwargs, n_clients = define_clients_n_kwargs(
+        walkers, hamiltonian_components, definition, {"max_force": max_force}
+    )
+
     # TODO: an app to check for valid input? (e.g., PBC + barostat)
     result = execute_ipi(
         len(walkers),
@@ -602,7 +631,7 @@ def _sample(
         bash_template=context.bash_template,
         inputs=inputs,
         outputs=outputs,
-        parsl_resource_specification=definition.wq_resources(max_nclients),
+        parsl_resource_specification=definition.wq_resources(n_clients),
     )
 
     # process MD output

From ad3e31eaa51d5b802d7951cb5c14a8095afd88db Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 10 Mar 2026 18:45:21 +0100
Subject: [PATCH 08/15] Update execution.py

Update ModelTraining - this will be adapted when we update MACE etc.
Update ReferenceEvaluation - updated memory_limit - allow creating Reference instances that ask for fewer cores than specified in ReferenceEvaluation (eliminating the need for CP2K/CP2K_small/...)
---
 psiflow/execution.py | 292 ++++++++++++++++++++-----------------------
 1 file changed, 133 insertions(+), 159 deletions(-)

diff --git a/psiflow/execution.py b/psiflow/execution.py
index 854f020..a29f188 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -1,17 +1,25 @@
 import logging
-import re
 import shutil
 import sys
 import warnings
 import subprocess
 import textwrap
+import inspect
 from datetime import datetime, timedelta
 from dataclasses import dataclass
 from pathlib import Path
 from threading import Lock
-
-# see https://stackoverflow.com/questions/59904631/python-class-constants-in-dataclasses
-from typing import Any, Optional, Union, ClassVar, Protocol, Iterable, Sequence
+from typing import (
+    Any,
+    Optional,
+    Union,
+    ClassVar,
+    Protocol,
+    Iterable,
+    Sequence,
+    Callable,
+    TypeVar,
+)
 
 import parsl
 import psutil
@@ -29,11 +37,13 @@
 from parsl.providers import LocalProvider, SlurmProvider
 from parsl.providers.base import ExecutionProvider
 
+from psiflow.utils.config import PSIFLOW_INTERNAL, PARSL_LOGFILE, PSIFLOW_LOGFILE
+from psiflow.utils.logging import setup_logging
+
 
 logger = logging.getLogger(__name__)  # logging per module
 
 
-PSIFLOW_INTERNAL = "psiflow_internal"  # TODO: move configuration files somewhere
 
 
 @dataclass
@@ -41,7 +51,7 @@ class ContainerSpec:
     """Controls container configuration"""
 
     uri: str
-    engine: str
+    engine: str = "apptainer"
     addopts: str = " --no-eval -e --no-mount home -W /tmp --writable-tmpfs"
     gpu_flavour: str | None = None
 
@@ -52,27 +62,20 @@ def __post_init__(self):
 
     def launch_command(self) -> str:
         pwd = Path.cwd().resolve()  # access to data / internal dir
-        args = [self.engine, "exec", self.addopts, f"--bind {pwd}"]
+        args = [self.engine, "run", self.addopts, f"--bind {pwd}"]
         if self.gpu_flavour == "cuda":
             args.append("--nv")
         elif self.gpu_flavour == "rocm":
             args.append("--rocm")
+        args.append(self.uri)
         return " ".join(args)
 
-    @staticmethod
-    def from_kwargs(kwargs: dict) -> Optional["ContainerSpec"]:
-        if "container_uri" not in kwargs:
-            return None
-        keys = ("uri", "engine", "addopts", "gpu_flavour")
-        kwargs = {k: kwargs[k2] for k in keys if (k2 := f"container_{k}") in kwargs}
-        return ContainerSpec(**kwargs)
-
 
 class ReferenceSpec(Protocol):
     """Defines default options for Reference implementations"""
 
     name: ClassVar[str]
-    reference_args: ClassVar[tuple[str, ...]]  # TODO: update 'cores_per_worker'
+    reference_args: ClassVar[tuple[str, ...]]
     mpi_command: str
     mpi_args: Sequence[str]
     executable: str
@@ -80,19 +83,14 @@ class ReferenceSpec(Protocol):
     def launch_command(self) -> str:
         raise NotImplementedError
 
-    @classmethod
-    def from_kwargs(cls, **kwargs):
-        keys = ("mpi_command", "mpi_args", "executable")
-        return cls(**{k: kwargs[k] for k in keys if k in kwargs})
-
 
 @dataclass
 class CP2KReferenceSpec(ReferenceSpec):
     name = "CP2K"
-    reference_args = ("cores_per_worker",)
-    mpi_command: str = "mpirun -np {cores_per_worker}"
+    reference_args = ("cores_per_task",)
+    mpi_command: str = "mpiexec -n {cores_per_task}"
     mpi_args: Sequence[str] = (
-        "-ENV OMP_NUM_THREADS=1",
+        "-genv OMP_NUM_THREADS=1",
         "--bind-to core",
         "--map-by core",
     )
@@ -105,8 +103,8 @@ def launch_command(self):
 @dataclass
 class GPAWReferenceSpec(ReferenceSpec):
     name = "GPAW"
-    reference_args = ("cores_per_worker",)
-    mpi_command: str = "mpirun -np {cores_per_worker}"
+    reference_args = ("cores_per_task",)
+    mpi_command: str = "mpirun -np {cores_per_task}"
     mpi_args: Sequence[str] = (
         "-x OMP_NUM_THREADS=1",
         "--bind-to core",
@@ -260,6 +258,12 @@ def __init__(
             msg = "Allowed task runtime exceeds provider walltime. Tasks might get killed by the scheduler."
             warnings.warn(msg)
         self.max_runtime = max_runtime
+        if (
+            self.executor_type == "workqueue"
+            and self.kwargs["min_runtime"] >= self.max_runtime
+        ):
+            msg = "Minimum task runtime exceeds maximum runtime. WQ might not not start tasks."
+            warnings.warn(msg)
 
         # set default WQ resource specs
         self.spec: dict | None = None
@@ -273,24 +277,24 @@ def __init__(
             }
         register_definition(definition=self)
 
-        # handle task environment variables
         # TODO: how to handle env variables?
+        # disable thread affinity and busy-idling until we can isolate task resources
+        default_env_vars = {
+            "OMP_PROC_BIND": "FALSE",
+            "OMP_WAIT_POLICY": "PASSIVE",
+            "OMP_DISPLAY_ENV": "VERBOSE",  # verbose OMP log
+        }
         if self.executor_type == "threadpool":
-            # disable thread affinity and busy-idling
-            default_env_vars = {
-                "OMP_PROC_BIND": "FALSE",
-                "OMP_WAIT_POLICY": "PASSIVE",
-                "OMP_NUM_THREADS": f"{self.cores_per_task}",
-                # "OMP_DISPLAY_ENV": "VERBOSE",  # verbose OMP log
-            }
+            default_env_vars |= {"OMP_NUM_THREADS": f"{self.cores_per_task}"}
         else:
-            # assert False, "IMPLEMENT THIS"
-            default_env_vars = {}
-        self.env_vars = default_env_vars | (env_vars or {})
+            # WQ sets OMP_NUM_THREADS itself
+            pass
 
-        # TODO: check between min_runtime and max_runtime?
+        # yaml parsing might un-stringify some keys
+        env_vars = {k: str(v).upper() for k, v in (env_vars or {}).items()}
+        self.env_vars = default_env_vars | (env_vars or {})
 
-        pass
+        return
 
     @property
     def name(self) -> str:
@@ -319,13 +323,13 @@ def task_slots(self) -> int:
         if self.executor_type == "threadpool":
             return self.kwargs["max_threads"]
 
-        slots = self.resources["cores"] // self.cores_per_task
         gpu_slots, memory_slots = float("inf"), float("inf")
+        cpu_slots = self.resources["cores"] // self.cores_per_task
         if self.use_gpu:
             gpu_slots = self.resources["gpus"] // self.kwargs["gpus_per_task"]
         if (mem_per_task := self.kwargs["mem_per_task"]) > 0:
             memory_slots = self.resources["memory"] // mem_per_task
-        return min(slots, gpu_slots, memory_slots)
+        return min(cpu_slots, gpu_slots, memory_slots)
 
     def wrap_in_timeout(self, command: str) -> str:
         if self.max_runtime == float("inf"):
@@ -334,6 +338,13 @@ def wrap_in_timeout(self, command: str) -> str:
         # send SIGTERM after max_runtime, follow with SIGKILL 30s later
         return f"timeout -k 30s {self.max_runtime}s {command}"
 
+    def wrap_in_srun(self, command: str) -> str:
+        # TODO: stub -- this does not work
+        if self.provider is None:
+            return command  # noop
+
+        return f"srun -t 1 -c $CORES {command}"
+
     def _create_threadpool(self, path: Path) -> ThreadPoolExecutor:
         max_threads = self.kwargs["max_threads"]
         return ThreadPoolExecutor(self.name, max_threads, working_dir=str(path))
@@ -345,11 +356,7 @@ def _create_workqueue(self, path: Path) -> WorkQueueExecutor:
         timeout = int(1e6) if self.resources["nodes"] > 1 else 20
         cores = self.resources["cores"]
 
-        worker_options = [
-            "--parent-death",
-            f"--cores={cores}",
-            f"--timeout={timeout}",
-        ]
+        worker_options = ["--parent-death", f"--cores={cores}", f"--timeout={timeout}"]
         if (memory := self.resources["memory"]) is not None:
             worker_options.append(f"--memory={memory * 1000}")  # in MB
         if (lifetime := self.lifetime) != float("inf"):
@@ -367,16 +374,12 @@ def _create_workqueue(self, path: Path) -> WorkQueueExecutor:
             prepend = self.container.launch_command()
             worker_executable = f"{prepend} {worker_executable}"
 
-        # TODO: why the custom WQ? -- does not seem necessary (anymore)
         executor = WorkQueueExecutor(
             label=self.name,
             working_dir=str(path / self.name),
             provider=self.provider,
             shared_fs=True,
-            # autocategory=False,
             port=0,  # avoid multiple executors trying to use the same port
-            # max_retries=1,
-            # coprocess=False,
             worker_options=" ".join(worker_options),
             worker_executable=worker_executable,
             scaling_cores_per_worker=cores,
@@ -410,11 +413,13 @@ def from_config(
             }
         elif executor == "workqueue":
             executor_kwargs = {
-                "cores_per_task": kwargs.get("cores_per_task", 0),
+                "cores_per_task": kwargs.get("cores_per_task", 1),
                 "gpus_per_task": kwargs.get("gpus_per_task", 0),
                 "mem_per_task": kwargs.get("mem_per_task", 0),
             }
-            assert any(v != 0 for v in executor_kwargs.values())
+            assert (
+                executor_kwargs["cores_per_task"] > 0
+            ), "WQ needs at least one core to launch tasks"
             min_runtime = kwargs.get("min_runtime", "00:00:00")
             executor_kwargs["min_runtime"] = str_to_timedelta(min_runtime).seconds
         else:
@@ -451,7 +456,7 @@ def __init__(
     ):
         super().__init__(**kwargs)
 
-        if self.use_gpu and self.kwargs['gpus_per_task'] > 1:
+        if self.use_gpu and self.kwargs["gpus_per_task"] > 1:
             # TODO: 'ConfigurationError' maybe?
             raise ValueError("No Hamiltonian can do multi-GPU evaluation")
 
@@ -521,40 +526,23 @@ def wq_resources(self, n_clients: int) -> dict:
 
 
 class ModelTraining(ExecutionDefinition):
-    def __init__(
-        self,
-        multigpu: bool = False,  # TODO: how to handle this?
-        **kwargs,
-    ) -> None:
+    def __init__(self, **kwargs) -> None:
         super().__init__(**kwargs)
-        self.multigpu = multigpu
-        if self.multigpu:
-            # TODO: why? Think this might be a multinode thing - which I do not care about
-            message = (
-                "the max_training_time keyword does not work "
-                "in combination with multi-gpu training. Adjust "
-                "the maximum number of epochs to control the "
-                "duration of training"
-            )
-            assert self.max_runtime is None, message
 
         if not self.use_gpu:
             warnings.warn(
                 "ModelTraining is configured for CPU operation. Is this what you want?"
             )
 
-        # default_env_vars = {
-        #     "OMP_NUM_THREADS": str(self.cores_per_worker),
-        #     "KMP_AFFINITY": "granularity=fine,compact,1,0",
-        #     "KMP_BLOCKTIME": "1",
-        #     "OMP_PROC_BIND": "spread",  # different from Model Eval
-        #     "PYTHONUNBUFFERED": "TRUE",
-        # }
-        # if env_vars is None:
-        #     env_vars = default_env_vars
-        # else:
-        #     default_env_vars.update(env_vars)
-        #     env_vars = default_env_vars
+        # if self.multigpu:
+        #     # TODO: why? Think this might be a multinode thing - which I do not care about
+        #     message = (
+        #         "the max_training_time keyword does not work "
+        #         "in combination with multi-gpu training. Adjust "
+        #         "the maximum number of epochs to control the "
+        #         "duration of training"
+        #     )
+        #     assert self.max_runtime is None, message
 
     def train_command(self, initialize: bool = False):
         command = "psiflow-mace-train"
@@ -563,78 +551,52 @@ def train_command(self, initialize: bool = False):
     def wq_resources(self, *args, **kwargs) -> dict:
         if self.spec is None:
             return {}  # threadpool
-        # TODO: reimplement this
-        return self.spec
-
-    # def wq_resources(self):
-    #     if self.use_threadpool:
-    #         return {}
-    #     resource_specification = {}
-    #
-    #     if self.multigpu:
-    #         nworkers = int(self.cores_available / self.cores_per_worker)
-    #     else:
-    #         nworkers = 1
-    #
-    #     resource_specification["gpus"] = nworkers  # one per GPU
-    #     resource_specification["cores"] = self.cores_available
-    #     resource_specification["disk"] = (
-    #         1000 * nworkers
-    #     )  # some random nontrivial amount?
-    #     memory = 1000 * self.cores_available  # similarly rather random
-    #     resource_specification["memory"] = int(memory)
-    #     resource_specification["running_time_min"] = self.max_training_time
-    #     return resource_specification
+        return self.spec.copy()
 
 
 class ReferenceEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        reference: "ReferenceSpec",
-        memory_limit: Optional[str] = None,  # TODO: how does this work?
+        reference: ReferenceSpec,
+        memory_limit: Optional[float] = None,
         **kwargs,
     ) -> None:
-        # TODO: how to know which code?
-        # before super().__init__ because 'name' attribute needed
-        self.reference = reference
         super().__init__(**kwargs)
-        self.memory_limit = memory_limit
+        self.reference = reference
+        self.memory_limit = memory_limit  # in GB
+
+        if self.use_gpu:
+            warnings.warn("Reference calculations do not support GPU computation yet.")
 
     def command(self):
-        # TODO: this does not work probably
-        launch_command = self.spec.launch_command()
-        kwargs = {k: getattr(self, k) for k in self.spec.reference_args}
-        launch_command = launch_command.format(**kwargs)
+        command = self.reference.launch_command()
+        kwargs = {k: getattr(self, k) for k in self.reference.reference_args}
+        command = command.format(**kwargs)
 
         if self.container is not None:
-            launch_command = f"{self.container.launch_command()} {launch_command}"
-
-        launch_command = self.wrap_in_timeout(launch_command)
-
-        commands = []
-        if self.memory_limit is not None:
-            # based on https://stackoverflow.com/a/42865957/2002471
-            units = {"KB": 1, "MB": 2**10, "GB": 2**20, "TB": 2**30}
-
-            def parse_size(size):  # TODO: to utils?
-                size = size.upper()
-                if not re.match(r" ", size):
-                    size = re.sub(r"([KMGT]?B)", r" \1", size)
-                number, unit = [string.strip() for string in size.split()]
-                return int(float(number) * units[unit])
+            command = f"{self.container.launch_command()} {command}"
+        if (mem := self.memory_limit) is not None:
+            # set max RAM usage and disable swap storage - requires systemd-run
+            command = f"systemd-run --user --scope -p MemoryMax={mem}G -p MemorySwapMax=0 {command}"
 
-            commands.append(f"ulimit -v {parse_size(self.memory_limit)}")
-
-        # exit code 0 so parsl always thinks bash app succeeded
-        return "\n".join([*commands, launch_command, "exit 0"])
+        return self.wrap_in_timeout(command)
 
-    def wq_resources(self, *args, **kwargs) -> dict:
+    def wq_resources(self, n_cores: int | None) -> dict:
         if self.spec is None:
             return {}  # threadpool
-        return self.spec
+
+        fraction = 1
+        if n_cores is not None:
+            fraction = n_cores / self.kwargs["cores_per_task"]
+        spec = self.spec.copy()
+        spec["cores"] = int(spec["cores"] * fraction)
+        spec["memory"] *= fraction
+        return spec
 
     @property
     def name(self) -> str:
+        if not hasattr(self, "reference"):
+            return super().name  # during init
         return self.reference.name
 
 
@@ -670,7 +632,6 @@ def __init__(
         # make sure task tmpdirs can be made
         Path(tmpdir_root).mkdir(parents=True, exist_ok=True)
         self.bash_template = create_bash_template(tmpdir_root, keep_tmpdirs)
-
         self.file_index = {}
         self.lock = Lock()
         parsl.load(config)
@@ -701,8 +662,6 @@ def from_config(
         cls,
         parsl_log_level: str,
         default_threads: int,
-        garbage_collect: bool,
-        retries: int,
         **kwargs,
     ) -> "ExecutionContext":
         path = Path.cwd().resolve() / PSIFLOW_INTERNAL
@@ -711,27 +670,36 @@ def from_config(
         path.mkdir(parents=True)
         patch_parsl_dirtree()
 
-        log_file = str(path / "parsl.log")
+        # setup logging
+        log_file = str(path / PARSL_LOGFILE)
         log_level = getattr(logging, parsl_log_level)
         parsl.set_file_logger(filename=log_file, name="parsl", level=log_level)
+        setup_logging(file=path / PSIFLOW_LOGFILE)  # TODO
+
+        # default container for ModelEvaluation and ModelTraining
+        base_container = None
+        if "container" in kwargs:
+            base_container = make_cls(ContainerSpec, **kwargs["container"])
 
         # create definitions
-        base_container = ContainerSpec.from_kwargs(kwargs)
         model_evaluation = ModelEvaluation.from_config(
             container=base_container, **kwargs["ModelEvaluation"]
         )
         model_training = ModelTraining.from_config(
             container=base_container, **kwargs["ModelTraining"]
         )
-
-        reference_evaluations = []  # reference evaluations might be class specific
-        for key in list(kwargs.keys()):
-            if key[:4] in REFERENCE_SPECS:  # allow for e.g., CP2K_small
+        reference_evaluations = []  # reference evaluations are class specific
+        for key, reference_cls in REFERENCE_SPECS.items():
+            if key in kwargs:
                 config = kwargs[key]
+                container = None
+                if "container" in config:
+                    container = make_cls(ContainerSpec, **config.pop("container"))
+                reference = make_cls(reference_cls, **config)
                 reference_evaluation = ReferenceEvaluation.from_config(
-                    reference=REFERENCE_SPECS[key[:4]].from_kwargs(**config),
-                    container=ContainerSpec.from_kwargs(kwargs | config),
-                    **config,
+                    reference=reference,
+                    container=container,
+                    **config,  # make sure the container key is removed
                 )
                 reference_evaluations.append(reference_evaluation)
         definitions = [model_evaluation, model_training, *reference_evaluations]
@@ -741,12 +709,12 @@ def from_config(
         internal = make_default_executors(default_threads, path, base_container)
         executors.extend(internal)
 
-        config = Config(
+        config = make_cls(
+            Config,
             executors=executors,
             run_dir=str(path),
             initialize_logging=False,
-            garbage_collect=garbage_collect,
-            retries=retries,
+            **kwargs,
         )
         return ExecutionContext(config, definitions, path / "context_dir", **kwargs)
 
@@ -828,16 +796,10 @@ def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> s
         return x
 
 
-# class MyWorkQueueExecutor(WorkQueueExecutor):
-#     # TODO: what does this do?
-#     def _get_launch_command(self, block_id):
-#         return self.worker_command
-
-
 # TODO: move everything below to appropriate files
 
 # TODO: attempt at managing priority through global state
-WQ_RESOURCES_REGISTRY = {}
+WQ_RESOURCES_REGISTRY = []
 
 
 def register_definition(definition: ExecutionDefinition) -> None:
@@ -845,7 +807,7 @@ def register_definition(definition: ExecutionDefinition) -> None:
     if (spec := definition.spec) is None:
         return  # threadpool does not have priority
 
-    WQ_RESOURCES_REGISTRY[definition.name] = spec
+    WQ_RESOURCES_REGISTRY.append((definition.name, spec))
     spec["priority"] = SetWQPriority.default
 
 
@@ -863,14 +825,14 @@ def __init__(self, value: int, verbose: bool = False) -> None:
     def __enter__(self):
         if self.verbose:
             print(f"SetWQPriority setting priority:\t{self.value}")
-        for n, spec in WQ_RESOURCES_REGISTRY.items():
+        for n, spec in WQ_RESOURCES_REGISTRY:
             spec["priority"] = self.value
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if self.verbose:
             print(f"SetWQPriority unsetting {self.value}")
-        for n, spec in WQ_RESOURCES_REGISTRY.items():
+        for n, spec in WQ_RESOURCES_REGISTRY:
             spec["priority"] = SetWQPriority.default
 
 
@@ -882,7 +844,6 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 default_threads: 4
 tmpdir_root: /tmp
 keep_tmpdirs: false
-container_engine: apptainer
 garbage_collect: true
 retries: 0
 
@@ -958,3 +919,16 @@ def log_dfk_tasks(verbose: bool = False):
         log.append(f"\n{i}\t{d['func_name']:<30}\n{args}\n{kwargs}")
     log.append("- Parsl task overview -")
     print(*log, sep="\n")
+
+
+# TODO: after 3.12, this is no longer needed
+#  https://docs.python.org/3/library/typing.html
+T = TypeVar("T")
+
+
+def make_cls(cls: type[T], **kwargs: Any) -> T:
+    """Very simple class factory. Use introspection to filter args and kwargs."""
+    sign = inspect.signature(cls)
+    argument_names = list(sign.parameters.keys())
+    arguments = {k: kwargs[k] for k in argument_names if k in kwargs}
+    return cls(**arguments)

From bb09d393945a1eb558ca1d4df2cd2f6222a62cc1 Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 10 Mar 2026 18:46:53 +0100
Subject: [PATCH 09/15] updates and bugfixes

- update modules to work with the new execution module and syntax
- fix tests
---
 configs/local_test.yaml        | 40 +++++++++++++++++-----------------
 psiflow/free_energy/phonons.py |  2 +-
 psiflow/reference/cp2k_.py     | 30 +++++++++++--------------
 psiflow/reference/dummy.py     |  6 ++---
 psiflow/reference/gpaw_.py     | 14 ++++--------
 psiflow/reference/orca_.py     | 15 +++++--------
 psiflow/reference/reference.py | 34 ++++++++++++++++++-----------
 psiflow/sampling/optimize.py   |  2 +-
 psiflow/sampling/server.py     |  2 +-
 psiflow/sampling/walker.py     |  3 ++-
 psiflow/serialization.py       |  7 +++---
 11 files changed, 75 insertions(+), 80 deletions(-)

diff --git a/configs/local_test.yaml b/configs/local_test.yaml
index 3f9dc2b..b44c2af 100644
--- a/configs/local_test.yaml
+++ b/configs/local_test.yaml
@@ -1,32 +1,32 @@
 ---
-parsl_log_level: WARNING
-retries: 0
-make_symlinks: false
-
 ModelEvaluation:
-  gpu: false
-  use_threadpool: false
-  max_simulation_time: 1
-  
+    executor: threadpool
+    max_threads: 4
+    max_runtime: 00:00:20
+    
 ModelTraining:
-  gpu: true
-  use_threadpool: true
-  max_training_time: 1
-  max_workers: 1  # suppress assertion for multigpu training
+    executor: threadpool
+    max_threads: 4
+    max_runtime: 00:00:20
+
   
 CP2K:
-  cores_per_worker: 1
-  max_evaluation_time: 0.1
-  container_uri: 'oras://ghcr.io/molmod/cp2k:2024.1'
+  executor: workqueue
+  cores_per_task: 2
+  max_runtime: 00:00:20
+  memory_limit: 2
+  container:
+    uri: docker://cp2k/cp2k:2025.2_mpich_x86_64_psmp
   
 GPAW:
-  cores_per_worker: 1
-  max_evaluation_time: 0.1
-  container_uri: 'oras://ghcr.io/molmod/gpaw:24.1'
+  executor: workqueue
+  cores_per_task: 2
+  container:
+    uri: oras://ghcr.io/molmod/gpaw:24.1
   
 ORCA:
-  cores_per_worker: 1
-  max_evaluation_time: 0.1
+  executor: workqueue
+  cores_per_task: 2
 
 
 ...
diff --git a/psiflow/free_energy/phonons.py b/psiflow/free_energy/phonons.py
index 2c2513c..2f449ef 100644
--- a/psiflow/free_energy/phonons.py
+++ b/psiflow/free_energy/phonons.py
@@ -162,7 +162,7 @@ def compute_harmonic(
         inputs.append(comp.hamiltonian.serialize_function(dtype="float64"))
         kwargs = {"idx": i, "address": comp.address}
         if isinstance(comp.hamiltonian, MACEHamiltonian):
-            kwargs |= definition.get_driver_devices(1)[0]
+            kwargs |= definition.get_driver_resources(1, 1)[0]
         driver_kwargs.append(kwargs)
 
     result = execute_ipi(
diff --git a/psiflow/reference/cp2k_.py b/psiflow/reference/cp2k_.py
index eb30e73..4d707f8 100644
--- a/psiflow/reference/cp2k_.py
+++ b/psiflow/reference/cp2k_.py
@@ -1,7 +1,7 @@
 import copy
 import io
 import warnings
-from typing import Optional, Union
+from typing import Optional, Union, ClassVar
 
 import numpy as np
 from ase.data import chemical_symbols
@@ -56,15 +56,16 @@ def modify_input(input_dict: dict, properties: tuple) -> None:
 
 
 def parse_output(output_str: str, properties: tuple) -> dict[str, float | np.ndarray]:
+    """Very basic output parser. Perhaps check the cp2k-output-tools package?"""
     lines = output_str.split("\n")
     data = {}
 
     # output status
-    idx = find_line(lines, "CP2K", reverse=True, max_lines=250)
+    key = "SUBROUTINE"
+    idx = find_line(lines, key, reverse=True, max_lines=100)
     data["status"] = status = Status.SUCCESS if idx is not None else Status.FAILED
     if status == Status.SUCCESS:
-        # total runtime
-        data["runtime"] = float(lines[idx].split()[-1])
+        data["runtime"] = float(lines[idx + 2].split()[-1])  # total runtime
 
     # find number of atoms
     idx = find_line(lines, "TOTAL NUMBERS AND MAXIMUM NUMBERS")
@@ -77,7 +78,7 @@ def parse_output(output_str: str, properties: tuple) -> dict[str, float | np.nda
     data["positions"] = lines_to_array(lines[idx : idx + natoms], 4, 7)
 
     # read energy
-    key = "ENERGY| Total FORCE_EVAL ( QS ) energy [a.u.]"
+    key = "ENERGY| Total FORCE_EVAL ( QS ) energy"
     idx = find_line(lines, key, idx)
     data["energy"] = float(lines[idx].split()[-1]) * Ha
 
@@ -85,28 +86,23 @@ def parse_output(output_str: str, properties: tuple) -> dict[str, float | np.nda
         return data
 
     # read forces
-    key = "ATOMIC FORCES in [a.u.]"
-    idx = find_line(lines, key, idx) + 3
-    forces = lines_to_array(lines[idx : idx + natoms], 3)
+    key = "FORCES| Atomic forces"
+    idx = find_line(lines, key, idx) + 2
+    forces = lines_to_array(lines[idx : idx + natoms], 2, 5)
 
     return data | {"forces": forces * Ha / Bohr}
 
 
 @psiflow.serializable
 class CP2K(Reference):
+    executor: ClassVar[str] = "CP2K"
     _execute_label = "cp2k_singlepoint"
     input_dict: dict
 
-    def __init__(
-        self,
-        input_str: str,
-        executor: str = "CP2K",
-        outputs: Union[tuple, list] = ("energy", "forces"),
-    ):
-        self.executor = executor
-        self.outputs = tuple(outputs)
+    def __init__(self, input_str: str, **kwargs):
+        super().__init__(**kwargs)
         self.input_dict = str_to_dict(input_str)
-        modify_input(self.input_dict, outputs)
+        modify_input(self.input_dict, self.outputs)
         self._create_apps()
 
     def compute_atomic_energy(self, element, box_size=None) -> AppFuture[float]:
diff --git a/psiflow/reference/dummy.py b/psiflow/reference/dummy.py
index f947efb..eeb2baa 100644
--- a/psiflow/reference/dummy.py
+++ b/psiflow/reference/dummy.py
@@ -12,16 +12,16 @@
 
 @psiflow.serializable
 class ReferenceDummy(Reference):
+    executor = "HTEX"
     _execute_label = "dummy_singlepoint"
 
-    def __init__(self, outputs: Union[tuple, list] = ("energy", "forces")):
-        self.outputs = outputs
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
         self._create_apps()
 
     def _create_apps(self):
         # psiflow.context().definitions does not contain "default_htex"
         self.execute_command = ""
-        self.app_pre = self.create_input
         self.app_execute = partial(
             bash_app(_execute, executors=["default_htex"]),
             reference=self,
diff --git a/psiflow/reference/gpaw_.py b/psiflow/reference/gpaw_.py
index a119271..e787abb 100644
--- a/psiflow/reference/gpaw_.py
+++ b/psiflow/reference/gpaw_.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional, Union, ClassVar
 
 from parsl import File
 from parsl.dataflow.futures import AppFuture
@@ -34,22 +34,16 @@ def parse_output(stdout: str, properties: tuple[str, ...]) -> dict:
 
 @psiflow.serializable
 class GPAW(Reference):
+    executor: ClassVar[str] = "GPAW"
     _execute_label = "gpaw_singlepoint"
     parameters: dict
     script: str
 
-    def __init__(
-        self,
-        parameters: dict,
-        script: str | Path = FILEPATH,
-        outputs: Union[tuple, list] = ("energy", "forces"),
-        executor: str = "GPAW",
-    ):
-        self.outputs = tuple(outputs)
+    def __init__(self, parameters: dict, script: str | Path = FILEPATH, **kwargs):
+        super().__init__(**kwargs)
         self.parameters = parameters
         assert (script := Path(script)).is_file()
         self.script = str(script.resolve())  # absolute path
-        self.executor = executor
         self._create_apps()
 
     def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
diff --git a/psiflow/reference/orca_.py b/psiflow/reference/orca_.py
index ca14115..509aa25 100644
--- a/psiflow/reference/orca_.py
+++ b/psiflow/reference/orca_.py
@@ -1,7 +1,7 @@
 import warnings
 import re
 from functools import partial
-from typing import Optional, Union
+from typing import Optional, Union, ClassVar
 
 import ase.symbols
 import numpy as np
@@ -121,20 +121,15 @@ def parse_output(stdout: str, properties: tuple[str, ...]) -> dict:
 
 @psiflow.serializable
 class ORCA(Reference):
+    executor: ClassVar[str] = "ORCA"
     _execute_label = "orca_singlepoint"
     input_template: str
     input_kwargs: dict
 
-    def __init__(
-        self,
-        input_template: str,
-        executor: str = "ORCA",
-        outputs: Union[tuple, list] = ("energy", "forces"),
-    ):
-        self.executor = executor
-        self.input_template = check_input(input_template, outputs)
+    def __init__(self, input_template: str, **kwargs):
+        super().__init__(**kwargs)
+        self.input_template = check_input(input_template, self.outputs)
         self.input_kwargs = DEFAULT_KWARGS.copy()  # TODO: user control?
-        self.outputs = tuple(outputs)
         self._create_apps()
 
     def _create_apps(self):
diff --git a/psiflow/reference/reference.py b/psiflow/reference/reference.py
index 6a8c008..dc29ec2 100644
--- a/psiflow/reference/reference.py
+++ b/psiflow/reference/reference.py
@@ -93,6 +93,7 @@ def compute_dataset(
 def _execute(
     reference: Reference,
     inputs: list[File],
+    bash_template: str,
     parsl_resource_specification: Optional[dict] = None,
     stdout: str = parsl.AUTO_LOGNAME,
     stderr: str = parsl.AUTO_LOGNAME,
@@ -100,8 +101,7 @@ def _execute(
 ) -> str:
     # TODO: we do not set env_vars here?
     command = reference.get_shell_command(inputs)
-    template = psiflow.context().bash_template
-    return template.format(commands=command, env='>/dev/null')
+    return bash_template.format(commands=command, env=">/dev/null")
 
 
 def _process_output(
@@ -114,7 +114,7 @@ def _process_output(
     try:
         data = reference.parse_output(stdout)
     except LineNotFoundError:
-        # TODO: find out what went wrong
+        # TODO: find out what went wrong?
         data = {"status": Status.FAILED}
     data |= {"stdout": Path(inputs[0]), "stderr": Path(inputs[1])}
     return update_geometry(geom, data)
@@ -123,10 +123,10 @@ def _process_output(
 @join_app
 def evaluate(reference: Reference, geom: Geometry) -> AppFuture[Geometry]:
     """"""
-    if geom == NullState:
+    if geom == NullState:  # TODO: remove this
         warnings.warn("Skipping NullState..")
         return copy_app_future(geom)
-    execute, *files = reference.app_pre(geom=geom)
+    execute, *files = reference.create_input(geom=geom)
     if not execute:  # TODO: should we reset geom?
         return copy_app_future(geom)
     future = reference.app_execute(inputs=files)
@@ -138,14 +138,20 @@ def evaluate(reference: Reference, geom: Geometry) -> AppFuture[Geometry]:
 
 @psiflow.serializable
 class Reference(Computable):
-    outputs: Union[list[str], tuple[str, ...]]
+    outputs: Sequence[str]
     batch_size: ClassVar[int] = 1  # TODO: not really used
-    executor: str
-    app_pre: ClassVar[Callable]  # TODO: fix serialisation
-    app_execute: ClassVar[Callable]
+    app_execute: ClassVar[Callable]  # TODO: fix serialisation
     app_post: ClassVar[Callable]
     _execute_label: ClassVar[str]
     execute_command: str
+    executor: ClassVar[str]
+    n_cores: Optional[int]
+
+    def __init__(
+        self, outputs: Sequence[str] = ("energy", "forces"), n_cores: int | None = None
+    ):
+        self.outputs: tuple[str, ...] = tuple(outputs)
+        self.n_cores = n_cores
 
     def compute(
         self,
@@ -182,14 +188,16 @@ def compute_dataset(self, dataset: Dataset) -> Dataset:
         return Dataset(future)
 
     def _create_apps(self):
-        definition = psiflow.context().definitions[self.executor]
+        context = psiflow.context()
+        definition = context.definitions[self.executor]
+        if (n := self.n_cores) is not None:
+            assert n <= definition.spec["cores"]
         self.execute_command = definition.command()
-        wq_resources = definition.wq_resources()
-        self.app_pre = self.create_input
         self.app_execute = partial(
             bash_app(_execute, executors=[self.executor]),
             reference=self,
-            parsl_resource_specification=wq_resources,
+            bash_template=context.bash_template,
+            parsl_resource_specification=definition.wq_resources(n),
             label=self._execute_label,
         )
         self.app_post = partial(
diff --git a/psiflow/sampling/optimize.py b/psiflow/sampling/optimize.py
index c8cd68c..cb531a4 100644
--- a/psiflow/sampling/optimize.py
+++ b/psiflow/sampling/optimize.py
@@ -179,7 +179,7 @@ def optimize(
         inputs.append(comp.hamiltonian.serialize_function(dtype="float64"))
         kwargs = {"idx": i, "address": comp.address}
         if isinstance(comp.hamiltonian, MACEHamiltonian):
-            kwargs |= definition.get_driver_devices(1)[0]
+            kwargs |= definition.get_driver_resources.get(1, 1)[0]
         driver_kwargs.append(kwargs)
 
     result = execute_ipi(
diff --git a/psiflow/sampling/server.py b/psiflow/sampling/server.py
index 6ade9fe..d0bc4e5 100644
--- a/psiflow/sampling/server.py
+++ b/psiflow/sampling/server.py
@@ -187,7 +187,7 @@ def main():
         run(args.start_xyz, args.input_xml)
         softexit.trigger(status="success", message="@PSIFLOW: We are done here.")
     except ConnectionError:
-        # TODO: in this case, no output files are generated..
+        # TODO: in this case, no output files are generated, so the task fails..
         traceback.print_exc()
         softexit.trigger(status="bad", message="@PSIFLOW: Clients failed to connect.")
     except np.linalg.LinAlgError:
diff --git a/psiflow/sampling/walker.py b/psiflow/sampling/walker.py
index 9b2ce9c..42c711d 100644
--- a/psiflow/sampling/walker.py
+++ b/psiflow/sampling/walker.py
@@ -14,6 +14,7 @@
 from psiflow.data import Dataset
 from psiflow.geometry import Geometry, check_equality
 from psiflow.hamiltonians import Hamiltonian, Zero, combine_hamiltonians
+
 # from psiflow.order_parameters import OrderParameter
 from psiflow.sampling.metadynamics import Metadynamics
 from psiflow.utils.apps import copy_app_future
@@ -68,7 +69,7 @@ def get_ensemble_kwargs(walker: "Walker") -> dict:
 @dataclass
 class Walker:
     start: Union[Geometry, AppFuture]
-    hamiltonian: Hamiltonian = Zero()
+    hamiltonian: Hamiltonian = field(default_factory=lambda: Zero())
     timestep: float = 0.5
     temperature: Optional[float] = 300
     pressure: Optional[float] = None
diff --git a/psiflow/serialization.py b/psiflow/serialization.py
index 6e49038..ec455b0 100644
--- a/psiflow/serialization.py
+++ b/psiflow/serialization.py
@@ -4,6 +4,7 @@
 import json
 from pathlib import Path
 from typing import ClassVar, Optional, Union, get_args, get_origin, get_type_hints
+from collections.abc import Sequence
 from dataclasses import InitVar
 
 import typeguard
@@ -81,18 +82,18 @@ def serializable(cls):
             origin = get_origin(type_hint)
             if origin is ClassVar:
                 continue  # do nothing for classvars
-            elif origin == dict:
+            elif origin in (dict, Sequence):
                 kind = "attrs"
             elif isinstance(type_hint, str) and type_hint.startswith("dataclasses"):
                 continue
             elif isinstance(type_hint, InitVar):
                 continue
 
-            if kind is None and not inspect.isclass(type_hint):
+            elif kind is not None and not inspect.isclass(type_hint):
                 raise ValueError(
                     "{} is formally not a class ({})".format(type_hint, name)
                 )
-            if issubclass(type_hint, Serializable):
+            elif issubclass(type_hint, Serializable):
                 kind = "serial"
             elif type_hint is Geometry:
                 kind = "geoms"

From 54089b0df6a4862090f570967f28b02ae03feb8e Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 10 Mar 2026 20:04:27 +0100
Subject: [PATCH 10/15] setup psiflow.log logging

---
 psiflow/utils/logging.py | 41 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 psiflow/utils/logging.py

diff --git a/psiflow/utils/logging.py b/psiflow/utils/logging.py
new file mode 100644
index 0000000..0976590
--- /dev/null
+++ b/psiflow/utils/logging.py
@@ -0,0 +1,41 @@
+import logging
+from pathlib import Path
+
+import parsl
+
+
+def setup_logging(file: Path, level=logging.INFO) -> None:
+    """Setup the Psiflow parent logger"""
+    logger = logging.getLogger('psiflow')
+    logger.setLevel(level)
+    logger.propagate = False  # do not propagate messages to root logger
+
+    fh = logging.FileHandler(file)
+    formatter = logging.Formatter(
+        fmt='%(asctime)s %(name)s [%(levelname)s] %(message)s',
+        datefmt='%Y-%m-%d %H:%M'
+    )
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+
+
+def log_dfk_tasks(verbose: bool = False):
+    """Get an overview of all tasks stored in the parsl DFK. For debugging purposes."""
+    dfk = parsl.dfk()
+    parsl.wait_for_current_tasks()
+    log = ["- Parsl task overview -"]
+    if not verbose:
+        log += [f"{i}\t{d['func_name']}" for i, d in dfk.tasks.items()]
+        log.append("- Parsl task overview -")
+        print(*log, sep="\n")
+        return
+
+    for i, d in dfk.tasks.items():
+        args = [(_.split("/")[-1] if isinstance(_, str) else _) for _ in d["args"]]
+        if "inputs" in (kwargs := d["kwargs"]):
+            kwargs["inputs"] = [f.filename for f in kwargs["inputs"]]
+        if "outputs" in kwargs:
+            kwargs["outputs"] = [f.filename for f in kwargs["outputs"]]
+        log.append(f"\n{i}\t{d['func_name']:<30}\n{args}\n{kwargs}")
+    log.append("- Parsl task overview -")
+    print(*log, sep="\n")
\ No newline at end of file

From 029f26f27876b37291132dd64a3d7daf19799861 Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 10 Mar 2026 20:06:01 +0100
Subject: [PATCH 11/15] cleanup action

---
 psiflow/execution.py       | 193 ++++++++-----------------------------
 psiflow/models/model.py    |   6 +-
 psiflow/sampling/ase.py    |   4 +-
 psiflow/sampling/output.py |   8 +-
 psiflow/utils/_plumed.py   |   5 -
 psiflow/utils/apps.py      |  95 +++++++-----------
 psiflow/utils/config.py    |  23 +++++
 psiflow/utils/io.py        |  11 +--
 psiflow/utils/parse.py     |  24 +++--
 psiflow/utils/wq.py        |  40 ++++++++
 tests/test_reference.py    |  14 ++-
 tests/test_sampling.py     |   2 +-
 12 files changed, 170 insertions(+), 255 deletions(-)
 create mode 100644 psiflow/utils/config.py
 create mode 100644 psiflow/utils/wq.py

diff --git a/psiflow/execution.py b/psiflow/execution.py
index a29f188..34b643d 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -1,25 +1,12 @@
 import logging
 import shutil
 import sys
-import warnings
 import subprocess
-import textwrap
 import inspect
-from datetime import datetime, timedelta
 from dataclasses import dataclass
 from pathlib import Path
 from threading import Lock
-from typing import (
-    Any,
-    Optional,
-    Union,
-    ClassVar,
-    Protocol,
-    Iterable,
-    Sequence,
-    Callable,
-    TypeVar,
-)
+from typing import Any, Optional, Union, ClassVar, Protocol, Sequence, TypeVar
 
 import parsl
 import psutil
@@ -37,13 +24,24 @@
 from parsl.providers import LocalProvider, SlurmProvider
 from parsl.providers.base import ExecutionProvider
 
-from psiflow.utils.config import PSIFLOW_INTERNAL, PARSL_LOGFILE, PSIFLOW_LOGFILE
+from psiflow.utils.config import (
+    PSIFLOW_INTERNAL,
+    PARSL_LOGFILE,
+    PSIFLOW_LOGFILE,
+    DEFAULT_CONFIG,
+    CONTEXT_DIR,
+)
 from psiflow.utils.logging import setup_logging
+from psiflow.utils.wq import register_definition
+from psiflow.utils.apps import create_bash_template
+from psiflow.utils.parse import str_to_timedelta
 
 
 logger = logging.getLogger(__name__)  # logging per module
 
 
+class ConfigurationError(ValueError):
+    pass  # some global psiflow configuration option does not make sense
 
 
 @dataclass
@@ -149,7 +147,7 @@ def make_slurm_provider(kwargs: dict) -> tuple[SlurmProvider, dict]:
     resources = {
         "nodes": provider.nodes_per_block,
         "cores": provider.cores_per_node,
-        "memory": provider.mem_per_node,
+        "memory": provider.mem_per_node or float("inf"),
         "gpus": provider.gpus_per_node,
         "lifetime": str_to_timedelta(provider.walltime).seconds,
     }
@@ -232,21 +230,20 @@ def __init__(
             elif container is not None and container.gpu_flavour is None:
                 msg = "Provide container 'gpu_flavour' to choose between CUDA and ROCM"
             if msg:
-                raise ValueError(msg)
+                raise ConfigurationError(msg)
 
         if self.executor_type == "workqueue":
-            # WQ-specific checks TODO: check that WQ kwargs do not exceed resources?
+            # WQ-specific checks
             msg = ""
             if self.kwargs["gpus_per_task"] > resources["gpus"]:
                 msg = "GPUs"
             if self.kwargs["cores_per_task"] > resources["cores"]:
                 msg = "cores"
-            if self.kwargs["mem_per_task"] > (resources["memory"] or float("inf")):
-                # TODO: do we need memory=None anywhere? otherwise default to inf?
+            if self.kwargs["mem_per_task"] > resources["memory"]:
                 msg = "memory"
             if msg:
                 msg = f"Apps will request more {msg} than available per Parsl block"
-                raise ValueError(msg)
+                raise ConfigurationError(msg)
 
         # how long can individual tasks run (in seconds)
         if max_runtime is None:
@@ -256,14 +253,14 @@ def __init__(
             max_runtime = str_to_timedelta(max_runtime).seconds
         if max_runtime != float("inf") and max_runtime >= self.lifetime:
             msg = "Allowed task runtime exceeds provider walltime. Tasks might get killed by the scheduler."
-            warnings.warn(msg)
+            logger.warning(msg)
         self.max_runtime = max_runtime
         if (
             self.executor_type == "workqueue"
             and self.kwargs["min_runtime"] >= self.max_runtime
         ):
             msg = "Minimum task runtime exceeds maximum runtime. WQ might not not start tasks."
-            warnings.warn(msg)
+            logger.warning(msg)
 
         # set default WQ resource specs
         self.spec: dict | None = None
@@ -338,10 +335,10 @@ def wrap_in_timeout(self, command: str) -> str:
         # send SIGTERM after max_runtime, follow with SIGKILL 30s later
         return f"timeout -k 30s {self.max_runtime}s {command}"
 
-    def wrap_in_srun(self, command: str) -> str:
-        # TODO: stub -- this does not work
-        if self.provider is None:
-            return command  # noop
+        # def wrap_in_srun(self, command: str) -> str:
+        #     # TODO: stub -- this does not work
+        #     if self.provider is None:
+        #         return command  # noop
 
         return f"srun -t 1 -c $CORES {command}"
 
@@ -397,8 +394,8 @@ def wq_resources(self, *args, **kwargs) -> dict:
     @classmethod
     def from_config(
         cls,
-        executor: str,  # TODO: no default value?
-        container: Optional[ContainerSpec],
+        executor: str = "workqueue",
+        container: Optional[ContainerSpec] = None,
         **kwargs,
     ):
         if executor == "threadpool":
@@ -423,7 +420,9 @@ def from_config(
             min_runtime = kwargs.get("min_runtime", "00:00:00")
             executor_kwargs["min_runtime"] = str_to_timedelta(min_runtime).seconds
         else:
-            raise ValueError("Key 'executor' must be 'threadpool' or 'workqueue'")
+            raise ConfigurationError(
+                "Key 'executor' must be 'threadpool' or 'workqueue'"
+            )
 
         # search for Parsl ExecutionProvider block, defaulting to "local"
         if "slurm" in kwargs:
@@ -457,8 +456,7 @@ def __init__(
         super().__init__(**kwargs)
 
         if self.use_gpu and self.kwargs["gpus_per_task"] > 1:
-            # TODO: 'ConfigurationError' maybe?
-            raise ValueError("No Hamiltonian can do multi-GPU evaluation")
+            raise ConfigurationError("No Hamiltonian can do multi-GPU evaluation")
 
         # i-Pi will kill client connections after no response for timeout seconds
         self.timeout = timeout
@@ -468,7 +466,7 @@ def __init__(
         if max_resource_multiplier is None:
             max_resource_multiplier = self.task_slots
         elif max_resource_multiplier > self.task_slots:
-            warnings.warn(
+            logger.warning(
                 "Provided 'max_resource_multiplier' exceeds available task slots "
                 f"({max_resource_multiplier} -> {self.task_slots}). "
                 f"Limiting 'max_resource_multiplier'."
@@ -490,12 +488,12 @@ def get_driver_resources(self, n_walkers: int, n_drivers: int) -> list[dict]:
 
         if n_drivers > m and not self.allow_oversubscription:
             # the combination of drivers does not fit on available resources
-            raise ValueError(
+            raise ConfigurationError(
                 f"Simulation with {n_drivers} independent drivers not possible. "
                 f"Either increase 'max_resource_multiplier' or enable resource oversubscription."
             )
         if n_clients > m and self.allow_oversubscription:
-            warnings.warn(
+            logger.warning(
                 f"Simulation wants to employ {n_clients} clients, "
                 f"but can only use {m}x the per-client budget. "
                 f"Oversubscribing CPU/GPU resources."
@@ -530,7 +528,7 @@ def __init__(self, **kwargs) -> None:
         super().__init__(**kwargs)
 
         if not self.use_gpu:
-            warnings.warn(
+            logger.warning(
                 "ModelTraining is configured for CPU operation. Is this what you want?"
             )
 
@@ -566,7 +564,7 @@ def __init__(
         self.memory_limit = memory_limit  # in GB
 
         if self.use_gpu:
-            warnings.warn("Reference calculations do not support GPU computation yet.")
+            logger.warning("Reference calculations do not support GPU computation yet.")
 
     def command(self):
         command = self.reference.launch_command()
@@ -661,6 +659,7 @@ def new_file(self, prefix: str, suffix: str) -> File:
     def from_config(
         cls,
         parsl_log_level: str,
+        psiflow_log_level: str,
         default_threads: int,
         **kwargs,
     ) -> "ExecutionContext":
@@ -674,7 +673,7 @@ def from_config(
         log_file = str(path / PARSL_LOGFILE)
         log_level = getattr(logging, parsl_log_level)
         parsl.set_file_logger(filename=log_file, name="parsl", level=log_level)
-        setup_logging(file=path / PSIFLOW_LOGFILE)  # TODO
+        setup_logging(file=path / PSIFLOW_LOGFILE, level=psiflow_log_level)
 
         # default container for ModelEvaluation and ModelTraining
         base_container = None
@@ -716,7 +715,7 @@ def from_config(
             initialize_logging=False,
             **kwargs,
         )
-        return ExecutionContext(config, definitions, path / "context_dir", **kwargs)
+        return ExecutionContext(config, definitions, path / CONTEXT_DIR, **kwargs)
 
 
 class ExecutionContextLoader:
@@ -796,67 +795,6 @@ def __call__(self, command: str, tasks_per_node: int, nodes_per_block: int) -> s
         return x
 
 
-# TODO: move everything below to appropriate files
-
-# TODO: attempt at managing priority through global state
-WQ_RESOURCES_REGISTRY = []
-
-
-def register_definition(definition: ExecutionDefinition) -> None:
-    """"""
-    if (spec := definition.spec) is None:
-        return  # threadpool does not have priority
-
-    WQ_RESOURCES_REGISTRY.append((definition.name, spec))
-    spec["priority"] = SetWQPriority.default
-
-
-class SetWQPriority:
-    """Manage the WQ priority tag as context manager"""
-
-    # TODO: this probably does not work in a nested way
-    # TODO: log to parsl.log?
-    default = 0
-
-    def __init__(self, value: int, verbose: bool = False) -> None:
-        self.value = value
-        self.verbose = verbose
-
-    def __enter__(self):
-        if self.verbose:
-            print(f"SetWQPriority setting priority:\t{self.value}")
-        for n, spec in WQ_RESOURCES_REGISTRY:
-            spec["priority"] = self.value
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.verbose:
-            print(f"SetWQPriority unsetting {self.value}")
-        for n, spec in WQ_RESOURCES_REGISTRY:
-            spec["priority"] = SetWQPriority.default
-
-
-# This is the default psiflow config which is always passed into the ExecutionContext
-# TODO: find a place for this
-DEFAULT_CONFIG = """
-parsl_log_level: WARNING
-usage_tracking: 3
-default_threads: 4
-tmpdir_root: /tmp
-keep_tmpdirs: false
-garbage_collect: true
-retries: 0
-
-ModelEvaluation:
-  executor: threadpool
-  max_threads: 2
-
-ModelTraining:
-  executor: threadpool
-  max_threads: 2
-"""
-
-
 def patch_parsl_dirtree() -> None:
     """By default, Parsl will put Executor logs etc. under numbered directories.
     We do not need this level of nesting, as psiflow_internal is refreshed every run"""
@@ -866,61 +804,6 @@ def patch_parsl_dirtree() -> None:
     parsl.dataflow.dflow.make_rundir = lambda x: x
 
 
-# TODO: arguments that need documenting: retries, strategy?, timeout, garbage_collect (Config)
-
-
-def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
-    """Create general wrapper for all bash apps. The exitcode ensures that every app completes successfully."""
-    template = f"""
-    # Create and move into new tmpdir for app execution
-    tmpdir=$(mktemp -d -p {tmpdir_root} "psiflow-tmp.XXXXXXXXXX")
-    cd $tmpdir; echo "tmpdir: $PWD"
-    {{env}}
-    printenv
-
-    # Actual app definition goes here
-    {{commands}}
-
-    # Cleanup
-    {'cd ../.. && rm -r $tmpdir' if not keep_tmpdirs else ''}
-    exit 0
-    """
-    return textwrap.dedent(template)
-
-
-def format_env_vars(env_vars: dict) -> str:
-    if len(env_vars) == 0:
-        return ""
-    return "export" + " ".join([f"{k}={v}" for k, v in env_vars.items()])
-
-
-def str_to_timedelta(s: str) -> timedelta:
-    t = datetime.strptime(s, "%H:%M:%S")
-    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
-
-
-def log_dfk_tasks(verbose: bool = False):
-    """Get an overview of all tasks stored in the parsl DFK. For debugging purposes."""
-    dfk = parsl.dfk()
-    parsl.wait_for_current_tasks()
-    log = ["- Parsl task overview -"]
-    if not verbose:
-        log += [f"{i}\t{d['func_name']}" for i, d in dfk.tasks.items()]
-        log.append("- Parsl task overview -")
-        print(*log, sep="\n")
-        return
-
-    for i, d in dfk.tasks.items():
-        args = [(_.split("/")[-1] if isinstance(_, str) else _) for _ in d["args"]]
-        if "inputs" in (kwargs := d["kwargs"]):
-            kwargs["inputs"] = [f.filename for f in kwargs["inputs"]]
-        if "outputs" in kwargs:
-            kwargs["outputs"] = [f.filename for f in kwargs["outputs"]]
-        log.append(f"\n{i}\t{d['func_name']:<30}\n{args}\n{kwargs}")
-    log.append("- Parsl task overview -")
-    print(*log, sep="\n")
-
-
 # TODO: after 3.12, this is no longer needed
 #  https://docs.python.org/3/library/typing.html
 T = TypeVar("T")
diff --git a/psiflow/models/model.py b/psiflow/models/model.py
index 6e11b04..73c9cb1 100644
--- a/psiflow/models/model.py
+++ b/psiflow/models/model.py
@@ -1,5 +1,6 @@
 from __future__ import annotations  # necessary for type-guarding class methods
 
+import logging
 from dataclasses import asdict
 from pathlib import Path
 from typing import Optional, Union
@@ -11,10 +12,11 @@
 
 import psiflow
 from psiflow.data import Dataset
-from psiflow.utils.apps import copy_data_future, log_message, setup_logger
+from psiflow.utils.apps import copy_data_future, log_message
 from psiflow.utils.io import save_yaml
 
-logger = setup_logger(__name__)
+
+logger = logging.getLogger(__name__)
 
 
 @typeguard.typechecked
diff --git a/psiflow/sampling/ase.py b/psiflow/sampling/ase.py
index e039791..4775c1a 100644
--- a/psiflow/sampling/ase.py
+++ b/psiflow/sampling/ase.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Optional, Union
 
 import parsl
@@ -9,7 +10,6 @@
 from psiflow.data.utils import write_frames
 from psiflow.geometry import Geometry
 from psiflow.hamiltonians import Hamiltonian
-from psiflow.utils.apps import setup_logger
 from psiflow.utils.io import _dump_json
 from psiflow.utils.parse import get_task_name_id
 from psiflow.execution import format_env_vars
@@ -17,7 +17,7 @@
 from ._ase import ALLOWED_MODES, __file__ as file_ase
 
 DEFAULT_EXECUTABLE = "script.py"
-logger = setup_logger(__name__)  # logging per module
+logger = logging.getLogger(__name__)
 
 
 class OptimisationFailedError(Exception):
diff --git a/psiflow/sampling/output.py b/psiflow/sampling/output.py
index 318a86c..7d74aec 100644
--- a/psiflow/sampling/output.py
+++ b/psiflow/sampling/output.py
@@ -1,5 +1,4 @@
-import copy
-import re
+import logging
 from enum import Enum
 from pathlib import Path
 from dataclasses import dataclass, field, InitVar
@@ -13,15 +12,14 @@
 
 import psiflow
 from psiflow.data import Dataset
-from psiflow.geometry import Geometry, NullState
+from psiflow.geometry import Geometry
 from psiflow.hamiltonians import Hamiltonian, MixtureHamiltonian, Zero
 from psiflow.sampling.walker import Walker
 from psiflow.utils.io import save_npz
-from psiflow.utils.apps import setup_logger
 from psiflow.utils.parse import get_task_name_id
 
 
-logger = setup_logger(__name__)  # logging per module
+logger = logging.getLogger(__name__)
 
 
 DEFAULT_OBSERVABLES = [
diff --git a/psiflow/utils/_plumed.py b/psiflow/utils/_plumed.py
index 89b6789..611bdc8 100644
--- a/psiflow/utils/_plumed.py
+++ b/psiflow/utils/_plumed.py
@@ -1,10 +1,7 @@
 import logging
 import os
 
-import typeguard
 
-
-@typeguard.typechecked
 def try_manual_plumed_linking() -> str:
     if "PLUMED_KERNEL" not in os.environ.keys():
         # try linking manually
@@ -23,7 +20,6 @@ def try_manual_plumed_linking() -> str:
     return os.environ["PLUMED_KERNEL"]
 
 
-@typeguard.typechecked
 def remove_comments_printflush(plumed_input: str) -> str:
     new_input = []
     for line in list(plumed_input.split("\n")):
@@ -38,7 +34,6 @@ def remove_comments_printflush(plumed_input: str) -> str:
     return "\n".join(new_input)
 
 
-@typeguard.typechecked
 def set_path_in_plumed(plumed_input: str, keyword: str, path_to_set: str) -> str:
     lines = plumed_input.split("\n")
     for i, line in enumerate(lines):
diff --git a/psiflow/utils/apps.py b/psiflow/utils/apps.py
index 0f52bdd..894619f 100644
--- a/psiflow/utils/apps.py
+++ b/psiflow/utils/apps.py
@@ -1,23 +1,20 @@
-from __future__ import annotations  # necessary for type-guarding class methods
-
-import logging
-import sys
+import shutil
+import textwrap
 from typing import Any, Union
+from pathlib import Path
 
 import numpy as np
-import typeguard
-from parsl.app.app import python_app
+from parsl import python_app
 from parsl.data_provider.files import File
 
 
-@typeguard.typechecked
 def get_attribute(obj: Any, *attribute_names: str) -> Any:
+    # TODO: not an app
     for name in attribute_names:
         obj = getattr(obj, name)
     return obj
 
 
-@typeguard.typechecked
 def _boolean_or(*args: Union[bool, np.bool_]) -> bool:
     return any(args)
 
@@ -32,27 +29,6 @@ def _multiply(a, b):
 multiply = python_app(_multiply, executors=["default_threads"])
 
 
-@typeguard.typechecked
-def setup_logger(module_name: str, level=logging.INFO) -> logging.Logger:
-    # Create logger instance for the module
-    module_logger = logging.getLogger(module_name)
-
-    # Set the desired format string
-    formatter = logging.Formatter("%(name)s - %(message)s")
-
-    # Create handler to send logs to stdout
-    stdout_handler = logging.StreamHandler(sys.stdout)
-    stdout_handler.setFormatter(formatter)
-
-    # Add handler to the logger instance
-    module_logger.addHandler(stdout_handler)
-
-    # Set the logging level for the logger
-    module_logger.setLevel(level)
-
-    return module_logger
-
-
 def _compute_sum(a, b):
     return np.add(a, b)
 
@@ -60,37 +36,25 @@ def _compute_sum(a, b):
 compute_sum = python_app(_compute_sum, executors=["default_threads"])
 
 
-@typeguard.typechecked
-def _combine_futures(inputs: list[Any]) -> list[Any]:
-    return list(inputs)
-
-
-combine_futures = python_app(_combine_futures, executors=["default_threads"])
-
-
-@typeguard.typechecked
 def _copy_data_future(
     pass_on_exist: bool = False,
     inputs: list[File] = [],
     outputs: list[File] = [],
 ) -> None:
-    import shutil
-    from pathlib import Path
-
     assert len(inputs) == 1
     assert len(outputs) == 1
     if Path(outputs[0]).is_file() and pass_on_exist:
-        return None
-    if Path(inputs[0]).is_file():
+        pass
+    elif Path(inputs[0]).is_file():
         shutil.copyfile(inputs[0], outputs[0])
     else:  # no need to copy empty file
         pass
+    return
 
 
 copy_data_future = python_app(_copy_data_future, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _copy_app_future(future: Any, inputs: list = [], outputs: list = []) -> Any:
     # inputs/outputs to enforce additional dependencies
     from copy import deepcopy
@@ -101,7 +65,6 @@ def _copy_app_future(future: Any, inputs: list = [], outputs: list = []) -> Any:
 copy_app_future = python_app(_copy_app_future, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _log_message(logger, message, *futures):
     if len(futures) > 0:
         logger.info(message.format(*futures))
@@ -112,23 +75,11 @@ def _log_message(logger, message, *futures):
 log_message = python_app(_log_message, executors=["default_threads"])
 
 
-def _pack(*args):
-    return args  # TODO: _combine_futures?
-
-
-pack = python_app(_pack, executors=["default_threads"])
-
-
-@typeguard.typechecked
-def _unpack_i(result: Union[np.ndarray, list, tuple], i: int) -> Any:
-    assert i <= len(result)
-    return result[i]
-
-
-unpack_i = python_app(_unpack_i, executors=["default_threads"])
+@python_app(executors=["default_threads"])
+def pack(*args: Any) -> tuple[Any]:
+    return args
 
 
-@typeguard.typechecked
 def _concatenate(*arrays: np.ndarray) -> np.ndarray:
     return np.concatenate(arrays)
 
@@ -136,9 +87,31 @@ def _concatenate(*arrays: np.ndarray) -> np.ndarray:
 concatenate = python_app(_concatenate, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _isnan(a: Union[float, np.ndarray]) -> bool:
     return bool(np.any(np.isnan(a)))
 
 
 isnan = python_app(_isnan, executors=["default_threads"])
+
+
+def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
+    """Create general wrapper for all bash apps. The exitcode ensures that every app completes successfully."""
+    template = f"""
+    # Create and move into new tmpdir for app execution
+    tmpdir=$(mktemp -d -p {tmpdir_root} "psiflow-tmp.XXXXXXXXXX")
+    cd $tmpdir; echo "tmpdir: $PWD"
+    {{env}}
+    printenv
+
+    # Actual app definition goes here
+    {{commands}}
+
+    # Cleanup
+    {'cd ../.. && rm -r $tmpdir' if not keep_tmpdirs else ''}
+    exit 0
+    """
+    return textwrap.dedent(template)
+
+
+combine_futures = None
+unpack_i = None
diff --git a/psiflow/utils/config.py b/psiflow/utils/config.py
new file mode 100644
index 0000000..14be6b1
--- /dev/null
+++ b/psiflow/utils/config.py
@@ -0,0 +1,23 @@
+PSIFLOW_INTERNAL = "psiflow_internal"
+PARSL_LOGFILE = "parsl.log"
+PSIFLOW_LOGFILE = "psiflow.log"
+CONTEXT_DIR = "context_dir"
+
+
+DEFAULT_CONFIG = """
+parsl_log_level: WARNING
+psiflow_log_level: WARNING
+usage_tracking: 3
+default_threads: 4
+tmpdir_root: /tmp
+keep_tmpdirs: false
+
+ModelEvaluation:
+  executor: threadpool
+  max_threads: 2
+
+ModelTraining:
+  executor: threadpool
+  max_threads: 2
+"""
+
diff --git a/psiflow/utils/io.py b/psiflow/utils/io.py
index c5e9385..2123429 100644
--- a/psiflow/utils/io.py
+++ b/psiflow/utils/io.py
@@ -3,12 +3,10 @@
 from typing import Any
 
 import numpy as np
-import typeguard
 from parsl.app.app import python_app
 from parsl.data_provider.files import File
 
 
-@typeguard.typechecked
 def _save_yaml(
     input_dict: dict,
     outputs: list[File] = [],
@@ -39,7 +37,6 @@ def _make_dict_safe(arg):
 save_yaml = python_app(_save_yaml, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _save_xml(
     element: ET.Element,
     outputs: list = [],
@@ -52,7 +49,6 @@ def _save_xml(
 save_xml = python_app(_save_xml, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _load_numpy(inputs: list[File] = [], **kwargs) -> np.ndarray:
     return np.loadtxt(inputs[0], **kwargs)
 
@@ -60,8 +56,7 @@ def _load_numpy(inputs: list[File] = [], **kwargs) -> np.ndarray:
 load_numpy = python_app(_load_numpy, executors=["default_threads"])
 
 
-@typeguard.typechecked
-def _read_yaml(inputs: list[File] = [], outputs: list[File] = []) -> dict:
+def _read_yaml(inputs: list[File] = []) -> dict:
     import yaml
 
     with open(inputs[0], "r") as f:
@@ -72,7 +67,6 @@ def _read_yaml(inputs: list[File] = [], outputs: list[File] = []) -> dict:
 read_yaml = python_app(_read_yaml, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _save_txt(data: str, outputs: list[File] = []) -> None:
     with open(outputs[0], "w") as f:
         f.write(data)
@@ -81,7 +75,6 @@ def _save_txt(data: str, outputs: list[File] = []) -> None:
 save_txt = python_app(_save_txt, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _load_metrics(inputs: list = []) -> np.recarray:
     # TODO: stop using recarrays
     return np.load(inputs[0], allow_pickle=True)
@@ -90,7 +83,6 @@ def _load_metrics(inputs: list = []) -> np.recarray:
 load_metrics = python_app(_load_metrics, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _save_metrics(data: np.recarray, outputs: list = []) -> None:
     # TODO: stop using recarrays
     with open(outputs[0], "wb") as f:
@@ -100,7 +92,6 @@ def _save_metrics(data: np.recarray, outputs: list = []) -> None:
 save_metrics = python_app(_save_metrics, executors=["default_threads"])
 
 
-@typeguard.typechecked
 def _dump_json(
     inputs: list = [],
     outputs: list = [],
diff --git a/psiflow/utils/parse.py b/psiflow/utils/parse.py
index 99fbcd7..9f265d1 100644
--- a/psiflow/utils/parse.py
+++ b/psiflow/utils/parse.py
@@ -7,9 +7,7 @@
 
 
 class LineNotFoundError(Exception):
-    """Call to find_line failed"""
-
-    pass
+    pass  # call to find_line failed
 
 
 def find_line(
@@ -24,7 +22,8 @@ def find_line(
         idx_slice = slice(idx_start, idx_start + max_lines)
     else:
         idx_start = idx_start or len(lines) - 1
-        idx_slice = slice(idx_start, idx_start - max_lines, -1)
+        idx_stop = max(idx_start - max_lines, 0)
+        idx_slice = slice(idx_start, idx_stop, -1)
     for i, l in enumerate(lines[idx_slice]):
         if l.strip().startswith(line):
             if not reverse:
@@ -41,8 +40,12 @@ def lines_to_array(
     return np.array([line.split()[start:stop] for line in lines], dtype=dtype)
 
 
-def string_to_timedelta(timedelta: str) -> datetime.timedelta:
+def str_to_timedelta(s: str) -> datetime.timedelta:
     """"""
+    # TODO: this will probably not work in general
+    time = datetime.datetime.strptime(s, "%H:%M:%S")
+    return datetime.timedelta(hours=time.hour, minutes=time.minute, seconds=time.second)
+
     allowed_units = "weeks", "days", "hours", "minutes", "seconds"
     time_list = timedelta.split()
     values, units = time_list[:-1:2], time_list[1::2]
@@ -50,9 +53,10 @@ def string_to_timedelta(timedelta: str) -> datetime.timedelta:
     return datetime.timedelta(**kwargs)
 
 
+
 def get_task_logs(task_id: int) -> tuple[Path, Path]:
     """"""
-    path = Path.cwd().resolve() / PSIFLOW_INTERNAL / "000/task_logs"  # TODO
+    path = Path.cwd().resolve() / PSIFLOW_INTERNAL / "task_logs"
     stdout = next(path.rglob(f"task_{task_id}_*.stdout"))
     stderr = next(path.rglob(f"task_{task_id}_*.stderr"))
     return stdout, stderr
@@ -61,3 +65,11 @@ def get_task_logs(task_id: int) -> tuple[Path, Path]:
 def get_task_name_id(logfile: str) -> tuple[str, str]:
     _, task_id, task_name = Path(logfile).stem.split("_", maxsplit=2)
     return task_name, task_id
+
+
+def format_env_vars(env_vars: dict) -> str:
+    if len(env_vars) == 0:
+        return ""
+    return "export" + " ".join([f"{k}={v}" for k, v in env_vars.items()])
+
+
diff --git a/psiflow/utils/wq.py b/psiflow/utils/wq.py
new file mode 100644
index 0000000..4ecc01d
--- /dev/null
+++ b/psiflow/utils/wq.py
@@ -0,0 +1,40 @@
+# TODO: this probably does not work in a nested way
+
+import logging
+
+
+logger = logging.getLogger(__name__)  # logging per module
+
+
+WQ_RESOURCES_REGISTRY = []
+
+
+def register_definition(definition: 'ExecutionDefinition') -> None:
+    """"""
+    if (spec := definition.spec) is None:
+        return  # threadpool does not have priority
+
+    WQ_RESOURCES_REGISTRY.append((definition.name, spec))
+    spec["priority"] = SetWQPriority.default
+
+
+class SetWQPriority:
+    """Manage the WQ priority tag as context manager"""
+    default = 0
+
+    def __init__(self, value: int, verbose: bool = False) -> None:
+        self.value = value
+        self.verbose = verbose
+
+    def __enter__(self):
+        if self.verbose:
+            logger.info(f"SetWQPriority setting priority:\t{self.value}")
+        for n, spec in WQ_RESOURCES_REGISTRY:
+            spec["priority"] = self.value
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.verbose:
+            logger.info(f"SetWQPriority unsetting {self.value}")
+        for n, spec in WQ_RESOURCES_REGISTRY:
+            spec["priority"] = SetWQPriority.default
diff --git a/tests/test_reference.py b/tests/test_reference.py
index 8fa5b09..a1be636 100644
--- a/tests/test_reference.py
+++ b/tests/test_reference.py
@@ -146,11 +146,10 @@ def test_cp2k_parse_output():
 
  ENERGY| Total FORCE_EVAL ( QS ) energy [a.u.]:              -14.202993407031412
 
- ATOMIC FORCES in [a.u.]
-
- # Atom   Kind   Element          X              Y              Z
-      1      1      O           0.00000000     0.00000000     0.00000000
- SUM OF ATOMIC FORCES           0.00000000     0.00000000     0.00000000     0.00000000
+ FORCES| Atomic forces [hartree/bohr]
+ FORCES|   Atom     x               y               z               |f|
+ FORCES|      1  0  0  0  0
+ FORCES| Sum            0.00000000     0.00000000     0.00000000     0.00000000
 
  STRESS| Analytical stress tensor [GPa]
  STRESS|                        x                   y                   z
@@ -169,7 +168,7 @@ def test_cp2k_parse_output():
  
 ### SKIPPED A BIT ###
  
-  -------------------------------------------------------------------------------
+ -------------------------------------------------------------------------------
  -                                                                             -
  -                                T I M I N G                                  -
  -                                                                             -
@@ -223,7 +222,7 @@ def test_cp2k_success(simple_cp2k_input, geom_h2_p):
         if "Number of threads for this process" in line:
             nthreads = int(line.split()[-1])
     definition = psiflow.context().definitions["CP2K"]
-    ncores = definition.cores_per_worker
+    ncores = definition.cores_per_task
     assert ncores == nprocesses
     assert 1 == nthreads
 
@@ -306,7 +305,6 @@ def test_cp2k_failure(geom_h2_p):
 
 
 def test_cp2k_memory(simple_cp2k_input):
-    # TODO: test_cp2k_memory == test_cp2k_timeout until memory constraints work
     reference = CP2K(simple_cp2k_input)
     geometry = Geometry.from_data(
         numbers=np.ones(4000),
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
index 7d955c6..a502a6a 100644
--- a/tests/test_sampling.py
+++ b/tests/test_sampling.py
@@ -317,7 +317,7 @@ def test_output_status(dataset):
 
     # walltime
     definition = psiflow.context().definitions["ModelEvaluation"]
-    definition.max_simulation_time = 5 / 60  # 5 seconds
+    definition.max_runtime = 5  # seconds
     outputs = sample([walker], steps=10000)
     assert outputs[0].status.result() == Status.TIMEOUT
     assert outputs[0].time.result() > 0

From f9f7d3371b56f5f260f0a1bdcd626788bbdc1440 Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Tue, 10 Mar 2026 21:26:33 +0100
Subject: [PATCH 12/15] small cleanup action

---
 psiflow/data/dataset.py        | 10 +++++-----
 psiflow/data/utils.py          |  7 +++----
 psiflow/free_energy/phonons.py |  2 +-
 psiflow/learning.py            |  6 ++++--
 psiflow/metrics.py             |  6 ++++--
 psiflow/reference/orca_.py     |  4 ++--
 psiflow/reference/reference.py |  5 +++--
 psiflow/sampling/ase.py        |  3 +--
 psiflow/sampling/optimize.py   |  2 +-
 psiflow/sampling/sampling.py   |  2 +-
 psiflow/utils/apps.py          |  4 +---
 tests/test_learning.py         |  2 +-
 12 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/psiflow/data/dataset.py b/psiflow/data/dataset.py
index e332167..660928c 100644
--- a/psiflow/data/dataset.py
+++ b/psiflow/data/dataset.py
@@ -13,7 +13,7 @@
 
 import psiflow
 from psiflow.geometry import QUANTITIES, Geometry
-from psiflow.utils.apps import combine_futures, copy_data_future, unpack_i
+from psiflow.utils.apps import copy_data_future, pack
 
 from .utils import (
     align_axes,
@@ -118,7 +118,7 @@ def __getitem__(
                 inputs=[self.extxyz],
                 outputs=[],  # will return Geometry as Future
             )
-            return unpack_i(future, 0)
+            return future[0]
         else:  # slice, list, AppFuture
             extxyz = read_frames(
                 index,
@@ -266,9 +266,9 @@ def get(
             inputs=[self.extxyz],
         )
         if len(quantities) == 1:
-            return unpack_i(result, 0)
+            return result[0]
         else:
-            return tuple([unpack_i(result, i) for i in range(len(quantities))])
+            return tuple([result[i] for i in range(len(quantities))])
 
     def evaluate(
         self,
@@ -301,7 +301,7 @@ def evaluate(
             outputs = [outputs]
         future = insert_quantities(
             quantities=tuple(computable.outputs),
-            arrays=combine_futures(inputs=list(outputs)),
+            arrays=pack(*outputs),
             inputs=[self.extxyz],
             outputs=[psiflow.context().new_file("data_", ".xyz")],
         )
diff --git a/psiflow/data/utils.py b/psiflow/data/utils.py
index 9078de1..0f54936 100644
--- a/psiflow/data/utils.py
+++ b/psiflow/data/utils.py
@@ -1,6 +1,6 @@
 import re
 import shutil
-from typing import Optional, Union
+from typing import Optional, Union, Sequence
 
 import numpy as np
 import typeguard
@@ -9,7 +9,6 @@
 from parsl.dataflow.futures import AppFuture
 
 from psiflow.geometry import Geometry, NullState, _assign_identifier, create_outputs
-from psiflow.utils.apps import unpack_i
 
 
 @typeguard.typechecked
@@ -206,7 +205,7 @@ def _extract_quantities(
 @typeguard.typechecked
 def _insert_quantities(
     quantities: tuple[str, ...],
-    arrays: list[np.ndarray, ...],
+    arrays: Sequence[np.ndarray],
     data: Optional[list[Geometry]] = None,
     inputs: list = [],
     outputs: list = [],
@@ -761,7 +760,7 @@ def get_train_valid_indices(
         tuple[AppFuture, AppFuture]: Futures for training and validation indices.
     """
     future = train_valid_indices(effective_nstates, train_valid_split, shuffle)
-    return unpack_i(future, 0), unpack_i(future, 1)
+    return future[0], future[1]
 
 
 @typeguard.typechecked
diff --git a/psiflow/free_energy/phonons.py b/psiflow/free_energy/phonons.py
index 2f449ef..8728fd5 100644
--- a/psiflow/free_energy/phonons.py
+++ b/psiflow/free_energy/phonons.py
@@ -20,7 +20,7 @@
 from psiflow.sampling.optimize import setup_forces
 from psiflow.utils.apps import multiply
 from psiflow.utils.io import load_numpy, save_xml
-from psiflow.execution import format_env_vars
+from psiflow.utils.parse import format_env_vars
 
 
 def _compute_frequencies(hessian: np.ndarray, geometry: Geometry) -> np.ndarray:
diff --git a/psiflow/learning.py b/psiflow/learning.py
index 3191a14..eab93d1 100644
--- a/psiflow/learning.py
+++ b/psiflow/learning.py
@@ -1,6 +1,7 @@
 from __future__ import annotations  # necessary for type-guarding class methods
 
 import shutil
+import logging
 from pathlib import Path
 from typing import Optional, Union
 
@@ -17,9 +18,10 @@
 from psiflow.models import Model
 from psiflow.reference import Reference
 from psiflow.sampling import SimulationOutput, Walker, sample
-from psiflow.utils.apps import boolean_or, isnan, setup_logger, unpack_i
+from psiflow.utils.apps import boolean_or, isnan
 
-logger = setup_logger(__name__)
+
+logger = logging.getLogger(__name__)  # logging per module
 
 
 @typeguard.typechecked
diff --git a/psiflow/metrics.py b/psiflow/metrics.py
index 2f73386..0234642 100644
--- a/psiflow/metrics.py
+++ b/psiflow/metrics.py
@@ -1,6 +1,7 @@
 from __future__ import annotations  # necessary for type-guarding class methods
 
 import os
+import logging
 from pathlib import Path
 from typing import Optional, Union
 
@@ -15,9 +16,10 @@
 from psiflow.hamiltonians import Hamiltonian
 from psiflow.models import Model
 from psiflow.sampling import SimulationOutput
-from psiflow.utils.apps import combine_futures, log_message, setup_logger
+from psiflow.utils.apps import log_message
+# from psiflow.utils.apps import combine_futures, log_message, setup_logger
 
-logger = setup_logger(__name__)
+logger = logging.getLogger(__name__)  # logging per module
 
 
 @typeguard.typechecked
diff --git a/psiflow/reference/orca_.py b/psiflow/reference/orca_.py
index 509aa25..cc2185b 100644
--- a/psiflow/reference/orca_.py
+++ b/psiflow/reference/orca_.py
@@ -12,7 +12,7 @@
 import psiflow
 from psiflow.geometry import Geometry
 from psiflow.reference.reference import Reference, Status, get_spin_multiplicities
-from psiflow.utils.parse import find_line, lines_to_array, string_to_timedelta
+from psiflow.utils.parse import find_line, lines_to_array, str_to_timedelta
 
 
 KEY_GHOST = "ghost"
@@ -94,7 +94,7 @@ def parse_output(stdout: str, properties: tuple[str, ...]) -> dict:
     if status == Status.SUCCESS:
         # total runtime
         idx = find_line(lines, "TOTAL RUN TIME", reverse=True, max_lines=5)
-        data["runtime"] = string_to_timedelta(lines[idx][16:])
+        data["runtime"] = str_to_timedelta(lines[idx][16:])
 
     # read coordinates
     idx_start = idx = find_line(lines, "CARTESIAN COORDINATES (ANGSTROEM)") + 2
diff --git a/psiflow/reference/reference.py b/psiflow/reference/reference.py
index dc29ec2..d049d9a 100644
--- a/psiflow/reference/reference.py
+++ b/psiflow/reference/reference.py
@@ -1,6 +1,7 @@
 from __future__ import annotations  # necessary for type-guarding class methods
 
 import warnings
+import logging
 from typing import ClassVar, Optional, Union, Callable, Sequence
 from pathlib import Path
 from functools import partial
@@ -16,11 +17,11 @@
 from psiflow.data import Computable, Dataset
 from psiflow.data.utils import extract_quantities
 from psiflow.geometry import Geometry, NullState
-from psiflow.utils.apps import copy_app_future, setup_logger
+from psiflow.utils.apps import copy_app_future
 from psiflow.utils.parse import LineNotFoundError, get_task_name_id
 
 
-logger = setup_logger(__name__)  # logging per module
+logger = logging.getLogger(__name__)  # logging per module
 
 
 class Status(Enum):
diff --git a/psiflow/sampling/ase.py b/psiflow/sampling/ase.py
index 4775c1a..69dde69 100644
--- a/psiflow/sampling/ase.py
+++ b/psiflow/sampling/ase.py
@@ -11,8 +11,7 @@
 from psiflow.geometry import Geometry
 from psiflow.hamiltonians import Hamiltonian
 from psiflow.utils.io import _dump_json
-from psiflow.utils.parse import get_task_name_id
-from psiflow.execution import format_env_vars
+from psiflow.utils.parse import get_task_name_id, format_env_vars
 
 from ._ase import ALLOWED_MODES, __file__ as file_ase
 
diff --git a/psiflow/sampling/optimize.py b/psiflow/sampling/optimize.py
index cb531a4..3d09880 100644
--- a/psiflow/sampling/optimize.py
+++ b/psiflow/sampling/optimize.py
@@ -20,7 +20,7 @@
 )
 from psiflow.sampling.output import HamiltonianComponent
 from psiflow.utils.io import save_xml
-from psiflow.execution import format_env_vars
+from psiflow.utils.parse import format_env_vars
 
 
 warnings.warn(
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index e37e1b5..dcb8fa8 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -21,7 +21,7 @@
     potential_component_name,
     HamiltonianComponent,
 )
-from psiflow.execution import format_env_vars
+from psiflow.utils.parse import format_env_vars
 from psiflow.sampling.utils import create_xml_list
 from psiflow.sampling.walker import Coupling, Walker, partition, Ensemble
 from psiflow.utils.io import _save_xml
diff --git a/psiflow/utils/apps.py b/psiflow/utils/apps.py
index 894619f..773915a 100644
--- a/psiflow/utils/apps.py
+++ b/psiflow/utils/apps.py
@@ -77,6 +77,7 @@ def _log_message(logger, message, *futures):
 
 @python_app(executors=["default_threads"])
 def pack(*args: Any) -> tuple[Any]:
+    """Combine passed futures into a single future."""
     return args
 
 
@@ -112,6 +113,3 @@ def create_bash_template(tmpdir_root: str, keep_tmpdirs: bool) -> str:
     """
     return textwrap.dedent(template)
 
-
-combine_futures = None
-unpack_i = None
diff --git a/tests/test_learning.py b/tests/test_learning.py
index e450a97..430a6ca 100644
--- a/tests/test_learning.py
+++ b/tests/test_learning.py
@@ -9,7 +9,7 @@
 from psiflow.metrics import Metrics, _create_table, parse_walker_log, reconstruct_dtypes
 from psiflow.reference import ReferenceDummy
 from psiflow.sampling import SimulationOutput, Walker
-from psiflow.utils.apps import combine_futures
+# from psiflow.utils.apps import combine_futures  # use pack instead
 from psiflow.utils.io import _load_metrics, _save_metrics, load_metrics, save_metrics
 
 

From bcfda23474531125370eb683742f121f8cedc9af Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Wed, 11 Mar 2026 22:35:49 +0100
Subject: [PATCH 13/15] final tweaks

---
 configs/local_test.yaml  |   2 +
 psiflow/execution.py     | 108 +++++++++++++++++++++------------------
 psiflow/utils/logging.py |   2 +-
 pyproject.toml           |   5 +-
 tests/conftest.py        |   2 -
 tests/test_execution.py  |  59 +++++++++++++++++++++
 6 files changed, 122 insertions(+), 56 deletions(-)
 create mode 100644 tests/test_execution.py

diff --git a/configs/local_test.yaml b/configs/local_test.yaml
index b44c2af..1342c39 100644
--- a/configs/local_test.yaml
+++ b/configs/local_test.yaml
@@ -1,4 +1,6 @@
 ---
+psiflow_log_level: INFO
+
 ModelEvaluation:
     executor: threadpool
     max_threads: 4
diff --git a/psiflow/execution.py b/psiflow/execution.py
index 34b643d..4d4aaef 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -44,6 +44,21 @@ class ConfigurationError(ValueError):
     pass  # some global psiflow configuration option does not make sense
 
 
+def ensure(
+    *conditions: bool,
+    msg: str = "Whoopsie",
+    msgs: Sequence[str] = (),
+    template: str = "{}",
+) -> None:
+    """Small helper function to replace 'assert' statements"""
+    if all(conditions):
+        return
+    if len(msgs) == 0:
+        raise ConfigurationError(msg)
+    msg = msgs[conditions.index(False)]
+    raise ConfigurationError(template.format(msg))
+
+
 @dataclass
 class ContainerSpec:
     """Controls container configuration"""
@@ -54,19 +69,17 @@ class ContainerSpec:
     gpu_flavour: str | None = None
 
     def __post_init__(self):
-        assert self.engine in ("apptainer", "singularity")
-        assert len(self.uri) > 0
-        assert self.gpu_flavour in ("cuda", "rocm", None)
+        ensure(
+            self.engine in ("apptainer", "singularity"),
+            len(self.uri) > 0,
+            self.gpu_flavour in ("cuda", "rocm", None),
+            msg="Invalid container configuration",
+        )
 
     def launch_command(self) -> str:
         pwd = Path.cwd().resolve()  # access to data / internal dir
-        args = [self.engine, "run", self.addopts, f"--bind {pwd}"]
-        if self.gpu_flavour == "cuda":
-            args.append("--nv")
-        elif self.gpu_flavour == "rocm":
-            args.append("--rocm")
-        args.append(self.uri)
-        return " ".join(args)
+        gpu = {"cuda": "--nv", "rocm": "--rocm"}.get(self.gpu_flavour, "")
+        return f"{self.engine} run {self.addopts} {gpu} --bind {pwd} {self.uri}"
 
 
 class ReferenceSpec(Protocol):
@@ -140,15 +153,15 @@ def launch_command(self):
 
 def make_slurm_provider(kwargs: dict) -> tuple[SlurmProvider, dict]:
     defaults = {"init_blocks": 0, "exclusive": False}
-    required = ("cores_per_node", "walltime", "gpus_per_node")
+    required = ("cores_per_node", "walltime")
     kwargs = defaults | kwargs
-    assert all(key in kwargs for key in required)
+    ensure(all(key in kwargs for key in required))
     provider = SlurmProvider(**kwargs)  # does not configure Launcher
     resources = {
         "nodes": provider.nodes_per_block,
         "cores": provider.cores_per_node,
         "memory": provider.mem_per_node or float("inf"),
-        "gpus": provider.gpus_per_node,
+        "gpus": provider.gpus_per_node or 0,
         "lifetime": str_to_timedelta(provider.walltime).seconds,
     }
     return provider, resources
@@ -224,26 +237,23 @@ def __init__(
         self.container = container
 
         if self.use_gpu:
-            msg = ""
-            if resources["gpus"] == 0:
-                msg = "GPU usage requested but no GPUs available"
-            elif container is not None and container.gpu_flavour is None:
-                msg = "Provide container 'gpu_flavour' to choose between CUDA and ROCM"
-            if msg:
-                raise ConfigurationError(msg)
+            ensure(
+                resources["gpus"] > 0, msg="GPU usage requested but no GPUs available"
+            )
+            ensure(
+                container is None or container.gpu_flavour is not None,
+                msg="Provide container 'gpu_flavour' to choose between CUDA and ROCM",
+            )
 
         if self.executor_type == "workqueue":
             # WQ-specific checks
-            msg = ""
-            if self.kwargs["gpus_per_task"] > resources["gpus"]:
-                msg = "GPUs"
-            if self.kwargs["cores_per_task"] > resources["cores"]:
-                msg = "cores"
-            if self.kwargs["mem_per_task"] > resources["memory"]:
-                msg = "memory"
-            if msg:
-                msg = f"Apps will request more {msg} than available per Parsl block"
-                raise ConfigurationError(msg)
+            ensure(
+                self.kwargs["gpus_per_task"] <= resources["gpus"],
+                self.kwargs["cores_per_task"] <= resources["cores"],
+                self.kwargs["mem_per_task"] <= resources["memory"],
+                msgs=["GPUs", "cores", "memory"],
+                template="Apps will request more {} than available per Parsl block",
+            )
 
         # how long can individual tasks run (in seconds)
         if max_runtime is None:
@@ -313,7 +323,8 @@ def cores_per_task(self) -> int:
         if self.executor_type == "workqueue":
             return self.kwargs["cores_per_task"]
         # assumes all threads are working
-        return int(self.resources["cores"] / self.kwargs["max_threads"])
+        cores_per_thread = self.resources["cores"] / self.kwargs["max_threads"]
+        return max(int(cores_per_thread), 1)
 
     @property
     def task_slots(self) -> int:
@@ -335,12 +346,11 @@ def wrap_in_timeout(self, command: str) -> str:
         # send SIGTERM after max_runtime, follow with SIGKILL 30s later
         return f"timeout -k 30s {self.max_runtime}s {command}"
 
-        # def wrap_in_srun(self, command: str) -> str:
-        #     # TODO: stub -- this does not work
-        #     if self.provider is None:
-        #         return command  # noop
-
-        return f"srun -t 1 -c $CORES {command}"
+    # def wrap_in_srun(self, command: str) -> str:
+    #     # TODO: stub -- this does not work
+    #     if self.provider is None:
+    #         return command  # noop
+    # return f"srun -t 1 -c $CORES {command}"
 
     def _create_threadpool(self, path: Path) -> ThreadPoolExecutor:
         max_threads = self.kwargs["max_threads"]
@@ -399,11 +409,12 @@ def from_config(
         **kwargs,
     ):
         if executor == "threadpool":
-            assert container is None, "Threadpool not compatible with containers"
-            assert (
-                "slurm" not in kwargs
-            ), "Threadpool not compatible with remote execution"
-            assert "max_threads" in kwargs, "Specify 'max_threads' for parallelism"
+            ensure(container is None, msg="Threadpool not compatible with containers")
+            ensure("max_threads" in kwargs, msg="Specify 'max_threads' for parallelism")
+            ensure(
+                "slurm" not in kwargs,
+                msg="Threadpool not compatible with remote execution",
+            )
             executor_kwargs = {
                 "max_threads": kwargs["max_threads"],
                 "use_gpu": kwargs.get("use_gpu", False),
@@ -414,15 +425,12 @@ def from_config(
                 "gpus_per_task": kwargs.get("gpus_per_task", 0),
                 "mem_per_task": kwargs.get("mem_per_task", 0),
             }
-            assert (
-                executor_kwargs["cores_per_task"] > 0
-            ), "WQ needs at least one core to launch tasks"
+            if executor_kwargs["cores_per_task"] == 0:
+                raise ConfigurationError("WQ needs at least one core to launch tasks")
             min_runtime = kwargs.get("min_runtime", "00:00:00")
             executor_kwargs["min_runtime"] = str_to_timedelta(min_runtime).seconds
         else:
-            raise ConfigurationError(
-                "Key 'executor' must be 'threadpool' or 'workqueue'"
-            )
+            raise ConfigurationError("Invalid executor key")
 
         # search for Parsl ExecutionProvider block, defaulting to "local"
         if "slurm" in kwargs:
@@ -448,7 +456,7 @@ def from_config(
 class ModelEvaluation(ExecutionDefinition):
     def __init__(
         self,
-        timeout: float = 5.0,
+        timeout: float = 10.0,
         max_resource_multiplier: int | None = None,
         allow_oversubscription: bool = True,
         **kwargs,
@@ -625,7 +633,7 @@ def __init__(
         self.path.mkdir(parents=True, exist_ok=True)
 
         self.definitions = {d.name: d for d in definitions}
-        assert len(self.definitions) == len(definitions)
+        ensure(len(self.definitions) == len(definitions))
 
         # make sure task tmpdirs can be made
         Path(tmpdir_root).mkdir(parents=True, exist_ok=True)
diff --git a/psiflow/utils/logging.py b/psiflow/utils/logging.py
index 0976590..373a613 100644
--- a/psiflow/utils/logging.py
+++ b/psiflow/utils/logging.py
@@ -12,7 +12,7 @@ def setup_logging(file: Path, level=logging.INFO) -> None:
 
     fh = logging.FileHandler(file)
     formatter = logging.Formatter(
-        fmt='%(asctime)s %(name)s [%(levelname)s] %(message)s',
+        fmt='%(asctime)s [%(levelname)s] %(name)s \t %(message)s',
         datefmt='%Y-%m-%d %H:%M'
     )
     fh.setFormatter(formatter)
diff --git a/pyproject.toml b/pyproject.toml
index 1f20c23..898ab5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "psiflow"
-version = "4.0.1"
+version = "4.0.2"
 description = "Library for developing interatomic potentials"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -13,12 +13,11 @@ dependencies = [
     "ase>=3.23.0",
     "pyyaml>=6.0",
     "numpy>=1.22.3, <2",
-    "parsl==2024.12.16",
+    "parsl==2026.02.16",
     "prettytable",
     "psutil",
     "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077",  # need 2024.1
     "ipi @ git+https://github.com/i-pi/i-pi.git@v3.1.10",
-    "pytimeparse",
     ]
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index e09e86e..2052219 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,3 @@
-import xml.etree.ElementTree as ET
 from dataclasses import asdict
 from pathlib import Path
 
@@ -44,7 +43,6 @@ def context(request, tmp_path_factory):
         path_config = Path(request.config.getoption("--psiflow-config"))
         with open(path_config, "r") as f:
             psiflow_config = yaml.safe_load(f)
-        psiflow_config["path"] = tmp_path_factory.mktemp("psiflow_internal")
         psiflow.load(psiflow_config)
         context = psiflow.context()  # noqa: F841
         yield
diff --git a/tests/test_execution.py b/tests/test_execution.py
new file mode 100644
index 0000000..6a456d9
--- /dev/null
+++ b/tests/test_execution.py
@@ -0,0 +1,59 @@
+import yaml
+
+from psiflow.execution import ExecutionDefinition
+
+
+def test_execution():
+    """Check a few execution parameters. This could definitely be expanded upon."""
+    data_yaml = """
+    executor: threadpool
+    max_threads: 42
+    use_gpu: false
+    max_runtime: 00:59:00
+    local:
+      cores: 4
+      memory: 8
+    """
+    definition = ExecutionDefinition.from_config(**yaml.safe_load(data_yaml))
+    assert definition.executor_type == "threadpool"
+    assert definition.provider is None
+    assert definition.container is None
+    assert definition.lifetime == float("inf")
+    assert definition.max_runtime == 3540
+    assert definition.task_slots == 42
+    assert definition.cores_per_task == 1
+    assert definition.use_gpu == False
+    assert definition.spec is None
+
+    data_yaml = """
+    cores_per_task: 3
+    gpus_per_task: 0
+    mem_per_task: 6
+    min_runtime: 00:15:00
+    env_vars:
+      CUSTOM_KEY: custom_var
+    slurm:
+      cores_per_node: 8
+      mem_per_node: 16
+      walltime: "02:00:00"
+    """
+    spec = {
+        "cores": 3,
+        "disk": 0,
+        "gpus": 0,
+        "memory": 6000,
+        "priority": 0,
+        "running_time_min": 900,
+    }
+    data = yaml.safe_load(data_yaml)
+    definition = ExecutionDefinition.from_config(**data)
+    assert definition.executor_type == "workqueue"
+    assert definition.provider is not None
+    assert definition.lifetime == 7200
+    assert definition.max_runtime == 7140
+    assert definition.task_slots == 2
+    assert definition.cores_per_task == 3
+    assert definition.use_gpu == False
+    assert definition.spec == spec
+    assert definition.env_vars["CUSTOM_KEY"] == "CUSTOM_VAR"
+

From 9259f8014729fd52ba217f4ffc4bb7ff4d30bf3e Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Mon, 30 Mar 2026 13:19:27 +0200
Subject: [PATCH 14/15] update example configs

---
 configs/example_local_debug.yaml    | 15 ++++++++
 configs/example_tier0_lumi.yaml     | 21 ++++++++++
 configs/example_tier1_hortense.yaml | 19 ++++++++++
 configs/example_tier2_ugent.yaml    | 59 +++++++++++++++++++++++++++++
 configs/local_test.yaml             |  5 +--
 configs/old_hortense.yaml           | 42 --------------------
 configs/old_lumi.yaml               | 39 -------------------
 configs/old_threadpool.yaml         | 29 --------------
 configs/old_wq.yaml                 | 17 ---------
 psiflow/utils/config.py             |  2 +-
 10 files changed, 116 insertions(+), 132 deletions(-)
 create mode 100644 configs/example_local_debug.yaml
 create mode 100644 configs/example_tier0_lumi.yaml
 create mode 100644 configs/example_tier1_hortense.yaml
 create mode 100644 configs/example_tier2_ugent.yaml
 delete mode 100644 configs/old_hortense.yaml
 delete mode 100644 configs/old_lumi.yaml
 delete mode 100644 configs/old_threadpool.yaml
 delete mode 100644 configs/old_wq.yaml

diff --git a/configs/example_local_debug.yaml b/configs/example_local_debug.yaml
new file mode 100644
index 0000000..385e0e4
--- /dev/null
+++ b/configs/example_local_debug.yaml
@@ -0,0 +1,15 @@
+# log all of the messages
+parsl_log_level: DEBUG
+psiflow_log_level: DEBUG
+
+# tell Parsl to not clean up completed tasks from the DFK
+# you can print out the full list of tasks with 'log_dfk_tasks' from psiflow.utils.logging
+garbage_collect: false
+
+# tell psiflow to execute every bash app in a specified directory - and not clean up afterwards
+# allows you to track which files are created in each task
+tmpdir_root: ~/psiflow_tasks
+keep_tmpdirs: true
+
+# not specifying ModelEvaluation or ModelTraining falls back to the default in 'psiflow.utils.config'
+# this does not include any ReferenceEvaluation blocks, so Reference apps will not run with this configuration
diff --git a/configs/example_tier0_lumi.yaml b/configs/example_tier0_lumi.yaml
new file mode 100644
index 0000000..13a6328
--- /dev/null
+++ b/configs/example_tier0_lumi.yaml
@@ -0,0 +1,21 @@
+# Tier-0 LUMI requires an active research project to submit calculations
+
+# run ModelEvaluation and ModelTraining in a psiflow container
+container:
+    uri: oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2  # outdated uri
+    engine: singularity  # LUMI uses singularity instead of apptainer
+    gpu_flavour: rocm  # GPU nodes are of the AMD kind
+
+# not specifying max_runtime defaults to (just shy of) job walltime
+ModelTraining:
+  cores_per_task: 56
+  gpus_per_task: 8
+  slurm:
+    partition: standard-g
+    account: project_465001125
+    nodes_per_block: 1
+    cores_per_node: 56
+    gpus_per_node: 8
+    max_blocks: 1
+    walltime: "12:00:00"
+    scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
diff --git a/configs/example_tier1_hortense.yaml b/configs/example_tier1_hortense.yaml
new file mode 100644
index 0000000..8bfe95c
--- /dev/null
+++ b/configs/example_tier1_hortense.yaml
@@ -0,0 +1,19 @@
+# Tier-1 Hortense requires an active research project to submit calculations
+# always make sure to 'unset SBATCH_PARTITION' before submitting anything
+
+# when not using a psiflow container, you need to make sure all necessary software is available on the worker node
+# this can be through installed modules, custom environments, ...
+ModelEvaluation:
+  cores_per_task: 8
+  gpus_per_task: 1
+  max_runtime: 06:00:00
+  slurm:
+    partition: gpu_rome_a100  # specify node partition where jobs should run
+    account: 2026_042  # specify your active computational grant
+    nodes_per_block: 1
+    cores_per_node: 32
+    gpus_per_block: 4
+    max_blocks: 10
+    walltime: 06:30:00
+    worker_init: micromamba activate my-mace-env  # load an environment with the appropriate software
+
diff --git a/configs/example_tier2_ugent.yaml b/configs/example_tier2_ugent.yaml
new file mode 100644
index 0000000..bc87a77
--- /dev/null
+++ b/configs/example_tier2_ugent.yaml
@@ -0,0 +1,59 @@
+# Tier-2 UGent divides its compute nodes over 'clusters' rather than partitions
+# always make sure to 'unset SLURM_CLUSTERS' before submitting anything
+
+# retry failed apps once
+retries: 1
+
+# psiflow always launches one HighThroughputExecutor and one ThreadPoolExecutor to handle internal apps (IO, parsing)
+# these tasks run locally and should never do any real computational work
+# default_threads needs to mainly be high enough to avoid concurrency bottlenecks
+default_threads: 8
+
+# run ModelEvaluation and ModelTraining in a psiflow container
+container:
+    uri: oras://ghcr.io/molmod/psiflow:4.0.0_cu118  # outdated uri
+    engine: apptainer
+    gpu_flavour: cuda  # required for GPU usage
+
+# tasks will use (at least) two cores (no GPUs) and can run for six hours at most
+# parsl asks SLURM for 16 cores on either doduo or shinx, so 8 tasks can run per SLURM job
+# only 10 SLURM jobs will run at once
+ModelEvaluation:
+  cores_per_task: 2
+  max_runtime: 06:00:00
+  slurm:
+    nodes_per_block: 1
+    cores_per_node: 16
+    max_blocks: 10
+    walltime: 08:00:00
+    clusters: doduo,shinx
+
+# tasks will use 8 cores and one GPU, running for four hours at most
+# parsl asks SLURM for 12 cores + 1 GPU on accelgor, so one task can run per SLURM job
+ModelTraining:
+  cores_per_task: 12
+  gpus_per_task: 1
+  max_runtime: 04:00:00
+  slurm:
+    nodes_per_block: 1
+    cores_per_node: 12
+    gpus_per_node: 1
+    walltime: 04:00:00
+    clusters: accelgor
+
+# tasks will use 32 cores (by default), running for one hour at most
+# tasks will be killed when they exceed 64 GB of memory usage
+# parsl asks SLURM for 32 cores on doduo or shinx, so one task can run per SLURM job
+# tells WQ to load a CP2K module before trying to launch calculations
+CP2K:
+  cores_per_task: 32
+  max_runtime: 01:00:00
+  memory_limit: 64
+  slurm:
+    nodes_per_block: 1
+    cores_per_node: 32
+    mem_per_node: 64
+    walltime: 04:00:00
+    max_blocks: 50
+    clusters: doduo,shinx
+    worker_init: ml CP2K/2023.1-foss-2023a
diff --git a/configs/local_test.yaml b/configs/local_test.yaml
index 1342c39..9381058 100644
--- a/configs/local_test.yaml
+++ b/configs/local_test.yaml
@@ -1,4 +1,3 @@
----
 psiflow_log_level: INFO
 
 ModelEvaluation:
@@ -11,7 +10,6 @@ ModelTraining:
     max_threads: 4
     max_runtime: 00:00:20
 
-  
 CP2K:
   executor: workqueue
   cores_per_task: 2
@@ -23,6 +21,7 @@ CP2K:
 GPAW:
   executor: workqueue
   cores_per_task: 2
+  max_runtime: 00:00:20
   container:
     uri: oras://ghcr.io/molmod/gpaw:24.1
   
@@ -30,5 +29,3 @@ ORCA:
   executor: workqueue
   cores_per_task: 2
 
-
-...
diff --git a/configs/old_hortense.yaml b/configs/old_hortense.yaml
deleted file mode 100644
index d6ccc68..0000000
--- a/configs/old_hortense.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
----
-parsl_log_level: WARNING
-container_engine: 'apptainer'
-container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_cu118'
-default_threads: 8
-ModelEvaluation:
-  cores_per_worker: 12
-  gpu: True
-  max_simulation_time: 20
-  slurm:
-    partition: "gpu_rome_a100"
-    account: "2023_070"
-    nodes_per_block: 1
-    cores_per_node: 48
-    max_blocks: 1
-    walltime: "12:00:00"
-    scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n"
-ModelTraining:
-  cores_per_worker: 12
-  gpu: true
-  max_training_time: 40
-  slurm:
-    partition: "gpu_rome_a100"
-    account: "2023_070"
-    nodes_per_block: 1
-    cores_per_node: 12
-    max_blocks: 1
-    walltime: "12:00:00"
-    scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
-CP2K:
-  cores_per_worker: 64
-  max_evaluation_time: 30
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
-  slurm:
-    partition: "cpu_rome"
-    account: "2024_079"
-    nodes_per_block: 1
-    cores_per_node: 64
-    max_blocks: 25
-    walltime: "06:00:00"
-    scheduler_options: "#SBATCH --clusters=dodrio\n"
-...
diff --git a/configs/old_lumi.yaml b/configs/old_lumi.yaml
deleted file mode 100644
index b5e9a14..0000000
--- a/configs/old_lumi.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
----
-parsl_log_level: WARNING
-container_engine: 'singularity'
-container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2'
-default_threads: 8
-CP2K:
-  cores_per_worker: 32
-  max_evaluation_time: 20
-  launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
-  slurm:
-    partition: "standard"
-    account: "project_465001125"
-    nodes_per_block: 1
-    cores_per_node: 128
-    max_blocks: 10
-    walltime: "01:00:00"
-ModelEvaluation:
-  cores_per_worker: 7
-  gpu: True
-  slurm:
-    partition: "standard-g"
-    account: "project_465001125"
-    nodes_per_block: 1
-    cores_per_node: 56
-    max_blocks: 10
-    walltime: "01:00:00"
-    scheduler_options: "#SBATCH --gres=gpu:8\n"
-ModelTraining:
-  cores_per_worker: 7
-  gpu: true
-  multigpu: true
-  slurm:
-    partition: "standard-g"
-    account: "project_465001125"
-    nodes_per_block: 1
-    cores_per_node: 56
-    walltime: "01:00:00"
-    scheduler_options: "#SBATCH --gres=gpu:8\n"
-...
diff --git a/configs/old_threadpool.yaml b/configs/old_threadpool.yaml
deleted file mode 100644
index 33f6c57..0000000
--- a/configs/old_threadpool.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
----
-parsl_log_level: WARNING
-retries: 0
-ModelEvaluation:
-  gpu: false
-  use_threadpool: true
-  max_simulation_time: 0.4
-ModelTraining:
-  gpu: true
-  use_threadpool: true
-  max_training_time: 1
-  max_workers: 1  # suppress assertion for multigpu training
-CP2K:
-  cores_per_worker: 2
-  max_evaluation_time: 0.3
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
-CP2K_container:
-  cores_per_worker: 2
-  max_evaluation_time: 0.3
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
-GPAW:
-  cores_per_worker: 2
-  max_evaluation_time: 0.3
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
-GPAW_container:
-  cores_per_worker: 2
-  max_evaluation_time: 0.3
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
-...
diff --git a/configs/old_wq.yaml b/configs/old_wq.yaml
deleted file mode 100644
index 660d784..0000000
--- a/configs/old_wq.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
----
-parsl_log_level: WARNING
-default_threads: 4
-ModelEvaluation:
-  cores_per_worker: 4
-  gpu: True
-  max_simulation_time: 0.4
-ModelTraining:
-  cores_per_worker: 4
-  gpu: true
-  max_training_time: 1
-  max_workers: 1
-CP2K:
-  cores_per_worker: 2
-  max_evaluation_time: 0.3
-  launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2023.2 /opt/entry.sh mpirun -np 2 -x OMP_NUM_THREADS=1 cp2k.psmp'
-...
diff --git a/psiflow/utils/config.py b/psiflow/utils/config.py
index 14be6b1..59e140b 100644
--- a/psiflow/utils/config.py
+++ b/psiflow/utils/config.py
@@ -8,7 +8,7 @@
 parsl_log_level: WARNING
 psiflow_log_level: WARNING
 usage_tracking: 3
-default_threads: 4
+default_threads: 8
 tmpdir_root: /tmp
 keep_tmpdirs: false
 

From c131be11fd25027dba8ea647cdece34e26e07f0e Mon Sep 17 00:00:00 2001
From: pdobbelaere <pidobbel.Dobbelaere@UGent.be>
Date: Mon, 30 Mar 2026 13:19:41 +0200
Subject: [PATCH 15/15] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 898ab5b..61df7a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
     "ase>=3.23.0",
     "pyyaml>=6.0",
     "numpy>=1.22.3, <2",
-    "parsl==2026.02.16",
+    "parsl==2026.02.23",
     "prettytable",
     "psutil",
     "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077",  # need 2024.1