Skip to content
Merged
15 changes: 15 additions & 0 deletions configs/example_local_debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# log all of the messages
parsl_log_level: DEBUG
psiflow_log_level: DEBUG

# tell Parsl to not clean up completed tasks from the DFK
# you can print out the full list of tasks with 'log_dfk_tasks' from psiflow.utils.logging
garbage_collect: false

# tell psiflow to execute every bash app in a specified directory - and not clean up afterwards
# allows you to track which files are created in each task
tmpdir_root: ~/psiflow_tasks
keep_tmpdirs: true

# not specifying ModelEvaluation or ModelTraining falls back to the default in 'psiflow.utils.config'
# this does not include any ReferenceEvaluation blocks, so Reference apps will not run with this configuration
21 changes: 21 additions & 0 deletions configs/example_tier0_lumi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Tier-0 LUMI requires an active research project to submit calculations

# run ModelEvaluation and ModelTraining in a psiflow container
container:
uri: oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2 # outdated uri
engine: singularity # LUMI uses singularity instead of apptainer
gpu_flavour: rocm # GPU nodes are of the AMD kind

# not specifying max_runtime defaults to (just shy of) job walltime
ModelTraining:
cores_per_task: 56
gpus_per_task: 8
slurm:
partition: standard-g
account: project_465001125
nodes_per_block: 1
cores_per_node: 56
gpus_per_node: 8
max_blocks: 1
walltime: "12:00:00"
scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
19 changes: 19 additions & 0 deletions configs/example_tier1_hortense.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Tier-1 Hortense requires an active research project to submit calculations
# always make sure to 'unset SBATCH_PARTITION' before submitting anything

# when not using a psiflow container, you need to make sure all necessary software is available on the worker node
# this can be through installed modules, custom environments, ...
ModelEvaluation:
cores_per_task: 8
gpus_per_task: 1
max_runtime: 06:00:00
slurm:
partition: gpu_rome_a100 # specify node partition where jobs should run
account: 2026_042 # specify your active computational grant
nodes_per_block: 1
cores_per_node: 32
gpus_per_block: 4
max_blocks: 10
walltime: 06:30:00
worker_init: micromamba activate my-mace-env # load an environment with the appropriate software

59 changes: 59 additions & 0 deletions configs/example_tier2_ugent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Tier-2 UGent divides its compute nodes over 'clusters' rather than partitions
# always make sure to 'unset SLURM_CLUSTERS' before submitting anything

# retry failed apps once
retries: 1

# psiflow always launches one HighThroughputExecutor and one ThreadPoolExecutor to handle internal apps (IO, parsing)
# these tasks run locally and should never do any real computational work
# default_threads needs to mainly be high enough to avoid concurrency bottlenecks
default_threads: 8

# run ModelEvaluation and ModelTraining in a psiflow container
container:
uri: oras://ghcr.io/molmod/psiflow:4.0.0_cu118 # outdated uri
engine: apptainer
gpu_flavour: cuda # required for GPU usage

# tasks will use (at least) two cores (no GPUs) and can run for six hours at most
# parsl asks SLURM for 16 cores on either doduo or shinx, so 8 tasks can run per SLURM job
# only 10 SLURM jobs will run at once
ModelEvaluation:
cores_per_task: 2
max_runtime: 06:00:00
slurm:
nodes_per_block: 1
cores_per_node: 16
max_blocks: 10
walltime: 08:00:00
clusters: doduo,shinx

# tasks will use 8 cores and one GPU, running for four hours at most
# parsl asks SLURM for 12 cores + 1 GPU on accelgor, so one task can run per SLURM job
ModelTraining:
cores_per_task: 12
gpus_per_task: 1
max_runtime: 04:00:00
slurm:
nodes_per_block: 1
cores_per_node: 12
gpus_per_node: 1
walltime: 04:00:00
clusters: accelgor

# tasks will use 32 cores (by default), running for one hour at most
# tasks will be killed when they exceed 64 GB of memory usage
# parsl asks SLURM for 32 cores on doduo or shinx, so one task can run per SLURM job
# tells WQ to load a CP2K module before trying to launch calculations
CP2K:
cores_per_task: 32
max_runtime: 01:00:00
memory_limit: 64
slurm:
nodes_per_block: 1
cores_per_node: 32
mem_per_node: 64
walltime: 04:00:00
max_blocks: 50
clusters: doduo,shinx
worker_init: ml CP2K/2023.1-foss-2023a
45 changes: 22 additions & 23 deletions configs/local_test.yaml
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
---
parsl_log_level: WARNING
retries: 0
make_symlinks: false
psiflow_log_level: INFO

ModelEvaluation:
gpu: false
use_threadpool: false
max_simulation_time: 1

executor: threadpool
max_threads: 4
max_runtime: 00:00:20
ModelTraining:
gpu: true
use_threadpool: true
max_training_time: 1
max_workers: 1 # suppress assertion for multigpu training

executor: threadpool
max_threads: 4
max_runtime: 00:00:20

CP2K:
cores_per_worker: 1
max_evaluation_time: 0.1
container_uri: 'oras://ghcr.io/molmod/cp2k:2024.1'
executor: workqueue
cores_per_task: 2
max_runtime: 00:00:20
memory_limit: 2
container:
uri: docker://cp2k/cp2k:2025.2_mpich_x86_64_psmp

GPAW:
cores_per_worker: 1
max_evaluation_time: 0.1
container_uri: 'oras://ghcr.io/molmod/gpaw:24.1'
executor: workqueue
cores_per_task: 2
max_runtime: 00:00:20
container:
uri: oras://ghcr.io/molmod/gpaw:24.1

ORCA:
cores_per_worker: 1
max_evaluation_time: 0.1

executor: workqueue
cores_per_task: 2

...
42 changes: 0 additions & 42 deletions configs/old_hortense.yaml

This file was deleted.

39 changes: 0 additions & 39 deletions configs/old_lumi.yaml

This file was deleted.

29 changes: 0 additions & 29 deletions configs/old_threadpool.yaml

This file was deleted.

17 changes: 0 additions & 17 deletions configs/old_wq.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions psiflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from pathlib import Path

import typeguard

from .config import setup_slurm_config # noqa: F401
from .execution import ExecutionContextLoader
from .serialization import ( # noqa: F401
Expand All @@ -12,7 +10,6 @@
)


@typeguard.typechecked
def resolve_and_check(path: Path) -> Path:
path = path.resolve()
if Path.cwd() in path.parents:
Expand Down
19 changes: 10 additions & 9 deletions psiflow/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from parsl.app.app import join_app, python_app
from parsl.app.python import PythonApp
from parsl.data_provider.files import File
from parsl.dataflow.futures import AppFuture
from parsl.dataflow.futures import AppFuture, DataFuture

import psiflow
from psiflow.geometry import QUANTITIES, Geometry
from psiflow.utils.apps import combine_futures, copy_data_future, unpack_i
from psiflow.utils.apps import copy_data_future, pack

from .utils import (
align_axes,
Expand Down Expand Up @@ -118,7 +118,7 @@ def __getitem__(
inputs=[self.extxyz],
outputs=[], # will return Geometry as Future
)
return unpack_i(future, 0)
return future[0]
else: # slice, list, AppFuture
extxyz = read_frames(
index,
Expand All @@ -127,21 +127,22 @@ def __getitem__(
).outputs[0]
return Dataset(None, extxyz)

def save(self, path: Union[Path, str]) -> AppFuture:
def save(self, path: Union[Path, str]) -> DataFuture:
"""
Save the dataset to a file.

Args:
path: Path to save the dataset.

Returns:
AppFuture: Future representing the completion of the save operation.
DataFuture: Future representing the file to which will be saved.
"""
path = psiflow.resolve_and_check(Path(path))
_ = copy_data_future(
future = copy_data_future(
inputs=[self.extxyz],
outputs=[File(str(path))],
)
return future.outputs[0]

def geometries(self) -> AppFuture:
"""
Expand Down Expand Up @@ -265,9 +266,9 @@ def get(
inputs=[self.extxyz],
)
if len(quantities) == 1:
return unpack_i(result, 0)
return result[0]
else:
return tuple([unpack_i(result, i) for i in range(len(quantities))])
return tuple([result[i] for i in range(len(quantities))])

def evaluate(
self,
Expand Down Expand Up @@ -300,7 +301,7 @@ def evaluate(
outputs = [outputs]
future = insert_quantities(
quantities=tuple(computable.outputs),
arrays=combine_futures(inputs=list(outputs)),
arrays=pack(*outputs),
inputs=[self.extxyz],
outputs=[psiflow.context().new_file("data_", ".xyz")],
)
Expand Down
Loading
Loading