diff --git a/.github/workflows/run_pytest.yaml b/.github/workflows/run_pytest.yaml
index b835eacc..078a2c92 100644
--- a/.github/workflows/run_pytest.yaml
+++ b/.github/workflows/run_pytest.yaml
@@ -36,7 +36,7 @@ jobs:
           pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4
           pip install torch==2.5.1
           pip install git+https://github.com/acesuit/MACE.git@v0.3.5
-          apptainer exec oras://ghcr.io/molmod/cp2k:2024.1 ls
+          apptainer exec docker://cp2k/cp2k:2025.2_mpich_x86_64_psmp ls
           apptainer exec oras://ghcr.io/molmod/gpaw:24.1 ls
       - name: Checkout specific commit
         uses: actions/checkout@v4
diff --git a/install_local.sh b/install_local.sh
new file mode 100644
index 00000000..9f7d7df6
--- /dev/null
+++ b/install_local.sh
@@ -0,0 +1,32 @@
+# Create a psiflow installation including ModelTraining and ModelEvaluation dependencies
+
+ENV_NAME="psiflow-dev"
+micromamba env create -n $ENV_NAME python=3.14 -c conda-forge
+micromamba activate $ENV_NAME
+
+# install workqueue
+micromamba install ndcctools -c conda-forge
+
+# install psiflow
+pip install -e psiflow[dev]
+
+# install PyTorch and MACE
+pip install torch --index-url https://download.pytorch.org/whl/cu128
+pip install mace-torch
+pip install cuequivariance cuequivariance-torch cuequivariance-ops-torch-cu12
+
+# install basic PLUMED and python API
+micromamba install plumed -c conda-forge
+pip install plumed
+
+# make PLUMED visible to the py-plumed interface
+PLUMED_KERNEL="$CONDA_PREFIX/lib/libplumedKernel.so"
+echo "{\"env_vars\": {\"PLUMED_KERNEL\": \"$PLUMED_KERNEL\"}}" >> "$CONDA_PREFIX/conda-meta/state"
+
+# install simple-dftd3
+micromamba install simple-dftd3 dftd3-python -c conda-forge
+
+# install cp2k-input-tools 
+# (its dependencies are a right mess, so we fix some and pip will complain)
+pip install cp2k-input-tools
+pip install --force-reinstall Pint
diff --git a/psiflow/data/dataset.py b/psiflow/data/dataset.py
index 21bb4760..4a86f395 100644
--- a/psiflow/data/dataset.py
+++ b/psiflow/data/dataset.py
@@ -285,7 +285,7 @@ def evaluate(
         Returns:
             Dataset: A new Dataset with evaluation results.
         """
-        # TODO: WIP
+        # TODO: remove this functionality?
         from psiflow.hamiltonians import Hamiltonian
 
         if not isinstance(computable, Hamiltonian):
diff --git a/psiflow/execution.py b/psiflow/execution.py
index 5d46a59f..26e7f3ae 100644
--- a/psiflow/execution.py
+++ b/psiflow/execution.py
@@ -246,7 +246,7 @@ def __init__(
             )
 
         if self.executor_type == "workqueue":
-            # WQ-specific checks
+            # WQ-specific checks  TODO: what about multinode?
             ensure(
                 self.kwargs["gpus_per_task"] <= resources["gpus"],
                 self.kwargs["cores_per_task"] <= resources["cores"],
@@ -541,19 +541,10 @@ def __init__(self, **kwargs) -> None:
                 "ModelTraining is configured for CPU operation. Is this what you want?"
             )
 
-        # if self.multigpu:
-        #     # TODO: why? Think this might be a multinode thing - which I do not care about
-        #     message = (
-        #         "the max_training_time keyword does not work "
-        #         "in combination with multi-gpu training. Adjust "
-        #         "the maximum number of epochs to control the "
-        #         "duration of training"
-        #     )
-        #     assert self.max_runtime is None, message
-
-    def train_command(self, initialize: bool = False):
-        command = "psiflow-mace-train"
-        return self.wrap_in_timeout(command)
+    @property
+    def multi_gpu(self) -> bool:
+        # only for WQ
+        return (self.spec or {}).get("gpus", 0) > 1
 
     def wq_resources(self, *args, **kwargs) -> dict:
         if self.spec is None:
diff --git a/psiflow/functions.py b/psiflow/functions.py
index 267c3b9c..a461e7d6 100644
--- a/psiflow/functions.py
+++ b/psiflow/functions.py
@@ -1,16 +1,16 @@
 import json
 import os
-import warnings
 import tempfile
-from dataclasses import dataclass
+import logging
+from dataclasses import dataclass, field
 from pathlib import Path
-from typing import ClassVar, Optional, Type, Union, get_type_hints
+from typing import Optional, Type, Union, Any
+from collections.abc import Sequence
 
-import ase
 import numpy as np
-import typeguard
 from ase import Atoms
-from ase.data import atomic_masses, chemical_symbols
+from ase.calculators.calculator import Calculator
+from ase.data import atomic_masses
 from ase.units import fs, kJ, mol, nm
 from ase.stress import voigt_6_to_full_3x3_stress
 
@@ -32,10 +32,8 @@ def format_output(
     return {"energy": energy, "forces": forces, "stress": stress}
 
 
-@typeguard.typechecked
-@dataclass
 class Function:
-    outputs: ClassVar[tuple]
+    outputs: tuple = ("energy", "forces", "stress")
 
     def __call__(
         self,
@@ -48,12 +46,9 @@ def compute(
         geometries: list[Geometry],
     ) -> dict[str, float | np.ndarray]:
         """Evaluate multiple geometries and merge data into single arrays"""
-        value, grad_pos, grad_cell = create_outputs(
-            self.outputs,
-            geometries,
-        )
+        value, grad_pos, grad_cell = create_outputs(self.outputs, geometries)
         for i, geometry in enumerate(geometries):
-            if geometry == NullState:
+            if geometry == NullState:  # TODO: remove
                 continue
             out = self(geometry)
             value[i] = out["energy"]
@@ -62,14 +57,8 @@ def compute(
         return {"energy": value, "forces": grad_pos, "stress": grad_cell}
 
 
-@dataclass
-class EnergyFunction(Function):
-    outputs: ClassVar[tuple[str, ...]] = ("energy", "forces", "stress")
-
-
-@typeguard.typechecked
-@dataclass
-class EinsteinCrystalFunction(EnergyFunction):
+@dataclass(frozen=True)
+class EinsteinCrystalFunction(Function):
     force_constant: float
     centers: np.ndarray
     volume: float = 0.0
@@ -88,22 +77,22 @@ def __call__(
         return format_output(geometry, energy, grad_pos, grad_cell)
 
 
-@typeguard.typechecked
-@dataclass
-class PlumedFunction(EnergyFunction):
+@dataclass(frozen=True)
+class PlumedFunction(Function):
     plumed_input: str
     external: Optional[Union[str, Path]] = None
+    plumed_instances: dict[tuple, "plumed.Plumed"] = field(default_factory=dict)
 
     def __post_init__(self):
-        self.plumed_instances = {}
+        if self.external is not None:
+            assert self.external in self.plumed_input
 
     def __call__(
         self,
         geometry: Geometry,
     ) -> dict[str, float | np.ndarray]:
-        plumed_input = self.plumed_input
-        if self.external is not None:
-            assert self.external in plumed_input
+        if not geometry.periodic and "VOLUME" in self.plumed_input:
+            raise ValueError("VOLUME CV only supported for periodic structures")
 
         key = self._geometry_to_key(geometry)
         if key not in self.plumed_instances:
@@ -124,7 +113,7 @@ def __call__(
             plumed_.cmd("setRestart", True)
             plumed_.cmd("setNatoms", len(geometry))
             plumed_.cmd("init")
-            for line in plumed_input.split("\n"):
+            for line in self.plumed_input.split("\n"):
                 plumed_.cmd("readInputLine", line)
             os.remove(tmp.name)  # remove whatever plumed has created
             self.plumed_instances[key] = plumed_
@@ -159,9 +148,8 @@ def _geometry_to_key(geometry: Geometry) -> tuple:
         return tuple([geometry.periodic]) + tuple(geometry.per_atom.numbers)
 
 
-@typeguard.typechecked
-@dataclass
-class ZeroFunction(EnergyFunction):
+@dataclass(frozen=True)
+class ZeroFunction(Function):
     def __call__(
         self,
         geometry: Geometry,
@@ -169,9 +157,8 @@ def __call__(
         return format_output(geometry, 0)
 
 
-@typeguard.typechecked
-@dataclass
-class HarmonicFunction(EnergyFunction):
+@dataclass(frozen=True)
+class HarmonicFunction(Function):
     positions: np.ndarray
     hessian: np.ndarray
     energy: Optional[float] = None
@@ -188,111 +175,57 @@ def __call__(
         return format_output(geometry, energy, (-1.0) * grad.reshape(-1, 3))
 
 
-@typeguard.typechecked
-@dataclass
-class MACEFunction(EnergyFunction):
-    model_path: str
+@dataclass(frozen=True)
+class MACEFunction(Function):
+    # TODO: why are some arguments separated and others 'calc_kwargs'?
+    model_path: str | Path
     ncores: int
     device: str
     dtype: str
-    atomic_energies: dict[str, float]
+    calc_kwargs: dict = field(default_factory=dict)
     env_vars: Optional[dict[str, str]] = None
 
-    def __post_init__(self):
-        import logging
-        import os
+    calc: Calculator = field(init=False)
 
+    def __post_init__(self):
         # import environment variables before any nontrivial imports
         if self.env_vars is not None:
             for key, value in self.env_vars.items():
                 os.environ[key] = value
 
         import torch
-        from mace.tools import torch_tools, utils
+        from mace.calculators.mace import MACECalculator
 
-        torch_tools.set_default_dtype(self.dtype)
-        if self.device == "gpu":  # when it's not a specific GPU, use any
-            self.device = "cuda"
-        self.device = torch_tools.init_device(self.device)
+        # MACE uses the root logger..
+        logging.getLogger("").setLevel(logging.INFO)
 
         torch.set_num_threads(self.ncores)
-        model = torch.load(f=self.model_path, map_location="cpu")
-        if self.dtype == "float64":
-            model = model.double()
-        else:
-            model = model.float()
-        model = model.to(self.device)
-        model.eval()
-        self.model = model
-        self.r_max = float(self.model.r_max)
-        self.z_table = utils.AtomicNumberTable(
-            [int(z) for z in self.model.atomic_numbers]
+        calc = MACECalculator(
+            model_paths=self.model_path,
+            device=self.device,
+            default_dtype=self.dtype,
+            **self.calc_kwargs,
         )
-
-        # remove unwanted streamhandler added by MACE / torch!
-        logging.getLogger("").removeHandler(logging.getLogger("").handlers[0])
-
-    def get_atomic_energy(self, geometry):
-        total = 0
-        numbers, counts = np.unique(geometry.per_atom.numbers, return_counts=True)
-        for idx, number in enumerate(numbers):
-            symbol = chemical_symbols[number]
-            try:
-                total += counts[idx] * self.atomic_energies[symbol]
-            except KeyError:
-                warnings.warn(
-                    f'(MACEFunction) No atomic energy entry for symbol "{symbol}". Are you sure?'
-                )
-        return total
+        object.__setattr__(self, "calc", calc)  # frozen dataclass instance
 
     def __call__(
         self,
         geometry: Geometry,
     ) -> dict[str, float | np.ndarray]:
-        from mace import data
-        from mace.tools.torch_geometric.batch import Batch
-
-        # TODO: is this call necessary?
-        # torch_tools.set_default_dtype(self.dtype)
-
-        energy, forces, stress = (
-            0.0,
-            np.zeros(shape=(len(geometry), 3)),
-            np.zeros(shape=(3, 3)),
-        )
+        atoms = geometry_to_atoms(geometry)
+        self.calc.calculate(atoms)
+        return format_output(geometry, **self.calc.results)
 
-        # compute offset if possible
-        if self.atomic_energies:
-            energy += self.get_atomic_energy(geometry)
 
-        cell = np.copy(geometry.cell) if geometry.periodic else None
-        atoms = Atoms(
-            positions=geometry.per_atom.positions,
-            numbers=geometry.per_atom.numbers,
-            cell=cell,
-            pbc=geometry.periodic,
-        )
-        config = data.config_from_atoms(atoms)
-        data = data.AtomicData.from_config(
-            config, z_table=self.z_table, cutoff=self.r_max
-        )
-        batch = Batch.from_data_list([data]).to(device=self.device)
-        out = self.model(batch.to_dict(), compute_stress=cell is not None)
-        energy += out["energy"].detach().cpu().item()
-        forces += out["forces"].detach().cpu().numpy()
-        if cell is not None:
-            stress += out["stress"].detach().cpu().numpy().squeeze()
-        return format_output(geometry, energy, forces, stress)
-
-
-@typeguard.typechecked
-@dataclass
-class DispersionFunction(EnergyFunction):
+@dataclass(frozen=True)
+class DispersionFunction(Function):
     method: str
     damping: str = "d3bj"
     params_tweaks: Optional[dict[str, float]] = None
     num_threads: int = 1
 
+    calc: Calculator = field(init=False)
+
     def __post_init__(self):
         # OMP_NUM_THREADS for parallel evaluation does not work..
         # https://github.com/dftd3/simple-dftd3/issues/49
@@ -301,20 +234,16 @@ def __post_init__(self):
 
         from dftd3.ase import DFTD3
 
-        self.calc = DFTD3(
+        calc = DFTD3(
             method=self.method, damping=self.damping, params_tweaks=self.params_tweaks
         )
+        object.__setattr__(self, "calc", calc)  # frozen dataclass instance
 
     def __call__(
         self,
         geometry: Geometry,
     ) -> dict[str, float | np.ndarray]:
-        atoms = ase.Atoms(  # TODO: geometry.to_atoms?
-            numbers=geometry.per_atom.numbers,
-            positions=geometry.per_atom.positions,
-            cell=geometry.cell,
-            pbc=geometry.periodic,
-        )
+        atoms = geometry_to_atoms(geometry)
         self.calc.calculate(atoms)
         return format_output(geometry, **self.calc.results)
 
@@ -322,13 +251,18 @@ def __call__(
 def _apply(
     arg: Union[Geometry, list[Geometry], None],
     outputs_: tuple[str, ...],
-    inputs: list = [],
-    function_cls: Optional[Type[Function]] = None,
+    function_cls: Type[Function],
+    wait_for: Any = None,
+    inputs: Sequence = (),
     parsl_resource_specification: dict = {},
     **parameters,
 ) -> Optional[list[np.ndarray]]:
     from psiflow.data.utils import _read_frames
 
+    # TODO: why pass geoms through arg or inputs?
+    #  should we not have a single keyword for input structures?
+    #  or at least reserve inputs for arbitrary futures (to wait on)?
+
     assert function_cls is not None
     if arg is None:
         states = _read_frames(inputs=[inputs[0]])
@@ -343,26 +277,32 @@ def _apply(
 
 
 def function_from_json(path: Union[str, Path], **kwargs) -> Function:
+    from psiflow.serialization import deserialize_hook
+
     functions = [
         EinsteinCrystalFunction,
         HarmonicFunction,
         MACEFunction,
         PlumedFunction,
         DispersionFunction,
-        None,
     ]
     with open(path, "r") as f:
-        data = json.loads(f.read())
-    assert "function_name" in data
-    for function_cls in functions:
-        if data["function_name"] == function_cls.__name__:
-            break
-    data.pop("function_name")
-    for name, type_hint in get_type_hints(function_cls).items():
-        if type_hint is np.ndarray:
-            data[name] = np.array(data[name])
+        data = json.loads(f.read(), object_hook=deserialize_hook)
+    function_name = data.pop("function_name")
+    function_cls = {f.__name__: f for f in functions}[function_name]
+
     for key, value in kwargs.items():
         if key in data:
             data[key] = value
     function = function_cls(**data)
     return function
+
+
+def geometry_to_atoms(geom: Geometry) -> Atoms:
+    """Only structural information"""
+    return Atoms(
+        positions=geom.per_atom.positions,
+        numbers=geom.per_atom.numbers,
+        cell=np.copy(geom.cell) if geom.periodic else None,
+        pbc=geom.periodic,
+    )
diff --git a/psiflow/hamiltonians.py b/psiflow/hamiltonians.py
index c8faee95..f101188e 100644
--- a/psiflow/hamiltonians.py
+++ b/psiflow/hamiltonians.py
@@ -1,13 +1,14 @@
-import urllib
+import logging
 from functools import partial
 from pathlib import Path
-from typing import Optional, Union, Callable, Sequence
+from typing import Optional, Union, Callable, Sequence, Any, ClassVar
+from dataclasses import dataclass, field
 
 import numpy as np
 from parsl.app.app import python_app
 from parsl.app.futures import DataFuture
 from parsl.data_provider.files import File
-from parsl.dataflow.futures import AppFuture
+from parsl.dataflow.futures import AppFuture, Future
 
 import psiflow
 from psiflow.data import Computable, Dataset, aggregate_multiple, compute
@@ -22,20 +23,27 @@
 )
 from psiflow.geometry import Geometry
 from psiflow.utils._plumed import remove_comments_printflush
-from psiflow.utils.apps import copy_app_future, get_attribute
+from psiflow.utils.apps import get_attribute
 from psiflow.utils.io import dump_json
 
 
+# TODO: comparison logic in __eq__ only works for hamiltonians without futures
+# TODO: dataclasses automatically generate __eq__
+
+
+logger = logging.getLogger(__name__)  # logging per module
+
+
 apply_threads = python_app(_apply, executors=["default_threads"])
 apply_htex = python_app(_apply, executors=["default_htex"])
 apply_modelevaluation = python_app(_apply, executors=["ModelEvaluation"])
 
 
+# TODO: why have the Computable class?
 class Hamiltonian(Computable):
-    outputs: tuple = ("energy", "forces", "stress")
     batch_size = 1000
-    app: Callable
-    function_name: str
+    outputs: ClassVar[tuple] = ("energy", "forces", "stress")
+    function_name: ClassVar[str]
 
     def compute(
         self,
@@ -49,7 +57,7 @@ def compute(
             batch_size = self.__class__.batch_size
         return compute(arg, self.get_app(), outputs_=outputs, batch_size=batch_size)
 
-    def __eq__(self, hamiltonian: "Hamiltonian") -> bool:
+    def __eq__(self, other: "Hamiltonian") -> bool:
         raise NotImplementedError
 
     def __mul__(self, a: float) -> "MixtureHamiltonian":
@@ -78,6 +86,7 @@ def serialize_function(self, **kwargs) -> DataFuture:
         ).outputs[0]
 
     def parameters(self) -> dict:
+        """Return function parameters"""
         raise NotImplementedError
 
     def get_app(self) -> Callable:
@@ -210,8 +219,9 @@ def serialize(self, **kwargs) -> list[DataFuture]:
 
 
 @psiflow.register_serializable
+@dataclass
 class Zero(Hamiltonian):
-    function_name: str = "ZeroFunction"
+    function_name: ClassVar[str] = "ZeroFunction"
 
     def __init__(self):
         pass
@@ -220,9 +230,7 @@ def get_app(self) -> Callable:
         return partial(apply_threads, function_cls=ZeroFunction)
 
     def __eq__(self, hamiltonian: Hamiltonian) -> bool:
-        if type(hamiltonian) is Zero:
-            return True
-        return False
+        return True if isinstance(hamiltonian, Zero) else False
 
     def __mul__(self, a: float) -> "Zero":
         return Zero()
@@ -235,17 +243,12 @@ def __add__(self, hamiltonian: Hamiltonian) -> Hamiltonian:
 
 
 @psiflow.register_serializable
+@dataclass
 class EinsteinCrystal(Hamiltonian):
-    # TODO: logic not consistent depending on Geometry | AppFuture
-    reference_geometry: Geometry | AppFuture
-    force_constant: float
-    function_name: str = "EinsteinCrystalFunction"
-
-    def __init__(self, geometry: Union[Geometry, AppFuture], force_constant: float):
-        super().__init__()
-        self.reference_geometry = copy_app_future(geometry)
-        self.force_constant = force_constant
-        self.external = None  # needed
+    force_constant: float | AppFuture
+    centers: np.ndarray | AppFuture
+    volume: float | AppFuture
+    function_name: ClassVar[str] = "EinsteinCrystalFunction"
 
     def get_app(self) -> Callable:
         return partial(
@@ -255,72 +258,84 @@ def get_app(self) -> Callable:
     def parameters(self) -> dict:
         return {
             "force_constant": self.force_constant,
-            "centers": get_attribute(self.reference_geometry, "per_atom", "positions"),
-            "volume": get_attribute(self.reference_geometry, "volume"),
+            "centers": self.centers,
+            "volume": self.volume,
         }
 
-    def __eq__(self, hamiltonian: Hamiltonian) -> bool:
+    def __eq__(self, other: Hamiltonian) -> bool:
+        if self is other:
+            return True  # identity check
         if (
-            not isinstance(hamiltonian, EinsteinCrystal)
-            or not np.allclose(self.force_constant, hamiltonian.force_constant)
-            or self.reference_geometry != hamiltonian.reference_geometry
+            not isinstance(other, EinsteinCrystal)
+            or not attrs_equal(self.force_constant, other.force_constant)
+            or not attrs_equal(self.centers, other.centers)
+            or not attrs_equal(self.volume, other.volume)
         ):
             return False
         return True
 
+    @classmethod
+    def from_geometry(
+        cls, geometry: Geometry | AppFuture, force_constant: float | AppFuture
+    ):
+        # TODO: this is not immutable?
+        centers = get_attribute(geometry, "per_atom", "positions")
+        volume = get_attribute(geometry, "volume")
+        return cls(force_constant, centers, volume)
+
 
 @psiflow.register_serializable
+@dataclass
 class PlumedHamiltonian(Hamiltonian):
-    plumed_input: str  # TODO: or future?
-    external: Optional[psiflow._DataFuture]
-    function_name: str = "PlumedFunction"
-
-    def __init__(
-        self,
-        plumed_input: str,
-        external: Union[None, str, Path, File, DataFuture] = None,
-    ):
-        super().__init__()
-        self.plumed_input = remove_comments_printflush(plumed_input)
-        if type(external) in [str, Path]:
-            external = File(str(external))
-        if external is not None:
-            assert external.filepath in self.plumed_input
-        self.external = external
+    plumed_input: str | AppFuture
+    external: Optional[psiflow._DataFuture] = None
+    function_name: ClassVar[str] = "PlumedFunction"
+
+    def __post_init__(self):
+        self._prepare_input()
+        if isinstance(ext := self.external, (str, Path)):
+            ext = File(ext)
+        if ext is not None:
+            assert ext.filepath in self.plumed_input
+        self.external = ext
+
+    def _prepare_input(self) -> None:
+        if isinstance(inp := self.plumed_input, Future):
+            app = python_app(remove_comments_printflush, executors=["default_threads"])
+            inp = app(inp)
+        else:
+            inp = remove_comments_printflush(inp)
+        self.plumed_input = inp
 
     def get_app(self) -> Callable:
-        return partial(apply_htex, function_cls=PlumedFunction, **self.parameters())
+        return partial(
+            apply_htex,
+            function_cls=PlumedFunction,
+            wait_for=self.external,
+            **self.parameters(),
+        )
 
     def parameters(self) -> dict:
-        if self.external is not None:  # ensure parameters depends on self.external
-            external = copy_app_future(self.external.filepath, inputs=[self.external])
-        else:
-            external = None
-        return {"plumed_input": self.plumed_input, "external": external}
+        path = self.external.filepath if self.external is not None else None
+        return {"plumed_input": self.plumed_input, "external": path}
 
     def __eq__(self, other: Hamiltonian) -> bool:
-        if (
-            not isinstance(other, PlumedHamiltonian)
-            or self.plumed_input != other.plumed_input
+        if self is other:
+            return True  # identity check
+        if not isinstance(other, PlumedHamiltonian) or not attrs_equal(
+            self.plumed_input, other.plumed_input
         ):
             return False
         return True
 
 
 @psiflow.register_serializable
+@dataclass
 class Harmonic(Hamiltonian):
-    reference_geometry: Geometry | AppFuture
     hessian: np.ndarray | AppFuture
-    function_name: str = "HarmonicFunction"
-
-    def __init__(
-        self,
-        reference_geometry: Geometry | AppFuture,
-        hessian: np.ndarray | AppFuture,
-    ):
-        # TODO: why not copy_app_future(geometry) like others?
-        self.reference_geometry = reference_geometry
-        self.hessian = hessian
+    positions: np.ndarray | AppFuture
+    energy: np.ndarray | AppFuture
+    function_name: ClassVar[str] = "HarmonicFunction"
 
     def get_app(self) -> Callable:
         return partial(
@@ -328,42 +343,40 @@ def get_app(self) -> Callable:
         )
 
     def parameters(self) -> dict:
-        positions = get_attribute(self.reference_geometry, "per_atom", "positions")
-        energy = get_attribute(self.reference_geometry, "energy")
         return {
-            "positions": positions,
-            "energy": energy,
+            "positions": self.positions,
+            "energy": self.energy,
             "hessian": self.hessian,
         }
 
-    def __eq__(self, hamiltonian: Hamiltonian) -> bool:
-        if type(hamiltonian) is not Harmonic:
-            return False
-        hamiltonian: Harmonic
-        if hamiltonian.reference_geometry != self.reference_geometry:
-            return False
-
-        # TODO: why this check? Is it not always an ndarray?
-        # slightly different check for numpy arrays
-        is_array0 = type(hamiltonian.hessian) is np.ndarray
-        is_array1 = type(self.hessian) is np.ndarray
-        if is_array0 and is_array1:
-            equal = np.allclose(hamiltonian.hessian, self.hessian)
-        else:
-            equal = hamiltonian.hessian == self.hessian
-        if not equal:
+    def __eq__(self, other: Hamiltonian) -> bool:
+        if self is other:
+            return True  # identity check
+        if (
+            not isinstance(other, Harmonic)
+            or not attrs_equal(self.hessian, other.hessian)
+            or not attrs_equal(self.positions, other.positions)
+            or not attrs_equal(self.energy, other.energy)
+        ):
             return False
         return True
 
+    @classmethod
+    def from_geometry(
+        cls, geometry: Geometry | AppFuture, hessian: np.ndarray | AppFuture
+    ):
+        # TODO: this is not immutable?
+        positions = get_attribute(geometry, "per_atom", "positions")
+        energy = get_attribute(geometry, "energy")
+        return cls(hessian, positions, energy)
+
 
 @psiflow.register_serializable
+@dataclass
 class D3Hamiltonian(Hamiltonian):
-    method: str
-    damping: str
-    function_name: str = "DispersionFunction"
-
-    def __init__(self, method: str, damping: str = "d3bj"):
-        self.method, self.damping = method, damping
+    method: str | AppFuture
+    damping: str | AppFuture = "d3bj"
+    function_name: ClassVar[str] = "DispersionFunction"
 
     def get_app(self) -> Callable:
         # execution-side parameters of function are not included in self.parameters()
@@ -381,99 +394,101 @@ def get_app(self) -> Callable:
     def parameters(self) -> dict:
         return {"method": self.method, "damping": self.damping}
 
-    def __eq__(self, hamiltonian: Hamiltonian) -> bool:
+    def __eq__(self, other: Hamiltonian) -> bool:
+        if self is other:
+            return True  # identity check
         if (
-            not isinstance(hamiltonian, D3Hamiltonian)
-            or self.method != hamiltonian.method
-            or self.damping != hamiltonian.damping
+            not isinstance(other, D3Hamiltonian)
+            or not attrs_equal(self.method, other.method)
+            or not attrs_equal(self.damping, other.damping)
         ):
             return False
         return True
 
 
 @psiflow.register_serializable
+@dataclass
 class MACEHamiltonian(Hamiltonian):
-    external: psiflow._DataFuture
-    atomic_energies: dict[str, float]
-    function_name: str = "MACEFunction"
+    external: psiflow._DataFuture | str | Path
+    kwargs: dict = field(default_factory=dict)
+    function_name: ClassVar[str] = "MACEFunction"
 
-    def __init__(
-        self,
-        external: Union[Path, str, psiflow._DataFuture],
-        atomic_energies: dict[str, float],
-    ):
-        self.atomic_energies = atomic_energies
-        if type(external) in [str, Path]:
-            self.external = File(external)
-        else:
-            self.external = external
+    def __post_init__(self):
+        if isinstance(ext := self.external, (str, Path)):
+            ext = File(ext)
+            self.external = ext
+
+    def update_kwargs(self, **kwargs: Any) -> None:
+        """Specify kwargs for MACECalculator (enable_cueq, head, ..)"""
+        self.kwargs |= kwargs
 
     def get_app(self) -> Callable:
         # execution-side parameters of function are not included in self.parameters()
         evaluation = psiflow.context().definitions["ModelEvaluation"]
-        resources = evaluation.wq_resources(1)
         return partial(
             apply_modelevaluation,
             function_cls=MACEFunction,
-            parsl_resource_specification=resources,
-            **self.parameters(),
+            wait_for=self.external,
+            parsl_resource_specification=evaluation.wq_resources(1),
+            **self.parameters(include_env=True),
         )
 
-    def parameters(self) -> dict:
-        # TODO: Why is the future copy needed? Can we not pass the File/DataFuture directly?
-        model_path = copy_app_future(self.external.filepath, inputs=[self.external])
+    def parameters(self, include_env: bool = False) -> dict:
+        # TODO: in i-Pi MD, 'ncores', 'dtype', 'device' should be set by the sampling module
+        #  (most of them are overwritten in the driver right now)
         evaluation = psiflow.context().definitions["ModelEvaluation"]
-        return {
-            "model_path": model_path,
-            "atomic_energies": self.atomic_energies,
+        data = {
+            "model_path": self.external.filepath,
             "ncores": evaluation.cores_per_task,
             "dtype": "float32",
-            "device": "gpu" if evaluation.use_gpu else "cpu",
-            "env_vars": evaluation.env_vars,
+            "device": "cuda" if evaluation.use_gpu else "cpu",
+            "calc_kwargs": self.kwargs,
         }
+        if include_env:  # python apps need to set env_vars
+            data["env_vars"] = evaluation.env_vars
+        return data
 
-    def __eq__(self, hamiltonian: Hamiltonian) -> bool:
-        if type(hamiltonian) is not MACEHamiltonian:
-            return False
-        hamiltonian: MACEHamiltonian
-        if self.external.filepath != hamiltonian.external.filepath:
-            return False
-        if len(self.atomic_energies) != len(hamiltonian.atomic_energies):
+    def __eq__(self, other: Hamiltonian) -> bool:
+        if self is other:
+            return True  # identity check
+        if (
+            not isinstance(other, MACEHamiltonian)
+            or not attrs_equal(self.external.filepath, other.external.filepath)
+            or not attrs_equal(self.kwargs, other.kwargs)
+        ):
             return False
-        for symbol, energy in self.atomic_energies.items():
-            if not np.allclose(
-                energy,
-                hamiltonian.atomic_energies[symbol],
-            ):
-                return False
         return True
 
-    # TODO: the methods below are outdated..
-
     @classmethod
-    def mace_mp0(cls, size: str = "small") -> "MACEHamiltonian":
-        urls = dict(
-            small="https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-10-mace-128-L0_energy_epoch-249.model",  # 2023-12-10-mace-128-L0_energy_epoch-249.model
-            large="https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-03-mace-128-L1_epoch-199.model",
-        )
-        assert size in urls
-        parsl_file = psiflow.context().new_file("mace_mp_", ".pth")
-        urllib.request.urlretrieve(
-            urls[size],
-            parsl_file.filepath,
-        )
-        return cls(parsl_file, {})
+    def from_foundation(cls, name: Optional[str] = None, url: Optional[str] = None):
+        # see mace.calculators.foundation_models
+        import urllib.request
+        from psiflow.models.mace import mace_mp_urls
 
-    @classmethod
-    def mace_cc(cls) -> "MACEHamiltonian":
-        url = "https://github.com/molmod/psiflow/raw/main/examples/data/ani500k_cc_cpu.model"
-        parsl_file = psiflow.context().new_file("mace_mp_", ".pth")
-        urllib.request.urlretrieve(
-            url,
-            parsl_file.filepath,
-        )
-        return cls(parsl_file, {})
+        if name is not None:
+            url = mace_mp_urls.get(name)
+        file = psiflow.context().new_file("mace_foundation_", ".model")
+        logger.info(f"Downloading MACE foundation model from {url}")
+        _, http_msg = urllib.request.urlretrieve(url, file)
+        return cls(external=file)
 
 
 def combine_hamiltonians(hamiltonians: list[Hamiltonian]) -> MixtureHamiltonian:
     return sum(hamiltonians, start=Zero())  # mostly for type hinting
+
+
+def attrs_equal(attr1: Any, attr2: Any) -> bool:
+
+    # check for futures
+    future1 = isinstance(attr1, Future)
+    future2 = isinstance(attr2, Future)
+    if future1 and future2:
+        return attr1 == attr2  # has to be the same object
+    elif future1 != future2:
+        return False  # xor
+
+    elif isinstance(attr1, (np.ndarray, float)):
+        return np.allclose(attr1, attr2)
+    elif isinstance(attr1, (dict, str)):
+        return attr1 == attr2
+    return False
diff --git a/psiflow/learning.py b/psiflow/learning.py
index 4ba8ec6a..11cf8d03 100644
--- a/psiflow/learning.py
+++ b/psiflow/learning.py
@@ -15,7 +15,7 @@
 from psiflow.geometry import Geometry, NullState, assign_identifier
 from psiflow.hamiltonians import Hamiltonian, Zero
 from psiflow.metrics import Metrics
-from psiflow.models import Model
+# from psiflow.models import Model
 from psiflow.reference import Reference
 from psiflow.sampling import SimulationOutput, Walker, sample
 from psiflow.utils.apps import boolean_or, isnan
diff --git a/psiflow/metrics.py b/psiflow/metrics.py
index 7e789767..7b7e3511 100644
--- a/psiflow/metrics.py
+++ b/psiflow/metrics.py
@@ -14,7 +14,7 @@
 from psiflow.data import Dataset
 from psiflow.geometry import Geometry
 from psiflow.hamiltonians import Hamiltonian
-from psiflow.models import Model
+# from psiflow.models import Model
 from psiflow.sampling import SimulationOutput
 from psiflow.utils.apps import log_message
 # from psiflow.utils.apps import combine_futures, log_message, setup_logger
diff --git a/psiflow/models/__init__.py b/psiflow/models/__init__.py
index d6130892..15c12e4e 100644
--- a/psiflow/models/__init__.py
+++ b/psiflow/models/__init__.py
@@ -1,46 +1 @@
-from pathlib import Path
-from typing import Union
-
-import typeguard
-import yaml
-from ase.data import chemical_symbols
-from parsl.data_provider.files import File
-
-import psiflow
-from psiflow.models._mace import MACE, MACEConfig  # noqa: F401
-from psiflow.models.model import Model
-from psiflow.utils.apps import copy_data_future
-
-
-@typeguard.typechecked
-def load_model(path: Union[Path, str]) -> Model:
-    path = psiflow.resolve_and_check(Path(path))
-    assert path.is_dir()
-    classes = [
-        MACE,
-    ]
-    for model_cls in classes + [None]:
-        assert model_cls is not None
-        name = model_cls.__name__
-        path_config = path / (name + ".yaml")
-        if path_config.is_file():
-            break
-    with open(path_config, "r") as f:
-        config = yaml.load(f, Loader=yaml.FullLoader)
-    atomic_energies = {}
-    for key in list(config):
-        print(key)
-        if key.startswith("atomic_energies_"):
-            element = key.split("atomic_energies_")[-1]
-            assert element in chemical_symbols
-            atomic_energies[element] = config.pop(key)
-    model = model_cls(**config)
-    for element, energy in atomic_energies.items():
-        model.add_atomic_energy(element, energy)
-    path_model = path / "{}.pth".format(name)
-    if path_model.is_file():
-        model.model_future = copy_data_future(
-            inputs=[File(str(path_model))],
-            outputs=[psiflow.context().new_file("model_", ".pth")],
-        ).outputs[0]
-    return model
+from psiflow.models.mace import MACE
\ No newline at end of file
diff --git a/psiflow/models/_mace.py b/psiflow/models/_mace.py
deleted file mode 100644
index 67d241c4..00000000
--- a/psiflow/models/_mace.py
+++ /dev/null
@@ -1,258 +0,0 @@
-from __future__ import annotations  # necessary for type-guarding class methods
-
-import logging
-from dataclasses import asdict, dataclass
-from functools import partial
-from typing import Optional, Union, Callable
-
-import typeguard
-from parsl.app.app import bash_app
-from parsl.data_provider.files import File
-from parsl.dataflow.futures import AppFuture
-
-import psiflow
-from psiflow.hamiltonians import MACEHamiltonian
-from psiflow.models.model import Model
-
-logger = logging.getLogger(__name__)  # logging per module
-
-
-@typeguard.typechecked
-@dataclass
-class MACEConfig:
-    name: str = "mace"
-    seed: int = 0
-    log_dir: str = ""  # gets overwritten
-    model_dir: str = ""
-    checkpoints_dir: str = ""
-    results_dir: str = ""
-    downloads_dir: str = ""
-    device: str = "cuda"
-    default_dtype: str = "float32"  # default: float64
-    # distributed: bool = False  # this is automatically set based on execution config
-    log_level: str = "INFO"
-    error_table: str = "PerAtomRMSE"
-    model: str = "MACE"
-    r_max: float = 5.0
-    radial_type: str = "bessel"
-    num_radial_basis: int = 8
-    num_cutoff_basis: int = 5
-    pair_repulsion: bool = False
-    distance_transform: Optional[str] = None
-    interaction: str = "RealAgnosticResidualInteractionBlock"
-    interaction_first: str = "RealAgnosticResidualInteractionBlock"
-    max_ell: int = 3
-    correlation: int = 3
-    num_interactions: int = 2
-    MLP_irreps: str = "16x0e"
-    radial_MLP: str = "[64, 64, 64]"
-    num_channels: int = 16  # default: 128 channels
-    max_L: int = 1
-    gate: str = "silu"
-    scaling: str = "rms_forces_scaling"
-    avg_num_neighbors: Optional[float] = None
-    compute_avg_num_neighbors: bool = True
-    compute_stress: bool = True  # default: False
-    compute_forces: bool = True
-    train_file: Optional[str] = None
-    valid_file: Optional[str] = None
-    valid_fraction: float = 1e-12  # never split training set
-    test_file: Optional[str] = None
-    num_workers: int = 0
-    pin_memory: bool = True
-    E0s: Optional[str] = "average"
-    energy_key: str = "energy"
-    forces_key: str = "forces"
-    virials_key: str = "virials"
-    stress_key: str = "stress"
-    dipole_key: str = "dipole"
-    charges_key: str = "charges"
-    loss: str = "weighted"
-    forces_weight: float = 1  # default: 100
-    swa_forces_weight: float = 1
-    energy_weight: float = 10  # default: 1
-    swa_energy_weight: float = 100
-    virials_weight: float = 0  # default: 1
-    swa_virials_weight: float = 0  # default: 10
-    stress_weight: float = 0  # default: 1
-    swa_stress_weight: float = 0  # default: 10
-    config_type_weights: str = '{"Default":1.0}'
-    huber_delta: float = 0.01
-    optimizer: str = "adam"
-    batch_size: int = 10
-    valid_batch_size: int = 10
-    lr: float = 0.01
-    swa_lr: float = 0.001
-    weight_decay: float = 5e-7
-    amsgrad: bool = True
-    scheduler: str = "ReduceLROnPlateau"
-    lr_factor: float = 0.8
-    scheduler_patience: int = 50
-    lr_scheduler_gamma: float = 0.9993
-    swa: bool = False  # default: False
-    start_swa: Optional[int] = int(1e10)  # never start swa
-    ema: bool = False
-    ema_decay: float = 0.99
-    max_num_epochs: int = 2048
-    patience: int = 2048
-    eval_interval: int = 2
-    keep_checkpoints: bool = False
-    restart_latest: bool = False
-    save_cpu: bool = True  # default: False
-    clip_grad: Optional[float] = 10
-    wandb: bool = False
-    wandb_project: Optional[str] = "psiflow"
-    wandb_group: Optional[str] = None
-    wandb_name: str = "mace_training"
-
-    @staticmethod
-    def serialize(config: dict):
-        store_true_keys = [
-            "amsgrad",
-            "swa",
-            "ema",
-            "keep_checkpoints",
-            "restart_latest",
-            "save_cpu",
-            "wandb",
-            "pair_repulsion",
-        ]
-        config_str = ""
-        for key, value in config.items():
-            if type(value) is bool:
-                if key in store_true_keys:
-                    if value:
-                        config_str += "--{} ".format(key)
-                    continue
-            if value is None:
-                continue
-
-            # get rid of any whitespace in str(value), as e.g. with lists
-            config_str += "".join("--{}={}".format(key, value).split()) + " "
-        return config_str
-
-
-# @typeguard.typechecked
-def initialize(
-    mace_config: dict,
-    command_train: str,
-    stdout: str = "",
-    stderr: str = "",
-    inputs: list[File] = [],
-    outputs: list[File] = [],
-    parsl_resource_specification: Optional[dict] = None,
-) -> str:
-    from psiflow.models._mace import MACEConfig
-
-    assert len(inputs) == 1
-    mace_config["train_file"] = inputs[0].filepath
-    mace_config["valid_file"] = None
-    config_str = MACEConfig.serialize(mace_config)
-    command_tmp = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir");'
-    command_cd = "cd $mytmpdir;"
-    command_list = [
-        command_tmp,
-        command_cd,
-        command_train,
-        config_str,
-        ";",
-        "cp model.pth {};".format(outputs[0].filepath),
-    ]
-    return " ".join(command_list)
-
-
-def train(
-    mace_config: dict,
-    command_train: str,
-    stdout: str = "",
-    stderr: str = "",
-    env_vars: dict = {},
-    inputs: list[File] = [],
-    outputs: list[File] = [],
-    parsl_resource_specification: Optional[dict] = None,
-) -> str:
-    from psiflow.models._mace import MACEConfig
-
-    assert len(inputs) == 3
-    mace_config["train_file"] = inputs[1].filepath
-    mace_config["valid_file"] = inputs[2].filepath
-    mace_config["device"] = "cuda"
-    config_str = MACEConfig.serialize(mace_config)
-    config_str += "--initialized_model={} ".format(inputs[0].filepath)
-
-    command_tmp = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir");'
-    command_cd = "cd $mytmpdir;"
-    command_env = ""
-    for key, value in env_vars.items():
-        command_env += " export {}={}; ".format(key, value)
-    command_list = [
-        command_tmp,
-        command_cd,
-        command_env,
-        command_train,
-        config_str,
-        ";",
-        "cp model.pth {};".format(outputs[0].filepath),
-    ]
-    return " ".join(command_list)
-
-
-@typeguard.typechecked
-@psiflow.register_serializable
-class MACE(Model):
-    _config: dict
-    model_future: Optional[psiflow._DataFuture]
-    atomic_energies: dict[str, float | AppFuture]
-
-    def __init__(self, **config) -> None:
-        config = MACEConfig(**config)  # validate input
-        config.save_cpu = True  # assert model is saved to CPU after training
-        config.device = "cpu"
-        if not config.swa:
-            config.start_swa = config.max_num_epochs  # otherwise he fails to read
-        self._config = asdict(config)
-
-        self.model_future = None
-        self.atomic_energies = {}
-
-    def get_train_app(self) -> Callable:
-        training = psiflow.context().definitions["ModelTraining"]
-        return partial(
-            bash_app(train, executors=[training.name]),
-            command_train=training.train_command(False),
-            env_vars=training.env_vars,
-            parsl_resource_specification=training.wq_resources(),
-        )
-
-    def get_initialise_app(self) -> Callable:
-        evaluation = psiflow.context().definitions["ModelEvaluation"]
-        training = psiflow.context().definitions["ModelTraining"]
-        command_init = training.train_command(True)
-        app_initialize = bash_app(initialize, executors=[evaluation.name])
-        # TODO: find a better way for model init
-        resources_init = evaluation.wq_resources(1)
-        if not evaluation.executor_type == "threadpool":
-            resources_init["running_time_min"] = 30  # at least 30 mins for init?
-        return partial(
-            app_initialize,
-            command_train=command_init,
-            parsl_resource_specification=resources_init,
-        )
-
-    @property
-    def seed(self) -> int:
-        return self._config["seed"]
-
-    @seed.setter
-    def seed(self, arg: int) -> None:
-        self._config["seed"] = arg
-
-    def create_hamiltonian(self) -> MACEHamiltonian:
-        assert self.model_future is not None
-
-        # wait for atomic energy calculations if necessary:
-        for element in list(self.atomic_energies):
-            value = self.atomic_energies[element]
-            if isinstance(value, AppFuture):
-                self.atomic_energies[element] = value.result()
-        return MACEHamiltonian(self.model_future, self.atomic_energies)
diff --git a/psiflow/models/mace.py b/psiflow/models/mace.py
new file mode 100644
index 00000000..cd8c41bd
--- /dev/null
+++ b/psiflow/models/mace.py
@@ -0,0 +1,348 @@
+import shutil
+import logging
+import weakref
+from pathlib import Path
+from enum import StrEnum
+from typing import Optional, Any
+from collections.abc import Sequence
+
+import ase
+import ase.io
+import yaml
+import parsl
+from ase.data import atomic_numbers
+from parsl import bash_app, python_app, File, join_app
+from parsl.dataflow.futures import AppFuture, Future
+
+import psiflow
+from psiflow.data import Dataset
+from psiflow.hamiltonians import MACEHamiltonian
+from psiflow.utils.future import resolve_nested_futures
+from psiflow.utils.parse import format_env_vars, get_task_name_id
+
+# TODO: when changing the training dataset, the computed avg_num_neighbors will also change,
+#  making old checkpoints inconsistent
+# TODO: sanitise_config consistently called?
+# TODO: training fails when batch_size > dataset (see github issue)
+# TODO: restart_latest functionality to continue training??
+
+logger = logging.getLogger(__name__)  # logging per module
+
+
+KEY_ATOMIC_ENERGIES = "psiflow_atomic_energies"
+KEY_ITERATION = "psiflow_train_iteration"
+
+
+MODEL_DIRS = weakref.WeakValueDictionary()
+
+
+class Status(StrEnum):
+    SUCCESS = "SUCCESS"
+    FAILURE = "FAILURE"
+    BAD_INPUT = "BAD INPUT"
+    UNKNOWN_ERROR = "UNKNOWN ERROR"
+
+
+def sanitise_config(config: dict) -> dict:
+    """"""
+    defaults = dict(
+        energy_key="energy", forces_key="forces", seed=42, save_cpu=True, work_dir="."
+    )
+    cfg = defaults | config
+
+    # keep all files in work_dir
+    keys = ("log_dir", "model_dir", "checkpoints_dir", "results_dir", "downloads_dir")
+    for k in keys:
+        cfg.pop(k, None)
+
+    return cfg
+
+
+def format_E0s(atomic_energies: dict) -> dict | str:
+    """"""
+    # TODO: MACE fails when atomic energy is not defined for every element..
+    if atomic_energies:
+        return {atomic_numbers[k]: v for k, v in atomic_energies.items()}
+    else:
+        return "average"
+
+
+def _execute(
+    bash_template: str,
+    inputs: list[File],
+    parsl_resource_specification: Optional[dict] = None,
+    stdout: str = parsl.AUTO_LOGNAME,
+    stderr: str = parsl.AUTO_LOGNAME,
+    label: str = "MACE",
+) -> str:
+    return bash_template.format(*inputs)
+
+
+class MACE:
+    """"""
+
+    root: Path
+    config: dict[str, Any]
+    iteration: int
+    model_future: Optional[psiflow._DataFuture]
+    atomic_energies: dict[str, float | AppFuture]
+
+    def __init__(self, root: Path, config: Optional[dict] = None):
+        # make sure nothing else is using the root directory
+        assert str(root) not in MODEL_DIRS, "Model directory in use.."
+        MODEL_DIRS[str(root)] = self
+
+        self.root = root
+        self.iteration = -1
+        self.atomic_energies = {}
+        self.model_future = None
+
+        if config is not None:
+            self.config = sanitise_config(config)
+            yaml.safe_dump(config, self.path_config.open("w"))
+        else:
+            self._load_config()
+        if (p := self.path_mlp).is_file():
+            self.model_future = File(p)
+
+    def update_kwargs(self, **kwargs: Any | Future) -> None:
+        """Update config arguments, possibly with futures"""
+        self.config |= kwargs
+
+    def train(self, training: Dataset, validation: Dataset) -> AppFuture:
+        if not self.has_model_future:
+            logger.warning("Attempting to train new model. Initialising first..")
+            self.initialize(training)
+
+        future_cfg = self._resolve_futures()
+        future = train_app(
+            self,
+            future_cfg,
+            training.extxyz,
+            validation.extxyz,
+            inputs=[self.model_future],  # wait for previous model training
+            outputs=[File(self.path_mlp)],
+        )
+        self.model_future = future.outputs[0]
+        return future
+
+    def initialize(self, dataset: Dataset) -> AppFuture:
+        """Create and save the model architecture"""
+        assert not self.has_model_future, "Already initialized.."
+        future_cfg = self._resolve_futures()
+        future = initialize_app(
+            self, future_cfg, dataset.extxyz, outputs=[File(self.path_mlp)]
+        )
+        self.model_future = future.outputs[0]
+        return future
+
+    def add_atomic_energy(self, element: str, energy: float | AppFuture) -> None:
+        assert (
+            not self.has_model_future
+        ), "Cannot add atomic energies after model has been initialized.."
+        if element in self.atomic_energies:
+            logger.warning(f"Overwriting existing atomic energy for '{element}'..")
+        self.atomic_energies[element] = energy
+
+    def create_hamiltonian(self) -> MACEHamiltonian:
+        # atomic energies are already part of the model
+        assert self.has_model_future, "Trained model does not exist.."
+        return MACEHamiltonian(self.model_future, {})
+
+    def _load_config(self) -> None:
+        """Does not care for futures"""
+        config = yaml.safe_load(self.path_config.open())
+        self.atomic_energies = config.pop(KEY_ATOMIC_ENERGIES, {})
+        self.iteration = config.pop(KEY_ITERATION, 0)
+        self.config = sanitise_config(config)
+
+    def _resolve_futures(self) -> AppFuture:
+        """Wait for all futures in config and atomic energies"""
+        cfg = self.config | {KEY_ATOMIC_ENERGIES: self.atomic_energies}
+        return resolve_nested_futures(cfg)
+
+    def _execute_app(self, config: dict) -> AppFuture:
+        """"""
+        context = psiflow.context()
+        definition = context.definitions["ModelTraining"]
+        resources = definition.wq_resources()
+
+        # final config tweaks
+        if definition.multi_gpu:
+            config["distributed"] = True
+            config["launcher"] = "torchrun"
+        else:
+            config["distributed"] = False
+        file = psiflow.context().new_file("mace_cfg_", ".yaml")
+        yaml.safe_dump(config, open(file.filepath, "w"))
+
+        # construct MACE train script
+        command = "$(which mace_run_train) --config {}"
+        if config["distributed"]:
+            command = f"torchrun --standalone --nnodes=1 --nproc_per_node={resources['gpus']} {command}"
+        command = definition.wrap_in_timeout(command)
+
+        command_lines = [
+            "mkdir checkpoints",  # otherwise MACE borks
+            command,
+            f"rsync -av --ignore-existing --exclude=/*.model ./ {self.root}/",  # copy things back
+        ]
+        command = "\n".join([l for l in command_lines if l])
+
+        execute_app = bash_app(_execute, executors=["ModelTraining"])
+        env_vars = format_env_vars(definition.env_vars)
+        future = execute_app(
+            bash_template=context.bash_template.format(commands=command, env=env_vars),
+            inputs=[file],
+            parsl_resource_specification=resources,
+            label="mace_init" if config["name"] == "init" else "mace_train",
+        )
+        return future
+
+    @property
+    def path_config(self) -> Path:
+        return self.root / "config.yaml"
+
+    @property
+    def path_mlp(self) -> Path:
+        return self.root / "last.model"
+
+    @property
+    def path_checkpoints(self) -> Path:
+        return self.root / "checkpoints"
+
+    @property
+    def has_model_future(self) -> bool:
+        # whether a model exists (or will exist)
+        return self.model_future is not None
+
+    @classmethod
+    def create(cls, path_dir: Path, config: dict):
+        """Create a new model in a fresh directory"""
+        path = psiflow.resolve_and_check(Path(path_dir))
+        path.mkdir()
+        return cls(path, config)
+
+    @classmethod
+    def load(cls, path_dir: Path):
+        """Load model from existing directory"""
+        path = psiflow.resolve_and_check(Path(path_dir))
+        return cls(path)
+
+
+@join_app
+def initialize_app(
+    model: MACE, config: dict, file_data: File, outputs: Sequence[File]
+) -> AppFuture:
+    """"""
+    # TODO: we can kill mace_run_train as soon as 'RESULTS' block starts
+    assert len(outputs) == 1
+    logger.info("Initialising MACE model...")
+
+    # make dummy val set
+    file_val = psiflow.context().new_file("dummy_", ".xyz")
+    atoms = ase.io.read(file_data.filepath)
+    dummy = ase.Atoms(numbers=atoms.numbers[:1])
+    ase.io.write(file_val.filepath, dummy)
+
+    # update train config
+    cfg = config.copy()
+    cfg |= {
+        "name": "init",
+        "max_num_epochs": 0,
+        "train_file": file_data.filepath,
+        "valid_file": file_val.filepath,
+    }
+    cfg["E0s"] = format_E0s(cfg.pop(KEY_ATOMIC_ENERGIES))
+
+    future = model._execute_app(cfg)
+    inputs = [future.stdout, future.stderr, future]
+    future_ = process_output(model, config, inputs=inputs)
+    return future_
+
+
+@join_app
+def train_app(
+    model: MACE,
+    config: dict,
+    file_train: File,
+    file_val,
+    inputs: Sequence = (),
+    outputs: Sequence[File] = (),
+) -> AppFuture:
+    """Wait for inputs and (re)train model"""
+    assert len(outputs) == 1
+    if model.iteration == 0:
+        logger.info("Training new MACE model...")
+    else:
+        logger.info("Retraining MACE model...")
+
+    # update train config
+    cfg = config.copy()
+    cfg |= {
+        "name": f"train-{model.iteration}",
+        "train_file": file_train.filepath,
+        "valid_file": file_val.filepath,
+        "foundation_model": str(model.path_mlp),  # restart from previous model
+    }
+    cfg["E0s"] = format_E0s(cfg.pop(KEY_ATOMIC_ENERGIES))
+
+    future = model._execute_app(cfg)
+    inputs = [future.stdout, future.stderr, future]
+    future_ = process_output(model, config, inputs=inputs)
+    return future_
+
+
+@python_app(executors=["default_threads"])
+def process_output(model: MACE, config: dict, inputs: Sequence = ()) -> None:
+    """Waits for future and processes MLP training output"""
+    key = "init" if model.iteration < 0 else f"train-{model.iteration}"
+
+    # copy last model
+    files = list(model.path_checkpoints.glob(f"{key}*.model"))
+    if len(files):
+        status = Status.SUCCESS
+        shutil.copy2(sorted(files)[-1], model.path_mlp)
+    else:
+        status = Status.FAILURE
+
+    name, task_id = get_task_name_id(inputs[0])
+    model.iteration += 1
+    cfg = config | {KEY_ITERATION: model.iteration}
+    if status == Status.SUCCESS:
+        # only update stored config if training is successful
+        logger.info(f"MACE training [ID {task_id}]: {status}")
+        yaml.safe_dump(cfg, model.path_config.open("w"))
+        return
+
+    # check final error logs
+    lines = Path(inputs[1]).read_text().rsplit(sep="\n", maxsplit=5)
+    log = "\n".join(lines[1:])
+    if "unrecognized arguments" in log:
+        status = Status.BAD_INPUT
+    else:
+        status = Status.UNKNOWN_ERROR
+
+    logger.warning(f"MACE training [ID {task_id}]: {status}")
+    raise RuntimeError("MACE training failed. Check output logs.")
+
+
+# copied from the MACE v0.3.15 repo
+mace_mp_urls = {
+    "small": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-10-mace-128-L0_energy_epoch-249.model",
+    "medium": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-03-mace-128-L1_epoch-199.model",
+    "large": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/MACE_MPtrj_2022.9.model",
+    "small-0b": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b/mace_agnesi_small.model",
+    "medium-0b": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b/mace_agnesi_medium.model",
+    "small-0b2": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b2/mace-small-density-agnesi-stress.model",
+    "medium-0b2": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b2/mace-medium-density-agnesi-stress.model",
+    "large-0b2": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b2/mace-large-density-agnesi-stress.model",
+    "medium-0b3": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b3/mace-mp-0b3-medium.model",
+    "medium-mpa-0": "https://github.com/ACEsuit/mace-mp/releases/download/mace_mpa_0/mace-mpa-0-medium.model",
+    "small-omat-0": "https://github.com/ACEsuit/mace-mp/releases/download/mace_omat_0/mace-omat-0-small.model",
+    "medium-omat-0": "https://github.com/ACEsuit/mace-mp/releases/download/mace_omat_0/mace-omat-0-medium.model",
+    "mace-matpes-pbe-0": "https://github.com/ACEsuit/mace-foundations/releases/download/mace_matpes_0/MACE-matpes-pbe-omat-ft.model",
+    "mace-matpes-r2scan-0": "https://github.com/ACEsuit/mace-foundations/releases/download/mace_matpes_0/MACE-matpes-r2scan-omat-ft.model",
+    "mh-0": "https://github.com/ACEsuit/mace-foundations/releases/download/mace_mh_1/mace-mh-0.model",
+    "mh-1": "https://github.com/ACEsuit/mace-foundations/releases/download/mace_mh_1/mace-mh-1.model",
+}
diff --git a/psiflow/models/mace_utils.py b/psiflow/models/mace_utils.py
deleted file mode 100644
index 28411a23..00000000
--- a/psiflow/models/mace_utils.py
+++ /dev/null
@@ -1,702 +0,0 @@
-###########################################################################################
-# Modified training script for MACE
-# Authors: Ilyes Batatia, Gregor Simm, David Kovacs, Sander Vandenhaute
-# This program is distributed under the MIT License (see MIT.md)
-###########################################################################################
-
-"""
-
-MACE utils for use in psiflow -- copied from mace@dee204f
-The following changes were made:
-
-    - use signal module to wrap tools.train() call with timeout such that
-    there is time left to save the best model.
-
-    - build model from scratch but load state dict of starting model
-
-    - simplified Calculator which incorporates additional atomic energy offsets
-
-"""
-import argparse
-
-
-class TimeoutException(Exception):
-    pass
-
-
-def timeout_handler(signum, frame):
-    raise TimeoutException
-
-
-def run(rank: int, args: argparse.Namespace, world_size: int) -> None:
-    import ast
-    import json
-    import logging
-    import os
-    from pathlib import Path
-    from typing import Optional
-
-    import mace
-    import numpy as np
-    import torch
-    import torch.nn.functional
-    from e3nn import o3
-    from mace import data, modules, tools
-    from mace.tools import torch_geometric
-    from mace.tools.scripts_utils import (
-        LRScheduler,
-        create_error_table,
-        get_atomic_energies,
-        get_config_type_weights,
-        get_dataset_from_xyz,
-    )
-    from torch.nn.parallel import DistributedDataParallel as DDP
-    from torch.optim.swa_utils import SWALR, AveragedModel
-    from torch_ema import ExponentialMovingAverage
-
-    # extend MACE arg parser with ability to pass initialized model; set tmpdirs
-    args.log_dir = os.path.join(os.getcwd(), "log")
-    args.model_dir = os.path.join(os.getcwd())
-    args.results_dir = os.path.join(os.getcwd(), "results")
-    args.downloads_dir = os.path.join(os.getcwd(), "downloads")
-    args.checkpoints_dir = os.path.join(os.getcwd(), "checkpoints")
-
-    tag = tools.get_tag(name=args.name, seed=args.seed)
-    if args.distributed:
-        local_rank = rank
-        os.environ["MASTER_ADDR"] = "localhost"
-        os.environ["MASTER_PORT"] = "12355"
-        torch.distributed.init_process_group(
-            backend="nccl",
-            rank=rank,
-            world_size=world_size,
-        )
-        torch.cuda.set_device(rank)
-    else:
-        pass
-
-    # Setup
-    tools.set_seeds(args.seed)
-    tools.setup_logger(level=args.log_level, tag=tag, directory=args.log_dir, rank=rank)
-    try:
-        logging.info(f"MACE version: {mace.__version__}")
-    except AttributeError:
-        logging.info("Cannot find MACE version, please install MACE via pip")
-    logging.info(f"Configuration: {args}")
-    device = tools.init_device(args.device)
-    tools.set_default_dtype(args.default_dtype)
-
-    assert args.foundation_model is None
-    assert args.statistics_file is None
-
-    # Data preparation
-    config_type_weights = get_config_type_weights(args.config_type_weights)
-    collections, atomic_energies_dict = get_dataset_from_xyz(
-        train_path=args.train_file,
-        valid_path=args.valid_file,
-        valid_fraction=args.valid_fraction,
-        config_type_weights=config_type_weights,
-        test_path=args.test_file,
-        seed=args.seed,
-        energy_key=args.energy_key,
-        forces_key=args.forces_key,
-        stress_key=args.stress_key,
-        virials_key=args.virials_key,
-        dipole_key=args.dipole_key,
-        charges_key=args.charges_key,
-    )
-
-    logging.info(
-        f"Total number of configurations: train={len(collections.train)}, valid={len(collections.valid)}, "
-        f"tests=[{', '.join([name + ': ' + str(len(test_configs)) for name, test_configs in collections.tests])}]"
-    )
-
-    # Atomic number table
-    # yapf: disable
-    if args.atomic_numbers is None:
-        assert args.train_file.endswith(".xyz"), "Must specify atomic_numbers when using .h5 train_file input"
-        z_table = tools.get_atomic_number_table_from_zs(
-            z
-            for configs in (collections.train, collections.valid)
-            for config in configs
-            for z in config.atomic_numbers
-        )
-    else:
-        if args.statistics_file is None:
-            logging.info("Using atomic numbers from command line argument")
-        else:
-            logging.info("Using atomic numbers from statistics file")
-        zs_list = ast.literal_eval(args.atomic_numbers)
-        assert isinstance(zs_list, list)
-        z_table = tools.get_atomic_number_table_from_zs(zs_list)
-    # yapf: enable
-    logging.info(z_table)
-
-    if atomic_energies_dict is None or len(atomic_energies_dict) == 0:
-        if args.E0s.lower() == "foundation":
-            raise NotImplementedError
-        else:
-            if args.train_file.endswith(".xyz"):
-                atomic_energies_dict = get_atomic_energies(
-                    args.E0s, collections.train, z_table
-                )
-            else:
-                atomic_energies_dict = get_atomic_energies(args.E0s, None, z_table)
-
-    if args.model == "AtomicDipolesMACE":
-        atomic_energies = None
-        dipole_only = True
-        compute_dipole = True
-        compute_energy = False
-        args.compute_forces = False
-        compute_virials = False
-        args.compute_stress = False
-    else:
-        dipole_only = False
-        if args.model == "EnergyDipolesMACE":
-            compute_dipole = True
-            compute_energy = True
-            args.compute_forces = True
-            compute_virials = False
-            args.compute_stress = False
-        else:
-            compute_energy = True
-            compute_dipole = False
-        atomic_energies: np.ndarray = np.array(
-            [atomic_energies_dict[z] for z in z_table.zs]
-        )
-        logging.info(f"Atomic energies: {atomic_energies.tolist()}")
-    args.batch_size = min(len(collections.train), args.batch_size)
-    print("actual batch size: {}".format(args.batch_size))
-
-    train_set = [
-        data.AtomicData.from_config(config, z_table=z_table, cutoff=args.r_max)
-        for config in collections.train
-    ]
-    valid_set = [
-        data.AtomicData.from_config(config, z_table=z_table, cutoff=args.r_max)
-        for config in collections.valid
-    ]
-    train_sampler, valid_sampler = None, None
-    if args.distributed:
-        train_sampler = torch.utils.data.distributed.DistributedSampler(
-            train_set,
-            num_replicas=world_size,
-            rank=rank,
-            shuffle=True,
-            drop_last=True,
-            seed=args.seed,
-        )
-        valid_sampler = torch.utils.data.distributed.DistributedSampler(
-            valid_set,
-            num_replicas=world_size,
-            rank=rank,
-            shuffle=True,
-            drop_last=True,
-            seed=args.seed,
-        )
-    train_loader = torch_geometric.dataloader.DataLoader(
-        dataset=train_set,
-        batch_size=args.batch_size,
-        sampler=train_sampler,
-        shuffle=(train_sampler is None),
-        drop_last=(train_sampler is None),
-        pin_memory=args.pin_memory,
-        num_workers=args.num_workers,
-        generator=torch.Generator().manual_seed(args.seed),
-    )
-    valid_loader = torch_geometric.dataloader.DataLoader(
-        dataset=valid_set,
-        batch_size=args.valid_batch_size,
-        sampler=valid_sampler,
-        shuffle=False,
-        drop_last=False,
-        pin_memory=args.pin_memory,
-        num_workers=args.num_workers,
-        generator=torch.Generator().manual_seed(args.seed),
-    )
-
-    loss_fn: torch.nn.Module
-    if args.loss == "weighted":
-        loss_fn = modules.WeightedEnergyForcesLoss(
-            energy_weight=args.energy_weight, forces_weight=args.forces_weight
-        )
-    elif args.loss == "forces_only":
-        loss_fn = modules.WeightedForcesLoss(forces_weight=args.forces_weight)
-    elif args.loss == "virials":
-        loss_fn = modules.WeightedEnergyForcesVirialsLoss(
-            energy_weight=args.energy_weight,
-            forces_weight=args.forces_weight,
-            virials_weight=args.virials_weight,
-        )
-    elif args.loss == "stress":
-        loss_fn = modules.WeightedEnergyForcesStressLoss(
-            energy_weight=args.energy_weight,
-            forces_weight=args.forces_weight,
-            stress_weight=args.stress_weight,
-        )
-    elif args.loss == "huber":
-        loss_fn = modules.WeightedHuberEnergyForcesStressLoss(
-            energy_weight=args.energy_weight,
-            forces_weight=args.forces_weight,
-            stress_weight=args.stress_weight,
-            huber_delta=args.huber_delta,
-        )
-    elif args.loss == "dipole":
-        assert (
-            dipole_only is True
-        ), "dipole loss can only be used with AtomicDipolesMACE model"
-        loss_fn = modules.DipoleSingleLoss(
-            dipole_weight=args.dipole_weight,
-        )
-    elif args.loss == "energy_forces_dipole":
-        assert dipole_only is False and compute_dipole is True
-        loss_fn = modules.WeightedEnergyForcesDipoleLoss(
-            energy_weight=args.energy_weight,
-            forces_weight=args.forces_weight,
-            dipole_weight=args.dipole_weight,
-        )
-    else:
-        # Unweighted Energy and Forces loss by default
-        loss_fn = modules.WeightedEnergyForcesLoss(energy_weight=1.0, forces_weight=1.0)
-    logging.info(loss_fn)
-
-    if args.compute_avg_num_neighbors:
-        avg_num_neighbors = modules.compute_avg_num_neighbors(train_loader)
-        if args.distributed:
-            num_graphs = torch.tensor(len(train_loader.dataset)).to(device)
-            num_neighbors = num_graphs * torch.tensor(avg_num_neighbors).to(device)
-            torch.distributed.all_reduce(num_graphs, op=torch.distributed.ReduceOp.SUM)
-            torch.distributed.all_reduce(
-                num_neighbors, op=torch.distributed.ReduceOp.SUM
-            )
-            args.avg_num_neighbors = (num_neighbors / num_graphs).item()
-        else:
-            args.avg_num_neighbors = avg_num_neighbors
-    logging.info(f"Average number of neighbors: {args.avg_num_neighbors}")
-
-    # Selecting outputs
-    compute_virials = False
-    if args.loss in ("stress", "virials", "huber"):
-        compute_virials = True
-        args.compute_stress = True
-        args.error_table = "PerAtomRMSEstressvirials"
-
-    output_args = {
-        "energy": compute_energy,
-        "forces": args.compute_forces,
-        "virials": compute_virials,
-        "stress": args.compute_stress,
-        "dipoles": compute_dipole,
-    }
-    logging.info(f"Selected the following outputs: {output_args}")
-
-    # Build model
-    logging.info("Building model")
-    if args.num_channels is not None and args.max_L is not None:
-        assert args.num_channels > 0, "num_channels must be positive integer"
-        assert args.max_L >= 0, "max_L must be non-negative integer"
-        args.hidden_irreps = o3.Irreps(
-            (args.num_channels * o3.Irreps.spherical_harmonics(args.max_L))
-            .sort()
-            .irreps.simplify()
-        )
-
-    assert (
-        len({irrep.mul for irrep in o3.Irreps(args.hidden_irreps)}) == 1
-    ), "All channels must have the same dimension, use the num_channels and max_L keywords to specify the number of channels and the maximum L"
-
-    logging.info(f"Hidden irreps: {args.hidden_irreps}")
-    model_config = dict(
-        r_max=args.r_max,
-        num_bessel=args.num_radial_basis,
-        num_polynomial_cutoff=args.num_cutoff_basis,
-        max_ell=args.max_ell,
-        interaction_cls=modules.interaction_classes[args.interaction],
-        num_interactions=args.num_interactions,
-        num_elements=len(z_table),
-        hidden_irreps=o3.Irreps(args.hidden_irreps),
-        atomic_energies=atomic_energies,
-        avg_num_neighbors=args.avg_num_neighbors,
-        atomic_numbers=z_table.zs,
-    )
-
-    model: torch.nn.Module
-
-    if args.model == "MACE":
-        if args.scaling == "no_scaling":
-            std = 1.0
-            logging.info("No scaling selected")
-        else:
-            mean, std = modules.scaling_classes[args.scaling](
-                train_loader, atomic_energies
-            )
-        model = modules.ScaleShiftMACE(
-            **model_config,
-            correlation=args.correlation,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[
-                "RealAgnosticInteractionBlock"
-            ],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-            atomic_inter_scale=std,
-            atomic_inter_shift=0.0,
-            radial_MLP=ast.literal_eval(args.radial_MLP),
-            radial_type=args.radial_type,
-        )
-    elif args.model == "ScaleShiftMACE":
-        mean, std = modules.scaling_classes[args.scaling](train_loader, atomic_energies)
-        model = modules.ScaleShiftMACE(
-            **model_config,
-            correlation=args.correlation,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[args.interaction_first],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-            atomic_inter_scale=std,
-            atomic_inter_shift=mean,
-            radial_MLP=ast.literal_eval(args.radial_MLP),
-            radial_type=args.radial_type,
-        )
-    elif args.model == "ScaleShiftBOTNet":
-        mean, std = modules.scaling_classes[args.scaling](train_loader, atomic_energies)
-        model = modules.ScaleShiftBOTNet(
-            **model_config,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[args.interaction_first],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-            atomic_inter_scale=std,
-            atomic_inter_shift=mean,
-        )
-    elif args.model == "BOTNet":
-        model = modules.BOTNet(
-            **model_config,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[args.interaction_first],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-        )
-    elif args.model == "AtomicDipolesMACE":
-        # std_df = modules.scaling_classes["rms_dipoles_scaling"](train_loader)
-        assert args.loss == "dipole", "Use dipole loss with AtomicDipolesMACE model"
-        assert (
-            args.error_table == "DipoleRMSE"
-        ), "Use error_table DipoleRMSE with AtomicDipolesMACE model"
-        model = modules.AtomicDipolesMACE(
-            **model_config,
-            correlation=args.correlation,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[
-                "RealAgnosticInteractionBlock"
-            ],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-            # dipole_scale=1,
-            # dipole_shift=0,
-        )
-    elif args.model == "EnergyDipolesMACE":
-        # std_df = modules.scaling_classes["rms_dipoles_scaling"](train_loader)
-        assert (
-            args.loss == "energy_forces_dipole"
-        ), "Use energy_forces_dipole loss with EnergyDipolesMACE model"
-        assert (
-            args.error_table == "EnergyDipoleRMSE"
-        ), "Use error_table EnergyDipoleRMSE with AtomicDipolesMACE model"
-        model = modules.EnergyDipolesMACE(
-            **model_config,
-            correlation=args.correlation,
-            gate=modules.gate_dict[args.gate],
-            interaction_cls_first=modules.interaction_classes[
-                "RealAgnosticInteractionBlock"
-            ],
-            MLP_irreps=o3.Irreps(args.MLP_irreps),
-        )
-    else:
-        raise RuntimeError(f"Unknown model: '{args.model}'")
-
-    if args.initialized_model is None:  # save currently initialized model
-        torch.save(model.to("cpu"), "model.pth")
-        return 0
-    else:  # override model with initialized state_dict
-        state_dict = torch.load(args.initialized_model, map_location="cpu").state_dict()
-        model.load_state_dict(state_dict)
-
-    model = model.to(device)
-
-    # Optimizer
-    decay_interactions = {}
-    no_decay_interactions = {}
-    for name, param in model.interactions.named_parameters():
-        if "linear.weight" in name or "skip_tp_full.weight" in name:
-            decay_interactions[name] = param
-        else:
-            no_decay_interactions[name] = param
-
-    param_options = dict(
-        params=[
-            {
-                "name": "embedding",
-                "params": model.node_embedding.parameters(),
-                "weight_decay": 0.0,
-            },
-            {
-                "name": "interactions_decay",
-                "params": list(decay_interactions.values()),
-                "weight_decay": args.weight_decay,
-            },
-            {
-                "name": "interactions_no_decay",
-                "params": list(no_decay_interactions.values()),
-                "weight_decay": 0.0,
-            },
-            {
-                "name": "products",
-                "params": model.products.parameters(),
-                "weight_decay": args.weight_decay,
-            },
-            {
-                "name": "readouts",
-                "params": model.readouts.parameters(),
-                "weight_decay": 0.0,
-            },
-        ],
-        lr=args.lr,
-        amsgrad=args.amsgrad,
-    )
-
-    optimizer: torch.optim.Optimizer
-    if args.optimizer == "adamw":
-        optimizer = torch.optim.AdamW(**param_options)
-    else:
-        optimizer = torch.optim.Adam(**param_options)
-
-    logger = tools.MetricsLogger(directory=args.results_dir, tag=tag + "_train")
-
-    lr_scheduler = LRScheduler(optimizer, args)
-
-    swa: Optional[tools.SWAContainer] = None
-    swas = [False]
-    if args.swa:
-        assert dipole_only is False, "swa for dipole fitting not implemented"
-        swas.append(True)
-        if args.start_swa is None:
-            args.start_swa = max(1, args.max_num_epochs // 4 * 3)
-        else:
-            if args.start_swa > args.max_num_epochs:
-                logging.info(
-                    f"Start swa must be less than max_num_epochs, got {args.start_swa} > {args.max_num_epochs}"
-                )
-                args.start_swa = max(1, args.max_num_epochs // 4 * 3)
-                logging.info(f"Setting start swa to {args.start_swa}")
-        if args.loss == "forces_only":
-            raise ValueError("Can not select swa with forces only loss.")
-        if args.loss == "virials":
-            loss_fn_energy = modules.WeightedEnergyForcesVirialsLoss(
-                energy_weight=args.swa_energy_weight,
-                forces_weight=args.swa_forces_weight,
-                virials_weight=args.swa_virials_weight,
-            )
-        elif args.loss == "stress":
-            loss_fn_energy = modules.WeightedEnergyForcesStressLoss(
-                energy_weight=args.swa_energy_weight,
-                forces_weight=args.swa_forces_weight,
-                stress_weight=args.swa_stress_weight,
-            )
-        elif args.loss == "energy_forces_dipole":
-            loss_fn_energy = modules.WeightedEnergyForcesDipoleLoss(
-                args.swa_energy_weight,
-                forces_weight=args.swa_forces_weight,
-                dipole_weight=args.swa_dipole_weight,
-            )
-            logging.info(
-                f"Using stochastic weight averaging (after {args.start_swa} epochs) with energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight}, dipole weight : {args.swa_dipole_weight} and learning rate : {args.swa_lr}"
-            )
-        else:
-            loss_fn_energy = modules.WeightedEnergyForcesLoss(
-                energy_weight=args.swa_energy_weight,
-                forces_weight=args.swa_forces_weight,
-            )
-            logging.info(
-                f"Using stochastic weight averaging (after {args.start_swa} epochs) with energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight} and learning rate : {args.swa_lr}"
-            )
-        swa = tools.SWAContainer(
-            model=AveragedModel(model),
-            scheduler=SWALR(
-                optimizer=optimizer,
-                swa_lr=args.swa_lr,
-                anneal_epochs=1,
-                anneal_strategy="linear",
-            ),
-            start=args.start_swa,
-            loss_fn=loss_fn_energy,
-        )
-
-    checkpoint_handler = tools.CheckpointHandler(
-        directory=args.checkpoints_dir,
-        tag=tag,
-        keep=args.keep_checkpoints,
-        swa_start=args.start_swa,
-    )
-
-    start_epoch = 0
-    if args.restart_latest:
-        try:
-            opt_start_epoch = checkpoint_handler.load_latest(
-                state=tools.CheckpointState(model, optimizer, lr_scheduler),
-                swa=True,
-                device=device,
-            )
-        except Exception:  # pylint: disable=W0703
-            opt_start_epoch = checkpoint_handler.load_latest(
-                state=tools.CheckpointState(model, optimizer, lr_scheduler),
-                swa=False,
-                device=device,
-            )
-        if opt_start_epoch is not None:
-            start_epoch = opt_start_epoch
-
-    ema: Optional[ExponentialMovingAverage] = None
-    if args.ema:
-        ema = ExponentialMovingAverage(model.parameters(), decay=args.ema_decay)
-    else:
-        for group in optimizer.param_groups:
-            group["lr"] = args.lr
-
-    logging.info(model)
-    logging.info(f"Number of parameters: {tools.count_parameters(model)}")
-    logging.info(f"Optimizer: {optimizer}")
-
-    if args.wandb:
-        logging.info("Using Weights and Biases for logging")
-        import wandb
-
-        wandb_config = {}
-        args_dict = vars(args)
-        args_dict_json = json.dumps(args_dict)
-        for key in args.wandb_log_hypers:
-            wandb_config[key] = args_dict[key]
-        tools.init_wandb(
-            project=args.wandb_project,
-            entity=args.wandb_entity,
-            name=args.wandb_name,
-            config=wandb_config,
-        )
-        wandb.run.summary["params"] = args_dict_json
-
-    if args.distributed:
-        distributed_model = DDP(model, device_ids=[local_rank])
-    else:
-        distributed_model = None
-
-    try:
-        tools.train(
-            model=model,
-            loss_fn=loss_fn,
-            train_loader=train_loader,
-            valid_loader=valid_loader,
-            optimizer=optimizer,
-            lr_scheduler=lr_scheduler,
-            checkpoint_handler=checkpoint_handler,
-            eval_interval=args.eval_interval,
-            start_epoch=start_epoch,
-            max_num_epochs=args.max_num_epochs,
-            logger=logger,
-            patience=args.patience,
-            save_all_checkpoints=args.save_all_checkpoints,
-            output_args=output_args,
-            device=device,
-            swa=swa,
-            ema=ema,
-            max_grad_norm=args.clip_grad,
-            log_errors=args.error_table,
-            log_wandb=args.wandb,
-            distributed=args.distributed,
-            distributed_model=distributed_model,
-            train_sampler=train_sampler,
-            rank=rank,
-        )
-    except TimeoutException:
-        logging.info("received SIGTERM!")
-        pass
-
-    # Evaluation on test datasets
-    logging.info("Computing metrics for training, validation, and test sets")
-
-    all_data_loaders = {
-        "train": train_loader,
-        "valid": valid_loader,
-    }
-
-    for swa_eval in swas:
-        try:
-            epoch = checkpoint_handler.load_latest(
-                state=tools.CheckpointState(model, optimizer, lr_scheduler),
-                swa=swa_eval,
-                device=device,
-            )
-        except BaseException as e:  # noqa: B036
-            print("failed to load checkpoint for swa:{}".format(swa_eval))
-            print(e)
-            continue
-        model.to(device)
-        if args.distributed:
-            distributed_model = DDP(model, device_ids=[local_rank])
-        model_to_evaluate = model if not args.distributed else distributed_model
-        logging.info(f"Loaded model from epoch {epoch}")
-
-        for param in model.parameters():
-            param.requires_grad = False
-        table = create_error_table(
-            table_type=args.error_table,
-            all_data_loaders=all_data_loaders,
-            model=model_to_evaluate,
-            loss_fn=loss_fn,
-            output_args=output_args,
-            log_wandb=args.wandb,
-            device=device,
-            distributed=args.distributed,
-        )
-        logging.info("\n" + str(table))
-
-        if rank == 0:
-            # Save entire model
-            # if swa_eval:
-            #    model_path = Path.cwd() / 'model_swa.pth'
-            # else:
-            model_path = Path.cwd() / "model.pth"
-            logging.info("swa: {}".format(swa_eval))
-            logging.info(f"Saving model to {model_path}")
-            if args.save_cpu:
-                model = model.to("cpu")
-            torch.save(model, model_path)
-
-        if args.distributed:
-            torch.distributed.barrier()
-
-    logging.info("Done")
-    if args.distributed:
-        torch.distributed.destroy_process_group()
-
-
-def main():
-    import signal
-
-    import torch
-    from mace import tools
-
-    signal.signal(signal.SIGTERM, timeout_handler)
-    # main()
-    parser = tools.build_default_arg_parser()
-    parser.add_argument(
-        "--initialized_model",
-        help="path to initialized model",
-        default=None,
-        type=str,
-    )
-    args = parser.parse_args()
-    world_size = torch.cuda.device_count()
-    if world_size > 1:
-        args.distributed = True
-        import torch.multiprocessing as mp
-
-        mp.spawn(run, args=(args, world_size), nprocs=world_size)
-    else:
-        args.distributed = False
-        run(0, args, 1)
diff --git a/psiflow/models/model.py b/psiflow/models/model.py
deleted file mode 100644
index 84d3938b..00000000
--- a/psiflow/models/model.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from __future__ import annotations  # necessary for type-guarding class methods
-
-import copy
-import logging
-from dataclasses import asdict
-from pathlib import Path
-from typing import Optional, Union, Callable
-
-import parsl
-import typeguard
-from parsl.data_provider.files import File
-from parsl.dataflow.futures import AppFuture
-
-import psiflow
-from psiflow.data import Dataset
-from psiflow.utils.apps import copy_data_future, log_message
-from psiflow.utils.io import save_yaml
-
-
-logger = logging.getLogger(__name__)
-
-
-@typeguard.typechecked
-@psiflow.register_serializable  # TODO: not required? you should always subclass this
-class Model:
-    _config: dict
-    model_future: Optional[psiflow._DataFuture]
-    atomic_energies: dict
-
-    def add_atomic_energy(self, element: str, energy: Union[float, AppFuture]) -> None:
-        assert self.model_future is None, (
-            "cannot add atomic energies after model has "
-            "been initialized; reset model, add energy, and reinitialize"
-        )
-        if element in self.atomic_energies:
-            if isinstance(energy, AppFuture):
-                energy = energy.result()
-            if isinstance(self.atomic_energies[element], AppFuture):
-                existing = self.atomic_energies[element].result()
-            assert energy == existing, (
-                "model already has atomic energy "
-                "for element {} ({}), which is different from {}"
-                "".format(element, existing, energy)
-            )
-        self.atomic_energies[element] = energy
-
-    def train(self, training: Dataset, validation: Dataset) -> None:
-        log_message(
-            logger,
-            "training model using {} states for training and {} for validation",
-            training.length(),
-            validation.length(),
-        )
-        inputs = [self.model_future]
-        if self.do_offset:
-            inputs += [
-                training.subtract_offset(**self.atomic_energies).extxyz,
-                validation.subtract_offset(**self.atomic_energies).extxyz,
-            ]
-        else:
-            inputs += [training.extxyz, validation.extxyz]
-        train_app = self.get_train_app()
-        future = train_app(
-            dict(self._config),
-            stdout=parsl.AUTO_LOGNAME,
-            stderr=parsl.AUTO_LOGNAME,
-            inputs=inputs,
-            outputs=[psiflow.context().new_file("model_", ".pth")],
-        )
-        self.model_future = future.outputs[0]
-
-    def initialize(self, dataset: Dataset) -> None:
-        """Initializes the model based on a dataset"""
-        assert self.model_future is None
-        if self.do_offset:
-            inputs = [dataset.subtract_offset(**self.atomic_energies).extxyz]
-        else:
-            inputs = [dataset.extxyz]
-        initialise_app = self.get_initialise_app()
-        future = initialise_app(
-            self._config,
-            stdout=parsl.AUTO_LOGNAME,
-            stderr=parsl.AUTO_LOGNAME,
-            inputs=inputs,
-            outputs=[psiflow.context().new_file("model_", ".pth")],
-        )
-        self.model_future = future.outputs[0]
-
-    def reset(self) -> None:
-        self.model_future = None
-
-    def save(
-        self,
-        path: Union[Path, str],
-    ) -> None:
-        path = psiflow.resolve_and_check(Path(path))
-        path.mkdir(exist_ok=True)
-
-        name = self.__class__.__name__
-        path_config = path / "{}.yaml".format(name)
-
-        atomic_energies = {
-            "atomic_energies_" + key: value
-            for key, value in self.atomic_energies.items()
-        }
-        save_yaml(
-            self._config,
-            outputs=[File(str(path_config))],
-            **atomic_energies,
-        )
-        if self.model_future is not None:
-            path_model = path / "{}.pth".format(name)
-            copy_data_future(
-                inputs=[self.model_future],
-                outputs=[File(str(path_model))],
-            )
-
-    def copy(self) -> Model:
-        model = self.__class__(**asdict(self.config))
-        for element, energy in self.atomic_energies.items():
-            model.add_atomic_energy(element, energy)
-        if self.model_future is not None:
-            model.model_future = copy_data_future(
-                inputs=[self.model_future],
-                outputs=[psiflow.context().new_file("model_", ".pth")],
-            ).outputs[0]
-        return model
-
-    def get_train_app(self) -> Callable:
-        raise NotImplementedError
-
-    def get_initialise_app(self) -> Callable:
-        raise NotImplementedError
-
-    @property
-    def do_offset(self) -> bool:
-        return len(self.atomic_energies) > 0
-
-    @property
-    def seed(self) -> int:
-        raise NotImplementedError
-
-    @seed.setter
-    def seed(self, arg) -> None:
-        raise NotImplementedError
diff --git a/psiflow/reference/gpaw_.py b/psiflow/reference/gpaw_.py
index 3c840a6f..84969793 100644
--- a/psiflow/reference/gpaw_.py
+++ b/psiflow/reference/gpaw_.py
@@ -49,7 +49,7 @@ def parse_output(stdout: str, properties: Sequence[str]) -> dict:
 @psiflow.register_serializable
 class GPAW(Reference):
     executor: str = "GPAW"
-    _execute_label = "gpaw_singlepoint"
+    _execute_label: str = "gpaw_singlepoint"
     parameters: dict
     script: str
 
@@ -59,7 +59,6 @@ def __init__(self, parameters: dict, script: str | Path = FILEPATH, **kwargs):
         assert (script := Path(script)).is_file()
         self.script = str(script.resolve())  # absolute path
         self.bash_template = make_bash_template(self.executor, self.script)
-        print(self.bash_template)
 
     def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
         return copy_app_future(0.0)  # GPAW computes formation energy by default
diff --git a/psiflow/sampling/_ase.py b/psiflow/sampling/_ase.py
index 41495f81..e66ae3d2 100644
--- a/psiflow/sampling/_ase.py
+++ b/psiflow/sampling/_ase.py
@@ -22,7 +22,7 @@
 from ase.filters import FrechetCellFilter
 
 from psiflow.geometry import Geometry
-from psiflow.functions import function_from_json, EnergyFunction
+from psiflow.functions import function_from_json, Function
 from psiflow.sampling.utils import TimeoutException, timeout_handler
 
 
@@ -34,7 +34,7 @@
 class FunctionCalculator(Calculator):
     implemented_properties = ["energy", "free_energy", "forces", "stress"]
 
-    def __init__(self, function: EnergyFunction, **kwargs):
+    def __init__(self, function: Function, **kwargs):
         super().__init__(**kwargs)
         self.function = function
 
diff --git a/psiflow/sampling/sampling.py b/psiflow/sampling/sampling.py
index dcb8fa80..117f8b79 100644
--- a/psiflow/sampling/sampling.py
+++ b/psiflow/sampling/sampling.py
@@ -9,7 +9,6 @@
 from parsl.app.app import bash_app
 from parsl.data_provider.files import File
 from parsl.dataflow.futures import AppFuture, DataFuture
-from sympy import print_glsl
 
 import psiflow
 from psiflow.data import Dataset
diff --git a/psiflow/serialization.py b/psiflow/serialization.py
index ffe28f4e..85e558fc 100644
--- a/psiflow/serialization.py
+++ b/psiflow/serialization.py
@@ -4,6 +4,7 @@
 from typing import Any, Optional, Union
 from collections.abc import Sequence
 
+import numpy as np
 from parsl import File, python_app
 from parsl.dataflow.futures import AppFuture, DataFuture
 
@@ -87,6 +88,8 @@ def default(self, obj: Any) -> dict[str, Any]:
                 return self._handle_file(obj)
             case Geometry():
                 return {CLS_KEY: "Geometry", "data": obj.to_string()}
+            case np.ndarray():
+                return {CLS_KEY: "Array", "data": obj.tolist()}
             case _ if name in SERIALIZABLE_CLS:  # class instances
                 return {CLS_KEY: name} | vars(obj)
             case _ if obj in SERIALIZABLE_CLS.values():  # classes
@@ -116,6 +119,8 @@ def deserialize_hook(data: dict) -> Any:
         return File(Path(data["path"]))
     elif cls_name == "Geometry":
         return Geometry.from_string(data["data"])
+    elif cls_name == "Array":
+        return np.array(data["data"])
     elif cls_name not in SERIALIZABLE_CLS:
         cls_set = set(SERIALIZABLE_CLS.keys()) or {}
         msg = f"Custom class '{cls_name}' not in {cls_set}. Cannot deserialize.."
diff --git a/psiflow/utils/apps.py b/psiflow/utils/apps.py
index 773915ae..d4e7937d 100644
--- a/psiflow/utils/apps.py
+++ b/psiflow/utils/apps.py
@@ -9,7 +9,7 @@
 
 
 def get_attribute(obj: Any, *attribute_names: str) -> Any:
-    # TODO: not an app
+    # uses Parsl lifted operators in case obj is a Future
     for name in attribute_names:
         obj = getattr(obj, name)
     return obj
diff --git a/psiflow/utils/io.py b/psiflow/utils/io.py
index 2123429a..93aa3b7b 100644
--- a/psiflow/utils/io.py
+++ b/psiflow/utils/io.py
@@ -1,11 +1,16 @@
 import json
 import xml.etree.ElementTree as ET
 from typing import Any
+from collections.abc import Sequence
 
 import numpy as np
 from parsl.app.app import python_app
 from parsl.data_provider.files import File
 
+from psiflow.serialization import JSONEncoder
+
+# TODO: check which of these methods is actively used
+
 
 def _save_yaml(
     input_dict: dict,
@@ -93,30 +98,18 @@ def _save_metrics(data: np.recarray, outputs: list = []) -> None:
 
 
 def _dump_json(
-    inputs: list = [],
-    outputs: list = [],
+    inputs: Sequence = (),
+    outputs: Sequence = (),
     **kwargs,
-) -> None:
-    def convert_to_list(array):
-        if not type(array) is np.ndarray:
-            if type(array) is np.floating:
-                return float(array)
-            return array
-        as_list = []
-        for item in array:
-            as_list.append(convert_to_list(item))
-        return as_list
-
-    assert len(outputs) == 1
-    for name in list(kwargs.keys()):
-        value = kwargs[name]
-        if type(value) is np.ndarray:
-            value = convert_to_list(value)
-        elif type(value) is np.floating:
-            value = float(value)
-        kwargs[name] = value
+) -> str:
+    assert len(outputs) <= 1
+    json_str = json.dumps(kwargs, cls=JSONEncoder)
+    if len(outputs) == 0:
+        return json_str
     with open(outputs[0], "w") as f:
-        f.write(json.dumps(kwargs))
+        f.write(json_str)
+    return json_str
+
 
 
 dump_json = python_app(_dump_json, executors=["default_threads"])
diff --git a/psiflow/utils/logging.py b/psiflow/utils/logging.py
index 8ef1998a..a8ff2a79 100644
--- a/psiflow/utils/logging.py
+++ b/psiflow/utils/logging.py
@@ -18,6 +18,10 @@ def setup_logging(file: Path, level=logging.INFO) -> None:
     fh.setFormatter(formatter)
     logger.addHandler(fh)
 
+    # keep Parsl logs in the parsl.log file
+    logging.getLogger("parsl").propagate = False
+
+
 
 def log_dfk_tasks(verbose: bool = False):
     """Get an overview of all tasks stored in the parsl DFK. For debugging purposes."""
diff --git a/pyproject.toml b/pyproject.toml
index 52d81587..98e13dbb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,18 +5,18 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "psiflow"
-version = "4.0.3"
+version = "4.0.4"
 description = "Library for developing interatomic potentials"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "ase>=3.23.0",
     "pyyaml>=6.0",
-    "numpy>=1.22.3, <2",
+#    "numpy>=1.22.3, <2",
     "parsl==2026.02.23",
     "prettytable",
     "psutil",
-    "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077",  # need 2024.1
+#    "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077",  # need 2024.1
     "ipi @ git+https://github.com/i-pi/i-pi.git@v3.1.10",
     ]
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 2052219c..58d20685 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
-from dataclasses import asdict
 from pathlib import Path
 
+import ase
 import numpy as np
 import parsl
 import pytest
@@ -12,7 +12,8 @@
 import psiflow
 from psiflow.data import Dataset
 from psiflow.geometry import Geometry
-from psiflow.models import MACE, MACEConfig
+from psiflow.models import MACE
+from psiflow.hamiltonians import MACEHamiltonian
 
 
 def pytest_addoption(parser):
@@ -50,21 +51,27 @@ def context(request, tmp_path_factory):
 
 
 @pytest.fixture(scope="session")
-def mace_config():
-    mace_config = MACEConfig()
-    mace_config.num_radial_basis = 3
-    mace_config.num_cutoff_basis = 2
-    mace_config.max_ell = 1
-    mace_config.correlation = 1
-    mace_config.MLP_irreps = "2x0e"
-    mace_config.num_channels = 2
-    mace_config.max_L = 0
-    mace_config.r_max = 4
-    mace_config.radial_MLP = "[4]"
-    return asdict(mace_config)
-
-
-def generate_emt_cu_data(nstates, amplitude, supercell=None):
+def mace_config() -> dict:
+    return dict(
+        num_radial_basis=3,
+        num_cutoff_basis=2,
+        max_ell=1,
+        correlation=1,
+        MLP_irreps="2x0e",
+        num_channels=2,
+        max_L=0,
+        r_max=4,
+        radial_MLP="[4]",
+        swa=True,
+        #
+        batch_size=1,  # MACE crashes if it is larger than dataset size
+        max_num_epochs=10,
+        energy_weight=100,  # make sure energy RMSE drops from first epoch
+        forces_weight=10
+    )
+
+
+def generate_emt_cu_data(nstates, amplitude, supercell=None) -> list[ase.Atoms]:
     if supercell is None:
         supercell = np.eye(3)
     atoms = make_supercell(bulk("Cu", "fcc", a=3.6, cubic=True), supercell)
@@ -90,40 +97,42 @@ def generate_emt_cu_data(nstates, amplitude, supercell=None):
 
 
 @pytest.fixture
-def dataset(context):
+def dataset(context) -> Dataset:
     data = generate_emt_cu_data(20, 0.2)
     data += generate_emt_cu_data(5, 0.15, supercell=np.diag([1, 2, 1]))
     data_ = [Geometry.from_atoms(atoms) for atoms in data]
     return Dataset(data_).align_axes()
 
 
+# @pytest.fixture(scope="session")
+# def mace_model(tmp_path_factory, mace_config):
+#     path = tmp_path_factory.mktemp("mace")
+#     model = MACEModel(path, mace_config)
+#     # manually recreate dataset with 'session' scope
+#     data = generate_emt_cu_data(20, 0.2)
+#     data_ = [Geometry.from_atoms(atoms) for atoms in data]
+#     dataset = Dataset(data_)
+#     # add additional state to initialize other atomic numbers
+#     geometry = Geometry.from_data(
+#         numbers=np.array(2 * [101]),
+#         positions=np.array([[0, 0, 0], [2, 0, 0]]),
+#         cell=None,
+#     )
+#     geometry.energy = -1.0
+#     geometry.per_atom.forces[:] = np.random.uniform(size=(2, 3))
+#     model.initialize(dataset[:5] + Dataset([geometry]))
+#     return model
+
+
 @pytest.fixture(scope="session")
-def mace_model(mace_config):
-    # manually recreate dataset with 'session' scope
-    data = generate_emt_cu_data(20, 0.2)
-    data_ = [Geometry.from_atoms(atoms) for atoms in data]
-    dataset = Dataset(data_)
-    model = MACE(**mace_config)
-    # add additional state to initialize other atomic numbers
-    # mace cannot handle partially periodic datasets
-    geometry = Geometry.from_data(
-        numbers=np.array(2 * [101]),
-        positions=np.array([[0, 0, 0], [2, 0, 0]]),
-        cell=2 * np.eye(3),
-    )
-    geometry.energy = -1.0
-    geometry.per_atom.forces[:] = np.random.uniform(size=(2, 3))
-    model.initialize(dataset[:5] + Dataset([geometry]))
-    return model
+def mace_foundation() -> Path:
+    hamiltonian = MACEHamiltonian.from_foundation(name="small")
+    return hamiltonian.external.filepath
 
 
 @pytest.fixture
 def dataset_h2(context):
-    h2 = Atoms(
-        numbers=[1, 1],
-        positions=[[0, 0, 0], [0.74, 0, 0]],
-        pbc=False,
-    )
+    h2 = Atoms(numbers=[1, 1], positions=[[0, 0, 0], [0.74, 0, 0]], pbc=False)
     data = [h2.copy() for i in range(20)]
     for atoms in data:
         atoms.positions += np.random.uniform(-0.05, 0.05, size=(2, 3))
diff --git a/tests/test_free_energy.py b/tests/test_free_energy.py
index e78ea599..ecacc3e8 100644
--- a/tests/test_free_energy.py
+++ b/tests/test_free_energy.py
@@ -14,19 +14,10 @@
 
 def test_integration_simple(dataset):
     dataset = dataset[:10]
-    einstein = EinsteinCrystal(dataset[1], force_constant=2)
-    geometry = optimize(
-        dataset[3],
-        einstein,
-        mode="fix_cell",
-        f_max=1e-4,
-    )
-    hessian = compute_harmonic(
-        geometry,
-        einstein,
-        pos_shift=5e-4,
-    )
-    harmonic = Harmonic(geometry, hessian)
+    einstein = EinsteinCrystal.from_geometry(dataset[1], force_constant=2)
+    geometry = optimize(dataset[3], einstein, mode="fix_cell", f_max=1e-4)
+    hessian = compute_harmonic(geometry, einstein, pos_shift=5e-4)
+    harmonic = Harmonic.from_geometry(geometry, hessian)
 
     integration = Integration(
         harmonic,
@@ -34,10 +25,7 @@ def test_integration_simple(dataset):
         delta_hamiltonian=(-0.1) * harmonic,
         delta_coefficients=np.array([0.0, 0.5, 1.0]),
     )
-    walkers = integration.create_walkers(
-        dataset,
-        initialize_by="quench",
-    )
+    walkers = integration.create_walkers(dataset, initialize_by="quench")
     for walker in walkers:
         assert check_equality(walker.start, dataset[1]).result()
 
@@ -80,21 +68,16 @@ def test_integration_simple(dataset):
 
 
 def test_integration_temperature(dataset):
-    einstein = EinsteinCrystal(dataset[0], force_constant=1)
+    einstein = EinsteinCrystal.from_geometry(dataset[0], force_constant=1)
     integration = Integration(
-        hamiltonian=einstein,
-        temperatures=[300, 400],
-        pressure=0.0,
+        hamiltonian=einstein, temperatures=[300, 400], pressure=0.0
     )
     integration.create_walkers(dataset[:3])
     integration.sample(steps=10, step=1)
     integration.compute_gradients()
     gradient0 = integration.states[0].gradients["temperature"]
 
-    integration = Integration(
-        hamiltonian=einstein,
-        temperatures=[300, 400],
-    )
+    integration = Integration(hamiltonian=einstein, temperatures=[300, 400])
     integration.create_walkers(dataset[:3])
     integration.sample(steps=10, step=1)
     integration.compute_gradients()
@@ -105,22 +88,19 @@ def test_integration_temperature(dataset):
 def test_phonons(dataset):
     reference = dataset[2].result()
     constant = 10
-    einstein = EinsteinCrystal(reference, force_constant=constant)
+    einstein = EinsteinCrystal.from_geometry(reference, force_constant=constant)
 
-    hessian = compute_harmonic(
-        reference,
-        einstein,
-        asr="none",  # einstein == translationally VARIANT
-    )
+    # einstein == translationally VARIANT
+    hessian = compute_harmonic(reference, einstein, asr="none")
     assert np.allclose(
         hessian.result(), constant * np.eye(3 * len(reference)), rtol=1e-4
     )
 
 
-def test_dihydrogen(dataset_h2):
+def test_dihydrogen(dataset_h2, mace_foundation):
     geometry = dataset_h2[0].result()
     geometry.cell = 20 * np.eye(3)
-    hamiltonian = MACEHamiltonian.mace_mp0("small")
+    hamiltonian = MACEHamiltonian(external=mace_foundation)
     optimized = optimize(
         geometry,
         hamiltonian,
diff --git a/tests/test_function.py b/tests/test_function.py
index 19c15b97..39ec1ca0 100644
--- a/tests/test_function.py
+++ b/tests/test_function.py
@@ -23,60 +23,57 @@
 from psiflow.utils._plumed import remove_comments_printflush, set_path_in_plumed
 from psiflow.utils.apps import copy_app_future
 from psiflow.utils.io import dump_json
-from psiflow.serialization import CLS_KEY
+from psiflow.utils.apps import get_attribute
+from psiflow.serialization import CLS_KEY, deserialize_hook
 
 
 def test_einstein_crystal(dataset):
+    future_geom = dataset[0]
+    test_dataset = dataset[:4].reset()
+    geometries = test_dataset.geometries().result()
+
     function = EinsteinCrystalFunction(
-        force_constant=1.0,
-        centers=dataset[0].result().per_atom.positions,
-        volume=0.0,
+        force_constant=1.0, centers=future_geom.result().per_atom.positions
     )
+    hamiltonian = EinsteinCrystal.from_geometry(future_geom, force_constant=1.0)
 
-    nstates = 4
-    geometries = dataset[:nstates].reset().geometries().result()
     energy, forces, stress = function.compute(geometries).values()
+    forces1, stress1, energy1 = hamiltonian.compute(
+        test_dataset, "forces", "stress", "energy"
+    )
+    forces2 = hamiltonian.compute(test_dataset, "forces", batch_size=3)
+
     assert np.all(energy >= 0)
     assert energy[0] == 0
-    assert np.allclose(  # forces point to centers
-        forces,
-        function.centers.reshape(1, -1, 3) - dataset[:4].get("positions").result(),
-    )
     assert geometries[0].energy is None
-    hamiltonian = EinsteinCrystal(dataset[0], force_constant=1.0)
-
-    forces_, stress_, energy_ = hamiltonian.compute(
-        dataset[:4], "forces", "stress", "energy"
-    )
-    assert np.allclose(
-        energy_.result(),
-        energy,
-    )
-    assert np.allclose(
-        forces_.result(),
+    assert np.allclose(  # forces point to centers
         forces,
+        function.centers.reshape(1, -1, 3) - test_dataset.get("positions").result(),
     )
-
-    forces = hamiltonian.compute(dataset[:4], "forces", batch_size=3)
-    assert np.allclose(
-        forces.result(),
-        forces_.result(),
-    )
+    assert np.allclose(energy1.result(), energy)
+    assert np.allclose(forces1.result(), forces)
+    assert np.allclose(forces1.result(), forces2.result())
 
 
 def test_einstein_force(dataset):
-    einstein = EinsteinCrystal(dataset[0], 5.0)
     reference = dataset[0].result()
+    force_constant = 5
     delta = 0.1
+    einstein = EinsteinCrystal.from_geometry(reference, force_constant)
+
+    geometries, forces = [], []
     for i in range(len(reference)):
         for j in range(3):  # x, y, z
             for sign in [+1, -1]:
                 geometry = reference.copy()
                 geometry.per_atom.positions[i, j] += sign * delta
-                forces = einstein.compute(geometry, "forces").result()
-                assert np.sign(forces[0, i, j]) == (-1.0) * sign
-                forces[0, i, j] = 0.0
-                assert np.allclose(forces, 0.0)
+                geometries.append(geometry)
+                forces_ = np.zeros_like(geometry.per_atom.positions)
+                forces_[i, j] = (-1.0) * sign * force_constant * delta
+                forces.append(forces_)
+
+    forces_ = einstein.compute(geometries, "forces").result()
+    assert np.allclose(forces, forces_)
 
 
 def test_get_filename_hills():
@@ -109,48 +106,41 @@ def test_get_filename_hills():
 
 def test_plumed_function(tmp_path, dataset, dataset_h2):
     data = dataset + dataset_h2
+    geometries = data.geometries().result()
     plumed_str = """
 D1: DISTANCE ATOMS=1,2 NOPBC
 CV: BIASVALUE arg=D1
 """
     function = PlumedFunction(plumed_str)
-    outputs = function.compute(data.geometries().result())
+    outputs = function.compute(geometries)
 
     f = 1 / (kJ / mol) * 10  # eV --> kJ/mol and nm --> A
     positions = data.get("positions").result()
     manual = np.linalg.norm(positions[:, 0, :] - positions[:, 1, :], axis=1)
-    assert np.allclose(
-        outputs["energy"] * f,
-        manual,
-    )
+    assert np.allclose(outputs["energy"] * f, manual)
     gradient = (positions[:, 0, :] - positions[:, 1, :]) / manual.reshape(-1, 1)
-    assert np.allclose(
-        outputs["forces"][:, 0, :] * f,
-        gradient * (-1.0),
-    )
+    assert np.allclose(outputs["forces"][:, 0, :] * f, gradient * (-1.0))
 
+    # test volume bias
+    geometries = dataset.geometries().result()
+    volumes = np.array([g.volume for g in geometries])
     plumed_input = """
 UNITS LENGTH=A ENERGY=kj/mol TIME=fs
 CV: VOLUME
 RESTRAINT ARG=CV AT=50 KAPPA=1
 """
     function = PlumedFunction(plumed_input)
-    energy, forces, stress = function.compute(dataset.geometries().result()).values()
-
-    volumes = np.linalg.det(dataset.get("cell").result())
-    energy_ = (volumes - 50) ** 2 * (kJ / mol) / 2
-    assert np.allclose(
-        energy,
-        energy_,
-    )
-
     hamiltonian = PlumedHamiltonian(plumed_input)
-    energy_, forces_, stress_ = hamiltonian.compute(dataset)
 
+    energy, forces, stress = function.compute(geometries).values()
+    energy_, forces_, stress_ = hamiltonian.compute(dataset)
+    energy_manual = (volumes - 50) ** 2 * (kJ / mol) / 2
+    assert np.allclose(energy, energy_manual)
     assert np.allclose(energy, energy_.result())
     assert np.allclose(stress, stress_.result())
 
     # use external grid as bias, check that file is read
+    test_set = dataset[:10]
     hills = """#! FIELDS time CV sigma_CV height biasf
 #! SET multivariate false
 #! SET kerneltype gaussian
@@ -168,24 +158,18 @@ def test_plumed_function(tmp_path, dataset, dataset_h2):
         path_hills
     )
     hamiltonian = PlumedHamiltonian(plumed_input, File(path_hills))
+    energy = hamiltonian.compute(test_set, "energy")
 
-    nstates = 10
-    positions = dataset[:nstates].get("positions").result()
-    distance = np.linalg.norm(positions[:, 0, :] - positions[:, 1, :], axis=1)
-    distance = distance.reshape(-1, 1)
-
-    energy = hamiltonian.compute(dataset[:10], "energy").result()
-
+    # compute bias energy manually
+    positions = test_set.get("positions").result()
+    vecs = positions[:, 0, :] - positions[:, 1, :]
+    distance = np.linalg.norm(vecs, axis=1).reshape(-1, 1)
     sigma = 2 * np.ones((1, 2))
     height = np.array([70, 70]).reshape(1, -1) * (kJ / mol)  # unit consistency
     center = np.array([2.5, 2.6]).reshape(1, -1)
     energy_per_hill = height * np.exp((distance - center) ** 2 / (-2 * sigma**2))
     energy_ = np.sum(energy_per_hill, axis=1)
-    assert np.allclose(
-        energy,
-        energy_,
-        atol=1e-3,
-    )
+    assert np.allclose(energy.result(), energy_, atol=1e-3)
 
     # check that hills file didn't change
     with open(path_hills, "r") as f:
@@ -193,26 +177,24 @@ def test_plumed_function(tmp_path, dataset, dataset_h2):
 
 
 def test_harmonic_function(dataset):
-    reference = dataset[0].result()
-    function = HarmonicFunction(
-        reference.per_atom.positions,
-        np.eye(3 * len(reference)),
-        reference.energy,
-    )
-    einstein = EinsteinCrystalFunction(1.0, reference.per_atom.positions)
+    test_set = dataset[:10]
+    geometries = test_set.geometries().result()
+    reference = geometries[0]
+    hess = np.eye(3 * len(reference))
 
-    energy, forces, _ = function.compute(dataset[:10].geometries().result()).values()
-    energy_, forces_, _ = einstein.compute(dataset[:10].geometries().result()).values()
+    function = HarmonicFunction(reference.per_atom.positions, hess, reference.energy)
+    einstein = EinsteinCrystalFunction(1.0, reference.per_atom.positions)
+    harmonic = Harmonic.from_geometry(dataset[0], hess)
 
-    assert np.allclose(energy - reference.energy, energy_)
-    assert np.allclose(forces_, forces)
+    energy, forces, _ = function.compute(geometries).values()
+    energy1, forces1, _ = einstein.compute(geometries).values()
+    energy2, forces2, _ = harmonic.compute(test_set)
 
-    harmonic = Harmonic(dataset[0], np.eye(3 * len(reference)))
     assert Harmonic.outputs == ("energy", "forces", "stress")
-
-    energy, forces, _ = harmonic.compute(dataset[:10])
-    assert np.allclose(energy.result() - reference.energy, energy_, atol=1e-5)
-    assert np.allclose(forces.result(), forces_)
+    assert np.allclose(energy - reference.energy, energy1)
+    assert np.allclose(forces, forces1)
+    assert np.allclose(energy2.result() - reference.energy, energy1, atol=1e-5)
+    assert np.allclose(forces2.result(), forces1)
 
 
 def test_dispersion_function(dataset):
@@ -230,46 +212,50 @@ def test_dispersion_function(dataset):
 
 
 def test_hamiltonian_arithmetic(dataset):
-    hamiltonian = EinsteinCrystal(dataset[0], force_constant=1.0)
-    hamiltonian_ = EinsteinCrystal(dataset[0].result(), force_constant=1.1)
-    assert not hamiltonian == hamiltonian_
-    hamiltonian_ = EinsteinCrystal(dataset[0], force_constant=1.0)
-    assert hamiltonian != hamiltonian_  # app future copied
-    hamiltonian_.reference_geometry = hamiltonian.reference_geometry
-    assert hamiltonian == hamiltonian_
-    hamiltonian_ = EinsteinCrystal(dataset[1], force_constant=1.0)
-    assert not hamiltonian == hamiltonian_
-    assert not hamiltonian == PlumedHamiltonian(plumed_input="")
-
-    # consider linear combination
-    scaled = 0.5 * hamiltonian
-    assert len(scaled) == 1
-    assert scaled.get_coefficient(hamiltonian) == 0.5
-    actually_scaled = EinsteinCrystal(dataset[0], force_constant=0.5)
-    assert scaled.get_coefficient(actually_scaled) is None
-
-    energy_scaled = scaled.compute(dataset[:10], "energy")
-    energy_actually = actually_scaled.compute(dataset[:10], "energy")
-    assert np.allclose(energy_scaled.result(), energy_actually.result())
-
-    energy, forces, _ = hamiltonian.compute(dataset[:10])
-    other = EinsteinCrystal(dataset[0], 4.0)
-    mixture = hamiltonian + other
+    test_set = dataset[:10]
+    future = dataset[0]
+    geom = future.result()
+    centers = get_attribute(future, "per_atom", "positions")
+    volume = get_attribute(future, "volume")
+
+    hamiltonian = EinsteinCrystal.from_geometry(geom, force_constant=1.0)
+    assert hamiltonian == hamiltonian
+    hamiltonian1 = EinsteinCrystal.from_geometry(geom, force_constant=1.0)
+    assert hamiltonian == hamiltonian1
+    hamiltonian1 = EinsteinCrystal.from_geometry(geom, force_constant=1.1)
+    assert hamiltonian != hamiltonian1  # different constants
+    hamiltonian1 = EinsteinCrystal(1.0, centers, volume)
+    assert hamiltonian != hamiltonian1  # unknown future
+    hamiltonian2 = EinsteinCrystal(1.0, centers, volume)
+    assert hamiltonian1 == hamiltonian2  # same unknown future
+    assert hamiltonian != PlumedHamiltonian(plumed_input="")
+
+    # consider linear combinations
+    zero = Zero()
+    assert hamiltonian == hamiltonian + zero
+    assert 2 * hamiltonian + zero == 2 * hamiltonian
+    scaled_m = 0.5 * hamiltonian
+    scaled_h1 = EinsteinCrystal.from_geometry(future, 0.5)
+    assert len(scaled_m) == 1
+    assert scaled_m.get_coefficient(hamiltonian) == 0.5
+    assert scaled_m.get_coefficient(scaled_h1) is None
+    scaled_h2 = EinsteinCrystal.from_geometry(future, 4.0)
+    mixture = hamiltonian + scaled_h2
     assert len(mixture) == 2
-    assert mixture == 0.9 * other + 0.1 * other + 1.0 * hamiltonian
-    _ = mixture + other
+    assert mixture == 0.9 * scaled_h2 + 0.1 * scaled_h2 + 1.0 * hamiltonian
     assert mixture.get_coefficients(mixture) == (1, 1)
-    assert mixture.get_coefficients(hamiltonian + actually_scaled) is None
-    energy_, forces_, _ = mixture.compute(dataset[:10])
+    assert mixture.get_coefficients(hamiltonian + scaled_h1) is None
+
+    energy_m = scaled_m.compute(test_set, "energy")
+    energy_h1 = scaled_h1.compute(test_set, "energy")
+    energy, forces, _ = hamiltonian.compute(test_set)
+    energy_, forces_, _ = mixture.compute(test_set)
+    assert np.allclose(energy_m.result(), energy_h1.result())
     assert np.allclose(energy_.result(), 5 * energy.result())
     assert np.allclose(forces_.result(), 5 * forces.result())
 
-    zero = Zero()
-    energy, forces, stress = zero.compute(dataset[:10])
+    energy, forces, stress = zero.compute(test_set)
     assert np.allclose(energy.result(), 0.0)
-    assert hamiltonian == hamiltonian + zero
-    assert 2 * hamiltonian + zero == 2 * hamiltonian
-
     geometries = [dataset[i].result() for i in [0, -1]]
     natoms = [len(geometry) for geometry in geometries]
     forces = zero.compute(geometries, "forces", batch_size=1).result()
@@ -278,32 +264,33 @@ def test_hamiltonian_arithmetic(dataset):
 
 
 def test_subtract(dataset):
-    einstein = EinsteinCrystal(dataset[0], force_constant=1.0)
+    einstein = EinsteinCrystal.from_geometry(dataset[0], force_constant=1.0)
     h = einstein - einstein
     assert isinstance(h, MixtureHamiltonian)
     assert np.allclose(h.coefficients, 0.0)
 
 
 def test_hamiltonian_serialize(dataset):
-    einstein = EinsteinCrystal(dataset[0], force_constant=1.0)
 
-    kappa = 1
-    center = 100
+    data = json.loads(psiflow.serialize(Zero()).result())
+    assert data[CLS_KEY] == "Zero"
+    zero = psiflow.deserialize(json.dumps(data)).result()
+    assert isinstance(zero, Zero)
+
     plumed_input = """
-UNITS LENGTH=A ENERGY=kj/mol TIME=fs
-CV: VOLUME
-RESTRAINT ARG=CV AT={center} KAPPA={kappa}
-""".format(
-        center=center, kappa=kappa / (kJ / mol)
+    UNITS LENGTH=A ENERGY=kj/mol TIME=fs
+    CV: VOLUME
+    RESTRAINT ARG=CV AT={center} KAPPA={kappa}
+    """.format(
+        center=100, kappa=1 / (kJ / mol)
     )
     plumed = PlumedHamiltonian(plumed_input)
+    einstein = EinsteinCrystal.from_geometry(dataset[0], force_constant=1.0)
+
     data = json.loads(psiflow.serialize(einstein).result())
     assert data[CLS_KEY] == "EinsteinCrystal"
-    assert "reference_geometry" in data
+    assert all((k in data) for k in ("centers", "volume"))
     einstein_ = psiflow.deserialize(json.dumps(data)).result()
-    energy = einstein.compute(dataset[:10], "energy")
-    energy_ = einstein_.compute(dataset[:10], "energy")
-    assert np.allclose(energy.result(), energy_.result())
 
     mixed = 0.1 * einstein + 0.9 * plumed
     data = json.loads(psiflow.serialize(mixed).result())
@@ -311,45 +298,37 @@ def test_hamiltonian_serialize(dataset):
     assert "hamiltonians" in data
     assert "coefficients" in data
     mixed_ = psiflow.deserialize(json.dumps(data)).result()
+    assert mixed.coefficients == mixed_.coefficients
     for h, h_ in zip(mixed.hamiltonians, mixed_.hamiltonians):
         if isinstance(h, EinsteinCrystal):
             assert h.force_constant == h_.force_constant
-            assert h.reference_geometry.result() == h_.reference_geometry
+            assert h.volume.result() == h_.volume
+            assert np.allclose(h.centers.result(), h_.centers)
         else:
             assert h == h_
-    assert mixed.coefficients == mixed_.coefficients
-    energy = mixed.compute(dataset[:10], "energy")
-    energy_ = mixed_.compute(dataset[:10], "energy")
-    assert np.allclose(energy.result(), energy_.result())
 
-    data = json.loads(psiflow.serialize(Zero()).result())
-    assert data[CLS_KEY] == "Zero"
-    zero = psiflow.deserialize(json.dumps(data)).result()
-    assert isinstance(zero, Zero)
+    test_set = dataset[:10]
+    energy_e = einstein.compute(test_set, "energy")
+    energy_e_ = einstein_.compute(test_set, "energy")
+    energy_m = mixed.compute(test_set, "energy")
+    energy_m_ = mixed_.compute(test_set, "energy")
+    assert np.allclose(energy_e.result(), energy_e_.result())
+    assert np.allclose(energy_m.result(), energy_m_.result())
 
 
 def test_evaluate(dataset):
-    hamiltonian = EinsteinCrystal(
-        geometry=dataset[0],
-        force_constant=1.0,
-    )
+    hamiltonian = EinsteinCrystal.from_geometry(geometry=dataset[0], force_constant=1.0)
     data = dataset[:10].reset()
-    evaluated = data.evaluate(
-        hamiltonian,
-        batch_size=None,
-    )
-    evaluated_ = data.evaluate(
-        hamiltonian,
-        batch_size=2,
-    )
+    evaluated = data.evaluate(hamiltonian, batch_size=None)
+    evaluated_ = data.evaluate(hamiltonian, batch_size=2)
     energy = hamiltonian.compute(evaluated, "energy")
-    for i, geometry in enumerate(evaluated.geometries().result()):
-        assert not np.all(np.isnan(geometry.per_atom.forces))
-        assert np.allclose(geometry.energy, energy.result()[i])
-        assert np.allclose(
-            evaluated_[i].result().energy,
-            geometry.energy,
-        )
+
+    geometries = evaluated.geometries().result()
+    geometries_ = evaluated_.geometries().result()
+    for geom, geom_, e in zip(geometries, geometries_, energy.result()):
+        assert not np.all(np.isnan(geom.per_atom.forces))
+        assert geom == geom_
+        assert geom.energy == e
 
 
 def test_json_dump():
@@ -361,52 +340,20 @@ def test_json_dump():
         "e": copy_app_future(False),
     }
     data_future = dump_json(
-        **data,
-        outputs=[psiflow.context().new_file("bla_", ".json")],
+        **data, outputs=[psiflow.context().new_file("bla_", ".json")]
     ).outputs[0]
-    psiflow.wait()
+    data_future.result()
     with open(data_future.filepath, "r") as f:
-        data_ = json.loads(f.read())
+        data_ = json.loads(f.read(), object_hook=deserialize_hook)
 
     new_a = np.array(data_["a"])
     assert len(new_a.shape) == 4
-    assert np.allclose(
-        data["a"],
-        new_a,
-    )
+    assert np.allclose(data["a"], new_a)
     assert data_["e"] is False
     assert type(data_["b"]) is list
     assert type(data_["c"]) is list
 
 
-def test_mace_function(dataset, mace_model):
-    model_path = str(mace_model.model_future.filepath)
-    mace_model.model_future.result()
-    function = MACEFunction(
-        model_path,
-        device="cpu",
-        dtype="float32",
-        ncores=2,
-        atomic_energies={},
-    )
-    output = function.compute(dataset[:1].geometries().result())
-    energy = output["energy"]
-
-    function = MACEFunction(
-        model_path,
-        device="cpu",
-        dtype="float32",
-        ncores=4,
-        atomic_energies={"Cu": 3, "H": 11},
-    )
-    output = function.compute(dataset[:1].geometries().result())
-    energy_ = output["energy"]
-    assert np.allclose(
-        energy + 11 + 3 * 3,
-        energy_,
-    )
-
-
 def test_function_from_json(tmp_path, dataset):
     hills = """#! FIELDS time CV sigma_CV height biasf
 #! SET multivariate false
@@ -430,6 +377,6 @@ def test_function_from_json(tmp_path, dataset):
     future.result()  # ensure file exists
     function = function_from_json(future.filepath)
 
-    energies = hamiltonian.compute(dataset, "energy").result()
+    energies = hamiltonian.compute(dataset, "energy")
     energies_ = function.compute(dataset.geometries().result())["energy"]
-    assert np.allclose(energies, energies_)
+    assert np.allclose(energies.result(), energies_)
diff --git a/tests/test_models.py b/tests/test_models.py
index 831c0d01..9476975f 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,121 +1,121 @@
-import copy
-
+import pytest
+import torch
 import numpy as np
 from parsl.app.futures import DataFuture
 
 import psiflow
 from psiflow.data import compute_rmse
 from psiflow.hamiltonians import MACEHamiltonian
-from psiflow.models import MACE, load_model
-
+from psiflow.models import MACE
+from psiflow.models.mace import KEY_ATOMIC_ENERGIES, KEY_ITERATION, MODEL_DIRS
+from psiflow.utils.apps import copy_app_future
+from psiflow.utils.io import _read_yaml
 
-def test_mace_init(mace_config, dataset):
-    model = MACE(**mace_config)
-    assert model.model_future is None
-    model.initialize(dataset[:1])
-    assert model.model_future is not None
 
-    _config = model._config
+def test_mace_init(tmp_path, mace_config, dataset):
+    model = MACE.create(tmp_path / "mace", mace_config)
+    atomic_energies = {"Cu": 3, "H": 7}
+    for k, v in atomic_energies.items():
+        model.add_atomic_energy(k, v)
+    model.update_kwargs(seed=42, pair_repulsion=copy_app_future(True))
 
-    data_str = psiflow.serialize(model).result()
-    model = psiflow.deserialize(data_str).result()
+    assert model.model_future is None
+    assert model.iteration == -1
+    future = model.initialize(dataset[:5])
+    assert isinstance(model.model_future, DataFuture)
+    assert model.iteration == -1
+    future.result()
+    assert model.path_mlp.is_file()
+    assert model.iteration == 0
 
-    _config_ = model._config
-    for key, value in _config.items():
-        assert key in _config_
-        if key in ("train_file", "valid_file"):
-            pass  # these are updated by the apps
-        elif type(value) is not list:
-            assert value == _config_[key]
+    config = _read_yaml([model.path_config])
+    assert config.pop(KEY_ATOMIC_ENERGIES) == atomic_energies
+    assert config.pop(KEY_ITERATION) == 0
+    assert config["seed"] == 42
+    assert config["pair_repulsion"]
 
-    config = copy.deepcopy(mace_config)
-    config["batch_size"] = (
-        100000  # bigger than ntrain --> should get reduced internally
-    )
-    model = MACE(**config)
-    model.seed = 1
-    model.initialize(dataset[:3])
-    assert isinstance(model.model_future, DataFuture)
+    mlp = torch.load(model.path_mlp, weights_only=False)
+    atomic_energies_ = mlp.atomic_energies_fn.atomic_energies.numpy()
+    assert atomic_energies_.flatten().tolist() == [7, 3]  # H, Cu
 
-    # create hamiltonian and verify addition of atomic energies
-    hamiltonian = model.create_hamiltonian()
-    assert hamiltonian == model.create_hamiltonian()
-    energies = hamiltonian.compute(dataset, "energy").result()
-
-    nstates = dataset.length().result()
-    # energies = np.array([evaluated[i].result().energy for i in range(nstates)])
-    assert not np.any(np.allclose(energies, 0.0))
-    energy_Cu = 3
-    energy_H = 7
-    atomic_energies = {"Cu": energy_Cu, "H": energy_H}
-    hamiltonian = MACEHamiltonian(hamiltonian.external, atomic_energies=atomic_energies)
-    assert hamiltonian != model.create_hamiltonian()  # atomic energies
-
-    evaluated = dataset.evaluate(hamiltonian).subtract_offset(Cu=energy_Cu, H=energy_H).geometries().result()
-    for i in range(nstates):
-        assert np.allclose(energies[i], evaluated[i].energy)
-
-    second = psiflow.deserialize(psiflow.serialize(hamiltonian)).result()
-    energies = hamiltonian.compute(dataset, "energy")
-    energies_ = second.compute(dataset, "energy")
-    assert np.allclose(energies.result(), energies_.result())
+    with pytest.raises(AssertionError):  # can only initialise once
+        model.initialize(dataset[:3])
+    with pytest.raises(AssertionError):  # one instance per dir
+        MACE.load(tmp_path / "mace")
 
-    hamiltonian = model.create_hamiltonian()
-    model.reset()
-    model.initialize(dataset[:3])
-    assert hamiltonian != model.create_hamiltonian()
+    model.config = model.atomic_energies = None
+    model._load_config()
+    assert model.config == config
+    assert model.atomic_energies == atomic_energies
+    assert model.iteration == 0
 
 
 def test_mace_train(gpu, mace_config, dataset, tmp_path):
     # as an additional verification, this test can be executed while monitoring
     # the mace logging, and in particular the rmse_r during training, to compare
     # it with the manually computed value
-    training = dataset[:-5]
+    key = "per_atom_energy"
+    training = dataset[:-15]
     validation = dataset[-5:]
-    mace_config["start_swa"] = 100
-    model = MACE(**mace_config)
-    model.initialize(training)
-    hamiltonian0 = model.create_hamiltonian()
+    path = tmp_path / "mace"
+    model = MACE.create(path, mace_config)
+
+    model.train(training, validation)
+    hamiltonian = model.create_hamiltonian()
     rmse0 = compute_rmse(
-        validation.get("per_atom_energy"),
-        validation.evaluate(hamiltonian0).get("per_atom_energy"),
+        validation.get(key),
+        validation.evaluate(hamiltonian).get(key),
     )
-    model.train(training, validation)
-    hamiltonian1 = model.create_hamiltonian()
+    future_train = model.train(training, validation)
+    hamiltonian = model.create_hamiltonian()
     rmse1 = compute_rmse(
-        validation.get("per_atom_energy"),
-        validation.evaluate(hamiltonian1).get("per_atom_energy"),
+        validation.get(key),
+        validation.evaluate(hamiltonian).get(key),
     )
-    assert rmse0.result() > rmse1.result()
 
+    future_train.result()  # wait for second training run
+    with pytest.raises(AssertionError):
+        MACE.load(path)
+    MODEL_DIRS.pop(str(model.root))  # 'free' training dir
+
+    # train from load
+    model_ = MACE.load(path)
+    hamiltonian = model_.create_hamiltonian()
+    rmse2 = compute_rmse(
+        validation.get(key),
+        validation.evaluate(hamiltonian).get(key),
+    )
+    model_.train(training, validation)
+    hamiltonian = model_.create_hamiltonian()
+    rmse3 = compute_rmse(
+        validation.get(key),
+        validation.evaluate(hamiltonian).get(key),
+    )
 
-def test_mace_save_load(mace_config, dataset, tmp_path):
-    model = MACE(**mace_config)
-    model.add_atomic_energy("H", 3)
-    model.add_atomic_energy("Cu", 4)
-    model.save(tmp_path)
-    model.initialize(dataset[:2])
-    e0 = model.create_hamiltonian().compute(dataset[3], "energy").result()
-
-    psiflow.wait()
-    assert (tmp_path / "MACE.yaml").exists()
-    assert not (tmp_path / "MACE.pth").exists()
-
-    model.save(tmp_path)
-    psiflow.wait()
-    assert (tmp_path / "MACE.pth").exists()
-
-    model_ = load_model(tmp_path)
-    assert type(model_) is MACE
-    assert model_.model_future is not None
-    e1 = model_.create_hamiltonian().compute(dataset[3], "energy").result()
-    assert np.allclose(e0, e1, atol=1e-4)  # up to single precision
-
-
-def test_mace_seed(mace_config):
-    model = MACE(**mace_config)
-    assert model.seed == 0
-    model.seed = 111
-    assert model.seed == 111
-    model._config["seed"] = 112
-    assert model.seed == 112
+    print(rmse0.result())
+    print(rmse1.result())
+    print(rmse2.result())
+    print(rmse3.result())
+
+    assert rmse0.result() > rmse1.result()
+    assert np.isclose(rmse1.result(), rmse2.result())
+    assert rmse2.result() > rmse3.result()
+
+
+def test_mace_hamiltonian(dataset, mace_foundation):
+    hamiltonian0 = MACEHamiltonian(mace_foundation)
+    hamiltonian1 = MACEHamiltonian(mace_foundation)
+
+    assert hamiltonian0 == hamiltonian1
+    hamiltonian1.update_kwargs(head="less")
+    assert hamiltonian0 != hamiltonian1
+    hamiltonian2 = psiflow.deserialize(psiflow.serialize(hamiltonian1)).result()
+    assert hamiltonian0 != hamiltonian2
+    assert hamiltonian1 == hamiltonian2
+    hamiltonian2.update_kwargs(enable_cueq=True)
+
+    e0 = hamiltonian0.compute(dataset, "energy")
+    e1 = hamiltonian1.compute(dataset, "energy")
+    e2 = hamiltonian2.compute(dataset, "energy")
+    assert np.allclose(e0.result(), e1.result())
+    assert np.allclose(e0.result(), e2.result())
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
index 4a0718a1..0f3ccf81 100644
--- a/tests/test_sampling.py
+++ b/tests/test_sampling.py
@@ -6,8 +6,7 @@
 
 import psiflow
 from psiflow.geometry import check_equality
-from psiflow.hamiltonians import EinsteinCrystal, PlumedHamiltonian
-from psiflow.models import MACE
+from psiflow.hamiltonians import EinsteinCrystal, PlumedHamiltonian, MACEHamiltonian
 from psiflow.sampling.optimize import (
     optimize as optimize_ipi,
     optimize_dataset as optimize_dataset_ipi,
@@ -32,6 +31,8 @@
 
 
 def test_walkers(dataset):
+    future = dataset[0]
+
     mtd0 = Metadynamics("METAD: FILE=bla")  # dummy
     mtd1 = Metadynamics("METAD: FILE=bla")  # dummy
     assert not (mtd0 == mtd1)
@@ -42,19 +43,19 @@ def test_walkers(dataset):
 RESTRAINT ARG=CV AT=1 KAPPA=1
 """
     plumed = PlumedHamiltonian(plumed_str)
-    einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
-    einstein_ = EinsteinCrystal(dataset[0], force_constant=0.2)
-    walker = Walker(dataset[0], einstein, temperature=300, metadynamics=mtd0)
+    einstein = EinsteinCrystal.from_geometry(future, force_constant=0.1)
+    einstein_ = EinsteinCrystal.from_geometry(future, force_constant=0.2)
+    walker = Walker(future, einstein, temperature=300, metadynamics=mtd0)
     assert walker.ensemble == Ensemble.NVT
     assert not walker.pimd
 
     walkers = [
         walker,
-        Walker(dataset[0], 0.5 * einstein_, nbeads=4, metadynamics=mtd1),
-        Walker(dataset[0], einstein + plumed, nbeads=4),
-        Walker(dataset[0], einstein, pressure=0, temperature=300, metadynamics=mtd1),
-        Walker(dataset[0], einstein_, pressure=100, temperature=600, metadynamics=mtd1),
-        Walker(dataset[0], einstein, temperature=600, metadynamics=mtd1),
+        Walker(future, 0.5 * einstein_, nbeads=4, metadynamics=mtd1),
+        Walker(future, einstein + plumed, nbeads=4),
+        Walker(future, einstein, pressure=0, temperature=300, metadynamics=mtd1),
+        Walker(future, einstein_, pressure=100, temperature=600, metadynamics=mtd1),
+        Walker(future, einstein, temperature=600, metadynamics=mtd1),
     ]
 
     # NVT
@@ -126,14 +127,15 @@ def test_parse_checkpoint(checkpoint):
     )
 
 
-def test_sample(dataset, mace_config):
+def test_sample(dataset, mace_foundation):
+    future = dataset[0]
     plumed_str = """
 UNITS LENGTH=A ENERGY=kj/mol TIME=fs
 CV: DISTANCE ATOMS=1,2 NOPBC
 RESTRAINT ARG=CV AT=1 KAPPA=1
 """
     plumed = PlumedHamiltonian(plumed_str)
-    einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
+    einstein = EinsteinCrystal.from_geometry(future, force_constant=0.1)
 
     plumed_str = """
 UNITS LENGTH=A ENERGY=kj/mol TIME=fs
@@ -143,14 +145,14 @@ def test_sample(dataset, mace_config):
     metadynamics = Metadynamics(plumed_str)
     walkers = [
         Walker(
-            start=dataset[0],
+            start=future,
             temperature=300,
             metadynamics=metadynamics,
             hamiltonian=0.9 * plumed + einstein,
         ),
-        Walker(start=dataset[0], temperature=600, hamiltonian=einstein),
-        Walker(start=dataset[0], temperature=600, hamiltonian=einstein),
-        Walker(start=dataset[0], temperature=600, hamiltonian=einstein),
+        Walker(start=future, temperature=600, hamiltonian=einstein),
+        Walker(start=future, temperature=450, hamiltonian=einstein),
+        Walker(start=future, temperature=600, hamiltonian=einstein),
     ]
     outputs = sample(walkers[:2], steps=50, step=5)
     [o1] = sample(walkers[2:3], steps=20, step=2, start=10, keep_trajectory=False)
@@ -225,23 +227,19 @@ def test_sample(dataset, mace_config):
     assert output.temperature is not None
     assert np.abs(output.temperature.result() - 200 < 200)  # what?
 
-    model = MACE(**mace_config)
-    model.initialize(dataset[:3])
-    hamiltonian = model.create_hamiltonian()
-    walker = Walker(start=dataset[0], temperature=600, hamiltonian=hamiltonian)
+    # minitest MACE
+    hamiltonian = MACEHamiltonian(mace_foundation)
+    walker = Walker(start=future, temperature=600, hamiltonian=hamiltonian)
     walker.state = dataset[1]
     [output] = sample(
         [walker],
         steps=10,
-        observables=["kinetic_md{electronvolt}"],
         checkpoint_step=1,
         fix_com=True,  # otherwise temperature won't match
     )
-    assert np.allclose(
-        hamiltonian.compute(dataset, "energy").result()[0],
-        output["potential{electronvolt}"].result()[0],
-        atol=1e-3,
-    )
+    energy = hamiltonian.compute(dataset[1], "energy")
+    energy_ = output["potential{electronvolt}"]
+    assert np.allclose(energy.result()[0], energy_.result()[0], atol=1e-5)
     assert np.allclose(output.time.result(), 10 * 0.5 / 1000)
     temp0 = output.temperature.result()
     temp1 = output["temperature{kelvin}"].result()[-1]
@@ -251,12 +249,13 @@ def test_sample(dataset, mace_config):
 
 
 def test_ensembles(dataset, dataset_h2):
-    einstein = EinsteinCrystal(dataset[0], force_constant=1e-1)
-    walker_nve = Walker(dataset[0], einstein, temperature=None)
-    walker_nvt = Walker(dataset[0], einstein, temperature=300)
-    walker_npt = Walker(dataset[0], einstein, temperature=300, pressure=0)
-    walker_nvst = Walker(dataset[0], einstein, temperature=300, volume_constrained=True)
-    einstein_h2 = EinsteinCrystal(dataset_h2[3], force_constant=1e-1)
+    future = dataset[0]
+    einstein = EinsteinCrystal.from_geometry(future, force_constant=1e-1)
+    walker_nve = Walker(future, einstein, temperature=None)
+    walker_nvt = Walker(future, einstein, temperature=300)
+    walker_npt = Walker(future, einstein, temperature=300, pressure=0)
+    walker_nvst = Walker(future, einstein, temperature=300, volume_constrained=True)
+    einstein_h2 = EinsteinCrystal.from_geometry(dataset_h2[3], force_constant=1e-1)
     walker_h2 = Walker(dataset_h2[0], einstein_h2)
     walkers = [walker_nve, walker_nvt, walker_npt, walker_nvst, walker_h2]
 
@@ -301,7 +300,7 @@ def test_ensembles(dataset, dataset_h2):
 def test_output_status(dataset):
     """"""
     geom = dataset[0].result()
-    einstein = EinsteinCrystal(geom, force_constant=0.1)
+    einstein = EinsteinCrystal.from_geometry(geom, force_constant=0.1)
     walker = Walker(start=geom, hamiltonian=einstein)
     walker_boom = Walker(start=geom, hamiltonian=einstein, timestep=1e25, pressure=0)
 
@@ -326,13 +325,14 @@ def test_output_status(dataset):
 
 
 def test_reset(dataset):
-    einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
-    walker = Walker(start=dataset[0], temperature=300, hamiltonian=einstein)
-    walker.state = dataset[1]
+    future1, future2 = dataset[0], dataset[1]
+    einstein = EinsteinCrystal.from_geometry(future1, force_constant=0.1)
+    walker = Walker(start=future1, temperature=300, hamiltonian=einstein)
+    walker.state = future2
     assert not walker.is_reset().result()
     assert not check_equality(walker.start, walker.state).result()
-    assert check_equality(walker.start, dataset[0]).result()
-    assert check_equality(walker.state, dataset[1]).result()
+    assert check_equality(walker.start, future1).result()
+    assert check_equality(walker.state, future2).result()
 
     walker.conditional_reset(False)
     assert not walker.is_reset().result()
@@ -344,8 +344,8 @@ def test_reset(dataset):
 
 def test_quench(dataset):
     dataset = dataset[:20]
-    einstein0 = EinsteinCrystal(dataset[3], force_constant=0.1)
-    einstein1 = EinsteinCrystal(dataset[11], force_constant=0.1)
+    einstein0 = EinsteinCrystal.from_geometry(dataset[3], force_constant=0.1)
+    einstein1 = EinsteinCrystal.from_geometry(dataset[11], force_constant=0.1)
     walkers = Walker(start=dataset[0], hamiltonian=einstein0).multiply(10)
     walkers[2].hamiltonian = einstein1
     quench(walkers, dataset)
@@ -369,8 +369,9 @@ def test_randomize(dataset):
 
 
 def test_rex(dataset):
-    einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
-    walker = Walker(dataset[0], hamiltonian=einstein, temperature=600)
+    future = dataset[0]
+    einstein = EinsteinCrystal.from_geometry(future, force_constant=0.1)
+    walker = Walker(future, hamiltonian=einstein, temperature=600)
     walkers = walker.multiply(2)
     replica_exchange(walkers, trial_frequency=2)
     assert walkers[0].coupling.nwalkers == len(walkers)
@@ -383,7 +384,7 @@ def test_rex(dataset):
     assert len(swaps) > 0  # at least some successful swaps
     assert np.allclose(swaps[0, 1:], np.array([1, 0]))  # 0, 1 --> 1, 0
 
-    walkers += Walker(dataset[0], hamiltonian=10 * einstein).multiply(2)
+    walkers += Walker(future, hamiltonian=10 * einstein).multiply(2)
     with pytest.raises(AssertionError):
         replica_exchange(walkers)
     assert len(partition(walkers)) == 3
@@ -394,7 +395,8 @@ def test_rex(dataset):
 
 
 def test_walker_serialization(dataset, tmp_path):
-    einstein = EinsteinCrystal(dataset[0], force_constant=0.1)
+    future = dataset[0]
+    einstein = EinsteinCrystal.from_geometry(future, force_constant=0.1)
     plumed_str = """
 UNITS LENGTH=A ENERGY=kj/mol TIME=fs
 CV: VOLUME
@@ -403,7 +405,7 @@ def test_walker_serialization(dataset, tmp_path):
 """
     metadynamics = Metadynamics(plumed_str)
     walkers = Walker(
-        dataset[0], hamiltonian=einstein, temperature=300, metadynamics=metadynamics
+        future, hamiltonian=einstein, temperature=300, metadynamics=metadynamics
     ).multiply(3)
     for i, walker in enumerate(walkers):
         walker.hamiltonian *= 1 / (1 + i)
@@ -430,7 +432,7 @@ def test_walker_serialization(dataset, tmp_path):
 
 
 def test_optimize_ipi(dataset):
-    einstein = EinsteinCrystal(dataset[2], force_constant=10)
+    einstein = EinsteinCrystal.from_geometry(dataset[2], force_constant=10)
 
     # i-PI optimizer's curvature guess fails in optimum --> don't start in dataset[2]
     future, future_traj = optimize_ipi(
@@ -452,7 +454,7 @@ def test_optimize_ipi(dataset):
 def test_optimize_ase(dataset):
     # TODO: test applied_pressure?
 
-    einstein = EinsteinCrystal(dataset[2], force_constant=10)
+    einstein = EinsteinCrystal.from_geometry(dataset[2], force_constant=10)
     future = optimize_ase(dataset[0], einstein, mode="fix_cell", f_max=1e-4)
     optimized = optimize_dataset_ase(
         dataset[3:5], einstein, mode="fix_cell", f_max=1e-4