diff --git a/src/capellambse/loader/core.py b/src/capellambse/loader/core.py index 4c7da5309..cf9d052f5 100644 --- a/src/capellambse/loader/core.py +++ b/src/capellambse/loader/core.py @@ -187,7 +187,12 @@ def __missing__(self, key: str) -> t.NoReturn: class ModelFile: - """Represents a single file in the model (i.e. a fragment).""" + """Represents a single file in the model (i.e. a fragment). + + This class loads the entire XML tree into memory. This makes it + unsuitable for large trees with only small interesting segments, + like ``.aird`` files. See :class:`VisualFile` for an alternative. + """ __qtypecache: dict[etree.QName, dict[int, etree._Element]] __xtypecache: dict[str, dict[int, etree._Element]] @@ -474,6 +479,99 @@ def unfollow_href(self, element_id: str) -> etree._Element: return self.__hrefsources[element_id] +class VisualFile: + """Represents a visual (AIRD) fragment. + + Visual fragments can rapidly grow very large, which makes it + impractical to hold them in memory entirely all the time. This + specialized class works similar to :class:`ModelFile`. However, it + only keeps the central index in memory, and only loads and parses + other data on request. + """ + + fragment_type: t.Final = FragmentType.VISUAL + + def __init__( + self, + filename: pathlib.PurePosixPath, + handler: filehandler.FileHandler, + ) -> None: + self.filename = filename + self.filehandler = handler + if filename.suffix not in VISUAL_EXTS: + raise ValueError(f"Bad filename for visual fragment: {filename}") + + with handler.open(filename) as f: + parser = etree.iterparse(f) + for _, element in parser: + parent = element.getparent() + if parent is None or parent.getparent() is not None: + continue + + if element.tag == f"{{{_n.NAMESPACES['viewpoint']}}}DAnalysis": + self.__analysis = element + break + parent.remove(element) + else: + raise RuntimeError( + "Broken XML: No 'viewpoint:DAnalysis' element found" + ) + parent = self.__analysis.getparent() + assert parent is not None + parent.remove(self.__analysis) + + def __getitem__(self, key: str) -> etree._Element: + # TODO Return a diagram root element if it's found in this fragment + raise KeyError(key) + + def referenced_files(self) -> cabc.Iterator[str]: + for i in self.__analysis: + if i.tag == "semanticResources" and i.text: + yield i.text + elif i.tag == "referencedAnalysis" and (href := i.get("href")): + yield href.split("#", maxsplit=1)[0] + + def enumerate_uuids(self) -> set[str]: + """Enumerate all UUIDs used in this fragment.""" + return set() + + def idcache_index(self, subtree: etree._Element) -> None: + """Index the IDs of ``subtree``.""" + raise NotImplementedError("Cannot modify visual fragments") + + def idcache_remove(self, source: str | etree._Element) -> None: + """Remove the ID or all IDs below the source from the ID cache.""" + raise NotImplementedError("Cannot modify visual fragments") + + def idcache_rebuild(self) -> None: + """Invalidate and rebuild this file's ID cache.""" + # Nothing to do + + def idcache_reserve(self, new_id: str) -> None: + """Reserve the given ID for an element to be inserted later.""" + raise NotImplementedError("Cannot modify visual fragments") + + def iterall_xt( + self, xtypes: cabc.Container[str] + ) -> cabc.Iterator[etree._Element]: + """Iterate over all elements in this tree by ``xsi:type``.""" + del xtypes + yield from () + + def write_xml( + self, + filename: pathlib.PurePosixPath, + encoding: str = "utf-8", + ) -> None: + """Do nothing.""" + del filename, encoding + + # pylint: disable-next=useless-return + def unfollow_href(self, element_id: str) -> etree._Element | None: + del element_id + return None + + class MelodyLoader: """Facilitates extensive access to Polarsys / Capella projects.""" @@ -557,7 +655,7 @@ def __init__( else: self.resources[resname] = reshdl - self.trees: dict[pathlib.PurePosixPath, ModelFile] = {} + self.trees: dict[pathlib.PurePosixPath, ModelFile | VisualFile] = {} self.__load_referenced_files( pathlib.PurePosixPath("\0", self.entrypoint) ) @@ -610,11 +708,17 @@ def __load_referenced_files( handler = self.resources[resource_path.parts[0]] filename = pathlib.PurePosixPath(*resource_path.parts[1:]) - frag = ModelFile( - filename, handler, ignore_uuid_dups=self.__ignore_uuid_dups - ) + frag: VisualFile | ModelFile + if filename.suffix in VISUAL_EXTS: + frag = VisualFile(filename, handler) + refs = list(frag.referenced_files()) + else: + frag = ModelFile( + filename, handler, ignore_uuid_dups=self.__ignore_uuid_dups + ) + refs = [] self.trees[resource_path] = frag - for ref in _find_refs(frag.root): + for ref in refs: ref_name = helpers.normalize_pure_path( _unquote_ref(ref), base=resource_path.parent ) @@ -685,6 +789,7 @@ def update_namespaces(self) -> None: if fragment.fragment_type != FragmentType.SEMANTIC: continue + assert isinstance(fragment, ModelFile) LOGGER.debug("Updating namespaces on fragment %s", fname) fragment.update_namespaces(vp) @@ -961,7 +1066,7 @@ def iterall_xt( """ xtset = self._nonempty_hashset(xtypes) if trees is None: - files: cabc.Iterable[ModelFile] = self.trees.values() + files: cabc.Iterable[ModelFile | VisualFile] = self.trees.values() else: files = (v for k, v in self.trees.items() if k in trees) return itertools.chain.from_iterable( @@ -1296,7 +1401,7 @@ def follow_links( def _find_fragment( self, element: etree._Element - ) -> tuple[pathlib.PurePosixPath, ModelFile]: + ) -> tuple[pathlib.PurePosixPath, ModelFile | VisualFile]: root = collections.deque( itertools.chain([element], element.iterancestors()), 1 )[0] diff --git a/src/capellambse/model/_model.py b/src/capellambse/model/_model.py index 0ba7866d8..7fda5da9e 100644 --- a/src/capellambse/model/_model.py +++ b/src/capellambse/model/_model.py @@ -420,7 +420,8 @@ class as the superclass of every concrete model element trees = [ t for t in self._loader.trees.values() - if t.fragment_type is loader.FragmentType.SEMANTIC + if isinstance(t, loader.ModelFile) + and t.fragment_type is loader.FragmentType.SEMANTIC ] matches: cabc.Iterable[etree._Element] if not classes: diff --git a/src/capellambse/model/_obj.py b/src/capellambse/model/_obj.py index a0080894a..bfd87acd1 100644 --- a/src/capellambse/model/_obj.py +++ b/src/capellambse/model/_obj.py @@ -48,7 +48,7 @@ from lxml import etree import capellambse -from capellambse import helpers +from capellambse import helpers, loader from . import VIRTUAL_NAMESPACE_PREFIX, T, U, _descriptors, _pods, _styleclass @@ -725,6 +725,7 @@ def __init__( ns = self.__capella_namespace__ qtype = model.qualify_classname((ns, type(self).__name__)) assert qtype.namespace is not None + assert isinstance(fragment, loader.ModelFile) fragment.add_namespace(qtype.namespace, ns.alias) self._element.set(helpers.ATT_XT, qtype) for key, val in kw.items():