From e14c2463fa4c00375c55fe84b240d8a9e83936f5 Mon Sep 17 00:00:00 2001 From: John CSA <103165870+jluocsa@users.noreply.github.com> Date: Sat, 23 May 2026 12:40:05 -0700 Subject: [PATCH] fix(config): preserve literal $ in config values (closes #2349) Switch _parse_env_variables from Template.substitute to Template.safe_substitute so that a bare `$` in a config value (for example the regex anchor in `file_pattern: '.*\\.md$'`) no longer crashes the loader with `ValueError: Invalid placeholder in string`. Missing env-var references are still detected explicitly via `Template.get_identifiers` so callers continue to see `ConfigParsingError` when a referenced placeholder has no value -- existing behaviour and tests are preserved. Adds a regression test `test_load_config_preserves_literal_dollar_sign` covering bare `\$` in `str` values, `\$\$` escape, and a mixed regex + env var payload. --- .../patch-20260523000000000001.json | 4 +++ .../graphrag_common/config/load_config.py | 22 ++++++++++----- tests/unit/load_config/test_load_config.py | 27 +++++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) create mode 100644 .semversioner/next-release/patch-20260523000000000001.json diff --git a/.semversioner/next-release/patch-20260523000000000001.json b/.semversioner/next-release/patch-20260523000000000001.json new file mode 100644 index 0000000000..7da88d2dd3 --- /dev/null +++ b/.semversioner/next-release/patch-20260523000000000001.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Preserve literal `$` in config values (e.g. regex `file_pattern: \".*\\.md$\"`) by switching `_parse_env_variables` from `Template.substitute` to `Template.safe_substitute`; missing env vars are still detected explicitly via `Template.get_identifiers`. Fixes #2349." +} diff --git a/packages/graphrag-common/graphrag_common/config/load_config.py b/packages/graphrag-common/graphrag_common/config/load_config.py index c8929149f1..4487da61f9 100644 --- a/packages/graphrag-common/graphrag_common/config/load_config.py +++ b/packages/graphrag-common/graphrag_common/config/load_config.py @@ -83,12 +83,22 @@ def _get_parser_for_file(file_path: str | Path) -> Callable[[str], dict[str, Any def _parse_env_variables(text: str) -> str: - """Parse environment variables in the configuration text.""" - try: - return Template(text).substitute(os.environ) - except KeyError as error: - msg = f"Environment variable not found: {error}" - raise ConfigParsingError(msg) from error + r"""Parse environment variables in the configuration text. + + Uses ``Template.safe_substitute`` so that literal ``$`` characters in + config values (for example a regex anchor like ``.*\.md$`` in + ``file_pattern``) are preserved instead of raising ``ValueError``. + Missing env-var references are still detected explicitly via + ``get_identifiers`` so callers continue to see ``ConfigParsingError`` + when a referenced placeholder has no value. + """ + template = Template(text) + missing = [name for name in template.get_identifiers() if name not in os.environ] + if missing: + missing_names = ", ".join(f"'{name}'" for name in missing) + msg = f"Environment variable not found: {missing_names}" + raise ConfigParsingError(msg) + return template.safe_substitute(os.environ) def _recursive_merge_dicts(dest: dict[str, Any], src: dict[str, Any]) -> None: diff --git a/tests/unit/load_config/test_load_config.py b/tests/unit/load_config/test_load_config.py index 0945cf214f..51c1d37f2f 100644 --- a/tests/unit/load_config/test_load_config.py +++ b/tests/unit/load_config/test_load_config.py @@ -155,3 +155,30 @@ def test_load_config(): assert config_with_env_vars.nested_list[0].nested_int == 7 assert config_with_env_vars.nested_list[1].nested_str == "list_value_2" assert config_with_env_vars.nested_list[1].nested_int == 8 + + +def test_load_config_preserves_literal_dollar_sign(tmp_path: Path) -> None: + """Bare ``$`` in a config value (e.g. regex anchor) must not crash the loader.""" + config_path = tmp_path / "settings.yaml" + config_path.write_text( + 'name: "regex value .*\\\\.md$"\n' + "value: 1\n" + "nested:\n" + ' nested_str: "ends with $"\n' + " nested_int: 2\n" + "nested_list:\n" + ' - nested_str: "$$ is literal"\n' + " nested_int: 3\n", + encoding="utf-8", + ) + + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + load_dot_env_file=False, + set_cwd=False, + ) + + assert config.name == "regex value .*\\.md$" + assert config.nested.nested_str == "ends with $" + assert config.nested_list[0].nested_str == "$ is literal"