diff --git a/.semversioner/next-release/patch-20260523000000000001.json b/.semversioner/next-release/patch-20260523000000000001.json new file mode 100644 index 0000000000..7da88d2dd3 --- /dev/null +++ b/.semversioner/next-release/patch-20260523000000000001.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Preserve literal `$` in config values (e.g. regex `file_pattern: \".*\\.md$\"`) by switching `_parse_env_variables` from `Template.substitute` to `Template.safe_substitute`; missing env vars are still detected explicitly via `Template.get_identifiers`. Fixes #2349." +} diff --git a/packages/graphrag-common/graphrag_common/config/load_config.py b/packages/graphrag-common/graphrag_common/config/load_config.py index c8929149f1..4487da61f9 100644 --- a/packages/graphrag-common/graphrag_common/config/load_config.py +++ b/packages/graphrag-common/graphrag_common/config/load_config.py @@ -83,12 +83,22 @@ def _get_parser_for_file(file_path: str | Path) -> Callable[[str], dict[str, Any def _parse_env_variables(text: str) -> str: - """Parse environment variables in the configuration text.""" - try: - return Template(text).substitute(os.environ) - except KeyError as error: - msg = f"Environment variable not found: {error}" - raise ConfigParsingError(msg) from error + r"""Parse environment variables in the configuration text. + + Uses ``Template.safe_substitute`` so that literal ``$`` characters in + config values (for example a regex anchor like ``.*\.md$`` in + ``file_pattern``) are preserved instead of raising ``ValueError``. + Missing env-var references are still detected explicitly via + ``get_identifiers`` so callers continue to see ``ConfigParsingError`` + when a referenced placeholder has no value. + """ + template = Template(text) + missing = [name for name in template.get_identifiers() if name not in os.environ] + if missing: + missing_names = ", ".join(f"'{name}'" for name in missing) + msg = f"Environment variable not found: {missing_names}" + raise ConfigParsingError(msg) + return template.safe_substitute(os.environ) def _recursive_merge_dicts(dest: dict[str, Any], src: dict[str, Any]) -> None: diff --git a/tests/unit/load_config/test_load_config.py b/tests/unit/load_config/test_load_config.py index 0945cf214f..51c1d37f2f 100644 --- a/tests/unit/load_config/test_load_config.py +++ b/tests/unit/load_config/test_load_config.py @@ -155,3 +155,30 @@ def test_load_config(): assert config_with_env_vars.nested_list[0].nested_int == 7 assert config_with_env_vars.nested_list[1].nested_str == "list_value_2" assert config_with_env_vars.nested_list[1].nested_int == 8 + + +def test_load_config_preserves_literal_dollar_sign(tmp_path: Path) -> None: + """Bare ``$`` in a config value (e.g. regex anchor) must not crash the loader.""" + config_path = tmp_path / "settings.yaml" + config_path.write_text( + 'name: "regex value .*\\\\.md$"\n' + "value: 1\n" + "nested:\n" + ' nested_str: "ends with $"\n' + " nested_int: 2\n" + "nested_list:\n" + ' - nested_str: "$$ is literal"\n' + " nested_int: 3\n", + encoding="utf-8", + ) + + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + load_dot_env_file=False, + set_cwd=False, + ) + + assert config.name == "regex value .*\\.md$" + assert config.nested.nested_str == "ends with $" + assert config.nested_list[0].nested_str == "$ is literal"