garak/tests/test_docs.py at main · NVIDIA/garak · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import importlib
from pathlib import Path
import re
import yaml

import pytest

TOP_PATHS = ["probes", "detectors", "harnesses", "generators", "evaluators", "buffs"]
DOC_SOURCE = Path("docs/source")

module_names = {}
for top_path in TOP_PATHS:
    module_names[top_path] = [
        i.name.replace(".py", "")
        for i in Path(f"garak/{top_path}").glob("*py")
        if not str(i).endswith("__init__.py")
    ]

ROOT_MODULES = list(Path("garak").glob("*py"))

MARKDOWN_CANARIES = set(
    [
        re.compile(r"(^|[^`!])\!?\[.+\]\((http|java).+\)"),  #  [link](http://link)
        re.compile(r"```"),  #  ```   (code block)
    ]
)


@pytest.mark.parametrize("category", TOP_PATHS)
def test_top_docs(category: str):
    file_path = DOC_SOURCE / f"index_{category}.rst"
    assert (
        file_path.is_file()
    ), "Top level category %s needs to have docs in %s but file is absent" % (
        category,
        file_path,
    )
    assert file_path.stat().st_size > 0, "Top level doc %s cannot be blank" % file_path


@pytest.mark.parametrize("classname", module_names["probes"])
def test_docs_probes(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "probes" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each probe family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_probes.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "probe docs must be linked to in index_probes.rst"


@pytest.mark.parametrize("classname", module_names["detectors"])
def test_docs_detectors(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "detectors" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each detector family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_detectors.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "detector docs must be linked to in index_detectors.rst"


@pytest.mark.parametrize("classname", module_names["harnesses"])
def test_docs_harnesses(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "harnesses" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each harness family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_harnesses.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "harness docs must be linked to in index_harnesses.rst"


@pytest.mark.parametrize("classname", module_names["evaluators"])
def test_docs_evaluators(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "evaluators" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each evaluator family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_evaluators.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "evaluator docs must be linked to in index_evaluators.rst"


@pytest.mark.parametrize("classname", module_names["generators"])
def test_docs_generators(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "generators" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each generator family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_generators.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "generator docs must be linked to in index_generators.rst"


@pytest.mark.parametrize("classname", module_names["buffs"])
def test_docs_buffs(classname: str):
    target_doc = f"{classname}"
    file_path = DOC_SOURCE / "buffs" / f"{target_doc}.rst"
    assert (
        file_path.is_file()
    ), f"There must be an entry for each buff family in the docs; missing {file_path}"
    assert (
        file_path.stat().st_size
    ), "plugin docs cannot be empty. you can just use a stub to read python docstrings, look at existing doc files"
    category_file = DOC_SOURCE / "index_buffs.rst"
    assert (
        open(category_file, "r", encoding="utf-8").read().find(target_doc + "\n") != -1
    ), "buff docs must be linked to in index_buffs.rst"


from garak import _plugins

probes = [classname for (classname, active) in _plugins.enumerate_plugins("probes")]
detectors = [
    classname for (classname, active) in _plugins.enumerate_plugins("detectors")
]
generators = [
    classname for (classname, active) in _plugins.enumerate_plugins("generators")
]
harnesses = [
    classname for (classname, active) in _plugins.enumerate_plugins("harnesses")
]
buffs = [classname for (classname, active) in _plugins.enumerate_plugins("buffs")]
# commented out until enumerate_plugins supports evaluators
# evaluators = [
#    classname for (classname, active) in _plugins.enumerate_plugins("evaluators")
# ]
plugins = sorted(probes + detectors + generators + buffs)


@pytest.mark.parametrize("plugin_name", plugins)
def test_check_plugin_class_docstring(plugin_name: str):
    plugin_name_parts = plugin_name.split(".")
    module_name = "garak." + ".".join(plugin_name_parts[:-1])
    class_name = plugin_name_parts[-1]
    mod = importlib.import_module(module_name)
    doc = getattr(getattr(mod, class_name), "__doc__")
    assert isinstance(doc, str), "All plugin classes must have docstrings"
    assert len(doc) > 0, "Plugin class docstrings must not be empty"
    for canary in MARKDOWN_CANARIES:
        canary_match = canary.search(doc, re.I)
        assert (
            canary_match is None
        ), f"Markdown in docstring: '{canary_match.group().strip()}' - use ReStructured Text for garak docs"


PLUGIN_GROUPS = sorted(
    list(set([".".join(plugin_name.split(".")[:2]) for plugin_name in plugins]))
)


@pytest.mark.parametrize("plugin_group", PLUGIN_GROUPS)
def test_check_plugin_module_docstring(plugin_group: str):
    module_name = "garak." + plugin_group
    mod = importlib.import_module(module_name)
    doc = getattr(mod, "__doc__")
    assert isinstance(doc, str), "All plugin groups/modules must have docstrings"
    assert len(doc) > 0, "Plugin group/module docstrings must not be empty"
    for canary in MARKDOWN_CANARIES:
        canary_match = canary.search(doc, re.I)
        assert (
            canary_match is None
        ), f"Markdown in docstring: '{canary_match.group().strip()}' - use ReStructured Text for garak docs"


@pytest.fixture(scope="session")
def doc_index_source_text():
    return open(DOC_SOURCE / "index.rst", "r", encoding="utf-8").read()


@pytest.mark.parametrize("root_module", ROOT_MODULES)
def test_root_modules_docs(doc_index_source_text, root_module: str):
    if not root_module.name.startswith("__"):
        assert f"{root_module.stem}.rst" in [
            entry.name for entry in DOC_SOURCE.glob("*rst")
        ], f"root module {root_module.stem} must have documentation in {root_module.stem}.rst"
        assert (
            f" {root_module.stem}\n" in doc_index_source_text
        ), f"root module doc page for {root_module.name} should be linked from doc root index"


def test_core_config_options_explained():
    import garak._config

    core_config_file_name = (
        garak._config.transient.package_dir / "resources" / "garak.core.yaml"
    )
    l1_nodes_to_check = []
    l2_nodes_to_check = []

    with open(core_config_file_name, encoding="utf-8") as settings_file:
        settings = yaml.safe_load(settings_file)
        for top_level_setting in settings:
            l1_nodes_to_check.append(top_level_setting)
            for second_level_setting in settings[top_level_setting]:
                l2_nodes_to_check.append(second_level_setting)

    configurable_rst = open(
        DOC_SOURCE / "configurable.rst", "r", encoding="utf-8"
    ).read()

    for l1_node in l1_nodes_to_check:
        title_case_l1_node = f"\n{l1_node} Config Items\n".title()
        assert (
            title_case_l1_node in configurable_rst
        ), f"core config value '{l1_node}' must be documented in configurable.rst"

    for l2_node in l2_nodes_to_check:
        assert (
            f"\n* ``{l2_node}`` - " in configurable_rst
        ), f"core config value '{l2_node}' must be documented in configurable.rst"


@pytest.mark.parametrize("doc_source_entry", DOC_SOURCE.iterdir())
def test_doc_src_extensions(doc_source_entry):
    if doc_source_entry.is_file():
        if doc_source_entry.name not in ("Makefile", "conf.py"):
            assert doc_source_entry.suffix == ".rst", (
                "Doc entry %s should be a .rst file" % doc_source_entry
            )


RST_FILES = DOC_SOURCE.glob("*rst")


@pytest.mark.parametrize("rst_file", RST_FILES)
def test_doc_src_no_markdown(rst_file):
    rst_file_content = open(rst_file, "r", encoding="utf-8").read()
    for canary in MARKDOWN_CANARIES:
        canary_match = canary.search(rst_file_content, re.I)
        assert (
            canary_match is None
        ), f"Markdown-like content in rst: {canary_match.group().strip()} use ReStructured Text for garak docs - Markdown won't render"