-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexport_utils.py
More file actions
127 lines (104 loc) · 3.63 KB
/
Copy pathexport_utils.py
File metadata and controls
127 lines (104 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Export utilities: convert text / chat history to .docx, .md, .txt files.
"""
from __future__ import annotations
import logging
import re
import tempfile
logger = logging.getLogger(__name__)
def _text_to_md(text: str) -> str:
"""Return text as-is (it may already be Markdown)."""
return text or ""
def _text_to_txt(text: str) -> str:
"""Strip Markdown formatting for plain text."""
if not text:
return ""
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
text = re.sub(r"\*(.+?)\*", r"\1", text)
text = re.sub(r"#{1,6}\s+", "", text)
text = re.sub(r"`{1,3}[^`]*`{1,3}", lambda m: m.group(0).strip("`"), text)
text = re.sub(r"\[(.+?)\]\(.+?\)", r"\1", text)
return text
def _text_to_docx(text: str, title: str = "Export") -> str:
"""Write text to a temp .docx file, return path."""
try:
from docx import Document # type: ignore[import-untyped]
except ImportError as exc:
raise RuntimeError(
"python-docx nicht installiert. Bitte 'pip install python-docx' ausführen."
) from exc
doc = Document()
doc.add_heading(title, level=1)
for line in (text or "").splitlines():
stripped = line.strip()
if not stripped:
doc.add_paragraph("")
continue
# Headings
m = re.match(r"^(#{1,6})\s+(.*)", stripped)
if m:
level = min(len(m.group(1)), 4)
doc.add_heading(m.group(2), level=level)
continue
# Bold lines (e.g. **Title**)
if stripped.startswith("**") and stripped.endswith("**"):
p = doc.add_paragraph()
run = p.add_run(stripped.strip("*"))
run.bold = True
continue
doc.add_paragraph(stripped)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
doc.save(tmp.name)
tmp.close()
return tmp.name
def export_text(text: str, fmt: str, title: str = "Export") -> str | None:
"""
Export text to a temp file.
fmt: 'docx' | 'md' | 'txt'
Returns file path, or None if text is empty.
"""
if not text or not text.strip():
return None
if fmt == "docx":
return _text_to_docx(text, title)
if fmt == "md":
content = _text_to_md(text)
suffix = ".md"
else:
content = _text_to_txt(text)
suffix = ".txt"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix, mode="w", encoding="utf-8")
tmp.write(content)
tmp.close()
return tmp.name
def export_chat(history: list, fmt: str) -> str | None:
"""
Export chat history to a temp file.
history: list of {"role": ..., "content": ...} dicts (or Gradio tuples)
fmt: 'docx' | 'md' | 'txt'
Returns file path, or None if history is empty.
"""
if not history:
return None
lines = []
for turn in history:
if isinstance(turn, dict):
role = turn.get("role", "?")
content = turn.get("content", "")
elif isinstance(turn, (list, tuple)) and len(turn) == 2:
role = "user" if turn[0] else "assistant"
content = turn[0] or turn[1]
else:
continue
# Flatten multimodal content
if isinstance(content, list):
parts = [
p.get("text", "")
for p in content
if isinstance(p, dict) and p.get("type") == "text"
]
content = "\n".join(parts)
label = "👤 Nutzer" if role == "user" else "🤖 Assistent"
lines.append(f"## {label}\n\n{content}\n")
full_text = "\n---\n\n".join(lines)
return export_text(full_text, fmt, title="Chat-Export")