forked from UsMaNBAjWaa/HSRIS_CustomerSupport_NLP
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_single_notebook.py
More file actions
76 lines (63 loc) · 2.67 KB
/
Copy pathmake_single_notebook.py
File metadata and controls
76 lines (63 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
make_single_notebook.py
Converts HSRIS_COMPLETE.py → HSRIS_FINAL.ipynb
Each # ── CELL N : comment starts a new notebook cell.
"""
import json, re
from pathlib import Path
src = Path("HSRIS_COMPLETE.py").read_text(encoding="utf-8")
lines = src.splitlines()
CELL_RE = re.compile(r"^# ── CELL \d+")
def make_code_cell(code_lines):
src = [l + "\n" for l in code_lines]
return {"cell_type": "code", "execution_count": None,
"metadata": {}, "outputs": [], "source": src}
def make_md_cell(text):
return {"cell_type": "markdown", "metadata": {},
"source": [l + "\n" for l in text.splitlines()]}
# ── split into cells ──────────────────────────────────────────
cells = []
buf = []
def flush(buf):
code = "\n".join(buf).strip()
if code:
cells.append(make_code_cell(buf))
for line in lines:
if CELL_RE.match(line):
flush(buf)
buf = [line]
elif re.match(r"^# ={30,}", line):
# section header → markdown
flush(buf)
buf = []
else:
buf.append(line)
flush(buf)
# ── title markdown cell ───────────────────────────────────────
title_cell = make_md_cell("""# 🚀 Hybrid Semantic Retrieval & Intelligence System (HSRIS)
**Platform:** Kaggle | **GPU:** Tesla T4 ×2 | **Framework:** PyTorch + NumPy
| Stage | Method | Details |
|---|---|---|
| Encoders | Label + One-Hot | Priority → int, Channel → binary vector |
| Sparse Retrieval | BoW + N-Grams + TF-IDF | Sparse COO tensors |
| Dense Retrieval | GloVe 300-d | TF-IDF-weighted sentence embeddings |
| Hybrid Search | α·TF-IDF + (1-α)·GloVe | α=0.4 default |
| Evaluation | Precision@5 | 50 queries, GloVe-wins analysis |
| UI | Gradio | Query box + α slider + Top-3 results |
> **Run order:** Enable GPU → Run All → Gradio link appears at the bottom.""")
cells.insert(0, title_cell)
# ── assemble notebook ─────────────────────────────────────────
nb = {
"nbformat": 4, "nbformat_minor": 5,
"metadata": {
"kernelspec": {"display_name": "Python 3",
"language": "python", "name": "python3"},
"language_info": {"name": "python", "version": "3.10.0"},
"accelerator": "GPU",
},
"cells": cells,
}
out = Path("HSRIS_FINAL.ipynb")
out.write_text(json.dumps(nb, indent=1, ensure_ascii=False), encoding="utf-8")
size_kb = out.stat().st_size // 1024
print(f"✅ HSRIS_FINAL.ipynb | {len(cells)} cells | {size_kb} KB")