Skip to content

Commit 387b743

Browse files
committed
Init
0 parents  commit 387b743

22 files changed

Lines changed: 1274 additions & 0 deletions

.gitignore

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# Visual Studio Code files
7+
.vscode
8+
.vs
9+
10+
# PyCharm files
11+
.idea
12+
13+
# Eclipse Project settings
14+
*.*project
15+
.settings
16+
17+
# Sublime Text settings
18+
*.sublime-workspace
19+
*.sublime-project
20+
21+
# Editor temporaries
22+
*.swn
23+
*.swo
24+
*.swp
25+
*.swm
26+
*.asv
27+
*~
28+
29+
# IPython notebook checkpoints
30+
.ipynb_checkpoints
31+
32+
# macOS dir files
33+
.DS_Store
34+
35+
**/build
36+
37+
Archive/*
38+
39+
# Matlab
40+
*.asv

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
## Pytorch implementation of [Neural Sequence Decoder](https://github.com/fwillett/speechBCI/tree/main/NeuralDecoder)
2+
3+
## Requirements
4+
- python >= 3.9
5+
6+
## Installation
7+
8+
pip install -e .
9+
10+
## How to run
11+
12+
1. Convert the speech BCI dataset using [formatCompetitionData.ipynb](./notebooks/formatCompetitionData.ipynb)
13+
2. Train model: `python ./scripts/train_model.py`
14+

notebooks/formatCompetitionData.ipynb

Lines changed: 330 additions & 0 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[build-system]
2+
requires = ["setuptools>=46.1.0"]

scripts/eval_competition.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import re
2+
import time
3+
import pickle
4+
import numpy as np
5+
6+
from edit_distance import SequenceMatcher
7+
import torch
8+
from dataset import SpeechDataset
9+
10+
import matplotlib.pyplot as plt
11+
12+
13+
from nnDecoderModel import getDatasetLoaders
14+
from nnDecoderModel import loadModel
15+
import neuralDecoder.utils.lmDecoderUtils as lmDecoderUtils
16+
import pickle
17+
import argparse
18+
19+
parser = argparse.ArgumentParser(description="")
20+
parser.add_argument("--modelPath", type=str, default=None, help="Path to model")
21+
input_args = parser.parse_args()
22+
23+
24+
with open(input_args.modelPath + "/args", "rb") as handle:
25+
args = pickle.load(handle)
26+
27+
args["datasetPath"] = "/oak/stanford/groups/henderj/stfan/data/ptDecoder_ctc"
28+
trainLoaders, testLoaders, loadedData = getDatasetLoaders(
29+
args["datasetPath"], args["seqLen"], args["maxTimeSeriesLen"], args["batchSize"]
30+
)
31+
32+
model = loadModel(input_args.modelPath, device="cpu")
33+
34+
device = "cpu"
35+
36+
model.eval()
37+
38+
rnn_outputs = {
39+
"logits": [],
40+
"logitLengths": [],
41+
"trueSeqs": [],
42+
"transcriptions": [],
43+
}
44+
partition = "competition"
45+
for i, testDayIdx in enumerate([4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20]):
46+
# for i, testDayIdx in enumerate(range(len(loadedData[partition]))):
47+
test_ds = SpeechDataset([loadedData[partition][i]])
48+
test_loader = torch.utils.data.DataLoader(
49+
test_ds, batch_size=1, shuffle=False, num_workers=0
50+
)
51+
for j, (X, y, X_len, y_len, _) in enumerate(test_loader):
52+
X, y, X_len, y_len, dayIdx = (
53+
X.to(device),
54+
y.to(device),
55+
X_len.to(device),
56+
y_len.to(device),
57+
torch.tensor([testDayIdx], dtype=torch.int64).to(device),
58+
)
59+
pred = model.forward(X, dayIdx)
60+
adjustedLens = ((X_len - model.kernelLen) / model.strideLen).to(torch.int32)
61+
62+
for iterIdx in range(pred.shape[0]):
63+
trueSeq = np.array(y[iterIdx][0 : y_len[iterIdx]].cpu().detach())
64+
65+
rnn_outputs["logits"].append(pred[iterIdx].cpu().detach().numpy())
66+
rnn_outputs["logitLengths"].append(
67+
adjustedLens[iterIdx].cpu().detach().item()
68+
)
69+
rnn_outputs["trueSeqs"].append(trueSeq)
70+
71+
transcript = loadedData[partition][i]["transcriptions"][j].strip()
72+
transcript = re.sub(r"[^a-zA-Z\- \']", "", transcript)
73+
transcript = transcript.replace("--", "").lower()
74+
rnn_outputs["transcriptions"].append(transcript)
75+
76+
77+
MODEL_CACHE_DIR = "/scratch/users/stfan/huggingface"
78+
# Load OPT 6B model
79+
llm, llm_tokenizer = lmDecoderUtils.build_opt(
80+
cacheDir=MODEL_CACHE_DIR, device="auto", load_in_8bit=True
81+
)
82+
83+
lmDir = "/oak/stanford/groups/henderj/stfan/code/nptlrig2/LanguageModelDecoder/examples/speech/s0/lm_order_exp/5gram/data/lang_test"
84+
ngramDecoder = lmDecoderUtils.build_lm_decoder(
85+
lmDir, acoustic_scale=0.5, nbest=100, beam=18
86+
)
87+
88+
89+
90+
# LM decoding hyperparameters
91+
acoustic_scale = 0.5
92+
blank_penalty = np.log(7)
93+
llm_weight = 0.5
94+
95+
llm_outputs = []
96+
# Generate nbest outputs from 5gram LM
97+
start_t = time.time()
98+
nbest_outputs = []
99+
for j in range(len(rnn_outputs["logits"])):
100+
logits = rnn_outputs["logits"][j]
101+
logits = np.concatenate(
102+
[logits[:, 1:], logits[:, 0:1]], axis=-1
103+
) # Blank is last token
104+
logits = lmDecoderUtils.rearrange_speech_logits(logits[None, :, :], has_sil=True)
105+
nbest = lmDecoderUtils.lm_decode(
106+
ngramDecoder,
107+
logits[0],
108+
blankPenalty=blank_penalty,
109+
returnNBest=True,
110+
rescore=True,
111+
)
112+
nbest_outputs.append(nbest)
113+
time_per_sample = (time.time() - start_t) / len(rnn_outputs["logits"])
114+
print(f"5gram decoding took {time_per_sample} seconds per sample")
115+
116+
for i in range(len(rnn_outputs["transcriptions"])):
117+
new_trans = [ord(c) for c in rnn_outputs["transcriptions"][i]] + [0]
118+
rnn_outputs["transcriptions"][i] = np.array(new_trans)
119+
120+
# Rescore nbest outputs with LLM
121+
start_t = time.time()
122+
llm_out = lmDecoderUtils.cer_with_gpt2_decoder(
123+
llm,
124+
llm_tokenizer,
125+
nbest_outputs[:],
126+
acoustic_scale,
127+
rnn_outputs,
128+
outputType="speech_sil",
129+
returnCI=True,
130+
lengthPenalty=0,
131+
alpha=llm_weight,
132+
)
133+
# time_per_sample = (time.time() - start_t) / len(logits)
134+
print(f"LLM decoding took {time_per_sample} seconds per sample")
135+
136+
print(llm_out["cer"], llm_out["wer"])
137+
with open(input_args.modelPath + "/llm_out", "wb") as handle:
138+
pickle.dump(llm_out, handle)
139+
140+
decodedTranscriptions = llm_out["decoded_transcripts"]
141+
with open(input_args.modelPath + "/5gramLLMCompetitionSubmission.txt", "w") as f:
142+
for x in range(len(decodedTranscriptions)):
143+
f.write(decodedTranscriptions[x] + "\n")

scripts/eval_competition.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
# Parameters
4+
#SBATCH --cpus-per-task=8
5+
#SBATCH --gpus-per-task=1
6+
#SBATCH --job-name=rescore
7+
#SBATCH --mail-type=ALL
8+
#SBATCH --mem=400GB
9+
#SBATCH --nodes=1
10+
#SBATCH --ntasks-per-node=1
11+
#SBATCH --open-mode=append
12+
#SBATCH --partition=henderj,owners
13+
#SBATCH --signal=USR1@120
14+
#SBATCH --time=2880
15+
#SBATCH --constraint=[GPU_MEM:32GB|GPU_MEM:40GB|GPU_MEM:80GB]
16+
17+
ml gcc/10.1.0
18+
ml load cudnn/8.6.0.163
19+
ml load cuda/11.7.1
20+
21+
python eval_competition.py --modelPath=$1

scripts/train_model.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
2+
modelName = 'speechBaseline4'
3+
4+
args = {}
5+
args['outputDir'] = '/oak/stanford/groups/henderj/stfan/logs/speech_logs/' + modelName
6+
args['datasetPath'] = '/oak/stanford/groups/henderj/fwillett/speech/ptDecoder_ctc'
7+
args['seqLen'] = 150
8+
args['maxTimeSeriesLen'] = 1200
9+
args['batchSize'] = 64
10+
args['lrStart'] = 0.02
11+
args['lrEnd'] = 0.02
12+
args['nUnits'] = 1024
13+
args['nBatch'] = 10000 #3000
14+
args['nLayers'] = 5
15+
args['seed'] = 0
16+
args['nClasses'] = 40
17+
args['nInputFeatures'] = 256
18+
args['dropout'] = 0.4
19+
args['whiteNoiseSD'] = 0.8
20+
args['constantOffsetSD'] = 0.2
21+
args['gaussianSmoothWidth'] = 2.0
22+
args['strideLen'] = 4
23+
args['kernelLen'] = 32
24+
args['bidirectional'] = True
25+
args['l2_decay'] = 1e-5
26+
27+
from neural_decoder.neural_decoder_trainer import trainModel
28+
29+
trainModel(args)

setup.cfg

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# This file is used to configure your project.
2+
# Read more about the various options under:
3+
# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
4+
# https://setuptools.pypa.io/en/latest/references/keywords.html
5+
6+
[metadata]
7+
name = neural_decoder
8+
description = PyTorch neural sequence decoder for speech BCI (https://github.com/fwillett/speechBCI/tree/main/NeuralDecoder)
9+
version = 0.0.1
10+
author = Chaofei Fan, Frank Willett
11+
author_email = stfan@stanford.edu
12+
license = MIT
13+
license_files = LICENSE.txt
14+
# Add here related links, for example:
15+
project_urls =
16+
17+
# Change if running only on Windows, Mac or Linux (comma-separated)
18+
platforms = Linux
19+
20+
# Add here all kinds of additional classifiers as defined under
21+
# https://pypi.org/classifiers/
22+
classifiers =
23+
Development Status :: 4 - Beta
24+
Programming Language :: Python
25+
26+
27+
[options]
28+
zip_safe = False
29+
packages = find_namespace:
30+
include_package_data = True
31+
package_dir =
32+
=src
33+
34+
# Require a min/specific Python version (comma-separated conditions)
35+
python_requires = >=3.9
36+
37+
# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
38+
# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
39+
# new major versions. This works if the required packages follow Semantic Versioning.
40+
# For more information, check out https://semver.org/.
41+
install_requires =
42+
importlib-metadata; python_version<"3.8"
43+
torch==1.13.1
44+
hydra-core==1.3.2
45+
hydra-submitit-launcher==1.1.5
46+
hydra-optuna-sweeper==1.2.0
47+
numpy==1.25.0
48+
scipy==1.11.1
49+
numba==0.58.1
50+
scikit-learn==1.3.2
51+
g2p_en==2.1.0
52+
edit_distance==1.0.6
53+
54+
55+
[options.packages.find]
56+
where = src
57+
exclude =
58+
tests
59+
examples

setup.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""
2+
Setup file for OnlineRecalibrator.
3+
Use setup.cfg to configure your project.
4+
5+
This file was generated with PyScaffold 4.5.
6+
PyScaffold helps you to put up the scaffold of your new Python project.
7+
Learn more under: https://pyscaffold.org/
8+
"""
9+
from setuptools import setup
10+
11+
if __name__ == "__main__":
12+
try:
13+
setup(
14+
)
15+
except: # noqa
16+
print(
17+
"\n\nAn error occurred while building the project, "
18+
"please ensure you have the most updated version of setuptools, "
19+
"setuptools_scm and wheel with:\n"
20+
" pip install -U setuptools setuptools_scm wheel\n\n"
21+
)
22+
raise
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Metadata-Version: 2.1
2+
Name: neural_decoder
3+
Version: 0.0.1
4+
Summary: PyTorch neural sequence decoder for speech BCI (https://github.com/fwillett/speechBCI/tree/main/NeuralDecoder)
5+
Author: Chaofei Fan, Frank Willett
6+
Author-email: stfan@stanford.edu
7+
License: MIT
8+
Platform: Linux
9+
Classifier: Development Status :: 4 - Beta
10+
Classifier: Programming Language :: Python
11+
Requires-Python: >=3.9
12+
Requires-Dist: importlib-metadata; python_version < "3.8"
13+
Requires-Dist: torch==1.13.1
14+
Requires-Dist: hydra-core==1.3.2
15+
Requires-Dist: hydra-submitit-launcher==1.1.5
16+
Requires-Dist: hydra-optuna-sweeper==1.2.0
17+
Requires-Dist: numpy==1.25.0
18+
Requires-Dist: scipy==1.11.1
19+
Requires-Dist: numba==0.58.1
20+
Requires-Dist: scikit-learn==1.3.2
21+
Requires-Dist: g2p_en==2.1.0
22+
Requires-Dist: edit_distance==1.0.6

0 commit comments

Comments
 (0)