-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpu_testing.py
More file actions
96 lines (73 loc) · 2.68 KB
/
gpu_testing.py
File metadata and controls
96 lines (73 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import time
import torch
from sklearn.datasets import load_digits
RUN_SECONDS = 180
REPORT_EVERY = 10
# Increase if GPU usage is low, decrease if you hit memory errors
MM_SIZE = 2048
MM_REPEATS = 6
def main():
if not torch.cuda.is_available():
raise RuntimeError("CUDA GPU not available.")
device = torch.device("cuda")
print("Using GPU:", torch.cuda.get_device_name(0))
# Real dataset from scikit-learn
digits = load_digits()
X = torch.tensor(digits.data, dtype=torch.float32, device=device) / 16.0
y = torch.tensor(digits.target, dtype=torch.long, device=device)
input_dim = X.shape[1]
num_classes = int(y.max().item()) + 1
model = torch.nn.Sequential(
torch.nn.Linear(input_dim, 1024),
torch.nn.ReLU(),
torch.nn.Linear(1024, 1024),
torch.nn.ReLU(),
torch.nn.Linear(1024, num_classes),
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
loss_fn = torch.nn.CrossEntropyLoss()
# Pre-allocate large GPU matrices
A = torch.randn((MM_SIZE, MM_SIZE), device=device)
B = torch.randn((MM_SIZE, MM_SIZE), device=device)
start = time.time()
next_report = start + REPORT_EVERY
steps = 0
print(f"Running for about {RUN_SECONDS} seconds...")
print("Watch Task Manager > Performance > GPU or run nvidia-smi -l 1\n")
while time.time() - start < RUN_SECONDS:
optimizer.zero_grad(set_to_none=True)
# Training work on GPU
logits = model(X)
loss = loss_fn(logits, y)
loss.backward()
optimizer.step()
# Extra GPU-heavy matrix math
for _ in range(MM_REPEATS):
C = torch.mm(A, B)
A = torch.relu(C)
B = torch.relu(torch.mm(B, A))
torch.cuda.synchronize()
steps += 1
if time.time() >= next_report:
with torch.no_grad():
preds = logits.argmax(dim=1)
acc = (preds == y).float().mean().item()
elapsed = time.time() - start
mem_used = torch.cuda.memory_allocated() / (1024 ** 2)
print(
f"Elapsed: {elapsed:6.1f}s | "
f"Steps: {steps:3d} | "
f"Loss: {loss.item():.4f} | "
f"Acc: {acc:.4f} | "
f"GPU mem: {mem_used:.1f} MB"
)
next_report += REPORT_EVERY
with torch.no_grad():
final_logits = model(X)
final_preds = final_logits.argmax(dim=1)
final_acc = (final_preds == y).float().mean().item()
print("\nDone.")
print(f"Final accuracy: {final_acc:.4f}")
print(f"Total runtime: {time.time() - start:.1f}s")
if __name__ == "__main__":
main()