-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbench.cpp
More file actions
162 lines (139 loc) · 6.32 KB
/
Copy pathbench.cpp
File metadata and controls
162 lines (139 loc) · 6.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/*
* dijkstra_bench.cpp — CS204 Stress Benchmark
* IIT Ropar | 2025-26
*
* Pure workload. Memory tracing is handled externally by the PIN tool
* (mem_trace.cpp). This file should be compiled and run normally:
*
* g++ -O0 -std=c++17 -o dijkstra_bench dijkstra_bench.cpp
* pin -t obj-intel64/mem_trace.so -o dijkstra.trace -- ./dijkstra_bench
*
* The workload has three phases designed to stress different aspects
* of cache replacement:
*
* 1. Dijkstra on a 200-node graph — 160 KB adjacency matrix, run from
* 10 sources to build temporal + spatial reuse patterns.
* 2. Matrix multiply (64x64 ints) — column accesses on B create poor
* spatial locality; A rows are reused, rewarding LRU/LFU.
* 3. Scan-with-polluter — hot array repeatedly evicted by a larger
* polluter array; OPT/LRU should outperform FIFO/RANDOM.
*
* Build with -O0 so loads/stores aren't optimised away.
*/
#include <iostream>
#include <vector>
#include <queue>
#include <limits>
#include <cstdint>
#include <random>
// ════════════════════════════════════════════════════════════════════════════
// Part 1: Dijkstra on a 200-node graph
// ════════════════════════════════════════════════════════════════════════════
static constexpr int NODES = 200;
static constexpr int INF = std::numeric_limits<int>::max() / 2;
static void build_graph(int graph[NODES][NODES], std::mt19937& rng) {
for (int i = 0; i < NODES; ++i)
for (int j = 0; j < NODES; ++j)
graph[i][j] = INF;
std::uniform_int_distribution<int> dst(0, NODES-1);
std::uniform_int_distribution<int> wgt(1, 50);
// ~8 edges per node on average
for (int i = 0; i < NODES; ++i) {
for (int k = 0; k < 8; ++k) {
int j = dst(rng);
if (i != j) graph[i][j] = wgt(rng);
}
}
}
// `volatile` sink prevents the optimiser from eliding the final reads
static volatile int g_sink = 0;
static void dijkstra(int graph[NODES][NODES], int src) {
int dist[NODES];
bool vis[NODES];
for (int i = 0; i < NODES; ++i) {
dist[i] = INF;
vis[i] = false;
}
dist[src] = 0;
using P = std::pair<int,int>;
std::priority_queue<P, std::vector<P>, std::greater<P>> pq;
pq.push({0, src});
while (!pq.empty()) {
auto [d, u] = pq.top(); pq.pop();
if (vis[u]) continue;
vis[u] = true;
for (int v = 0; v < NODES; ++v) {
if (graph[u][v] == INF) continue;
int nd = dist[u] + graph[u][v];
if (nd < dist[v]) {
dist[v] = nd;
pq.push({nd, v});
}
}
}
// Consume results so the compiler can't drop the work
for (int i = 0; i < NODES; ++i) g_sink += dist[i];
}
// ════════════════════════════════════════════════════════════════════════════
// Part 2: Matrix multiply A[64][64] * B[64][64] = C[64][64]
// ════════════════════════════════════════════════════════════════════════════
static constexpr int M = 64;
static void matmul(int A[M][M], int B[M][M], int C[M][M]) {
for (int i = 0; i < M; ++i)
for (int j = 0; j < M; ++j)
C[i][j] = 0;
for (int i = 0; i < M; ++i)
for (int k = 0; k < M; ++k) {
int aik = A[i][k]; // row reuse
for (int j = 0; j < M; ++j) {
C[i][j] += aik * B[k][j]; // column access on B
}
}
// Consume result
for (int i = 0; i < M; ++i)
for (int j = 0; j < M; ++j)
g_sink += C[i][j];
}
// ════════════════════════════════════════════════════════════════════════════
// Part 3: Scan-with-polluter (thrashing pattern)
// ════════════════════════════════════════════════════════════════════════════
static void thrash_pattern() {
static int hot[256];
static int polluter[4096];
for (int i = 0; i < 256; ++i) hot[i] = i;
for (int i = 0; i < 4096; ++i) polluter[i] = i;
for (int round = 0; round < 20; ++round) {
for (int i = 0; i < 256; ++i) g_sink += hot[i];
for (int i = 0; i < 4096; ++i) g_sink += polluter[i];
}
}
// ════════════════════════════════════════════════════════════════════════════
// main
// ════════════════════════════════════════════════════════════════════════════
int main() {
std::mt19937 rng(42); // fixed seed → reproducible workload
std::cerr << "[1/3] Dijkstra on " << NODES << "-node graph...\n";
{
static int graph[NODES][NODES];
build_graph(graph, rng);
for (int s = 0; s < NODES; s += NODES/10) {
dijkstra(graph, s);
}
}
std::cerr << "[2/3] Matrix multiply " << M << "x" << M << "...\n";
{
static int A[M][M], B[M][M], C[M][M];
std::uniform_int_distribution<int> d(1, 100);
for (int i = 0; i < M; ++i)
for (int j = 0; j < M; ++j) {
A[i][j] = d(rng);
B[i][j] = d(rng);
}
matmul(A, B, C);
matmul(A, B, C); // second pass exercises warm-cache reuse
}
std::cerr << "[3/3] Thrashing pattern (scan-with-polluter)...\n";
thrash_pattern();
std::cerr << "[done] sink = " << g_sink << "\n";
return 0;
}