Memory-Cache-Simulator/bench.cpp at main · hritik0101/Memory-Cache-Simulator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/*
 * dijkstra_bench.cpp — CS204 Stress Benchmark
 * IIT Ropar | 2025-26
 *
 * Pure workload. Memory tracing is handled externally by the PIN tool
 * (mem_trace.cpp). This file should be compiled and run normally:
 *
 *   g++ -O0 -std=c++17 -o dijkstra_bench dijkstra_bench.cpp
 *   pin -t obj-intel64/mem_trace.so -o dijkstra.trace -- ./dijkstra_bench
 *
 * The workload has three phases designed to stress different aspects
 * of cache replacement:
 *
 *  1. Dijkstra on a 200-node graph  — 160 KB adjacency matrix, run from
 *     10 sources to build temporal + spatial reuse patterns.
 *  2. Matrix multiply (64x64 ints)  — column accesses on B create poor
 *     spatial locality; A rows are reused, rewarding LRU/LFU.
 *  3. Scan-with-polluter  — hot array repeatedly evicted by a larger
 *     polluter array; OPT/LRU should outperform FIFO/RANDOM.
 *
 * Build with -O0 so loads/stores aren't optimised away.
 */

#include <iostream>
#include <vector>
#include <queue>
#include <limits>
#include <cstdint>
#include <random>

// ════════════════════════════════════════════════════════════════════════════
//  Part 1: Dijkstra on a 200-node graph
// ════════════════════════════════════════════════════════════════════════════
static constexpr int NODES = 200;
static constexpr int INF   = std::numeric_limits<int>::max() / 2;

static void build_graph(int graph[NODES][NODES], std::mt19937& rng) {
    for (int i = 0; i < NODES; ++i)
        for (int j = 0; j < NODES; ++j)
            graph[i][j] = INF;

    std::uniform_int_distribution<int> dst(0, NODES-1);
    std::uniform_int_distribution<int> wgt(1, 50);

    // ~8 edges per node on average
    for (int i = 0; i < NODES; ++i) {
        for (int k = 0; k < 8; ++k) {
            int j = dst(rng);
            if (i != j) graph[i][j] = wgt(rng);
        }
    }
}

// `volatile` sink prevents the optimiser from eliding the final reads
static volatile int g_sink = 0;

static void dijkstra(int graph[NODES][NODES], int src) {
    int  dist[NODES];
    bool vis[NODES];

    for (int i = 0; i < NODES; ++i) {
        dist[i] = INF;
        vis[i]  = false;
    }
    dist[src] = 0;

    using P = std::pair<int,int>;
    std::priority_queue<P, std::vector<P>, std::greater<P>> pq;
    pq.push({0, src});

    while (!pq.empty()) {
        auto [d, u] = pq.top(); pq.pop();
        if (vis[u]) continue;
        vis[u] = true;

        for (int v = 0; v < NODES; ++v) {
            if (graph[u][v] == INF) continue;
            int nd = dist[u] + graph[u][v];
            if (nd < dist[v]) {
                dist[v] = nd;
                pq.push({nd, v});
            }
        }
    }
    // Consume results so the compiler can't drop the work
    for (int i = 0; i < NODES; ++i) g_sink += dist[i];
}

// ════════════════════════════════════════════════════════════════════════════
//  Part 2: Matrix multiply  A[64][64] * B[64][64] = C[64][64]
// ════════════════════════════════════════════════════════════════════════════
static constexpr int M = 64;

static void matmul(int A[M][M], int B[M][M], int C[M][M]) {
    for (int i = 0; i < M; ++i)
        for (int j = 0; j < M; ++j)
            C[i][j] = 0;

    for (int i = 0; i < M; ++i)
        for (int k = 0; k < M; ++k) {
            int aik = A[i][k];                     // row reuse
            for (int j = 0; j < M; ++j) {
                C[i][j] += aik * B[k][j];          // column access on B
            }
        }

    // Consume result
    for (int i = 0; i < M; ++i)
        for (int j = 0; j < M; ++j)
            g_sink += C[i][j];
}

// ════════════════════════════════════════════════════════════════════════════
//  Part 3: Scan-with-polluter (thrashing pattern)
// ════════════════════════════════════════════════════════════════════════════
static void thrash_pattern() {
    static int hot[256];
    static int polluter[4096];

    for (int i = 0; i < 256;  ++i) hot[i]      = i;
    for (int i = 0; i < 4096; ++i) polluter[i] = i;

    for (int round = 0; round < 20; ++round) {
        for (int i = 0; i < 256;  ++i) g_sink += hot[i];
        for (int i = 0; i < 4096; ++i) g_sink += polluter[i];
    }
}

// ════════════════════════════════════════════════════════════════════════════
//  main
// ════════════════════════════════════════════════════════════════════════════
int main() {
    std::mt19937 rng(42);   // fixed seed → reproducible workload

    std::cerr << "[1/3] Dijkstra on " << NODES << "-node graph...\n";
    {
        static int graph[NODES][NODES];
        build_graph(graph, rng);
        for (int s = 0; s < NODES; s += NODES/10) {
            dijkstra(graph, s);
        }
    }

    std::cerr << "[2/3] Matrix multiply " << M << "x" << M << "...\n";
    {
        static int A[M][M], B[M][M], C[M][M];
        std::uniform_int_distribution<int> d(1, 100);
        for (int i = 0; i < M; ++i)
            for (int j = 0; j < M; ++j) {
                A[i][j] = d(rng);
                B[i][j] = d(rng);
            }
        matmul(A, B, C);
        matmul(A, B, C);   // second pass exercises warm-cache reuse
    }

    std::cerr << "[3/3] Thrashing pattern (scan-with-polluter)...\n";
    thrash_pattern();

    std::cerr << "[done] sink = " << g_sink << "\n";
    return 0;
}