Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/guide/configuration/vllm.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ curl -X POST http://localhost:13305/api/v1/install \
-d '{"recipe": "vllm", "backend": "rocm"}'
```

The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx1150`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); the `-{gfx_target}` suffix is appended at runtime from `SystemInfo::get_rocm_arch()`, so a single pin covers all supported architectures.
The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx120X`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); Lemonade appends the machine's ROCm release target at runtime. Discrete RDNA GPUs use family release targets such as `gfx110X`/`gfx120X` even when the physical ISA is specific (for example, `gfx1201`), while APUs keep their specific targets such as `gfx1151`. User pins may be either the base version or a full per-target release tag; Lemonade normalizes either form to the current machine's target so the happy path does not require knowing the asset naming convention.

## Use

Expand Down
19 changes: 19 additions & 0 deletions src/cpp/include/lemon/system_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ class SystemInfo {

// Device support detection
static std::string get_rocm_arch();
// ROCm backend release assets (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm
// nightly) are published per *release target*: discrete RDNA GPUs ship under
// a family target (gfx110X / gfx120X / gfx103X) while APUs ship per-specific
// ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch() returns the specific ISA
// (e.g. gfx1201), which is correct for TheRock runtime paths but 404s on these
// per-target asset names. Use this when constructing such asset filenames.
static std::string get_rocm_release_target();
static std::string get_cuda_arch();

// CUDA release assets are architecture-specific (sm_89, sm_120, etc.).
Expand Down Expand Up @@ -231,6 +238,18 @@ std::unique_ptr<SystemInfo> create_system_info();
// Returns architecture string (e.g., "gfx1150", "gfx1151", "gfx110X", "gfx120X") or empty string if not recognized
std::string identify_rocm_arch_from_name(const std::string& device_name);

// Map a specific ROCm ISA (e.g. "gfx1201") to the *release target* name used in
// ROCm backend asset filenames (e.g. "gfx120X"). Discrete RDNA GPUs collapse to
// a family target (gfx1030-103X, gfx1100-110X, gfx1200/1201-120X); APUs
// (gfx1150/1151/1152) and already-family or unknown values pass through
// unchanged. Pure function with no hardware dependency so it can be unit-tested.
std::string rocm_arch_to_release_target(const std::string& arch);

// Remove a final ROCm gfx release-target suffix from a release tag, if present.
// Example: "vllm0.22.1-rocm7.13.0-gfx120X" -> "vllm0.22.1-rocm7.13.0".
// Returns the original string unchanged when the final segment is not a gfx token.
std::string strip_rocm_release_target_suffix(const std::string& version);

// Helper to identify CUDA Compute Capability from a marketing GPU name
// Returns an sm_XX token (e.g., "sm_75", "sm_86", "sm_120") or empty string if not recognized
std::string identify_cuda_arch_from_name(const std::string& device_name);
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/resources/backend_versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
"metal": "b17"
},
"vllm": {
"rocm": "vllm0.20.1-rocm7.12.0"
"rocm": "vllm0.22.1-rocm7.13.0"
},
"moonshine": {
"cpu": "moonshine0.0.62"
Expand Down
5 changes: 4 additions & 1 deletion src/cpp/server/backends/llamacpp_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ InstallParams LlamaCppServer::get_install_params(const std::string& backend, con
#endif
} else if (resolved_backend == "rocm-nightly") {
params.repo = "lemonade-sdk/llamacpp-rocm";
std::string target_arch = SystemInfo::get_rocm_arch();
// llamacpp-rocm nightly publishes per release target (gfx103X/110X/120X
// for discrete RDNA GPUs, gfx1150/1151/1152 for APUs), not per specific
// ISA. Map gfx1201 -> gfx120X so the asset resolves.
std::string target_arch = SystemInfo::get_rocm_release_target();
if (target_arch.empty()) {
throw std::runtime_error(
SystemInfo::get_unsupported_backend_error("llamacpp", "rocm-nightly")
Expand Down
10 changes: 7 additions & 3 deletions src/cpp/server/backends/vllm_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,20 @@ InstallParams VLLMServer::get_install_params(const std::string& backend, const s

if (backend == "rocm") {
params.repo = "lemonade-sdk/vllm-rocm";
std::string target_arch = SystemInfo::get_rocm_arch();
// vllm-rocm publishes one release per *release target*: gfx120X for RDNA4
// dGPUs, gfx1151 for Strix Halo APU, etc. get_rocm_arch() returns the
// specific ISA (gfx1201) which has no matching release tag and 404s, so
// map to the release target.
std::string target_arch = SystemInfo::get_rocm_release_target();
if (target_arch.empty()) {
throw std::runtime_error(
SystemInfo::get_unsupported_backend_error("vllm", "rocm")
);
}
#ifdef __linux__
// One release per GPU target since 0.19.1: release tag is
// {version}-{target_arch}, e.g. vllm0.20.1-rocm7.12.0-gfx1151.
std::string release_tag = version + "-" + target_arch;
// {version}-{target_arch}, e.g. vllm0.22.1-rocm7.13.0-gfx120X.
std::string release_tag = strip_rocm_release_target_suffix(version) + "-" + target_arch;
params.version_override = release_tag;
params.filename = release_tag + "-x64.tar.gz";
#else
Expand Down
6 changes: 5 additions & 1 deletion src/cpp/server/backends/whisper_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ InstallParams WhisperServer::get_install_params(const std::string& backend, cons
throw std::runtime_error("Unsupported platform for whisper.cpp cpu backend");
#endif
} else if (backend == "rocm") {
std::string rocm_arch = SystemInfo::get_rocm_arch();
// whisper.cpp-rocm publishes per release target (gfx120X family for RDNA4
// dGPUs, gfx1151 for Strix Halo APU, etc.), not per specific ISA. Use the
// release-target mapping so gfx1201 resolves to the published gfx120X
// asset instead of 404ing.
std::string rocm_arch = SystemInfo::get_rocm_release_target();
if (rocm_arch.empty()) {
throw std::runtime_error(SystemInfo::get_unsupported_backend_error("whispercpp", "rocm"));
}
Expand Down
99 changes: 95 additions & 4 deletions src/cpp/server/system_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1496,8 +1496,12 @@ json SystemInfo::build_recipes_info(const json& devices) {
}
}
} else {
bool is_vllm_rocm = def.recipe == "vllm" && def.backend == "rocm";
auto normalize_expected_version = [is_vllm_rocm](const std::string& version) {
return is_vllm_rocm ? strip_rocm_release_target_suffix(version) : version;
};
std::string installed_version = get_recipe_version(def.recipe, def.backend);
std::string expected_version = get_expected_backend_version(def.recipe, def.backend);
std::string expected_version = normalize_expected_version(get_expected_backend_version(def.recipe, def.backend));

// The user's *_bin pin overrides what the state machine considers
// "expected" — otherwise an explicit-tag pin (e.g. b8664) would
Expand All @@ -1511,7 +1515,10 @@ json SystemInfo::build_recipes_info(const json& devices) {
expected_version.clear();
} else {
// Bare upstream tag — that tag IS what the user expects.
expected_version = user_pin;
// vllm-rocm user pins may be base versions or full per-target
// release tags; normalize either form to the base version so
// status matches the install path's current-target suffix.
expected_version = normalize_expected_version(user_pin);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Require the current vLLM ROCm target in status

When vllm.rocm_bin is set to a full per-target tag and a same-base install for a different target is already in the cache (for example, a cache copied from a gfx1151 machine to gfx120X), this strips the expected version down to the base. The versions_match helper below then accepts any {expected}-... suffix, so /system-info reports the wrong-target vLLM bundle as installed instead of prompting a reinstall, even though the install path would construct the current machine's target suffix.

Useful? React with 👍 / 👎.

}
}
}
Expand Down Expand Up @@ -1576,8 +1583,10 @@ json SystemInfo::build_recipes_info(const json& devices) {
latest_tag = bm->get_or_resolve_latest_tag(def.recipe, def.backend);
}
}
if (!latest_tag.empty()
&& version_compare(installed_version, latest_tag) < 0) {
std::string installed_version_for_compare = normalize_expected_version(installed_version);
std::string latest_tag_for_compare = normalize_expected_version(latest_tag);
if (!latest_tag_for_compare.empty()
&& version_compare(installed_version_for_compare, latest_tag_for_compare) < 0) {
backend["state"] = "update_available";
backend["message"] = "Newer upstream release available: " + latest_tag;
backend["action"] = get_install_command(def.recipe, def.backend);
Expand Down Expand Up @@ -1925,6 +1934,82 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {
return "";
}

std::string rocm_arch_to_release_target(const std::string& arch) {
// ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm
// nightly) publish discrete RDNA GPUs under a *family* target while APUs ship
// per-specific ISA. get_rocm_arch() returns the specific ISA (e.g. gfx1201)
// which is right for TheRock runtime paths but does not exist as a per-target
// asset name. Collapse discrete dGPU ISAs to their family here.
//
// Specific ISA -> release target:
// gfx1010-1012 -> (no asset family published; pass through)
// gfx1030-1036 -> gfx103X (RDNA2 dGPU)
// gfx1100-1103 -> gfx110X (RDNA3 dGPU)
// gfx1200/1201 -> gfx120X (RDNA4 dGPU)
// gfx1150/1151/1152 -> unchanged (APU, published per-specific)
// gfx90X / gfx94X / etc. -> unchanged (handled elsewhere / pass through)
// already-family (gfx1XXX with trailing X) and unknown -> unchanged
if (arch.empty()) {
return arch;
}

// Already a family target (trailing 'X') or non-gfx token: leave as-is.
if (arch.back() == 'X' || arch.compare(0, 3, "gfx") != 0) {
return arch;
}

// APUs are published per-specific ISA; do not collapse them.
if (arch == "gfx1150" || arch == "gfx1151" || arch == "gfx1152") {
return arch;
}

// Match a 4-digit RDNA gfx token: gfx<major><minor><step>, e.g. gfx1201.
// Collapse the trailing step nibble to 'X' to form the family target for the
// RDNA dGPU families that publish family assets (gfx103X/110X/120X).
if (arch.size() == 7) {
const std::string base3 = arch.substr(3, 3); // e.g. "120" from gfx1201
if (base3 == "103" || base3 == "110" || base3 == "120") {
return "gfx" + base3 + "X";
}
}

// Anything else (data-center gfx90X/94X already family, gfx101X dGPU, etc.)
// passes through unchanged.
return arch;
}

std::string strip_rocm_release_target_suffix(const std::string& version) {
const size_t dash = version.rfind("-gfx");
if (dash == std::string::npos) {
return version;
}

const std::string token = version.substr(dash + 1); // gfx...
if (token.compare(0, 3, "gfx") != 0) {
return version;
}

auto is_hex = [](char ch) {
return (ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F');
};

const std::string rest = token.substr(3);
bool valid_gfx_suffix = false;
if (rest.size() == 3) {
// e.g. gfx90a
valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex);
} else if (rest.size() == 4) {
// e.g. gfx1151, gfx120X, gfx110X
valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex) ||
(std::all_of(rest.begin(), rest.begin() + 3, is_hex) &&
(rest[3] == 'X' || rest[3] == 'x'));
}

return valid_gfx_suffix ? version.substr(0, dash) : version;
}

// Linux: identify NPU architecture from sysfs accel subsystem
// Checks /sys/class/accel/*/device/driver for amdxdna, then reads number of columns
// If amdxdna not loaded, fall back to PCI device IDs
Expand Down Expand Up @@ -2135,6 +2220,12 @@ std::string SystemInfo::get_rocm_arch() {
return ""; // No supported architecture found
}

std::string SystemInfo::get_rocm_release_target() {
// Same GPU selection as get_rocm_arch(), but mapped to the per-target asset
// name used by ROCm backend release repos. See rocm_arch_to_release_target().
return rocm_arch_to_release_target(get_rocm_arch());
}

static int cuda_sm_value(const std::string& arch) {
if (arch.size() <= 3 || arch.substr(0, 3) != "sm_") {
return 0;
Expand Down
141 changes: 141 additions & 0 deletions test/test_rocm_release_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
CPU-runnable unit tests for ROCm specific-ISA -> release-target mapping logic
(system_info.cpp::rocm_arch_to_release_target()).

ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm nightly)
publish discrete RDNA GPUs under a *family* target (gfx103X / gfx110X / gfx120X)
while APUs ship per-specific ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch()
returns the specific ISA (e.g. gfx1201), which is correct for TheRock runtime
paths but 404s on these per-target asset names. This test replicates
the C++ mapping so it can be validated without AMD hardware.

Run with: python -m pytest test/test_rocm_release_target.py
or: python test/test_rocm_release_target.py
"""

import re
import unittest


# ---------------------------------------------------------------------------
# Python replica of system_info.cpp::rocm_arch_to_release_target()
# ---------------------------------------------------------------------------
def rocm_arch_to_release_target(arch: str) -> str:
if not arch:
return arch
# Already a family target (trailing 'X') or non-gfx token: leave as-is.
if arch.endswith("X") or not arch.startswith("gfx"):
return arch
# APUs are published per-specific ISA; do not collapse them.
if arch in ("gfx1150", "gfx1151", "gfx1152"):
return arch
# 4-digit RDNA gfx token: collapse the trailing step nibble to 'X' to form
# the family target for the families that publish family assets.
if len(arch) == 7:
base3 = arch[3:6] # e.g. "120" from "gfx1201"
if base3 in ("103", "110", "120"):
return "gfx" + base3 + "X"
# Anything else passes through unchanged.
return arch


# ---------------------------------------------------------------------------
# Python replica of vllm_server.cpp ROCm release tag construction
# ---------------------------------------------------------------------------
def strip_vllm_rocm_target_suffix(version: str) -> str:
# vllm-rocm GitHub releases are tagged as:
# vllm0.22.1-rocm7.13.0-gfx120X
# Runtime config may contain a base version, or resolve_user_version() may
# return a target-suffixed tag from GitHub latest / explicit user pin. Strip
# the optional target suffix before appending this machine's target.
return re.sub(r"-gfx(?:[0-9a-f]{3,4}|[0-9a-f]{3}X)$", "", version)


def vllm_rocm_release_tag(version: str, target_arch: str) -> str:
return strip_vllm_rocm_target_suffix(version) + "-" + target_arch


class TestRocmReleaseTarget(unittest.TestCase):
def test_rdna4_dgpu_collapses_to_family(self):
# R9700 (gfx1201) must map to the published gfx120X target.
self.assertEqual(rocm_arch_to_release_target("gfx1201"), "gfx120X")
self.assertEqual(rocm_arch_to_release_target("gfx1200"), "gfx120X")

def test_rdna3_dgpu_collapses_to_family(self):
for isa in ("gfx1100", "gfx1101", "gfx1102", "gfx1103"):
with self.subTest(isa=isa):
self.assertEqual(rocm_arch_to_release_target(isa), "gfx110X")

def test_rdna2_dgpu_collapses_to_family(self):
for isa in ("gfx1030", "gfx1031", "gfx1032", "gfx1033",
"gfx1034", "gfx1035", "gfx1036"):
Comment on lines +71 to +72

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Blacken the new ROCm release-target test

AGENTS.md says Python must be formatted with Black and that this is enforced in CI; in the checked commit, python3 -m black --check test/test_rocm_release_target.py reports this new file would be reformatted, starting with this tuple and later literals. When the formatting check runs, the PR will fail even though the test logic itself passes.

Useful? React with 👍 / 👎.

with self.subTest(isa=isa):
self.assertEqual(rocm_arch_to_release_target(isa), "gfx103X")

def test_apus_stay_specific(self):
# APU assets are published per-specific ISA; must NOT be collapsed.
for isa in ("gfx1150", "gfx1151", "gfx1152"):
with self.subTest(isa=isa):
self.assertEqual(rocm_arch_to_release_target(isa), isa)

def test_family_targets_are_idempotent(self):
for fam in ("gfx103X", "gfx110X", "gfx120X"):
with self.subTest(fam=fam):
self.assertEqual(rocm_arch_to_release_target(fam), fam)

def test_datacenter_and_other_archs_pass_through(self):
# Data-center / CDNA and gfx101X dGPU have no family-collapse rule here.
for isa in ("gfx908", "gfx90a", "gfx942", "gfx1010", "gfx1011", "gfx1012"):
with self.subTest(isa=isa):
self.assertEqual(rocm_arch_to_release_target(isa), isa)

def test_empty_and_non_gfx_pass_through(self):
for val in ("", "sm_120", "radeon", "unknown"):
with self.subTest(val=val):
self.assertEqual(rocm_arch_to_release_target(val), val)

def test_maps_to_release_target_names_without_advertising_backend_support(self):
# This mapping only chooses the asset-name token for recipes that already
# decided the current GPU is supported. Recipe availability remains
# controlled by RECIPE_DEFS in system_info.cpp (for example,
# whispercpp/vllm intentionally do not advertise gfx103X or gfx1152).
known_release_target_names = {"gfx103X", "gfx110X", "gfx120X",
"gfx1150", "gfx1151", "gfx1152"}
for isa in ("gfx1201", "gfx1100", "gfx1030", "gfx1151"):
with self.subTest(isa=isa):
self.assertIn(rocm_arch_to_release_target(isa), known_release_target_names)


class TestVllmRocmReleaseTags(unittest.TestCase):
def test_base_version_appends_current_target(self):
self.assertEqual(
vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0", "gfx120X"),
"vllm0.22.1-rocm7.13.0-gfx120X",
)

def test_existing_target_suffix_is_replaced_not_double_appended(self):
# GitHub marks one target release as "Latest". If resolve_user_version()
# returns that full tag, vllm_server.cpp must rebuild the tag for the
# current machine's ROCm release target rather than generating a bogus
# "...-gfx1151-gfx120X" tag.
self.assertEqual(
vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx1151", "gfx120X"),
"vllm0.22.1-rocm7.13.0-gfx120X",
)

def test_existing_matching_target_suffix_is_idempotent(self):
self.assertEqual(
vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx120X"),
"vllm0.22.1-rocm7.13.0-gfx120X",
)

def test_specific_target_suffix_can_be_replaced_with_specific_apu_target(self):
self.assertEqual(
vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx1151"),
"vllm0.22.1-rocm7.13.0-gfx1151",
)


if __name__ == "__main__":
unittest.main()
Loading