diff --git a/docs/guide/configuration/vllm.md b/docs/guide/configuration/vllm.md index b904a224a..f332f56e6 100644 --- a/docs/guide/configuration/vllm.md +++ b/docs/guide/configuration/vllm.md @@ -31,7 +31,7 @@ curl -X POST http://localhost:13305/api/v1/install \ -d '{"recipe": "vllm", "backend": "rocm"}' ``` -The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx1150`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); the `-{gfx_target}` suffix is appended at runtime from `SystemInfo::get_rocm_arch()`, so a single pin covers all supported architectures. +The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx120X`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); Lemonade appends the machine's ROCm release target at runtime. Discrete RDNA GPUs use family release targets such as `gfx110X`/`gfx120X` even when the physical ISA is specific (for example, `gfx1201`), while APUs keep their specific targets such as `gfx1151`. User pins may be either the base version or a full per-target release tag; Lemonade normalizes either form to the current machine's target so the happy path does not require knowing the asset naming convention. ## Use diff --git a/src/cpp/include/lemon/system_info.h b/src/cpp/include/lemon/system_info.h index 9b143ae47..b7e4e0ff2 100644 --- a/src/cpp/include/lemon/system_info.h +++ b/src/cpp/include/lemon/system_info.h @@ -109,6 +109,13 @@ class SystemInfo { // Device support detection static std::string get_rocm_arch(); + // ROCm backend release assets (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm + // nightly) are published per *release target*: discrete RDNA GPUs ship under + // a family target (gfx110X / gfx120X / gfx103X) while APUs ship per-specific + // ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch() returns the specific ISA + // (e.g. gfx1201), which is correct for TheRock runtime paths but 404s on these + // per-target asset names. Use this when constructing such asset filenames. + static std::string get_rocm_release_target(); static std::string get_cuda_arch(); // CUDA release assets are architecture-specific (sm_89, sm_120, etc.). @@ -231,6 +238,18 @@ std::unique_ptr create_system_info(); // Returns architecture string (e.g., "gfx1150", "gfx1151", "gfx110X", "gfx120X") or empty string if not recognized std::string identify_rocm_arch_from_name(const std::string& device_name); +// Map a specific ROCm ISA (e.g. "gfx1201") to the *release target* name used in +// ROCm backend asset filenames (e.g. "gfx120X"). Discrete RDNA GPUs collapse to +// a family target (gfx1030-103X, gfx1100-110X, gfx1200/1201-120X); APUs +// (gfx1150/1151/1152) and already-family or unknown values pass through +// unchanged. Pure function with no hardware dependency so it can be unit-tested. +std::string rocm_arch_to_release_target(const std::string& arch); + +// Remove a final ROCm gfx release-target suffix from a release tag, if present. +// Example: "vllm0.22.1-rocm7.13.0-gfx120X" -> "vllm0.22.1-rocm7.13.0". +// Returns the original string unchanged when the final segment is not a gfx token. +std::string strip_rocm_release_target_suffix(const std::string& version); + // Helper to identify CUDA Compute Capability from a marketing GPU name // Returns an sm_XX token (e.g., "sm_75", "sm_86", "sm_120") or empty string if not recognized std::string identify_cuda_arch_from_name(const std::string& device_name); diff --git a/src/cpp/resources/backend_versions.json b/src/cpp/resources/backend_versions.json index db08f41d0..95a6b0803 100644 --- a/src/cpp/resources/backend_versions.json +++ b/src/cpp/resources/backend_versions.json @@ -90,7 +90,7 @@ "metal": "b17" }, "vllm": { - "rocm": "vllm0.20.1-rocm7.12.0" + "rocm": "vllm0.22.1-rocm7.13.0" }, "moonshine": { "cpu": "moonshine0.0.62" diff --git a/src/cpp/server/backends/llamacpp_server.cpp b/src/cpp/server/backends/llamacpp_server.cpp index a8b731f63..16480748a 100644 --- a/src/cpp/server/backends/llamacpp_server.cpp +++ b/src/cpp/server/backends/llamacpp_server.cpp @@ -164,7 +164,10 @@ InstallParams LlamaCppServer::get_install_params(const std::string& backend, con #endif } else if (resolved_backend == "rocm-nightly") { params.repo = "lemonade-sdk/llamacpp-rocm"; - std::string target_arch = SystemInfo::get_rocm_arch(); + // llamacpp-rocm nightly publishes per release target (gfx103X/110X/120X + // for discrete RDNA GPUs, gfx1150/1151/1152 for APUs), not per specific + // ISA. Map gfx1201 -> gfx120X so the asset resolves. + std::string target_arch = SystemInfo::get_rocm_release_target(); if (target_arch.empty()) { throw std::runtime_error( SystemInfo::get_unsupported_backend_error("llamacpp", "rocm-nightly") diff --git a/src/cpp/server/backends/vllm_server.cpp b/src/cpp/server/backends/vllm_server.cpp index 7584d56d9..bd5dd87ae 100644 --- a/src/cpp/server/backends/vllm_server.cpp +++ b/src/cpp/server/backends/vllm_server.cpp @@ -79,7 +79,11 @@ InstallParams VLLMServer::get_install_params(const std::string& backend, const s if (backend == "rocm") { params.repo = "lemonade-sdk/vllm-rocm"; - std::string target_arch = SystemInfo::get_rocm_arch(); + // vllm-rocm publishes one release per *release target*: gfx120X for RDNA4 + // dGPUs, gfx1151 for Strix Halo APU, etc. get_rocm_arch() returns the + // specific ISA (gfx1201) which has no matching release tag and 404s, so + // map to the release target. + std::string target_arch = SystemInfo::get_rocm_release_target(); if (target_arch.empty()) { throw std::runtime_error( SystemInfo::get_unsupported_backend_error("vllm", "rocm") @@ -87,8 +91,8 @@ InstallParams VLLMServer::get_install_params(const std::string& backend, const s } #ifdef __linux__ // One release per GPU target since 0.19.1: release tag is - // {version}-{target_arch}, e.g. vllm0.20.1-rocm7.12.0-gfx1151. - std::string release_tag = version + "-" + target_arch; + // {version}-{target_arch}, e.g. vllm0.22.1-rocm7.13.0-gfx120X. + std::string release_tag = strip_rocm_release_target_suffix(version) + "-" + target_arch; params.version_override = release_tag; params.filename = release_tag + "-x64.tar.gz"; #else diff --git a/src/cpp/server/backends/whisper_server.cpp b/src/cpp/server/backends/whisper_server.cpp index 9f50da020..0a586c5be 100644 --- a/src/cpp/server/backends/whisper_server.cpp +++ b/src/cpp/server/backends/whisper_server.cpp @@ -94,7 +94,11 @@ InstallParams WhisperServer::get_install_params(const std::string& backend, cons throw std::runtime_error("Unsupported platform for whisper.cpp cpu backend"); #endif } else if (backend == "rocm") { - std::string rocm_arch = SystemInfo::get_rocm_arch(); + // whisper.cpp-rocm publishes per release target (gfx120X family for RDNA4 + // dGPUs, gfx1151 for Strix Halo APU, etc.), not per specific ISA. Use the + // release-target mapping so gfx1201 resolves to the published gfx120X + // asset instead of 404ing. + std::string rocm_arch = SystemInfo::get_rocm_release_target(); if (rocm_arch.empty()) { throw std::runtime_error(SystemInfo::get_unsupported_backend_error("whispercpp", "rocm")); } diff --git a/src/cpp/server/system_info.cpp b/src/cpp/server/system_info.cpp index cf0adfc52..939f9b062 100644 --- a/src/cpp/server/system_info.cpp +++ b/src/cpp/server/system_info.cpp @@ -1496,8 +1496,12 @@ json SystemInfo::build_recipes_info(const json& devices) { } } } else { + bool is_vllm_rocm = def.recipe == "vllm" && def.backend == "rocm"; + auto normalize_expected_version = [is_vllm_rocm](const std::string& version) { + return is_vllm_rocm ? strip_rocm_release_target_suffix(version) : version; + }; std::string installed_version = get_recipe_version(def.recipe, def.backend); - std::string expected_version = get_expected_backend_version(def.recipe, def.backend); + std::string expected_version = normalize_expected_version(get_expected_backend_version(def.recipe, def.backend)); // The user's *_bin pin overrides what the state machine considers // "expected" — otherwise an explicit-tag pin (e.g. b8664) would @@ -1511,7 +1515,10 @@ json SystemInfo::build_recipes_info(const json& devices) { expected_version.clear(); } else { // Bare upstream tag — that tag IS what the user expects. - expected_version = user_pin; + // vllm-rocm user pins may be base versions or full per-target + // release tags; normalize either form to the base version so + // status matches the install path's current-target suffix. + expected_version = normalize_expected_version(user_pin); } } } @@ -1576,8 +1583,10 @@ json SystemInfo::build_recipes_info(const json& devices) { latest_tag = bm->get_or_resolve_latest_tag(def.recipe, def.backend); } } - if (!latest_tag.empty() - && version_compare(installed_version, latest_tag) < 0) { + std::string installed_version_for_compare = normalize_expected_version(installed_version); + std::string latest_tag_for_compare = normalize_expected_version(latest_tag); + if (!latest_tag_for_compare.empty() + && version_compare(installed_version_for_compare, latest_tag_for_compare) < 0) { backend["state"] = "update_available"; backend["message"] = "Newer upstream release available: " + latest_tag; backend["action"] = get_install_command(def.recipe, def.backend); @@ -1925,6 +1934,82 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) { return ""; } +std::string rocm_arch_to_release_target(const std::string& arch) { + // ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm + // nightly) publish discrete RDNA GPUs under a *family* target while APUs ship + // per-specific ISA. get_rocm_arch() returns the specific ISA (e.g. gfx1201) + // which is right for TheRock runtime paths but does not exist as a per-target + // asset name. Collapse discrete dGPU ISAs to their family here. + // + // Specific ISA -> release target: + // gfx1010-1012 -> (no asset family published; pass through) + // gfx1030-1036 -> gfx103X (RDNA2 dGPU) + // gfx1100-1103 -> gfx110X (RDNA3 dGPU) + // gfx1200/1201 -> gfx120X (RDNA4 dGPU) + // gfx1150/1151/1152 -> unchanged (APU, published per-specific) + // gfx90X / gfx94X / etc. -> unchanged (handled elsewhere / pass through) + // already-family (gfx1XXX with trailing X) and unknown -> unchanged + if (arch.empty()) { + return arch; + } + + // Already a family target (trailing 'X') or non-gfx token: leave as-is. + if (arch.back() == 'X' || arch.compare(0, 3, "gfx") != 0) { + return arch; + } + + // APUs are published per-specific ISA; do not collapse them. + if (arch == "gfx1150" || arch == "gfx1151" || arch == "gfx1152") { + return arch; + } + + // Match a 4-digit RDNA gfx token: gfx, e.g. gfx1201. + // Collapse the trailing step nibble to 'X' to form the family target for the + // RDNA dGPU families that publish family assets (gfx103X/110X/120X). + if (arch.size() == 7) { + const std::string base3 = arch.substr(3, 3); // e.g. "120" from gfx1201 + if (base3 == "103" || base3 == "110" || base3 == "120") { + return "gfx" + base3 + "X"; + } + } + + // Anything else (data-center gfx90X/94X already family, gfx101X dGPU, etc.) + // passes through unchanged. + return arch; +} + +std::string strip_rocm_release_target_suffix(const std::string& version) { + const size_t dash = version.rfind("-gfx"); + if (dash == std::string::npos) { + return version; + } + + const std::string token = version.substr(dash + 1); // gfx... + if (token.compare(0, 3, "gfx") != 0) { + return version; + } + + auto is_hex = [](char ch) { + return (ch >= '0' && ch <= '9') || + (ch >= 'a' && ch <= 'f') || + (ch >= 'A' && ch <= 'F'); + }; + + const std::string rest = token.substr(3); + bool valid_gfx_suffix = false; + if (rest.size() == 3) { + // e.g. gfx90a + valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex); + } else if (rest.size() == 4) { + // e.g. gfx1151, gfx120X, gfx110X + valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex) || + (std::all_of(rest.begin(), rest.begin() + 3, is_hex) && + (rest[3] == 'X' || rest[3] == 'x')); + } + + return valid_gfx_suffix ? version.substr(0, dash) : version; +} + // Linux: identify NPU architecture from sysfs accel subsystem // Checks /sys/class/accel/*/device/driver for amdxdna, then reads number of columns // If amdxdna not loaded, fall back to PCI device IDs @@ -2135,6 +2220,12 @@ std::string SystemInfo::get_rocm_arch() { return ""; // No supported architecture found } +std::string SystemInfo::get_rocm_release_target() { + // Same GPU selection as get_rocm_arch(), but mapped to the per-target asset + // name used by ROCm backend release repos. See rocm_arch_to_release_target(). + return rocm_arch_to_release_target(get_rocm_arch()); +} + static int cuda_sm_value(const std::string& arch) { if (arch.size() <= 3 || arch.substr(0, 3) != "sm_") { return 0; diff --git a/test/test_rocm_release_target.py b/test/test_rocm_release_target.py new file mode 100644 index 000000000..6f788be12 --- /dev/null +++ b/test/test_rocm_release_target.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +CPU-runnable unit tests for ROCm specific-ISA -> release-target mapping logic +(system_info.cpp::rocm_arch_to_release_target()). + +ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm nightly) +publish discrete RDNA GPUs under a *family* target (gfx103X / gfx110X / gfx120X) +while APUs ship per-specific ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch() +returns the specific ISA (e.g. gfx1201), which is correct for TheRock runtime +paths but 404s on these per-target asset names. This test replicates +the C++ mapping so it can be validated without AMD hardware. + +Run with: python -m pytest test/test_rocm_release_target.py + or: python test/test_rocm_release_target.py +""" + +import re +import unittest + + +# --------------------------------------------------------------------------- +# Python replica of system_info.cpp::rocm_arch_to_release_target() +# --------------------------------------------------------------------------- +def rocm_arch_to_release_target(arch: str) -> str: + if not arch: + return arch + # Already a family target (trailing 'X') or non-gfx token: leave as-is. + if arch.endswith("X") or not arch.startswith("gfx"): + return arch + # APUs are published per-specific ISA; do not collapse them. + if arch in ("gfx1150", "gfx1151", "gfx1152"): + return arch + # 4-digit RDNA gfx token: collapse the trailing step nibble to 'X' to form + # the family target for the families that publish family assets. + if len(arch) == 7: + base3 = arch[3:6] # e.g. "120" from "gfx1201" + if base3 in ("103", "110", "120"): + return "gfx" + base3 + "X" + # Anything else passes through unchanged. + return arch + + +# --------------------------------------------------------------------------- +# Python replica of vllm_server.cpp ROCm release tag construction +# --------------------------------------------------------------------------- +def strip_vllm_rocm_target_suffix(version: str) -> str: + # vllm-rocm GitHub releases are tagged as: + # vllm0.22.1-rocm7.13.0-gfx120X + # Runtime config may contain a base version, or resolve_user_version() may + # return a target-suffixed tag from GitHub latest / explicit user pin. Strip + # the optional target suffix before appending this machine's target. + return re.sub(r"-gfx(?:[0-9a-f]{3,4}|[0-9a-f]{3}X)$", "", version) + + +def vllm_rocm_release_tag(version: str, target_arch: str) -> str: + return strip_vllm_rocm_target_suffix(version) + "-" + target_arch + + +class TestRocmReleaseTarget(unittest.TestCase): + def test_rdna4_dgpu_collapses_to_family(self): + # R9700 (gfx1201) must map to the published gfx120X target. + self.assertEqual(rocm_arch_to_release_target("gfx1201"), "gfx120X") + self.assertEqual(rocm_arch_to_release_target("gfx1200"), "gfx120X") + + def test_rdna3_dgpu_collapses_to_family(self): + for isa in ("gfx1100", "gfx1101", "gfx1102", "gfx1103"): + with self.subTest(isa=isa): + self.assertEqual(rocm_arch_to_release_target(isa), "gfx110X") + + def test_rdna2_dgpu_collapses_to_family(self): + for isa in ("gfx1030", "gfx1031", "gfx1032", "gfx1033", + "gfx1034", "gfx1035", "gfx1036"): + with self.subTest(isa=isa): + self.assertEqual(rocm_arch_to_release_target(isa), "gfx103X") + + def test_apus_stay_specific(self): + # APU assets are published per-specific ISA; must NOT be collapsed. + for isa in ("gfx1150", "gfx1151", "gfx1152"): + with self.subTest(isa=isa): + self.assertEqual(rocm_arch_to_release_target(isa), isa) + + def test_family_targets_are_idempotent(self): + for fam in ("gfx103X", "gfx110X", "gfx120X"): + with self.subTest(fam=fam): + self.assertEqual(rocm_arch_to_release_target(fam), fam) + + def test_datacenter_and_other_archs_pass_through(self): + # Data-center / CDNA and gfx101X dGPU have no family-collapse rule here. + for isa in ("gfx908", "gfx90a", "gfx942", "gfx1010", "gfx1011", "gfx1012"): + with self.subTest(isa=isa): + self.assertEqual(rocm_arch_to_release_target(isa), isa) + + def test_empty_and_non_gfx_pass_through(self): + for val in ("", "sm_120", "radeon", "unknown"): + with self.subTest(val=val): + self.assertEqual(rocm_arch_to_release_target(val), val) + + def test_maps_to_release_target_names_without_advertising_backend_support(self): + # This mapping only chooses the asset-name token for recipes that already + # decided the current GPU is supported. Recipe availability remains + # controlled by RECIPE_DEFS in system_info.cpp (for example, + # whispercpp/vllm intentionally do not advertise gfx103X or gfx1152). + known_release_target_names = {"gfx103X", "gfx110X", "gfx120X", + "gfx1150", "gfx1151", "gfx1152"} + for isa in ("gfx1201", "gfx1100", "gfx1030", "gfx1151"): + with self.subTest(isa=isa): + self.assertIn(rocm_arch_to_release_target(isa), known_release_target_names) + + +class TestVllmRocmReleaseTags(unittest.TestCase): + def test_base_version_appends_current_target(self): + self.assertEqual( + vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0", "gfx120X"), + "vllm0.22.1-rocm7.13.0-gfx120X", + ) + + def test_existing_target_suffix_is_replaced_not_double_appended(self): + # GitHub marks one target release as "Latest". If resolve_user_version() + # returns that full tag, vllm_server.cpp must rebuild the tag for the + # current machine's ROCm release target rather than generating a bogus + # "...-gfx1151-gfx120X" tag. + self.assertEqual( + vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx1151", "gfx120X"), + "vllm0.22.1-rocm7.13.0-gfx120X", + ) + + def test_existing_matching_target_suffix_is_idempotent(self): + self.assertEqual( + vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx120X"), + "vllm0.22.1-rocm7.13.0-gfx120X", + ) + + def test_specific_target_suffix_can_be_replaced_with_specific_apu_target(self): + self.assertEqual( + vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx1151"), + "vllm0.22.1-rocm7.13.0-gfx1151", + ) + + +if __name__ == "__main__": + unittest.main()