lemonade-sdk · bong-water-water-bong · Jun 25, 2026 · chatgpt-codex-connector · Jun 25, 2026 · chatgpt-codex-connector
diff --git a/docs/guide/configuration/vllm.md b/docs/guide/configuration/vllm.md
@@ -31,7 +31,7 @@ curl -X POST http://localhost:13305/api/v1/install \
   -d '{"recipe": "vllm", "backend": "rocm"}'
 ```
 
-The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx1150`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); the `-{gfx_target}` suffix is appended at runtime from `SystemInfo::get_rocm_arch()`, so a single pin covers all supported architectures.
+The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx120X`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); Lemonade appends the machine's ROCm release target at runtime. Discrete RDNA GPUs use family release targets such as `gfx110X`/`gfx120X` even when the physical ISA is specific (for example, `gfx1201`), while APUs keep their specific targets such as `gfx1151`. User pins may be either the base version or a full per-target release tag; Lemonade normalizes either form to the current machine's target so the happy path does not require knowing the asset naming convention.
 
 ## Use
 

diff --git a/src/cpp/include/lemon/system_info.h b/src/cpp/include/lemon/system_info.h
@@ -109,6 +109,13 @@ class SystemInfo {
 
     // Device support detection
     static std::string get_rocm_arch();
+    // ROCm backend release assets (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm
+    // nightly) are published per *release target*: discrete RDNA GPUs ship under
+    // a family target (gfx110X / gfx120X / gfx103X) while APUs ship per-specific
+    // ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch() returns the specific ISA
+    // (e.g. gfx1201), which is correct for TheRock runtime paths but 404s on these
+    // per-target asset names. Use this when constructing such asset filenames.
+    static std::string get_rocm_release_target();
     static std::string get_cuda_arch();
 
     // CUDA release assets are architecture-specific (sm_89, sm_120, etc.).
@@ -231,6 +238,18 @@ std::unique_ptr<SystemInfo> create_system_info();
 // Returns architecture string (e.g., "gfx1150", "gfx1151", "gfx110X", "gfx120X") or empty string if not recognized
 std::string identify_rocm_arch_from_name(const std::string& device_name);
 
+// Map a specific ROCm ISA (e.g. "gfx1201") to the *release target* name used in
+// ROCm backend asset filenames (e.g. "gfx120X"). Discrete RDNA GPUs collapse to
+// a family target (gfx1030-103X, gfx1100-110X, gfx1200/1201-120X); APUs
+// (gfx1150/1151/1152) and already-family or unknown values pass through
+// unchanged. Pure function with no hardware dependency so it can be unit-tested.
+std::string rocm_arch_to_release_target(const std::string& arch);
+
+// Remove a final ROCm gfx release-target suffix from a release tag, if present.
+// Example: "vllm0.22.1-rocm7.13.0-gfx120X" -> "vllm0.22.1-rocm7.13.0".
+// Returns the original string unchanged when the final segment is not a gfx token.
+std::string strip_rocm_release_target_suffix(const std::string& version);
+
 // Helper to identify CUDA Compute Capability from a marketing GPU name
 // Returns an sm_XX token (e.g., "sm_75", "sm_86", "sm_120") or empty string if not recognized
 std::string identify_cuda_arch_from_name(const std::string& device_name);

diff --git a/src/cpp/resources/backend_versions.json b/src/cpp/resources/backend_versions.json
@@ -90,7 +90,7 @@
     "metal": "b17"
   },
   "vllm": {
-    "rocm": "vllm0.20.1-rocm7.12.0"
+    "rocm": "vllm0.22.1-rocm7.13.0"
   },
   "moonshine": {
     "cpu": "moonshine0.0.62"

diff --git a/src/cpp/server/backends/llamacpp_server.cpp b/src/cpp/server/backends/llamacpp_server.cpp
@@ -164,7 +164,10 @@ InstallParams LlamaCppServer::get_install_params(const std::string& backend, con
 #endif
     } else if (resolved_backend == "rocm-nightly") {
         params.repo = "lemonade-sdk/llamacpp-rocm";
-        std::string target_arch = SystemInfo::get_rocm_arch();
+        // llamacpp-rocm nightly publishes per release target (gfx103X/110X/120X
+        // for discrete RDNA GPUs, gfx1150/1151/1152 for APUs), not per specific
+        // ISA. Map gfx1201 -> gfx120X so the asset resolves.
+        std::string target_arch = SystemInfo::get_rocm_release_target();
         if (target_arch.empty()) {
             throw std::runtime_error(
                 SystemInfo::get_unsupported_backend_error("llamacpp", "rocm-nightly")

diff --git a/src/cpp/server/backends/vllm_server.cpp b/src/cpp/server/backends/vllm_server.cpp
@@ -79,16 +79,20 @@ InstallParams VLLMServer::get_install_params(const std::string& backend, const s
 
     if (backend == "rocm") {
         params.repo = "lemonade-sdk/vllm-rocm";
-        std::string target_arch = SystemInfo::get_rocm_arch();
+        // vllm-rocm publishes one release per *release target*: gfx120X for RDNA4
+        // dGPUs, gfx1151 for Strix Halo APU, etc. get_rocm_arch() returns the
+        // specific ISA (gfx1201) which has no matching release tag and 404s, so
+        // map to the release target.
+        std::string target_arch = SystemInfo::get_rocm_release_target();
         if (target_arch.empty()) {
             throw std::runtime_error(
                 SystemInfo::get_unsupported_backend_error("vllm", "rocm")
             );
         }
 #ifdef __linux__
         // One release per GPU target since 0.19.1: release tag is
-        // {version}-{target_arch}, e.g. vllm0.20.1-rocm7.12.0-gfx1151.
-        std::string release_tag = version + "-" + target_arch;
+        // {version}-{target_arch}, e.g. vllm0.22.1-rocm7.13.0-gfx120X.
+        std::string release_tag = strip_rocm_release_target_suffix(version) + "-" + target_arch;
         params.version_override = release_tag;
         params.filename = release_tag + "-x64.tar.gz";
 #else

diff --git a/src/cpp/server/backends/whisper_server.cpp b/src/cpp/server/backends/whisper_server.cpp
@@ -94,7 +94,11 @@ InstallParams WhisperServer::get_install_params(const std::string& backend, cons
         throw std::runtime_error("Unsupported platform for whisper.cpp cpu backend");
 #endif
     } else if (backend == "rocm") {
-        std::string rocm_arch = SystemInfo::get_rocm_arch();
+        // whisper.cpp-rocm publishes per release target (gfx120X family for RDNA4
+        // dGPUs, gfx1151 for Strix Halo APU, etc.), not per specific ISA. Use the
+        // release-target mapping so gfx1201 resolves to the published gfx120X
+        // asset instead of 404ing.
+        std::string rocm_arch = SystemInfo::get_rocm_release_target();
         if (rocm_arch.empty()) {
             throw std::runtime_error(SystemInfo::get_unsupported_backend_error("whispercpp", "rocm"));
         }

diff --git a/src/cpp/server/system_info.cpp b/src/cpp/server/system_info.cpp
@@ -1496,8 +1496,12 @@ json SystemInfo::build_recipes_info(const json& devices) {
                 }
             }
         } else {
+            bool is_vllm_rocm = def.recipe == "vllm" && def.backend == "rocm";
+            auto normalize_expected_version = [is_vllm_rocm](const std::string& version) {
+                return is_vllm_rocm ? strip_rocm_release_target_suffix(version) : version;
+            };
             std::string installed_version = get_recipe_version(def.recipe, def.backend);
-            std::string expected_version = get_expected_backend_version(def.recipe, def.backend);
+            std::string expected_version = normalize_expected_version(get_expected_backend_version(def.recipe, def.backend));
 
             // The user's *_bin pin overrides what the state machine considers
             // "expected" — otherwise an explicit-tag pin (e.g. b8664) would
@@ -1511,7 +1515,10 @@ json SystemInfo::build_recipes_info(const json& devices) {
                         expected_version.clear();
                     } else {
                         // Bare upstream tag — that tag IS what the user expects.
-                        expected_version = user_pin;
+                        // vllm-rocm user pins may be base versions or full per-target
+                        // release tags; normalize either form to the base version so
+                        // status matches the install path's current-target suffix.
+                        expected_version = normalize_expected_version(user_pin);
                     }
                 }
             }
@@ -1576,8 +1583,10 @@ json SystemInfo::build_recipes_info(const json& devices) {
                         latest_tag = bm->get_or_resolve_latest_tag(def.recipe, def.backend);
                     }
                 }
-                if (!latest_tag.empty()
-                    && version_compare(installed_version, latest_tag) < 0) {
+                std::string installed_version_for_compare = normalize_expected_version(installed_version);
+                std::string latest_tag_for_compare = normalize_expected_version(latest_tag);
+                if (!latest_tag_for_compare.empty()
+                    && version_compare(installed_version_for_compare, latest_tag_for_compare) < 0) {
                     backend["state"] = "update_available";
                     backend["message"] = "Newer upstream release available: " + latest_tag;
                     backend["action"] = get_install_command(def.recipe, def.backend);
@@ -1925,6 +1934,82 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {
     return "";
 }
 
+std::string rocm_arch_to_release_target(const std::string& arch) {
+    // ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm
+    // nightly) publish discrete RDNA GPUs under a *family* target while APUs ship
+    // per-specific ISA. get_rocm_arch() returns the specific ISA (e.g. gfx1201)
+    // which is right for TheRock runtime paths but does not exist as a per-target
+    // asset name. Collapse discrete dGPU ISAs to their family here.
+    //
+    // Specific ISA -> release target:
+    //   gfx1010-1012            -> (no asset family published; pass through)
+    //   gfx1030-1036            -> gfx103X   (RDNA2 dGPU)
+    //   gfx1100-1103            -> gfx110X   (RDNA3 dGPU)
+    //   gfx1200/1201            -> gfx120X   (RDNA4 dGPU)
+    //   gfx1150/1151/1152       -> unchanged (APU, published per-specific)
+    //   gfx90X / gfx94X / etc.  -> unchanged (handled elsewhere / pass through)
+    //   already-family (gfx1XXX with trailing X) and unknown -> unchanged
+    if (arch.empty()) {
+        return arch;
+    }
+
+    // Already a family target (trailing 'X') or non-gfx token: leave as-is.
+    if (arch.back() == 'X' || arch.compare(0, 3, "gfx") != 0) {
+        return arch;
+    }
+
+    // APUs are published per-specific ISA; do not collapse them.
+    if (arch == "gfx1150" || arch == "gfx1151" || arch == "gfx1152") {
+        return arch;
+    }
+
+    // Match a 4-digit RDNA gfx token: gfx<major><minor><step>, e.g. gfx1201.
+    // Collapse the trailing step nibble to 'X' to form the family target for the
+    // RDNA dGPU families that publish family assets (gfx103X/110X/120X).
+    if (arch.size() == 7) {
+        const std::string base3 = arch.substr(3, 3);  // e.g. "120" from gfx1201
+        if (base3 == "103" || base3 == "110" || base3 == "120") {
+            return "gfx" + base3 + "X";
+        }
+    }
+
+    // Anything else (data-center gfx90X/94X already family, gfx101X dGPU, etc.)
+    // passes through unchanged.
+    return arch;
+}
+
+std::string strip_rocm_release_target_suffix(const std::string& version) {
+    const size_t dash = version.rfind("-gfx");
+    if (dash == std::string::npos) {
+        return version;
+    }
+
+    const std::string token = version.substr(dash + 1);  // gfx...
+    if (token.compare(0, 3, "gfx") != 0) {
+        return version;
+    }
+
+    auto is_hex = [](char ch) {
+        return (ch >= '0' && ch <= '9') ||
+               (ch >= 'a' && ch <= 'f') ||
+               (ch >= 'A' && ch <= 'F');
+    };
+
+    const std::string rest = token.substr(3);
+    bool valid_gfx_suffix = false;
+    if (rest.size() == 3) {
+        // e.g. gfx90a
+        valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex);
+    } else if (rest.size() == 4) {
+        // e.g. gfx1151, gfx120X, gfx110X
+        valid_gfx_suffix = std::all_of(rest.begin(), rest.end(), is_hex) ||
+                           (std::all_of(rest.begin(), rest.begin() + 3, is_hex) &&
+                            (rest[3] == 'X' || rest[3] == 'x'));
+    }
+
+    return valid_gfx_suffix ? version.substr(0, dash) : version;
+}
+
 // Linux: identify NPU architecture from sysfs accel subsystem
 // Checks /sys/class/accel/*/device/driver for amdxdna, then reads number of columns
 // If amdxdna not loaded, fall back to PCI device IDs
@@ -2135,6 +2220,12 @@ std::string SystemInfo::get_rocm_arch() {
     return "";  // No supported architecture found
 }
 
+std::string SystemInfo::get_rocm_release_target() {
+    // Same GPU selection as get_rocm_arch(), but mapped to the per-target asset
+    // name used by ROCm backend release repos. See rocm_arch_to_release_target().
+    return rocm_arch_to_release_target(get_rocm_arch());
+}
+
 static int cuda_sm_value(const std::string& arch) {
     if (arch.size() <= 3 || arch.substr(0, 3) != "sm_") {
         return 0;

diff --git a/test/test_rocm_release_target.py b/test/test_rocm_release_target.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+CPU-runnable unit tests for ROCm specific-ISA -> release-target mapping logic
+(system_info.cpp::rocm_arch_to_release_target()).
+
+ROCm backend release repos (whisper.cpp-rocm, vllm-rocm, llamacpp-rocm nightly)
+publish discrete RDNA GPUs under a *family* target (gfx103X / gfx110X / gfx120X)
+while APUs ship per-specific ISA (gfx1150 / gfx1151 / gfx1152). get_rocm_arch()
+returns the specific ISA (e.g. gfx1201), which is correct for TheRock runtime
+paths but 404s on these per-target asset names. This test replicates
+the C++ mapping so it can be validated without AMD hardware.
+
+Run with: python -m pytest test/test_rocm_release_target.py
+      or: python test/test_rocm_release_target.py
+"""
+
+import re
+import unittest
+
+
+# ---------------------------------------------------------------------------
+# Python replica of system_info.cpp::rocm_arch_to_release_target()
+# ---------------------------------------------------------------------------
+def rocm_arch_to_release_target(arch: str) -> str:
+    if not arch:
+        return arch
+    # Already a family target (trailing 'X') or non-gfx token: leave as-is.
+    if arch.endswith("X") or not arch.startswith("gfx"):
+        return arch
+    # APUs are published per-specific ISA; do not collapse them.
+    if arch in ("gfx1150", "gfx1151", "gfx1152"):
+        return arch
+    # 4-digit RDNA gfx token: collapse the trailing step nibble to 'X' to form
+    # the family target for the families that publish family assets.
+    if len(arch) == 7:
+        base3 = arch[3:6]  # e.g. "120" from "gfx1201"
+        if base3 in ("103", "110", "120"):
+            return "gfx" + base3 + "X"
+    # Anything else passes through unchanged.
+    return arch
+
+
+# ---------------------------------------------------------------------------
+# Python replica of vllm_server.cpp ROCm release tag construction
+# ---------------------------------------------------------------------------
+def strip_vllm_rocm_target_suffix(version: str) -> str:
+    # vllm-rocm GitHub releases are tagged as:
+    #   vllm0.22.1-rocm7.13.0-gfx120X
+    # Runtime config may contain a base version, or resolve_user_version() may
+    # return a target-suffixed tag from GitHub latest / explicit user pin. Strip
+    # the optional target suffix before appending this machine's target.
+    return re.sub(r"-gfx(?:[0-9a-f]{3,4}|[0-9a-f]{3}X)$", "", version)
+
+
+def vllm_rocm_release_tag(version: str, target_arch: str) -> str:
+    return strip_vllm_rocm_target_suffix(version) + "-" + target_arch
+
+
+class TestRocmReleaseTarget(unittest.TestCase):
+    def test_rdna4_dgpu_collapses_to_family(self):
+        # R9700 (gfx1201) must map to the published gfx120X target.
+        self.assertEqual(rocm_arch_to_release_target("gfx1201"), "gfx120X")
+        self.assertEqual(rocm_arch_to_release_target("gfx1200"), "gfx120X")
+
+    def test_rdna3_dgpu_collapses_to_family(self):
+        for isa in ("gfx1100", "gfx1101", "gfx1102", "gfx1103"):
+            with self.subTest(isa=isa):
+                self.assertEqual(rocm_arch_to_release_target(isa), "gfx110X")
+
+    def test_rdna2_dgpu_collapses_to_family(self):
+        for isa in ("gfx1030", "gfx1031", "gfx1032", "gfx1033",
+                    "gfx1034", "gfx1035", "gfx1036"):
+            with self.subTest(isa=isa):
+                self.assertEqual(rocm_arch_to_release_target(isa), "gfx103X")
+
+    def test_apus_stay_specific(self):
+        # APU assets are published per-specific ISA; must NOT be collapsed.
+        for isa in ("gfx1150", "gfx1151", "gfx1152"):
+            with self.subTest(isa=isa):
+                self.assertEqual(rocm_arch_to_release_target(isa), isa)
+
+    def test_family_targets_are_idempotent(self):
+        for fam in ("gfx103X", "gfx110X", "gfx120X"):
+            with self.subTest(fam=fam):
+                self.assertEqual(rocm_arch_to_release_target(fam), fam)
+
+    def test_datacenter_and_other_archs_pass_through(self):
+        # Data-center / CDNA and gfx101X dGPU have no family-collapse rule here.
+        for isa in ("gfx908", "gfx90a", "gfx942", "gfx1010", "gfx1011", "gfx1012"):
+            with self.subTest(isa=isa):
+                self.assertEqual(rocm_arch_to_release_target(isa), isa)
+
+    def test_empty_and_non_gfx_pass_through(self):
+        for val in ("", "sm_120", "radeon", "unknown"):
+            with self.subTest(val=val):
+                self.assertEqual(rocm_arch_to_release_target(val), val)
+
+    def test_maps_to_release_target_names_without_advertising_backend_support(self):
+        # This mapping only chooses the asset-name token for recipes that already
+        # decided the current GPU is supported. Recipe availability remains
+        # controlled by RECIPE_DEFS in system_info.cpp (for example,
+        # whispercpp/vllm intentionally do not advertise gfx103X or gfx1152).
+        known_release_target_names = {"gfx103X", "gfx110X", "gfx120X",
+                                      "gfx1150", "gfx1151", "gfx1152"}
+        for isa in ("gfx1201", "gfx1100", "gfx1030", "gfx1151"):
+            with self.subTest(isa=isa):
+                self.assertIn(rocm_arch_to_release_target(isa), known_release_target_names)
+
+
+class TestVllmRocmReleaseTags(unittest.TestCase):
+    def test_base_version_appends_current_target(self):
+        self.assertEqual(
+            vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0", "gfx120X"),
+            "vllm0.22.1-rocm7.13.0-gfx120X",
+        )
+
+    def test_existing_target_suffix_is_replaced_not_double_appended(self):
+        # GitHub marks one target release as "Latest". If resolve_user_version()
+        # returns that full tag, vllm_server.cpp must rebuild the tag for the
+        # current machine's ROCm release target rather than generating a bogus
+        # "...-gfx1151-gfx120X" tag.
+        self.assertEqual(
+            vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx1151", "gfx120X"),
+            "vllm0.22.1-rocm7.13.0-gfx120X",
+        )
+
+    def test_existing_matching_target_suffix_is_idempotent(self):
+        self.assertEqual(
+            vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx120X"),
+            "vllm0.22.1-rocm7.13.0-gfx120X",
+        )
+
+    def test_specific_target_suffix_can_be_replaced_with_specific_apu_target(self):
+        self.assertEqual(
+            vllm_rocm_release_tag("vllm0.22.1-rocm7.13.0-gfx120X", "gfx1151"),
+            "vllm0.22.1-rocm7.13.0-gfx1151",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,7 +31,7 @@ curl -X POST http://localhost:13305/api/v1/install \ @@
       -d '{"recipe": "vllm", "backend": "rocm"}'
     ```
-    The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx1150`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); the `-{gfx_target}` suffix is appended at runtime from `SystemInfo::get_rocm_arch()`, so a single pin covers all supported architectures.
+    The install fetches a per-GPU-target release (e.g. `…-gfx1151`, `…-gfx120X`) from [lemonade-sdk/vllm-rocm](https://github.com/lemonade-sdk/vllm-rocm/releases). The base version is pinned in [`backend_versions.json`](https://github.com/lemonade-sdk/lemonade/blob/main/src/cpp/resources/backend_versions.json); Lemonade appends the machine's ROCm release target at runtime. Discrete RDNA GPUs use family release targets such as `gfx110X`/`gfx120X` even when the physical ISA is specific (for example, `gfx1201`), while APUs keep their specific targets such as `gfx1151`. User pins may be either the base version or a full per-target release tag; Lemonade normalizes either form to the current machine's target so the happy path does not require knowing the asset naming convention.
     ## Use
@@ Expand Down @@