Skip to content

ramalama does not pass through GPU when run in a toolbox(1), though llama.cpp does #2798

Description

@ibotty

Issue Description

With

ramalama --debug --engine podman serve hf://bartowski/Qwen_Qwen3.6-27B-GGUF:Q6_K_L --image=quay.io/ramalama/rocm --runtime-args="--spec-default" --privileged  --backend=vulkan --device /dev/dri --device /dev/kfd

a podman exec -itl ./llama-server --list-devices shows no available devices.

On the host (in a toolbox) I can see devices though.

> llama-server --list-devices
ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon 780M Graphics, gfx1103 (0x1103), VMM: no, Wave Size: 32
Available devices:
  ROCm0: AMD Radeon 780M Graphics (31038 MiB, 31038 MiB free)

Steps to reproduce the issue

s.a.

Describe the results you received

s.a.

Describe the results you expected

s.a.

ramalama info output

{
    "Accelerator": "hip",
    "Config": {
        "benchmarks": {},
        "container": true,
        "engine": "podman",
        "provider": {
            "openai": {}
        },
        "runtimes": {
            "llama_cpp": {},
            "mlx": {}
        },
        "settings": {
            "config_files": [
                "/usr/share/ramalama/ramalama.conf",
                "/var/home/tob/.config/ramalama/ramalama.conf"
            ]
        },
        "user": {}
    },
    "Engine": {
        "Info": {
            "host": {
                "arch": "amd64",
                "buildahVersion": "1.43.1",
                "cgroupControllers": [
                    "cpu",
                    "io",
                    "memory",
                    "pids"
                ],
                "cgroupManager": "systemd",
                "cgroupVersion": "v2",
                "conmon": {
                    "package": "conmon-2.2.1-2.fc44.x86_64",
                    "path": "/usr/bin/conmon",
                    "version": "conmon version 2.2.1, commit: "
                },
                "cpuUtilization": {
                    "idlePercent": 95.08,
                    "systemPercent": 1.33,
                    "userPercent": 3.58
                },
                "cpus": 16,
                "databaseBackend": "sqlite",
                "distribution": {
                    "distribution": "fedora",
                    "variant": "sway-atomic",
                    "version": "44"
                },
                "emulatedArchitectures": [
                    "linux/arm64",
                    "linux/arm64be"
                ],
                "eventLogger": "journald",
                "freeLocks": 2044,
                "hostname": "framework",
                "idMappings": {
                    "gidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 100000,
                            "size": 65536
                        }
                    ],
                    "uidmap": [
                        {
                            "container_id": 0,
                            "host_id": 1000,
                            "size": 1
                        },
                        {
                            "container_id": 1,
                            "host_id": 100000,
                            "size": 65536
                        }
                    ]
                },
                "kernel": "7.0.12-200.fc44.x86_64",
                "linkmode": "dynamic",
                "logDriver": "journald",
                "memFree": 13176299520,
                "memTotal": 65092734976,
                "networkBackend": "netavark",
                "networkBackendInfo": {
                    "backend": "netavark",
                    "defaultNetwork": "podman",
                    "dns": {
                        "package": "aardvark-dns-1.17.1-1.fc44.x86_64",
                        "path": "/usr/libexec/podman/aardvark-dns",
                        "version": "aardvark-dns 1.17.1"
                    },
                    "package": "netavark-1.17.2-1.fc44.x86_64",
                    "path": "/usr/libexec/podman/netavark",
                    "version": "netavark 1.17.2"
                },
                "ociRuntime": {
                    "name": "crun",
                    "package": "crun-1.28-1.fc44.x86_64",
                    "path": "/usr/bin/crun",
                    "version": "crun version 1.28\ncommit: 54f16ffbefcd022bf032af768b5c5ce075c18bfc\nrundir: /run/user/1000/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +WASM:wasmedge +JSON_C"
                },
                "os": "linux",
                "pasta": {
                    "executable": "/usr/bin/pasta",
                    "package": "passt-0^20260526.g038c51e-1.fc44.x86_64",
                    "version": "pasta 0^20260526.g038c51e-1.fc44.x86_64\nCopyright Red Hat\nGNU General Public License, version 2 or later\n  <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
                },
                "remoteSocket": {
                    "exists": true,
                    "path": "/run/user/1000/podman/podman.sock"
                },
                "rootlessNetworkCmd": "pasta",
                "security": {
                    "apparmorEnabled": false,
                    "capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
                    "rootless": true,
                    "seccompEnabled": true,
                    "seccompProfilePath": "/usr/share/containers/seccomp.json",
                    "selinuxEnabled": true
                },
                "serviceIsRemote": false,
                "slirp4netns": {
                    "executable": "/usr/bin/slirp4netns",
                    "package": "slirp4netns-1.3.1-4.fc44.x86_64",
                    "version": "slirp4netns version 1.3.1\ncommit: e5e368c4f5db6ae75c2fce786e31eef9da6bf236\nlibslirp: 4.9.1\nSLIRP_CONFIG_VERSION_MAX: 6\nlibseccomp: 2.6.0"
                },
                "swapFree": 2578432000,
                "swapTotal": 8589930496,
                "uptime": "76h 55m 7.00s (Approximately 3.17 days)",
                "variant": ""
            },
            "plugins": {
                "authorization": null,
                "log": [
                    "k8s-file",
                    "none",
                    "passthrough",
                    "journald"
                ],
                "network": [
                    "bridge",
                    "macvlan",
                    "ipvlan"
                ],
                "volume": [
                    "local"
                ]
            },
            "registries": {
                "search": [
                    "registry.fedoraproject.org",
                    "registry.access.redhat.com",
                    "docker.io"
                ]
            },
            "store": {
                "configFile": "/var/home/tob/.config/containers/storage.conf",
                "containerStore": {
                    "number": 2,
                    "paused": 0,
                    "running": 2,
                    "stopped": 0
                },
                "graphDriverName": "overlay",
                "graphOptions": {},
                "graphRoot": "/var/home/tob/.local/share/containers/storage",
                "graphRootAllocated": 998483427328,
                "graphRootUsed": 705889763328,
                "graphStatus": {
                    "Backing Filesystem": "btrfs",
                    "Native Overlay Diff": "true",
                    "Supports d_type": "true",
                    "Supports shifting": "false",
                    "Supports volatile": "true",
                    "Using metacopy": "false"
                },
                "imageCopyTmpDir": "/var/tmp",
                "imageStore": {
                    "number": 4
                },
                "runRoot": "/run/user/1000/containers",
                "transientStore": false,
                "volumePath": "/var/home/tob/.local/share/containers/storage/volumes"
            },
            "version": {
                "APIVersion": "5.8.2",
                "BuildOrigin": "Fedora Project",
                "Built": 1776124800,
                "BuiltTime": "Tue Apr 14 02:00:00 2026",
                "GitCommit": "5b263b5f5b48004a87caac44e67349a8266d9ef4",
                "GoVersion": "go1.26.1-X:nodwarf5",
                "Os": "linux",
                "OsArch": "linux/amd64",
                "Version": "5.8.2"
            }
        },
        "Name": "podman"
    },
    "Image": "quay.io/ramalama/ramalama:0.21",
    "RagImage": "quay.io/ramalama/ramalama-rag:0.21",
    "Runtimes": {
        "Available": [
            "llama.cpp",
            "mlx",
            "vllm"
        ],
        "Default": "llama.cpp"
    },
    "Selinux": false,
    "Shortnames": {
        "Files": [
            "/usr/share/ramalama/shortnames.conf"
        ],
        "Names": {
            "cerebrum": "hf://froggeric/Cerebrum-1.0-7b-GGUF",
            "deepseek": "hf://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF",
            "dragon": "hf://llmware/dragon-mistral-7b-v0",
            "gemma3": "hf://ggml-org/gemma-3-4b-it-GGUF",
            "gemma3:12b": "hf://ggml-org/gemma-3-12b-it-GGUF",
            "gemma3:1b": "hf://ggml-org/gemma-3-1b-it-GGUF",
            "gemma3:27b": "hf://ggml-org/gemma-3-27b-it-GGUF",
            "gemma3:4b": "hf://ggml-org/gemma-3-4b-it-GGUF",
            "gemma3n": "hf://ggml-org/gemma-3n-E4B-it-GGUF",
            "gemma3n:e2b": "hf://ggml-org/gemma-3n-E2B-it-GGUF",
            "gemma3n:e2b-it-f16": "hf://ggml-org/gemma-3n-E2B-it-GGUF:F16",
            "gemma3n:e2b-it-q8_0": "hf://ggml-org/gemma-3n-E2B-it-GGUF:Q8_0",
            "gemma3n:e4b": "hf://ggml-org/gemma-3n-E4B-it-GGUF",
            "gemma3n:e4b-it-f16": "hf://ggml-org/gemma-3n-E4B-it-GGUF:F16",
            "gemma3n:e4b-it-q8_0": "hf://ggml-org/gemma-3n-E4B-it-GGUF:Q8_0",
            "gpt-5.1": "openai://gpt-5.1-2025-11-13",
            "gpt-oss": "hf://ggml-org/gpt-oss-20b-GGUF",
            "gpt-oss:120b": "hf://ggml-org/gpt-oss-120b-GGUF",
            "gpt-oss:20b": "hf://ggml-org/gpt-oss-20b-GGUF",
            "granite": "hf://ibm-granite/granite-3.3-8b-instruct-GGUF",
            "granite-be-3.0:1b": "hf://taronaeo/Granite-3.0-1B-A400M-Instruct-BE-GGUF",
            "granite-be-3.3:2b": "hf://taronaeo/Granite-3.3-2B-Instruct-BE-GGUF",
            "granite-lab-7b": "hf://instructlab/granite-7b-lab-GGUF",
            "granite-lab-8b": "hf://ibm-granite/granite-3.3-8b-instruct-GGUF",
            "granite-lab:7b": "hf://instructlab/granite-7b-lab-GGUF",
            "granite:2b": "hf://bartowski/ibm-granite_granite-3.3-2b-instruct-GGUF",
            "granite:7b": "hf://instructlab/granite-7b-lab-GGUF",
            "granite:8b": "hf://ibm-granite/granite-3.3-8b-instruct-GGUF",
            "hermes": "hf://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
            "ibm/granite": "hf://ibm-granite/granite-3.3-8b-instruct-GGUF",
            "ibm/granite:2b": "hf://bartowski/ibm-granite_granite-3.3-2b-instruct-GGUF",
            "ibm/granite:7b": "hf://instructlab/granite-7b-lab-GGUF",
            "ibm/granite:8b": "hf://ibm-granite/granite-3.3-8b-instruct-GGUF",
            "merlinite": "hf://instructlab/merlinite-7b-lab-GGUF",
            "merlinite-lab-7b": "hf://instructlab/merlinite-7b-lab-GGUF",
            "merlinite-lab:7b": "hf://instructlab/merlinite-7b-lab-GGUF",
            "merlinite:7b": "hf://instructlab/merlinite-7b-lab-GGUF",
            "mistral": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF",
            "mistral-small3.1": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF",
            "mistral-small3.1:24b": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF",
            "mistral:7b": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF",
            "mistral:7b-v1": "hf://TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
            "mistral:7b-v2": "hf://TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
            "mistral:7b-v3": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF",
            "mistral_code_16k": "hf://TheBloke/Mistral-7B-Code-16K-qlora-GGUF",
            "mistral_codealpaca": "hf://TheBloke/Mistral-7B-codealpaca-lora-GGUF",
            "mixtao": "hf://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF",
            "openchat": "hf://TheBloke/openchat-3.5-0106-GGUF",
            "openorca": "hf://TheBloke/Mistral-7B-OpenOrca-GGUF",
            "phi2": "hf://MaziyarPanahi/phi-2-GGUF",
            "qwen2.5vl": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
            "qwen2.5vl:2b": "hf://ggml-org/Qwen2.5-VL-2B-Instruct-GGUF",
            "qwen2.5vl:32b": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
            "qwen2.5vl:3b": "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF",
            "qwen2.5vl:7b": "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF",
            "qwen3.5:0.8b": "hf://bartowski/Qwen_Qwen3.5-0.8B-GGUF",
            "qwen3.5:27b": "hf://bartowski/Qwen_Qwen3.5-27B-GGUF",
            "qwen3.5:2b": "hf://bartowski/Qwen_Qwen3.5-2B-GGUF",
            "qwen3.5:4b": "hf://bartowski/Qwen_Qwen3.5-4B-GGUF",
            "qwen3.5:9b": "hf://bartowski/Qwen_Qwen3.5-9B-GGUF",
            "qwen3:0.6b": "hf://Qwen/Qwen3-0.6B-GGUF",
            "qwen3:1.7b": "hf://Qwen/Qwen3-1.7B-GGUF",
            "qwen3:32b": "hf://Qwen/Qwen3-32B-GGUF",
            "qwen3:4b": "hf://Qwen/Qwen3-4B-GGUF",
            "qwen3:8b": "hf://Qwen/Qwen3-8B-GGUF",
            "smollm:135m": "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF",
            "smolvlm": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
            "smolvlm:256m": "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
            "smolvlm:2b": "hf://ggml-org/SmolVLM-Instruct-GGUF",
            "smolvlm:500m": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
            "stories-be:260k": "hf://taronaeo/tinyllamas-BE/stories260K-be.gguf",
            "tiny": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
            "tinyllama": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
        },
        "Sources": [
            "/usr/share/ramalama/shortnames.conf"
        ]
    },
    "Store": "/var/home/tob/.local/share/ramalama",
    "ToolsImage": "quay.io/ramalama/rocm-tools:0.21",
    "UseContainer": true,
    "Version": "0.21.0"
}

Upstream Latest Release

No

Additional environment details

No response

Additional information

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions