From e1fbfac0fa3d46ccd20e8ab566698bc19b623a1c Mon Sep 17 00:00:00 2001 From: mudler <2420543+mudler@users.noreply.github.com> Date: Sat, 27 Jun 2026 21:10:22 +0000 Subject: [PATCH 1/3] :arrow_up: Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- backend/cpp/llama-cpp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index b02bdac07356..64bd395f2b98 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=9d5d882d8cd0f0a9283d87ed5e6fe3ee0d925fb1 +LLAMA_VERSION?=0ed235ea2c17a19fc8238668653946721ed136fd LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= From 61b0a11a7e23bf93930c3f5b9ea6283568744014 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Jun 2026 22:25:27 +0000 Subject: [PATCH 2/3] fix(llama-cpp): link server-stream.cpp TU into grpc-server for upstream 0ed235ea (#10536) Upstream llama.cpp 0ed235ea added an SSE stream-resumption layer in a new translation unit tools/server/server-stream.cpp, which defines stream_session, stream_pipe_producer and the g_stream_sessions manager. server-context.cpp (already #included into grpc-server.cpp) now calls into it via spipe->cleanup(), stream_aware_should_stop() and stream_session_attach_pipe(), so without the new TU the grpc-server link fails on every arch with: undefined reference to `stream_pipe_producer::cleanup()' prepare.sh already copies every tools/server/* file into tools/grpc-server/, so the source is present; the only missing piece was including its definitions. Add an __has_include-guarded #include "server-stream.cpp" before server-context.cpp, mirroring the existing server-chat.cpp and server-schema.cpp guards, keeping the source compatible with older pins/forks that predate the split. The file is self-contained (its only external symbols come from server-common, already in the TU) so it adds no new undefined references; the http route-handler factories it also defines are unused in the grpc path but harmless. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --- backend/cpp/llama-cpp/grpc-server.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 9d17e23b11d9..a02d461f46dd 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -30,6 +30,19 @@ #define LOCALAI_HAS_SERVER_SCHEMA 1 #include "server-schema.cpp" #endif +// server-stream.cpp exists only in llama.cpp after the upstream refactor that +// added the SSE stream-resumption layer (stream_session/stream_pipe_producer). +// server-context.cpp calls into it (spipe->cleanup(), stream_aware_should_stop, +// stream_session_attach_pipe), so its definitions must be part of this +// translation unit or the link fails with "undefined reference to +// stream_pipe_producer::cleanup()". The file is self-contained (its only +// external symbols come from server-common, already pulled in above) and the +// http route-handler factories it also defines are unused here but harmless. +// __has_include keeps the source compatible with older pins/forks that predate +// the split. +#if __has_include("server-stream.cpp") +#include "server-stream.cpp" +#endif #include "server-context.cpp" // LocalAI From 148292fdaf14e96101a1298d9cf64e5621eb988b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 27 Jun 2026 23:26:11 +0000 Subject: [PATCH 3/3] fix(llama-cpp): build renamed ggml-rpc-server target for upstream 0ed235ea (#10536) Upstream renamed the RPC server CMake target and binary from `rpc-server` to `ggml-rpc-server` (tools/rpc/CMakeLists.txt: `set(TARGET ggml-rpc-server)`), so the RPC-enabled grpc build failed with "No rule to make target 'rpc-server'". The grpc-server itself links fine after the server-stream.cpp fix; this only updates the RPC target name and the binary path copied to llama-cpp-rpc-server. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:claude-opus-4-8 [Claude Code] --- backend/cpp/llama-cpp/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 64bd395f2b98..f542ac3b43c0 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -156,11 +156,11 @@ llama-cpp-grpc: llama.cpp cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge $(info ${GREEN}I llama-cpp build info:grpc${RESET}) - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target ggml-rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc llama-cpp-rpc-server: llama-cpp-grpc - cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server + cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/ggml-rpc-server llama-cpp-rpc-server llama.cpp: mkdir -p llama.cpp