diff --git a/Project.toml b/Project.toml
index 0f527672..09caa5b1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ExaModels"
 uuid = "1037b233-b668-4ce9-9b63-f9f681f55dd2"
-version = "0.11.0"
+version = "0.11.1"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -9,7 +9,6 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843"
 
 [weakdeps]
-GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -30,13 +29,6 @@ ExaModelsSpecialFunctions = "SpecialFunctions"
 
 [compat]
 Adapt = "4"
-# GPUCompiler 1.12+ runs VectorCombinePass unconditionally, producing
-# invalid SPIR-V on OpenCLBackend / oneAPIBackend (spirv-val: "forward
-# referenced IDs have not been defined"). v1.13.3 still reproduces.
-# Pinned to 1.11 until JuliaGPU/GPUCompiler.jl#XXX (gate the pass on
-# can_vectorize) is released. CUDA/AMDGPU/Metal users are also held to
-# 1.11 by this constraint; widen once upstream ships the fix.
-GPUCompiler = "~1.11"
 JuMP = "1"
 KernelAbstractions = "0.9"
 MathOptInterface = "1.19"
diff --git a/docs/Project.toml b/docs/Project.toml
index 20319180..d00010d2 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 ExaModels = "1037b233-b668-4ce9-9b63-f9f681f55dd2"
diff --git a/docs/src/jump.jl b/docs/src/jump.jl
index d43242b0..55e33626 100644
--- a/docs/src/jump.jl
+++ b/docs/src/jump.jl
@@ -20,9 +20,11 @@ jm = Model()
 em = ExaModel(jm; backend = CUDABackend())
 
 # Here, note that only scalar objective/constraints created via `@constraint` and `@objective` API are supported. Older syntax like `@NLconstraint` and `@NLobjective` are not supported.
-# We can solve the model using any of the solvers supported by ExaModels. For example, we can use MadNLP:
+# We can solve the model using any of the solvers supported by ExaModels. For example, we can use MadNLP.
+# Note that `CUDSS` must be loaded alongside `MadNLPGPU`: it is the default linear solver for GPU sparse
+# models and it triggers MadNLPGPU's CUDA extension, which provides the GPU KKT machinery.
 
-using MadNLPGPU
+using MadNLPGPU, CUDSS
 
 result = madnlp(em)
 
@@ -31,7 +33,7 @@ result = madnlp(em)
 # Alternatively, one can use the `Optimizer` interface provided by `ExaModels`. This feature can be used as follows.
 
 using ExaModels, JuMP, CUDA
-using MadNLP, MadNLPGPU
+using MadNLP, MadNLPGPU, CUDSS
 
 set_optimizer(jm, () -> ExaModels.Optimizer(MadNLP.madnlp, CUDABackend()))
 optimize!(jm)
diff --git a/docs/src/parameters.md b/docs/src/parameters.md
index d09d7348..8203edc1 100644
--- a/docs/src/parameters.md
+++ b/docs/src/parameters.md
@@ -31,7 +31,7 @@ An ExaCore
 Adding parameters is very similar to adding variables -- just pass a vector of values denoting the initial values.
 
 ````julia
-@add_parameter(c_param, θ, [100.0, 1.0])  # [penalty_coeff, offset]
+@add_par(c_param, θ, [100.0, 1.0])  # [penalty_coeff, offset]
 ````
 
 ````
@@ -45,35 +45,37 @@ Define the variables as before:
 
 ````julia
 N = 10
-@add_variable(c_param, x_p, N; start = (mod(i, 2) == 1 ? -1.2 : 1.0 for i = 1:N))
+@add_var(c_param, x_p, N; start = (mod(i, 2) == 1 ? -1.2 : 1.0 for i = 1:N))
 ````
 
 ````
 Variable
 
-  x ∈ R^{10}
+  x_p ∈ R^{10}
 
 ````
 
 Now we can use the parameters in our objective function just like variables:
 
 ````julia
-@add_objective(c_param, θ[1] * (x_p[i-1]^2 - x_p[i])^2 + (x_p[i-1] - θ[2])^2 for i = 2:N)
+@add_obj(c_param, θ[1] * (x_p[i-1]^2 - x_p[i])^2 + (x_p[i-1] - θ[2])^2 for i = 2:N)
 ````
 
 ````
 Objective
 
-  min (...) + ∑_{p ∈ P} f(x,θ,p)
+  ∑_{i ∈ I} f(x,i)
 
-  where |P| = 9
+  f(x,i) = θ[1] * x[i - 1]^2 - x[i]^2 + x[i - 1] - θ[2]^2
+
+  where |I| = 9
 
 ````
 
 Add the same constraints as before:
 
 ````julia
-@add_constraint(
+@add_con(
     c_param,
     3x_p[i+1]^3 + 2 * x_p[i+2] - 5 +
     sin(x_p[i+1] - x_p[i+2])sin(x_p[i+1] + x_p[i+2]) +
@@ -84,10 +86,11 @@ Add the same constraints as before:
 ````
 Constraint
 
-  s.t. (...)
-       g♭ ≤ [g(x,θ,p)]_{p ∈ P} ≤ g♯
+  g♭ ≤ [g(x,i)]_{i ∈ I} ≤ g♯
+
+  g(x,i) = 3 * x[i + 1]^3 + 2 * x[i + 2] - 5 + sin(x[i + 1] - x[i + 2]) * sin(x[i + 1] + x[i + 2]) + 4 * x[i + 1] - x[i] * exp(x[i] - x[i + 1]) - 3
 
-  where |P| = 8
+  where |I| = 8
 
 ````
 
@@ -154,11 +157,11 @@ Number of Iterations....: 6
 
                                    (scaled)                 (unscaled)
 Objective...............:   7.8692659500473017e-01    6.2324586324374636e+00
-Dual infeasibility......:   7.9746955363607132e-10    6.3159588647976857e-09
-Constraint violation....:   8.3546503049092280e-12    8.3546503049092280e-12
+Dual infeasibility......:   7.9746301767912855e-10    6.3159071000186987e-09
+Constraint violation....:   8.3542062156993779e-12    8.3542062156993779e-12
 Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00
 Complementarity.........:   0.0000000000000000e+00    0.0000000000000000e+00
-Overall NLP error.......:   7.9746955363607132e-10    6.3159588647976857e-09
+Overall NLP error.......:   7.9746301767912855e-10    6.3159071000186987e-09
 
 
 Number of objective function evaluations             = 7
@@ -168,7 +171,7 @@ Number of inequality constraint evaluations          = 0
 Number of equality constraint Jacobian evaluations   = 7
 Number of inequality constraint Jacobian evaluations = 0
 Number of Lagrangian Hessian evaluations             = 6
-Total seconds in IPOPT                               = 0.342
+Total seconds in IPOPT                               = 0.344
 
 EXIT: Optimal Solution Found.
 Original objective: 6.232458632437464
@@ -212,12 +215,12 @@ iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
 Number of Iterations....: 6
 
                                    (scaled)                 (unscaled)
-Objective...............:   5.4592422674820063e-01    8.6474397516914987e+00
-Dual infeasibility......:   7.9051456536755353e-10    1.2521750715422049e-08
+Objective...............:   5.4592422674819974e-01    8.6474397516914845e+00
+Dual infeasibility......:   7.9051456515071309e-10    1.2521750711987297e-08
 Constraint violation....:   8.4190432403374871e-12    8.4190432403374871e-12
 Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00
 Complementarity.........:   0.0000000000000000e+00    0.0000000000000000e+00
-Overall NLP error.......:   7.9051456536755353e-10    1.2521750715422049e-08
+Overall NLP error.......:   7.9051456515071309e-10    1.2521750711987297e-08
 
 
 Number of objective function evaluations             = 7
@@ -227,10 +230,10 @@ Number of inequality constraint evaluations          = 0
 Number of equality constraint Jacobian evaluations   = 7
 Number of inequality constraint Jacobian evaluations = 0
 Number of Lagrangian Hessian evaluations             = 6
-Total seconds in IPOPT                               = 0.002
+Total seconds in IPOPT                               = 0.001
 
 EXIT: Optimal Solution Found.
-Modified penalty objective: 8.647439751691499
+Modified penalty objective: 8.647439751691484
 
 ````
 
diff --git a/src/nlp.jl b/src/nlp.jl
index 259768f8..2ab374ae 100644
--- a/src/nlp.jl
+++ b/src/nlp.jl
@@ -295,8 +295,8 @@ julia> c = ExaCore(Float32; backend = CUDABackend(), concrete = Val(true))
 An ExaCore
 
   Float type: ...................... Float32
-  Array type: ...................... CUDA.CuArray{Float32, 1, CUDA.DeviceMemory}
-  Backend: ......................... CUDA.CUDAKernels.CUDABackend
+  Array type: ...................... CUDACore.CuArray{Float32, 1, CUDACore.DeviceMemory}
+  Backend: ......................... CUDACore.CUDAKernels.CUDABackend
 
   number of objective patterns: .... 0
   number of constraint patterns: ... 0