JuliaDecisionFocusedLearning · klamike · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 12, 2026
diff --git a/docs/src/math.md b/docs/src/math.md
@@ -133,3 +133,37 @@ We make use of a few key observations:
 
 - Projection on $\mathcal{Z}$ commutes with scaling
 - Projection on an interval commutes with scaling if scaling is also applied to the interval in question
+
+### Pock-Chambolle scaling
+
+Following Lemma 2 of:
+
+> Pock, Thomas, and Antonin Chambolle. "Diagonal preconditioning for first order primal-dual algorithms in convex optimization." *2011 International Conference on Computer Vision*. IEEE, 2011. [doi:10.1109/ICCV.2011.6126441](https://doi.org/10.1109/ICCV.2011.6126441)
+
+for matrix $K$ of size $m \times n$ and $\alpha \in [0, 2]$, the diagonal factors are
+
+```math
+\tau_j = \left(\sum_i |K_{ij}|^{\alpha}\right)^{-1} \quad \text{(variable side, column $j$)}
+```
+
+```math
+\sigma_i = \left(\sum_j |K_{ij}|^{2-\alpha}\right)^{-1} \quad \text{(constraint side, row $i$)}
+```
+
+so that $\|\Sigma^{1/2} K T^{1/2}\| \leq 1$ with $T = \mathrm{diag}(\tau_j), \Sigma = \mathrm{diag}(\sigma_i)$. CoolPDLP absorbs the $1/2$ powers into the rescaling factors: $D_2[j] = \tau_j^{1/2}$ and $D_1[i] = \sigma_i^{1/2}$. The exponent parameter $\alpha$ is exposed as `chambolle_pock_alpha`. See also [`CoolPDLP.chambolle_pock_preconditioner`](@ref).
+
+Note that CoolPDLP uses $2-\alpha$ for the row and $\alpha$ for the column,
+to be aligned with the [PDLP paper](https://dl.acm.org/doi/abs/10.5555/3540261.3541809). This is the opposite of the
+convention used in the original Pock-Chambolle paper referenced above.
+
+### Ruiz equilibration
+
+The Ruiz iteration alternately rescales each row and each column of $A$ by the square root of its $\ell_\infty$ norm. After enough iterations the rescaled matrix has all row and column $\infty$-norms close to 1. See also [`CoolPDLP.ruiz_preconditioner`](@ref).
+
+### Composition
+
+The two passes run one after the other: first Ruiz produces $D_1^{\mathrm{ruiz}}, D_2^{\mathrm{ruiz}}$, then Pock-Chambolle is applied to the already-Ruiz-rescaled matrix $D_1^{\mathrm{ruiz}} A D_2^{\mathrm{ruiz}}$ and produces $D_1^{\mathrm{cp}}, D_2^{\mathrm{cp}}$. The final scaling is then
+
+```math
+D_1 = D_1^{\mathrm{cp}} D_1^{\mathrm{ruiz}} \qquad D_2 = D_2^{\mathrm{ruiz}} D_2^{\mathrm{cp}}
+```
diff --git a/src/components/preconditioning.jl b/src/components/preconditioning.jl
@@ -110,10 +110,42 @@ function diagonal_norm_preconditioner(
     return Preconditioner(Diagonal(d1), Diagonal(d2))
 end
 
-function chambolle_pock_preconditioner(cons::ConstraintMatrix; alpha::Number)
-    return diagonal_norm_preconditioner(cons; p_row = 2 - alpha, p_col = alpha)
+"""
+    chambolle_pock_preconditioner(cons; alpha)
+
+Diagonal preconditioner from Lemma 2 of:
+
+> Pock, Thomas, and Antonin Chambolle. "Diagonal preconditioning for first order
+> primal-dual algorithms in convex optimization." 2011 International Conference
+> on Computer Vision. IEEE, 2011. doi:10.1109/ICCV.2011.6126441
+
+For matrix `K` of size m×n and `α ∈ [0, 2]`, the variable-side factor for
+column `j` is `(sum_i |K[i,j]|^α)^{-1/2}` and the constraint-side factor for
+row `i` is `(sum_j |K[i,j]|^(2-α))^{-1/2}`. With these, `‖Σ^{1/2} K T^{1/2}‖ ≤ 1`
+where `T, Σ` are the diagonals built from those factors.
+
+Note that CoolPDLP uses `2-α` for the row and `α` for the column, to be aligned with
+the 2021 paper "Practical large-scale linear programming using primal-dual hybrid gradient"
+of Applegate et al. - this is the opposite of the convention used in
+the original Pock-Chambolle paper referenced above.
+"""
+function chambolle_pock_preconditioner(cons::ConstraintMatrix{T}; alpha::Number) where {T}
+    (; A, At) = cons
+    α_row = T(2 - alpha)
+    α_col = T(alpha)
+    col_sums = map(j -> column_power_sum(A, j, α_col), axes(A, 2))
+    row_sums = map(i -> column_power_sum(At, i, α_row), axes(A, 1))
+    d1 = map(s -> iszero(s) ? one(T) : inv(sqrt(s)), row_sums)
+    d2 = map(s -> iszero(s) ? one(T) : inv(sqrt(s)), col_sums)
+    return Preconditioner(Diagonal(d1), Diagonal(d2))
 end
 
+"""
+    ruiz_preconditioner(cons; iterations)
+
+Ruiz equilibration: for `iterations` rounds, rescale each row and column of `A`
+(inside `cons`) by the square root of its inf norm.
+"""
 function ruiz_preconditioner(cons::ConstraintMatrix; iterations::Integer)
     prec = identity_preconditioner(cons)
     for _ in 1:iterations

diff --git a/src/utils/linalg.jl b/src/utils/linalg.jl
@@ -107,6 +107,19 @@ end
 column_norm(A::AbstractMatrix, j::Integer, p) = norm(view(A, :, j), p)
 column_norm(A::SparseMatrixCSC, j::Integer, p) = norm(view(nonzeros(A), nzrange(A, j)), p)
 
+"""
+    column_power_sum(A, j, p)
+
+Return `sum_i |A[i, j]|^p`. Zero entries are treated as contributing `0` (not `0^0 = 1`),
+so the sparse and dense cases are aligned.
+"""
+@inline _power_term(x, p) = iszero(x) ? zero(x) : abs(x)^p
+
+column_power_sum(A::AbstractMatrix, j::Integer, p) =
+    sum(x -> _power_term(x, p), view(A, :, j); init = zero(eltype(A)))
+column_power_sum(A::SparseMatrixCSC, j::Integer, p) =
+    sum(x -> _power_term(x, p), view(nonzeros(A), nzrange(A, j)); init = zero(eltype(A)))
+
 mynnz(A::AbstractSparseMatrix) = nnz(A)
 mynnz(A::AbstractMatrix) = prod(size(A))
 

diff --git a/test/components/preconditioning.jl b/test/components/preconditioning.jl
@@ -73,3 +73,29 @@ end
     @test CoolPDLP.clamp.(sol.x, milp.lv, milp.uv) ≈ prec.D2 * CoolPDLP.clamp.(sol_p.x, milp_p.lv, milp_p.uv)
     @test CoolPDLP.clamp.(milp.A * sol.x, milp.lc, milp.uc) ≈ prec.D1 \ CoolPDLP.clamp.(milp_p.A * sol_p.x, milp_p.lc, milp_p.uc)
 end
+
+@testset "Pock-Chambolle exponents" begin
+    rng = Xoshiro(7)
+    A = sprand(rng, 10, 20, 0.5)
+    cons = CoolPDLP.ConstraintMatrix(A, sparse(transpose(A)))
+    @testset "α = $alpha" for alpha in (0.0, 0.5, 1.0, 1.5, 2.0)
+        prec = CoolPDLP.chambolle_pock_preconditioner(cons; alpha = alpha)
+        At_csc = sparse(transpose(A))
+        for j in axes(A, 2)
+            ref = sum(
+                x -> iszero(x) ? zero(x) : abs(x)^(alpha),
+                nonzeros(A)[nzrange(A, j)]; init = 0.0
+            )
+            expected = iszero(ref) ? 1.0 : inv(sqrt(ref))
+            @test prec.D2.diag[j] ≈ expected
+        end
+        for i in axes(A, 1)
+            ref = sum(
+                x -> iszero(x) ? zero(x) : abs(x)^(2 - alpha),
+                nonzeros(At_csc)[nzrange(At_csc, i)]; init = 0.0
+            )
+            expected = iszero(ref) ? 1.0 : inv(sqrt(ref))
+            @test prec.D1.diag[i] ≈ expected
+        end
+    end
+end