huggingface · psiddh · Jun 24, 2026
diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py
@@ -418,6 +418,14 @@ def __init__(
         self.config = model.config
         self.use_custom_kv_cache = use_custom_kv_cache
         self.use_custom_sdpa = use_custom_sdpa
+
+        # update_cache op only supports single-token (decode) inputs
+        if use_custom_kv_cache and not disable_dynamic_shapes:
+            logging.warning(
+                "Custom KV cache requires static shapes. Automatically setting disable_dynamic_shapes=True."
+            )
+            disable_dynamic_shapes = True
+
         self.disable_dynamic_shapes = disable_dynamic_shapes
         self.metadata = save_config_to_constant_methods(
             model.config,

diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
@@ -33,6 +33,7 @@
 from optimum.executorch import ExecuTorchModelForCausalLM
 from optimum.executorch.modeling import _FILE_PATTERN
 from optimum.exporters.executorch import main_export
+from optimum.exporters.executorch.integrations import CausalLMExportableModule
 from optimum.utils.file_utils import find_files_matching_pattern
 
 from ..utils import check_causal_lm_output_quality
@@ -183,3 +184,27 @@ def forward(self, x):
                 if node.op == "call_function" and node.target == exir_ops.edge.aten.embedding.default
             )
         )
+
+    def test_custom_kv_cache_auto_disables_dynamic_shapes(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+
+        wrapper = CausalLMExportableModule(
+            model,
+            use_custom_kv_cache=True,
+            disable_dynamic_shapes=False,
+        )
+        self.assertTrue(wrapper.disable_dynamic_shapes)
+        self.assertFalse(wrapper.metadata.get("enable_dynamic_shape", True))
+
+    def test_dynamic_shapes_preserved_without_custom_kv_cache(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+
+        wrapper = CausalLMExportableModule(
+            model,
+            use_custom_kv_cache=False,
+            disable_dynamic_shapes=False,
+        )
+        self.assertFalse(wrapper.disable_dynamic_shapes)
+        self.assertTrue(wrapper.metadata.get("enable_dynamic_shape", False))