From 204c684d7dbc752d95e8dde9d4976989f29379b3 Mon Sep 17 00:00:00 2001 From: Gareth Ellis Date: Fri, 29 May 2026 14:41:37 +0200 Subject: [PATCH 1/3] Don't crash on non-UTF-8 ApiError bodies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ApiError handler in execute_single() decodes `e.body`, `e.error`, and `e.info` as UTF-8 to build a human-readable error message. When the body is binary (e.g., binary protobuf returned by ES OTLP endpoints on 4xx/5xx), the strict decode raises UnicodeDecodeError, which crashes the worker mid-task. Switch the six decode() calls to use errors="replace" so undecodable bytes become U+FFFD instead of aborting the worker. No semantic change for valid UTF-8 (the common case). This is a latent bug independent of OTLP — any operation that surfaces a binary error body would have hit it. --- esrally/driver/driver.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/esrally/driver/driver.py b/esrally/driver/driver.py index 64086c4c5..aab67b1b9 100644 --- a/esrally/driver/driver.py +++ b/esrally/driver/driver.py @@ -2129,16 +2129,19 @@ def _parse_headers(e): # Some runners return a raw response, causing the 'error' property to be a string literal of the bytes/BytesIO object, # we should avoid bubbling that up # e.g. ApiError(413, '<_io.BytesIO object at 0xffffaf146a70>') + # errors="replace" so binary response bodies (e.g. OTLP ingest returns binary protobuf + # error frames) don't crash the driver with UnicodeDecodeError. Undecodable bytes become + # U+FFFD in the logged/recorded message. if isinstance(e.body, bytes): # could be an empty body - if error_body := e.body.decode("utf-8"): + if error_body := e.body.decode("utf-8", errors="replace"): error_message = error_body else: # to be consistent with an empty 'e.error' error_message = str(None) elif isinstance(e.body, BytesIO): # could be an empty body - if error_body := e.body.read().decode("utf-8"): + if error_body := e.body.read().decode("utf-8", errors="replace"): error_message = error_body else: # to be consistent with an empty 'e.error' @@ -2146,17 +2149,17 @@ def _parse_headers(e): # fallback to 'error' property if the body isn't bytes/BytesIO else: if isinstance(e.error, bytes): - error_message = e.error.decode("utf-8") + error_message = e.error.decode("utf-8", errors="replace") elif isinstance(e.error, BytesIO): - error_message = e.error.read().decode("utf-8") + error_message = e.error.read().decode("utf-8", errors="replace") else: # if the 'error' is empty, we get back str(None) error_message = e.error if isinstance(e.info, bytes): - error_info = e.info.decode("utf-8") + error_info = e.info.decode("utf-8", errors="replace") elif isinstance(e.info, BytesIO): - error_info = e.info.read().decode("utf-8") + error_info = e.info.read().decode("utf-8", errors="replace") else: error_info = e.info From 34ac42e0aadae204caff12f4253122cc45ce88c3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:47:25 +0000 Subject: [PATCH 2/3] Add non-UTF8 ApiError body execute_single test --- tests/driver/driver_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/driver/driver_test.py b/tests/driver/driver_test.py index 47c99afbc..c62c2cf5c 100644 --- a/tests/driver/driver_test.py +++ b/tests/driver/driver_test.py @@ -1987,6 +1987,25 @@ async def test_execute_single_with_http_400_with_raw_response_body(self): await driver.execute_single(self.context_managed(runner), es, params, on_error=OnErrorBehavior.ABORT) assert exc.value.args[0] == ("Request returned an error. Error type: api, Description: Huge error, HTTP Status: 499") + @pytest.mark.asyncio + async def test_execute_single_with_http_400_with_non_utf8_raw_response_body(self): + es = None + params = None + body = io.BytesIO(b"\xff") + str_literal = str(body) + error_meta = elastic_transport.ApiResponseMeta( + status=499, + http_version="1.1", + headers=elastic_transport.HttpHeaders(), + duration=0.0, + node=elastic_transport.NodeConfig(scheme="http", host="localhost", port=9200), + ) + runner = mock.AsyncMock(side_effect=elasticsearch.ApiError(message=str_literal, meta=error_meta, body=body)) + + with pytest.raises(exceptions.RallyAssertionError) as exc: + await driver.execute_single(self.context_managed(runner), es, params, on_error=OnErrorBehavior.ABORT) + assert exc.value.args[0] == ("Request returned an error. Error type: api, Description: �, HTTP Status: 499") + @pytest.mark.asyncio async def test_execute_single_with_http_400(self): es = None From 835a0e17780f7c8b9f5e2e38385075ebfb955d30 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:59:27 +0000 Subject: [PATCH 3/3] Align non-UTF8 test with HTTP 400 status --- tests/driver/driver_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/driver/driver_test.py b/tests/driver/driver_test.py index c62c2cf5c..acb70e5d7 100644 --- a/tests/driver/driver_test.py +++ b/tests/driver/driver_test.py @@ -1994,7 +1994,7 @@ async def test_execute_single_with_http_400_with_non_utf8_raw_response_body(self body = io.BytesIO(b"\xff") str_literal = str(body) error_meta = elastic_transport.ApiResponseMeta( - status=499, + status=400, http_version="1.1", headers=elastic_transport.HttpHeaders(), duration=0.0, @@ -2004,7 +2004,7 @@ async def test_execute_single_with_http_400_with_non_utf8_raw_response_body(self with pytest.raises(exceptions.RallyAssertionError) as exc: await driver.execute_single(self.context_managed(runner), es, params, on_error=OnErrorBehavior.ABORT) - assert exc.value.args[0] == ("Request returned an error. Error type: api, Description: �, HTTP Status: 499") + assert exc.value.args[0] == ("Request returned an error. Error type: api, Description: �, HTTP Status: 400") @pytest.mark.asyncio async def test_execute_single_with_http_400(self):