From 3b62dbd20e1cb28c5e6d2a571d1b80b19c04158d Mon Sep 17 00:00:00 2001
From: jordansilly77-stack <jordansilly77-stack@users.noreply.github.com>
Date: Sat, 20 Jun 2026 00:07:22 +0800
Subject: [PATCH] Add Tencent Cloud ASR Sapat guide

Signed-off-by: jordansilly77-stack <jordansilly77-stack@users.noreply.github.com>
---
 authors/xu_yang.md                            |  10 +
 .../20260619_definition_signed_api_request.md |  26 ++
 ...tencent_cloud_asr_with_sapat_in_daytona.md | 329 ++++++++++++++++++
 ...oud_asr_with_sapat_in_daytona_workflow.svg |  30 ++
 4 files changed, 395 insertions(+)
 create mode 100644 authors/xu_yang.md
 create mode 100644 definitions/20260619_definition_signed_api_request.md
 create mode 100644 guides/20260619_run_tencent_cloud_asr_with_sapat_in_daytona.md
 create mode 100644 guides/assets/20260619_run_tencent_cloud_asr_with_sapat_in_daytona_workflow.svg

diff --git a/authors/xu_yang.md b/authors/xu_yang.md
new file mode 100644
index 00000000..1a1913cd
--- /dev/null
+++ b/authors/xu_yang.md
@@ -0,0 +1,10 @@
+Author: Xu Yang Title: Independent Developer Description: Xu Yang is an
+independent developer focused on practical automation, Python tooling, and
+cloud workflow repair. He works across browser automation, backend scripts, and
+developer tooling, with a preference for small reproducible systems that can be
+tested, handed off, and maintained without unnecessary ceremony. Author Image:
+![xu-yang](https://github.com/jordansilly77-stack.png?size=512) Author
+LinkedIn: Author Twitter: Company Name: Independent Company Description:
+Independent developer
+building pragmatic automation and developer workflow tools. Company Logo Dark:
+Company Logo White:
diff --git a/definitions/20260619_definition_signed_api_request.md b/definitions/20260619_definition_signed_api_request.md
new file mode 100644
index 00000000..3cc73f42
--- /dev/null
+++ b/definitions/20260619_definition_signed_api_request.md
@@ -0,0 +1,26 @@
+---
+title: 'signed api request'
+description:
+  'A signed API request includes a cryptographic signature that proves who sent
+  the request and whether the request body was changed in transit.'
+---
+
+# signed api request
+
+## Definition
+
+A signed API request is an HTTP request that carries a cryptographic signature
+derived from the request method, path, headers, body, timestamp, and a secret
+key. The receiving service recalculates the signature and accepts the request
+only when both signatures match.
+
+Signed requests are common in cloud APIs because they avoid sending the secret
+key directly over the network. They also make replay and tampering harder: if a
+timestamp, header, or request body changes, the computed signature no longer
+matches.
+
+For AI engineering workflows, signed requests matter when a provider does not
+use a simple bearer token. A transcription pipeline might call a speech API that
+requires provider-specific signing, so the code needs deterministic request
+construction, stable JSON serialization, clear environment variables, and mock
+tests that validate the signature headers without exposing real credentials.
diff --git a/guides/20260619_run_tencent_cloud_asr_with_sapat_in_daytona.md b/guides/20260619_run_tencent_cloud_asr_with_sapat_in_daytona.md
new file mode 100644
index 00000000..482f4b9e
--- /dev/null
+++ b/guides/20260619_run_tencent_cloud_asr_with_sapat_in_daytona.md
@@ -0,0 +1,329 @@
+---
+title: 'Run Tencent Cloud ASR with Sapat in Daytona'
+description:
+  'Build a repeatable Tencent Cloud speech-to-text workflow with Sapat inside a
+  Daytona workspace.'
+date: 2026-06-19
+author: 'Xu Yang'
+tags: ['daytona', 'speech-to-text', 'python', 'tencent-cloud']
+---
+
+# Run Tencent Cloud ASR with Sapat in Daytona
+
+# Introduction
+
+Speech-to-text looks simple until the same workflow has to run twice, on a
+different machine, with the same credentials, the same audio conversion rules,
+and the same output location. A local script can transcribe a file today and
+then become difficult to reproduce next week because `ffmpeg` changed, the
+environment variables were never written down, or the cloud API needs a signed
+request instead of a normal bearer token.
+
+This guide shows how to run Tencent Cloud Automatic Speech Recognition through
+Sapat in a Daytona workspace. Sapat is a small Python CLI for routing audio to
+multiple transcription providers. The companion provider implementation adds a
+`tencentcloud` provider that signs Tencent Cloud ASR `SentenceRecognition`
+requests, sends local audio as base64, and writes the transcript through Sapat's
+normal processing flow.
+
+The goal is not to hide provider complexity. The goal is to put it in one
+repeatable place. Daytona gives the workflow a clean workspace. Sapat gives the
+workflow a consistent command-line interface. The provider code handles Tencent
+Cloud's [signed API request](/definitions/20260619_definition_signed_api_request.md)
+format so the operator can focus on audio quality, language selection, and
+transcript review.
+
+## TL;DR
+
+- Use a Daytona workspace so `ffmpeg`, Python dependencies, and Sapat are
+  configured the same way every time.
+- Add Tencent Cloud credentials as environment variables, not committed files.
+- Use Sapat's `--provider tencentcloud` option for short local audio clips that
+  fit Tencent Cloud's `SentenceRecognition` limits.
+- Keep one sample audio file for validation before running customer or
+  production recordings.
+- Use mock tests for request signing so the provider can be checked without
+  exposing real Tencent Cloud secrets.
+
+## How the workflow fits together
+
+![Tencent Cloud ASR workflow with Sapat in Daytona](/assets/20260619_run_tencent_cloud_asr_with_sapat_in_daytona_workflow.svg)
+
+The moving pieces are intentionally small:
+
+- Daytona creates the workspace and keeps the setup reproducible.
+- Sapat converts the source media and invokes a selected transcription provider.
+- The Tencent Cloud provider builds the ASR request payload, signs it with
+  TC3-HMAC-SHA256, and sends it to Tencent Cloud ASR.
+- The response comes back as plain text plus optional word timing data.
+- Sapat writes the transcript file next to the input so it can be reviewed,
+  committed, or handed to another tool.
+
+This division matters because Tencent Cloud's API is not an OpenAI-compatible
+endpoint. A normal `Authorization: Bearer ...` header is not enough. The
+signature must be generated from the exact request body and headers that are
+sent to the API. If one byte changes after signing, the request fails.
+
+## Prerequisites
+
+You need:
+
+- A Daytona workspace with Python 3.8 or newer.
+- `ffmpeg`, because Sapat can convert video files before transcription.
+- Tencent Cloud ASR enabled in your Tencent Cloud account.
+- A Tencent Cloud Secret ID and Secret Key with permission to call ASR.
+- A local audio or video file for testing.
+
+If you are testing the companion provider before it is merged, use the Sapat
+branch from the provider pull request:
+
+```bash
+git clone https://github.com/jordansilly77-stack/sapat.git
+cd sapat
+git checkout codex/tencentcloud-provider
+python -m venv .venv
+source .venv/bin/activate
+pip install -e '.[dev]'
+```
+
+The companion provider pull request is
+[`nkkko/sapat#67`](https://github.com/nibzard/sapat/pull/67). After it is
+merged, the fork and branch step can be replaced with the normal Sapat install
+path.
+
+## Step 1: Create a clean Daytona workspace
+
+Start with a workspace dedicated to transcription experiments. Keep the first
+run small: one short audio clip, one provider, and one transcript output.
+
+Inside the workspace, confirm Python and `ffmpeg` are available:
+
+```bash
+python --version
+ffmpeg -version
+```
+
+If `ffmpeg` is missing, install it before running Sapat. The exact command
+depends on your base image. For Debian or Ubuntu images, use:
+
+```bash
+sudo apt-get update
+sudo apt-get install -y ffmpeg
+```
+
+Keep audio samples in a simple folder:
+
+```bash
+mkdir -p samples transcripts
+```
+
+Copy a short test file into `samples/`. Tencent Cloud's one-sentence API is
+best for short clips, command snippets, QA samples, voice notes, and validation
+checks. For long meetings or large recordings, use a task-based batch API
+instead of this short-form endpoint.
+
+## Step 2: Configure Tencent Cloud credentials
+
+Do not commit credentials to the repository. Put them in the workspace
+environment or a local `.env` file that is ignored by Git:
+
+```bash
+export TENCENTCLOUD_SECRET_ID="your-secret-id"
+export TENCENTCLOUD_SECRET_KEY="your-secret-key"
+export TENCENTCLOUD_REGION="ap-guangzhou"
+```
+
+The provider also accepts an optional endpoint override:
+
+```bash
+export TENCENTCLOUD_ASR_ENDPOINT="asr.tencentcloudapi.com"
+```
+
+Use the default endpoint unless your account, region, or network policy requires
+something different.
+
+The provider signs each request with Tencent Cloud's TC3-HMAC-SHA256 flow. That
+signature uses:
+
+- HTTP method: `POST`
+- canonical URI: `/`
+- signed headers: `content-type` and `host`
+- service name: `asr`
+- API version: `2019-06-14`
+- action: `SentenceRecognition`
+- timestamp: generated at request time
+
+You should not need to touch the signing code during normal use. If a request
+fails with a signature error, check for mismatched credentials, wrong endpoint,
+or system clock drift first.
+
+## Step 3: Pick the right engine model
+
+Tencent Cloud ASR uses engine names rather than a generic model name. The
+provider exposes common aliases, but it is still useful to know what is being
+sent.
+
+| Sapat model or language | Tencent Cloud engine |
+| --- | --- |
+| `default`, `zh`, `mandarin` | `16k_zh` |
+| `en`, `english` | `16k_en` |
+| `cantonese`, `yue` | `16k_yue` |
+| `multi` | `16k_zh-PY` |
+| `ja` language | `16k_ja` |
+| `ko` language | `16k_ko` |
+
+For Mandarin and mixed Chinese-English clips, start with:
+
+```bash
+sapat samples/voice-note.mp3 --provider tencentcloud --model zh --language zh
+```
+
+For English clips:
+
+```bash
+sapat samples/demo.mp3 --provider tencentcloud --model english --language en
+```
+
+If the clip is already in a Tencent-supported format such as MP3, WAV, M4A, or
+AAC, keep it as-is for the first test. If the source is a video, Sapat can
+convert it before transcription.
+
+## Step 4: Run a first transcription
+
+Run a short sample first:
+
+```bash
+sapat samples/voice-note.mp3 \
+  --provider tencentcloud \
+  --model zh \
+  --language zh \
+  --quality M
+```
+
+The `--quality` flag controls Sapat's conversion behavior before the provider
+receives audio. Start with `M` for normal voice notes. Use `H` for recordings
+with music, echo, multiple speakers, or poor input quality, but keep file size
+limits in mind.
+
+When the run finishes, inspect the generated transcript:
+
+```bash
+ls -la samples
+cat samples/voice-note.txt
+```
+
+For a real workflow, do not judge quality from one file. Keep a small validation
+set with:
+
+- one clean Mandarin clip,
+- one noisy Mandarin clip,
+- one English clip,
+- one mixed Chinese-English clip if your users need it,
+- one clip with domain words or product names.
+
+This tells you whether the selected engine is the right default for your
+workspace.
+
+## Step 5: Use hotwords for product names
+
+Tencent Cloud supports temporary hotwords. The provider exposes this through
+the `hotword_list` option in code and tests. In CLI-driven workflows, a small
+wrapper script is often the cleanest way to pass provider-specific options
+until the CLI grows first-class flags for every provider.
+
+For example, a thin Python runner can call the provider directly:
+
+```python
+from sapat.providers.tencentcloud import TencentCloudProvider
+
+provider = TencentCloudProvider()
+result = provider.transcribe(
+    "samples/voice-note.mp3",
+    model="zh",
+    language="zh",
+    hotword_list="Daytona|8,Sapat|10,ASR|8",
+)
+print(result.text)
+```
+
+Use hotwords sparingly. A focused list of product names, customer names, or
+technical terms is better than a long dictionary. If too many words are boosted,
+the transcript can become worse rather than better.
+
+## Step 6: Validate without real credentials
+
+The provider should be testable without sending audio to Tencent Cloud. The
+companion PR includes mock-based tests that verify:
+
+- credentials control provider availability,
+- the API action is `SentenceRecognition`,
+- the request body includes base64 audio and the original byte length,
+- the Tencent Cloud headers include timestamp, version, action, region, and
+  authorization,
+- response text, duration, and word timing data are parsed,
+- Tencent Cloud error responses raise a clear runtime error.
+
+Run the focused checks:
+
+```bash
+python -m pytest tests/providers/test_tencentcloud.py -q
+python -m pytest tests/test_registry.py tests/test_cli.py -q
+python -m compileall sapat tests/providers/test_tencentcloud.py
+```
+
+This is especially useful in Daytona because anyone who opens the workspace can
+repeat the validation before adding real credentials. The tests do not contain
+secrets, private audio, generated transcripts, or account-specific metadata.
+
+## Common issues and troubleshooting
+
+**Problem:** The provider is not listed as available.
+
+**Solution:** Check that both `TENCENTCLOUD_SECRET_ID` and
+`TENCENTCLOUD_SECRET_KEY` are present in the workspace environment. Sapat's
+provider registry only lists providers whose required environment variables are
+set.
+
+**Problem:** Tencent Cloud returns a signature error.
+
+**Solution:** Confirm the Secret ID and Secret Key belong to the same account,
+the endpoint host is correct, and the workspace clock is accurate. Signed
+requests are sensitive to the timestamp and the exact request body.
+
+**Problem:** The request succeeds but the transcript is poor.
+
+**Solution:** Check the engine model first. `16k_zh` is a good default for
+Mandarin. Use `16k_en` for English and `16k_yue` for Cantonese. Also check the
+input quality before changing code: clipped audio, background music, and stereo
+voice overlap can hurt any speech-to-text provider.
+
+**Problem:** The audio file is too large.
+
+**Solution:** The short-form local upload path is for short clips. Split long
+recordings into smaller clips, use a URL-based or batch task API, or choose
+another Sapat provider that is designed for long-form transcription.
+
+**Problem:** Domain terms are mistranscribed.
+
+**Solution:** Use a short temporary hotword list and rerun the validation set.
+Keep the list small and weighted toward terms that must be correct.
+
+## Conclusion
+
+Tencent Cloud ASR is a useful option when a team needs Chinese-language
+transcription, regional cloud coverage, or provider diversity beyond
+OpenAI-compatible endpoints. The tradeoff is that the API uses signed requests,
+so the integration should be tested carefully and kept in a reusable provider
+module rather than copied into one-off scripts.
+
+With Daytona and Sapat, the workflow becomes easier to repeat: create the
+workspace, set credentials, run the provider, review the transcript, and keep
+tests around the signing path. That is the difference between a demo and a
+workflow someone else can safely rerun.
+
+## References
+
+- [Sapat repository](https://github.com/nkkko/sapat)
+- [Tencent Cloud ASR API documentation](https://cloud.tencent.com/document/product/1093)
+- [Companion Tencent Cloud provider PR](https://github.com/nibzard/sapat/pull/67)
+- [Daytona content contribution guide](https://github.com/daytonaio/content/blob/main/CONTRIBUTING.md)
diff --git a/guides/assets/20260619_run_tencent_cloud_asr_with_sapat_in_daytona_workflow.svg b/guides/assets/20260619_run_tencent_cloud_asr_with_sapat_in_daytona_workflow.svg
new file mode 100644
index 00000000..eed48cfc
--- /dev/null
+++ b/guides/assets/20260619_run_tencent_cloud_asr_with_sapat_in_daytona_workflow.svg
@@ -0,0 +1,30 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="1120" height="560" viewBox="0 0 1120 560" role="img" aria-labelledby="title desc">
+  <title id="title">Tencent Cloud ASR workflow with Sapat in Daytona</title>
+  <desc id="desc">A Daytona workspace runs Sapat, signs a Tencent Cloud ASR request, sends audio, and saves a transcript.</desc>
+  <rect width="1120" height="560" fill="#f7f9fc"/>
+  <rect x="60" y="80" width="250" height="120" rx="12" fill="#ffffff" stroke="#243b53" stroke-width="2"/>
+  <text x="185" y="130" font-family="Arial, sans-serif" font-size="24" text-anchor="middle" fill="#102a43">Daytona workspace</text>
+  <text x="185" y="164" font-family="Arial, sans-serif" font-size="16" text-anchor="middle" fill="#52606d">Python, ffmpeg, Sapat</text>
+  <rect x="435" y="80" width="250" height="120" rx="12" fill="#ffffff" stroke="#1f6f8b" stroke-width="2"/>
+  <text x="560" y="124" font-family="Arial, sans-serif" font-size="24" text-anchor="middle" fill="#0b4f6c">Tencent provider</text>
+  <text x="560" y="158" font-family="Arial, sans-serif" font-size="16" text-anchor="middle" fill="#52606d">Base64 + TC3 signature</text>
+  <rect x="810" y="80" width="250" height="120" rx="12" fill="#ffffff" stroke="#176b3a" stroke-width="2"/>
+  <text x="935" y="124" font-family="Arial, sans-serif" font-size="24" text-anchor="middle" fill="#14532d">Tencent Cloud ASR</text>
+  <text x="935" y="158" font-family="Arial, sans-serif" font-size="16" text-anchor="middle" fill="#52606d">SentenceRecognition API</text>
+  <rect x="248" y="330" width="250" height="120" rx="12" fill="#ffffff" stroke="#7c3aed" stroke-width="2"/>
+  <text x="373" y="374" font-family="Arial, sans-serif" font-size="24" text-anchor="middle" fill="#4c1d95">Transcript file</text>
+  <text x="373" y="408" font-family="Arial, sans-serif" font-size="16" text-anchor="middle" fill="#52606d">Review, commit, hand off</text>
+  <rect x="623" y="330" width="250" height="120" rx="12" fill="#ffffff" stroke="#b45309" stroke-width="2"/>
+  <text x="748" y="374" font-family="Arial, sans-serif" font-size="24" text-anchor="middle" fill="#78350f">Validation loop</text>
+  <text x="748" y="408" font-family="Arial, sans-serif" font-size="16" text-anchor="middle" fill="#52606d">Mock tests + sample audio</text>
+  <path d="M310 140 H435" stroke="#334e68" stroke-width="3" fill="none" marker-end="url(#arrow)"/>
+  <path d="M685 140 H810" stroke="#334e68" stroke-width="3" fill="none" marker-end="url(#arrow)"/>
+  <path d="M935 200 C935 285 850 315 748 330" stroke="#334e68" stroke-width="3" fill="none" marker-end="url(#arrow)"/>
+  <path d="M623 390 H498" stroke="#334e68" stroke-width="3" fill="none" marker-end="url(#arrow)"/>
+  <path d="M373 330 C280 300 220 250 185 200" stroke="#334e68" stroke-width="3" fill="none" marker-end="url(#arrow)"/>
+  <defs>
+    <marker id="arrow" markerWidth="12" markerHeight="12" refX="10" refY="6" orient="auto">
+      <path d="M2 2 L10 6 L2 10 Z" fill="#334e68"/>
+    </marker>
+  </defs>
+</svg>