This document provides practical examples of how to use the CosyVoice2 API.
Currently, the API does not require authentication. This may change in future versions.
http://localhost:8000
Add a new voice for zero-shot cloning:
curl -X POST "http://localhost:8000/api/v1/voices/" \
-H "Content-Type: multipart/form-data" \
-F "voice_id=my_custom_voice" \
-F "name=My Custom Voice" \
-F "description=A custom voice for testing" \
-F "voice_type=zero_shot" \
-F "language=en" \
-F "prompt_text=Hello, this is a sample of my voice." \
-F "audio_format=wav" \
-F "audio_file=@/path/to/voice_sample.wav"Python example:
import requests
url = "http://localhost:8000/api/v1/voices/"
with open("voice_sample.wav", "rb") as audio_file:
files = {"audio_file": audio_file}
data = {
"voice_id": "my_custom_voice",
"name": "My Custom Voice",
"description": "A custom voice for testing",
"voice_type": "zero_shot",
"language": "en",
"prompt_text": "Hello, this is a sample of my voice.",
"audio_format": "wav"
}
response = requests.post(url, files=files, data=data)
print(response.json())curl -X GET "http://localhost:8000/api/v1/voices/"With filtering:
curl -X GET "http://localhost:8000/api/v1/voices/?voice_type=zero_shot&language=en&page=1&page_size=10"curl -X GET "http://localhost:8000/api/v1/voices/my_custom_voice"curl -X PUT "http://localhost:8000/api/v1/voices/my_custom_voice" \
-H "Content-Type: application/json" \
-d '{
"name": "Updated Voice Name",
"description": "Updated description"
}'curl -X DELETE "http://localhost:8000/api/v1/voices/my_custom_voice"curl -X GET "http://localhost:8000/api/v1/voices/stats/summary"curl -X GET "http://localhost:8000/api/v1/voices/pretrained/list"The API supports both synchronous and asynchronous voice synthesis:
- Sync endpoints (
/api/v1/synthesize/*): Return results immediately (may take time) - Async endpoints (
/api/v1/async/synthesize/*): Return task ID immediately, check status later
curl -X POST "http://localhost:8000/api/v1/synthesize/sft" \
-H "Content-Type: application/json" \
-d '{
"text": "Hello, how are you today?",
"voice_id": "pretrained_voice_001",
"speed": 1.0,
"format": "wav",
"stream": false
}'Python example:
import requests
url = "http://localhost:8000/api/v1/synthesize/sft"
data = {
"text": "Hello, how are you today?",
"voice_id": "pretrained_voice_001",
"speed": 1.0,
"format": "wav",
"stream": False
}
response = requests.post(url, json=data)
result = response.json()
if result["success"]:
# Download the audio file
audio_url = f"http://localhost:8000{result['audio_url']}"
audio_response = requests.get(audio_url)
with open("output.wav", "wb") as f:
f.write(audio_response.content)
print(f"Audio saved to output.wav (duration: {result['duration']}s)")curl -X POST "http://localhost:8000/api/v1/async/synthesize/sft" \
-H "Content-Type: application/json" \
-d '{
"text": "Hello, how are you today?",
"voice_id": "pretrained_voice_001",
"speed": 1.0,
"format": "wav"
}'Response:
{
"task_id": "task_abc123def456",
"status": "submitted",
"message": "SFT synthesis task submitted successfully",
"check_status_url": "/api/v1/async/tasks/task_abc123def456/status",
"result_url": "/api/v1/async/tasks/task_abc123def456/result"
}Check status:
curl -X GET "http://localhost:8000/api/v1/async/tasks/task_abc123def456/status"Get result when completed:
curl -X GET "http://localhost:8000/api/v1/async/tasks/task_abc123def456/result"curl -X POST "http://localhost:8000/api/v1/synthesize/zero-shot" \
-H "Content-Type: multipart/form-data" \
-F "text=Hello, this is a test of voice cloning." \
-F "voice_id=my_custom_voice" \
-F "speed=1.0" \
-F "format=wav" \
-F "stream=false"curl -X POST "http://localhost:8000/api/v1/synthesize/zero-shot" \
-H "Content-Type: multipart/form-data" \
-F "text=Hello, this is a test of voice cloning." \
-F "prompt_text=This is the original text from the audio sample." \
-F "speed=1.0" \
-F "format=wav" \
-F "stream=false" \
-F "prompt_audio=@/path/to/voice_sample.wav"curl -X POST "http://localhost:8000/api/v1/synthesize/cross-lingual" \
-H "Content-Type: multipart/form-data" \
-F "text=你好,这是跨语言语音合成测试。" \
-F "voice_id=my_custom_voice" \
-F "speed=1.0" \
-F "format=wav" \
-F "stream=false"curl -X POST "http://localhost:8000/api/v1/synthesize/instruct" \
-H "Content-Type: application/json" \
-d '{
"text": "Hello, how are you today?",
"voice_id": "pretrained_voice_001",
"instruct_text": "Please speak in a happy and energetic tone",
"speed": 1.0,
"format": "wav",
"stream": false
}'Here's a complete example that demonstrates the full workflow:
import requests
import time
BASE_URL = "http://localhost:8000"
def add_voice():
"""Add a custom voice to the cache"""
url = f"{BASE_URL}/api/v1/voices/"
with open("my_voice_sample.wav", "rb") as audio_file:
files = {"audio_file": audio_file}
data = {
"voice_id": "my_voice_001",
"name": "My Voice",
"description": "My personal voice for cloning",
"voice_type": "zero_shot",
"language": "en",
"prompt_text": "Hello, this is my voice sample.",
"audio_format": "wav"
}
response = requests.post(url, files=files, data=data)
if response.status_code == 201:
print("Voice added successfully!")
return True
else:
print(f"Failed to add voice: {response.text}")
return False
def synthesize_speech(text, voice_id):
"""Synthesize speech using the cached voice"""
url = f"{BASE_URL}/api/v1/synthesize/zero-shot"
data = {
"text": text,
"voice_id": voice_id,
"speed": 1.0,
"format": "wav",
"stream": False
}
response = requests.post(url, data=data)
if response.status_code == 200:
result = response.json()
if result["success"]:
# Download the audio
audio_url = f"{BASE_URL}{result['audio_url']}"
audio_response = requests.get(audio_url)
filename = f"synthesized_{int(time.time())}.wav"
with open(filename, "wb") as f:
f.write(audio_response.content)
print(f"Speech synthesized successfully: {filename}")
print(f"Duration: {result['duration']}s")
return filename
else:
print(f"Synthesis failed: {result['message']}")
else:
print(f"Request failed: {response.text}")
return None
def main():
# Step 1: Add a voice (make sure you have my_voice_sample.wav)
if add_voice():
# Step 2: Synthesize speech
text = "Hello! This is a test of voice cloning using CosyVoice2 API."
synthesize_speech(text, "my_voice_001")
if __name__ == "__main__":
main()The API returns structured error responses:
{
"error": "voice_not_found",
"message": "Voice with ID 'non_existent' not found",
"details": {
"path": "/api/v1/voices/non_existent"
}
}Common error codes:
400: Bad Request (invalid input)404: Not Found (voice or resource not found)409: Conflict (voice already exists)422: Unprocessable Entity (validation error)500: Internal Server Error503: Service Unavailable (model not ready)
Currently, there are no rate limits, but this may be added in future versions for production deployments.