Skip to content

Commit 51be60e

Browse files
authored
(inworld tts): add language param (#5723)
1 parent 7b2b57e commit 51be60e

1 file changed

Lines changed: 14 additions & 0 deletions

File tree

  • livekit-plugins/livekit-plugins-inworld/livekit/plugins/inworld

livekit-plugins/livekit-plugins-inworld/livekit/plugins/inworld/tts.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class _TTSOptions:
9595
bit_rate: int
9696
speaking_rate: float
9797
temperature: float
98+
language: NotGivenOr[str] = NOT_GIVEN
9899
timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN
99100
text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN
100101
timestamp_transport_strategy: TimestampTransportStrategy = DEFAULT_TIMESTAMP_TRANSPORT_STRATEGY
@@ -401,6 +402,8 @@ async def _send_loop(self) -> None:
401402
},
402403
"contextId": msg.context_id,
403404
}
405+
if is_given(opts.language):
406+
pkt["create"]["language"] = opts.language
404407
if is_given(opts.timestamp_type):
405408
pkt["create"]["timestampType"] = opts.timestamp_type
406409
if is_given(opts.text_normalization):
@@ -825,6 +828,7 @@ def __init__(
825828
sample_rate: NotGivenOr[int] = NOT_GIVEN,
826829
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
827830
temperature: NotGivenOr[float] = NOT_GIVEN,
831+
language: NotGivenOr[str] = NOT_GIVEN,
828832
timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
829833
text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
830834
timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
@@ -853,6 +857,9 @@ def __init__(
853857
Defaults to 1.0.
854858
temperature (float, optional): Determines the degree of randomness when sampling audio
855859
tokens to generate the response. Range (0, 2]. Defaults to 1.0.
860+
language (str, optional): BCP-47 language tag (e.g., "en-US", "fr-FR", "ja-JP")
861+
specifying the language that the given voice should speak the text in.
862+
If not set, the model default applies.
856863
timestamp_type (str, optional): Controls timestamp metadata returned with the audio.
857864
Use "WORD" for word-level timestamps or "CHARACTER" for character-level.
858865
Useful for karaoke-style captions, word highlighting, and lipsync.
@@ -926,6 +933,7 @@ def __init__(
926933
sample_rate=sample_rate if is_given(sample_rate) else DEFAULT_SAMPLE_RATE,
927934
speaking_rate=speaking_rate if is_given(speaking_rate) else DEFAULT_SPEAKING_RATE,
928935
temperature=temperature if is_given(temperature) else DEFAULT_TEMPERATURE,
936+
language=language,
929937
timestamp_type=timestamp_type,
930938
text_normalization=text_normalization,
931939
timestamp_transport_strategy=timestamp_transport_strategy
@@ -983,6 +991,7 @@ def update_options(
983991
sample_rate: NotGivenOr[int] = NOT_GIVEN,
984992
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
985993
temperature: NotGivenOr[float] = NOT_GIVEN,
994+
language: NotGivenOr[str] = NOT_GIVEN,
986995
timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
987996
text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
988997
timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
@@ -1001,6 +1010,7 @@ def update_options(
10011010
speaking_rate (float, optional): The speed of the voice.
10021011
temperature (float, optional): Determines the degree of randomness when sampling audio
10031012
tokens to generate the response.
1013+
language (str, optional): BCP-47 language tag (e.g., "en-US", "fr-FR").
10041014
timestamp_type (str, optional): Controls timestamp metadata ("WORD" or "CHARACTER").
10051015
text_normalization (str, optional): Controls text normalization ("ON" or "OFF").
10061016
timestamp_transport_strategy (str, optional): Controls timestamp transport strategy
@@ -1023,6 +1033,8 @@ def update_options(
10231033
self._opts.speaking_rate = speaking_rate
10241034
if is_given(temperature):
10251035
self._opts.temperature = temperature
1036+
if is_given(language):
1037+
self._opts.language = language
10261038
if is_given(timestamp_type):
10271039
_validate_str_param(timestamp_type, "timestamp_type", TimestampType)
10281040
self._opts.timestamp_type = timestamp_type
@@ -1133,6 +1145,8 @@ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
11331145
"modelId": self._opts.model,
11341146
"audioConfig": audio_config,
11351147
}
1148+
if utils.is_given(self._opts.language):
1149+
body_params["language"] = self._opts.language
11361150
if utils.is_given(self._opts.timestamp_type):
11371151
body_params["timestampType"] = self._opts.timestamp_type
11381152
if utils.is_given(self._opts.text_normalization):

0 commit comments

Comments
 (0)