From dca49b9811f51a122026f3908a7cf304b801d10b Mon Sep 17 00:00:00 2001 From: PhilippNaused Date: Sun, 8 Jun 2025 20:50:22 +0200 Subject: [PATCH 1/2] Add test project --- .github/workflows/tests.yml | 18 ++++++++++++++++++ KokoroSharp.Tests/KokoroSharp.Tests.csproj | 19 +++++++++++++++++++ KokoroSharp.sln | 6 ++++++ KokoroSharp/KokoroSharp.csproj | 5 +++++ 4 files changed, 48 insertions(+) create mode 100644 .github/workflows/tests.yml create mode 100644 KokoroSharp.Tests/KokoroSharp.Tests.csproj diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..807aa5b --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,18 @@ +name: Build & Test + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-dotnet@v4 + with: + dotnet-version: 8.0.x + - name: Test + run: dotnet test -c Release \ No newline at end of file diff --git a/KokoroSharp.Tests/KokoroSharp.Tests.csproj b/KokoroSharp.Tests/KokoroSharp.Tests.csproj new file mode 100644 index 0000000..e4954ce --- /dev/null +++ b/KokoroSharp.Tests/KokoroSharp.Tests.csproj @@ -0,0 +1,19 @@ + + + + net8.0 + enable + enable + Exe + false + + + + + + + + + + + \ No newline at end of file diff --git a/KokoroSharp.sln b/KokoroSharp.sln index 7e28a85..40b7b2e 100644 --- a/KokoroSharp.sln +++ b/KokoroSharp.sln @@ -5,6 +5,8 @@ VisualStudioVersion = 17.12.35506.116 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "KokoroSharp", "KokoroSharp\KokoroSharp.csproj", "{81F1B2B4-923A-4AD5-BAC9-72EA1620E162}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "KokoroSharp.Tests", "KokoroSharp.Tests\KokoroSharp.Tests.csproj", "{B0AB7C0B-D1C1-447C-9EB5-5CC9CB9E8943}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -15,6 +17,10 @@ Global {81F1B2B4-923A-4AD5-BAC9-72EA1620E162}.Debug|Any CPU.Build.0 = Debug|Any CPU {81F1B2B4-923A-4AD5-BAC9-72EA1620E162}.Release|Any CPU.ActiveCfg = Release|Any CPU {81F1B2B4-923A-4AD5-BAC9-72EA1620E162}.Release|Any CPU.Build.0 = Release|Any CPU + {B0AB7C0B-D1C1-447C-9EB5-5CC9CB9E8943}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B0AB7C0B-D1C1-447C-9EB5-5CC9CB9E8943}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B0AB7C0B-D1C1-447C-9EB5-5CC9CB9E8943}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B0AB7C0B-D1C1-447C-9EB5-5CC9CB9E8943}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/KokoroSharp/KokoroSharp.csproj b/KokoroSharp/KokoroSharp.csproj index 3688a0b..f984a26 100644 --- a/KokoroSharp/KokoroSharp.csproj +++ b/KokoroSharp/KokoroSharp.csproj @@ -32,4 +32,9 @@ + + + + + From 892eed5377c934d5ae905fc695f3cd54e1cd0249 Mon Sep 17 00:00:00 2001 From: PhilippNaused Date: Sun, 8 Jun 2025 20:50:43 +0200 Subject: [PATCH 2/2] Add tests for text pre-processor --- KokoroSharp.Tests/TokenizerTests.cs | 17 +++++++++++++++++ KokoroSharp/Processing/Tokenizer.cs | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 KokoroSharp.Tests/TokenizerTests.cs diff --git a/KokoroSharp.Tests/TokenizerTests.cs b/KokoroSharp.Tests/TokenizerTests.cs new file mode 100644 index 0000000..35821d5 --- /dev/null +++ b/KokoroSharp.Tests/TokenizerTests.cs @@ -0,0 +1,17 @@ +using KokoroSharp.Processing; + +namespace KokoroSharp.Tests; + +public class TokenizerTests { + [Test] + [Arguments("$1", "1 dollar")] + [Arguments("$1.50", "1 dollar 50")] + [Arguments("$ 1.50", "1 dollar 50")] + [Arguments("1€", "1 euro")] + [Arguments("1,75 €", "1 euro 75")] + [Arguments("1,75€", "1 euro 75")] + [Arguments("3.1415", "3 point 1 4 1 5")] + public async Task PreprocessText(string input, string expected) { + await Assert.That(Tokenizer.PreprocessText(input)).IsEqualTo(expected); + } +} \ No newline at end of file diff --git a/KokoroSharp/Processing/Tokenizer.cs b/KokoroSharp/Processing/Tokenizer.cs index 1894298..45f7009 100644 --- a/KokoroSharp/Processing/Tokenizer.cs +++ b/KokoroSharp/Processing/Tokenizer.cs @@ -81,7 +81,7 @@ public static string Phonemize_Internal(string text, out string originalSegments /// Normalizes the input text to what the Kokoro model would expect to see, preparing it for phonemization. /// In addition, converts various "written" text to "spoken" form (e.g. $1 --> "one dollar" instead of "dollar one". - internal static string PreprocessText(string text, string langCode) { + internal static string PreprocessText(string text, string langCode = "en-us") { text = HeaderLink().Replace(text, "$1"); // Discard links appearing in `[Header](link)` format. text = HeaderImgLink().Replace(text, "$1$2"); // And in [Header[(img](link)] text = Money().Replace(text, "$2 $1 $3"); // Convert money amounts like "$1.50" to "1 $ 50".