Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Swift API for Kokoro TTS 1.0 #1803

Merged
merged 1 commit into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/scripts/test-swift.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ ls -lh
ls -lh
rm -rf vits-piper-*

./run-tts-kokoro-zh-en.sh
ls -lh
rm -rf kokoro-multi-*

./run-tts-kokoro-en.sh
ls -lh
rm -rf kokoro-en-*
Expand Down
2 changes: 1 addition & 1 deletion cxx-api-examples/kokoro-tts-en-cxx-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Copyright (c) 2025 Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese TTS with Kokoro.
// for English TTS with Kokoro.
//
// clang-format off
/*
Expand Down
2 changes: 1 addition & 1 deletion cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Copyright (c) 2025 Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese TTS with Kokoro.
// for Chinese + English TTS with Kokoro.
//
// clang-format off
/*
Expand Down
1 change: 1 addition & 0 deletions swift-api-examples/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ add-punctuations
tts-matcha-zh
tts-matcha-en
tts-kokoro-en
tts-kokoro-zh-en
8 changes: 6 additions & 2 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -767,14 +767,18 @@ func sherpaOnnxOfflineTtsKokoroModelConfig(
voices: String = "",
tokens: String = "",
dataDir: String = "",
lengthScale: Float = 1.0
lengthScale: Float = 1.0,
dictDir: String = "",
lexicon: String = ""
) -> SherpaOnnxOfflineTtsKokoroModelConfig {
return SherpaOnnxOfflineTtsKokoroModelConfig(
model: toCPointer(model),
voices: toCPointer(voices),
tokens: toCPointer(tokens),
data_dir: toCPointer(dataDir),
length_scale: lengthScale
length_scale: lengthScale,
dict_dir: toCPointer(dictDir),
lexicon: toCPointer(lexicon)
)
}

Expand Down
37 changes: 37 additions & 0 deletions swift-api-examples/run-tts-kokoro-zh-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2
fi

if [ ! -e ./tts-kokoro-zh-en ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts-kokoro-zh-en.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts-kokoro-zh-en

strip tts-kokoro-zh-en
else
echo "./tts-kokoro-zh-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-kokoro-zh-en
69 changes: 69 additions & 0 deletions swift-api-examples/tts-kokoro-zh-en.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
class MyClass {
func playSamples(samples: [Float]) {
print("Play \(samples.count) samples")
}
}

func run() {
let model = "./kokoro-multi-lang-v1_0/model.onnx"
let voices = "./kokoro-multi-lang-v1_0/voices.bin"
let tokens = "./kokoro-multi-lang-v1_0/tokens.txt"
let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data"
let dictDir = "./kokoro-multi-lang-v1_0/dict"
let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt"
let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
model: model,
voices: voices,
tokens: tokens,
dataDir: dataDir,
dictDir: dictDir,
lexicon: lexicon
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

let myClass = MyClass()

// We use Unretained here so myClass must be kept alive as the callback is invoked
//
// See also
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

let callback: TtsCallbackWithArg = { samples, n, arg in
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
var savedSamples: [Float] = []
for index in 0..<n {
savedSamples.append(samples![Int(index)])
}

o.playSamples(samples: savedSamples)

// return 1 so that it continues generating
return 1
}

let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

let text =
"中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
let sid = 0
let speed: Float = 1.0

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test-kokoro-zh-en.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}

@main
struct App {
static func main() {
run()
}
}
Loading