-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTTS_download_and_test_all_models.py
127 lines (103 loc) · 6.41 KB
/
TTS_download_and_test_all_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#This script download all models from TTS library that can synthesize text.
#It checks which models are multi-language and have an English vocoder, and how many of them have a custom (allows voice cloning) or default English vocoder.
#It also check the same for English models (default vs custom vocoder), and counts the number of non-English models, and the errors that might emerge.
#Import Libraries
from TTS.api import TTS #pip install TTS==0.13.3
import time
import os
start=time.time() #To keep track of time
#Text to convert to speech
text="Deepfakes are AI generated synthetic videos of any person or celebrity that impersonates the actual person and makes them act or say anything they \
originally never did. The process of creation of deepfakes is technically complex and generally requires a vast amount of data which is then fed to a neural \
network to train and generate the synthetic video."
#Set parameters to 0 to count different types of models (see below). Same for models with errors and those that should be ignored.
multi_voice_clone=0 #Multi-language model with English vocoder (allows voice cloning of the input voice)
en_voice_clone=0 #English model with custom English vocoder (allows voice cloning)
multi_default_voc=0 #Multi-language with default English vocoder (no voice cloning)
en_default_voc=0 #English model with default English vocoder (no voice cloning)
other_languages=0 #Other languages
error=0 #Models with errors
ignored=0 #Models that should be ignored (due to errors or other reasons)
#To keep track of their names
multi_voice_clone_list=[]
en_voice_clone_list=[]
multi_default_voc_list=[]
en_default_voc_list=[]
other_languages_list=[]
error_list=[]
ignored_list=[]
for model in TTS.list_models(): #Loop over all TTS models
if 'en/ek1' not in model: #This model gives error and so, it should be ignored
if '/en' in model or 'multi' in model or 'univer' in model: #models that support English - If ['/el' in model] gives voice with gibberish (use only in Colab)
tts = TTS(model_name=model) #Download and load the model - can also use gpu=True argument - No arguments to make it less verbose
try: #If the model is multi-language and has an English vocoder (for txt-to-speech of a given input audio/voice cloning)
tts.tts_to_file(text=text, speaker_wav=os.getcwd()+"/input_audio.mp3", language="en", file_path="output_en_"+''.join(model.split('/'))+".wav")
multi_voice_clone=multi_voice_clone+1
multi_voice_clone_list.append(model)
except: #If above results in error
try: #If the model is English only and has a custom English vocoder for voice cloning
tts.tts_to_file(text=text, speaker_wav=os.getcwd()+"/input_audio.mp3", file_path="output_en_"+''.join(model.split('/'))+".wav")
en_voice_clone=en_voice_clone+1
en_voice_clone_list.append(model)
except:
try: #If the model is multi-language and has a default English vocoder (no voice cloning)
tts.tts_to_file(text=text, language="en", file_path="output_en_"+''.join(model.split('/'))+".wav")
multi_default_voc=multi_default_voc+1
multi_default_voc_list.append(model)
except:
try: #If the model supports only English and has a default English vocoder (no voice cloning)
tts.tts_to_file(text=text, file_path="output_en_"+''.join(model.split('/'))+".wav")
en_default_voc=en_default_voc+1
en_default_voc_list.append(model)
except: #In any other case there is error
error=error+1
error_list.append(model)
else: #If the model does not support English
other_languages=other_languages+1
other_languages_list.append(model)
else: #If the model contains 'en/ek1' it will be ignored since it will give an error
ignored=ignored+1
ignored_list.append(model)
print("There were", len(TTS.list_models()),"TTS models checked")
print("\n")
print(multi_voice_clone,"Multi-language models with English vocoder for voice cloning:",multi_voice_clone_list)
print("\n")
print(en_voice_clone,"English models with custom vocoder for voice cloning:",en_voice_clone_list)
print("\n")
print(multi_default_voc,"Multi-language models with a default English vocoder (no voice cloning):",multi_default_voc_list)
print("\n")
print(en_default_voc,"English models with default English vocoder (no voice cloning):",en_default_voc_list)
print("\n")
print(other_languages,"models of non-English languages:",other_languages_list)
print("\n")
print(error,"models with errors:",error_list)
print("\n")
print(ignored,"models ignored:",ignored_list)
print("\n")
end=time.time()
print("Time taken:",end-start) #Took ~300secs (3min with models already downloaded)
assert len(TTS.list_models())==multi_voice_clone+en_voice_clone+multi_default_voc+en_default_voc+other_languages+error+ignored, \
"Something is wrong with the counting of models"
# # Running a multi-speaker and multi-lingual model
# # print(TTS.list_models())
# # List available 🐸TTS models and choose the first one
# model_name = TTS.list_models()[0]
# # Init TTS
# tts = TTS(model_name)
# # print(tts)
# # Run TTS
# # ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language
# # Text to speech with a numpy output
# wav = tts.tts("This is a test!", speaker=tts.speakers[0], language=tts.languages[0])
# # Text to speech to a file
# tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
# #For greek txt-to-speech use the model below - Only works in Colab
# pip install TTS==0.13.3
# !sudo apt-get install espeak
# from google.colab import drive
# drive.mount('/content/gdrive')
# from TTS.api import TTS
# model_name="tts_models/el/cv/vits"
# tts = TTS(model_name=model_name)#, progress_bar=True, gpu=False)
# text = "Τι θα θέλατε να παραγγείλετε; Έχουμε σουβλάκια με σως, πατάτες και από πιάτα ημέρας γκιούλμπαστι. Σας αρέσουν;"
# tts.tts_to_file(text=text, file_path="content/gdrive/MyDrive/ordergr.wav")