Text to voice

1.if Japanese , recommend this two.

https://text-to-speech.imtranslator.net/speech.asp
http://www.gavo.t.u-tokyo.ac.jp/ojad/phrasing/index

2. If Chinese recommend this one

http://www.gnet.com.cn/include/voice/voice.php

3. If english, recommend this one

4. clone a real voice

5 srt translation online tool

6 convert voice to srt file tool

Generate the clone voice

from bark.api import generate_audio
from transformers import BertTokenizer
from bark.generation import SAMPLE_RATE, preload_models, codec_decode, generate_coarse, generate_fine, generate_text_semantic

Enter your prompt and speaker here

text_prompt = “Hello, my name is Serpy. And, uh — and I like pizza. [laughs]”
voice_name = " # use your custom voice name here if you have one

load the tokenizer

tokenizer = BertTokenizer.from_pretrained(“bert-base-multilingual-cased”)

download and load all models

preload_models(
#text_use_gpu=False,
text_use_small=True,
#coarse_use_gpu=True,
coarse_use_small=True,
#fine_use_gpu=True,
fine_use_small=True,
codec_use_gpu=False,
#force_reload=False,

)

simple generation

audio_array = generate_audio(text_prompt, history_prompt=voice_name, text_temp=0.7, waveform_temp=0.7)

generation with more control

x_semantic = generate_text_semantic(
text_prompt,
history_prompt=voice_name,
temp=0.7,
top_k=50,
top_p=0.95,
)

x_coarse_gen = generate_coarse(
x_semantic,
history_prompt=voice_name,
temp=0.7,
top_k=50,
top_p=0.95,
)
x_fine_gen = generate_fine(
x_coarse_gen,
history_prompt=voice_name,
temp=0.5,
)
audio_array = codec_decode(x_fine_gen)

from IPython.display import Audio

play audio

Audio(audio_array, rate=SAMPLE_RATE)

from scipy.io.wavfile import write as write_wav

save audio

filepath = “” # change this to your desired output path
write_wav(filepath, SAMPLE_RATE, audio_array)

Generate the voice

import spacy
import numpy as np
from bark.api import generate_audio
from transformers import BertTokenizer
from bark.generation import SAMPLE_RATE, preload_models, codec_decode, generate_coarse, generate_fine, generate_text_semantic
from scipy.io.wavfile import write as write_wav
from IPython.display import Audio

Enter your prompt and speaker here

script = “”"
Yes, i am here and serious.
Accounts that try to game our verification system with non-sequitur self-promotion or advertise in a misleading way will be suspended.
“”".replace("\n", " ").strip()

voice_name = “” # use your custom voice name here if you have one

load the tokenizer

tokenizer = BertTokenizer.from_pretrained(“bert-base-multilingual-cased”)

download and load all models

preload_models(
text_use_small=True,
coarse_use_small=True,
fine_use_small=True,
codec_use_gpu=False,
)

nlp = spacy.load(‘en_core_web_sm’)
doc = nlp(script)
sentences = [sent.text.strip() for sent in doc.sents]

audio_arrays = []
for text_prompt in sentences:
# generation with more control
x_semantic = generate_text_semantic(
text_prompt,
history_prompt=voice_name,
temp=0.7,
top_k=50,
top_p=0.95,
min_eos_p=0.05, # adjust this value based on your needs
)

x_coarse_gen = generate_coarse(
    x_semantic,
    history_prompt=voice_name,
    temp=0.7,
    top_k=50,
    top_p=0.95,
)
x_fine_gen = generate_fine(
    x_coarse_gen,
    history_prompt=voice_name,
    temp=0.5,
)
audio_array = codec_decode(x_fine_gen)

audio_arrays.append(audio_array)

concatenate all audio arrays into one

final_audio_array = np.concatenate(audio_arrays)

play audio

Audio(final_audio_array, rate=SAMPLE_RATE)

save audio

filepath = “” # change this to your desired output path
write_wav(filepath, SAMPLE_RATE, final_audio_array)

cd /home/hk/audiocraft ; /usr/bin/env /home/hk/audiocraft/env/bin/python /home/hk/.vscode/extensions/ms-python.python-2023.4.1/pythonFiles/lib/python/debugpy/adapter/…/…/debugpy/launcher 60135 – /home/hk/audiocraft/app.py
Running on local URL: http://127.0.0.1:7860

To create a public link, set share=True in launch().
Loading model melody
Downloading (…)ve/main/spiece.model: 100%|██████████████████████| 792k/792k [00:00<00:00, 1.23MB/s]
Downloading (…)lve/main/config.json: 100%|█████████████████████| 1.21k/1.21k [00:00<00:00, 804kB/s]
Downloading model.safetensors: 100%|████████████████████████████| 892M/892M [02:42<00:00, 5.49MB/s]
new batch 1 [‘Pop dance track with catchy melodies, tropical percussion, and upbeat rhythms, perfect for the beach’] [(44100, (442368, 2))]
Make a video took 0.7567908763885498
batch finished 1 11.583285093307495
Tempfiles currently stored: 2
Loading model melody
new batch 1 [‘pop dance song that features memorable melodies, lively tropical percussion, and energetic rhythms, specifically designed to create a vibrant and enjoyable atmosphere at the beach’] [(44100, (442368, 2))]
CLIPPING /tmp/tmpcco_r2o0.wav happening with proba (a bit of clipping is okay): 1.1458333574410062e-05 maximum scale: 1.2867498397827148
Make a video took 1.0688543319702148
batch finished 1 47.17802405357361
Tempfiles currently stored: 4

Prompt is '“pop dance song that features memorable melodies, lively tropical percussion, and energetic rhythms, specifically designed to create a vibrant and enjoyable atmosphere at the beach”

Generated audio is here ’ #music #AI music generation # prompt - YouTube