Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions egs/cmu_arctic/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
About the SLT Arctic corpus

The CMU_ARCTIC databases were constructed at the Language Technologies Institute at Carnegie Mellon University as phonetically balanced, US English single speaker databases designed for unit selection speech synthesis research.

The databases consist of around 1150 utterances carefully selected from out-of-copyright texts from Project Gutenberg. The databses include US English male (bdl) and female (slt) speakers (both experinced voice talent) as well as other accented speakers.

Each subdirectory of this directory contains the
scripts for a sequence of experiments.

s1: To run run_demo_voice with WORLD vocoder


175 changes: 175 additions & 0 deletions egs/cmu_arctic/s1/01_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#!/bin/bash

if test "$#" -ne 1; then
echo "################################"
echo "Usage:"
echo "./01_setup.sh <voice_name>"
echo ""
echo "Give a voice name eg., slt"
echo "Available speakers are bdl, slt, jmk"
echo "################################"
exit 1
fi

IFS='_' read -ra voice_name <<< "$1"
spk="${voice_name[0]}"
voice_name="$1"
echo "Speaker is ${spk}."

# Define a regex to select only parts of the database for the demo version.
if [[ "${voice_name}" == *"demo"* ]]; then
corpus_select_rgx="arctic_a00[0-5][0-9]" # Use only the first 59 utterances in demo.
else
corpus_select_rgx="*" # Use all utterances here.
fi

### Step 1: setup directories and the training data files ###
echo "Step 1:"

current_working_dir=$(pwd)
merlin_dir=$(dirname $(dirname $(dirname $current_working_dir)))
experiments_dir=${current_working_dir}/experiments
data_dir=${current_working_dir}/database

voice_name=$1
voice_dir=${experiments_dir}/${voice_name}

acoustic_dir=${voice_dir}/acoustic_model
duration_dir=${voice_dir}/duration_model
synthesis_dir=${voice_dir}/test_synthesis

mkdir -p ${data_dir}
mkdir -p ${experiments_dir}
mkdir -p ${voice_dir}
mkdir -p ${acoustic_dir}
mkdir -p ${duration_dir}
mkdir -p ${synthesis_dir}
mkdir -p ${acoustic_dir}/data
mkdir -p ${duration_dir}/data
mkdir -p ${synthesis_dir}/txt


audio_dir=database/wav
rawaudio_dir=database/rawaudio
txt_dir=database/txt
label_dir=database/labels

# URL of arctic DB.
arch=cmu_us_${spk}_arctic-WAVEGG.tar.bz2
url=http://festvox.org/cmu_arctic/cmu_arctic/orig/$arch
laburl=http://festvox.org/cmu_arctic/cmuarctic.data
# Download the data.
if [ ! -e $rawaudio_dir/$arch ]; then
mkdir -p $rawaudio_dir
cd $rawaudio_dir
wget $url
tar xjf $arch
cd ../../
fi
rm -rf $txt_dir
if [ ! -e $txt_dir ]; then
mkdir -p $txt_dir
cd $txt_dir
wget $laburl
mv cmuarctic.data utts.data # For consistency.
cd ../../
fi

# Collect utterances ids of necessary audio files.
utts=($(find "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${corpus_select_rgx}.wav -exec basename {} .wav \;))
# Remove duplicates.
utts=($(printf "%s\n" "${utts[@]}" | sort -u))

# Audios have to be removed because demo/full could have been changed.
rm -rf $audio_dir
# Leave this check for fast testing, when $audio_dir does not have to be removed.
if [ ! -e $audio_dir ]; then
mkdir -p $audio_dir
# Collect necessary audio files.
for utt in "${utts[@]}"; do
# Sample down to 16k mono, script 03_prepare_acoustic_features cannot handle stereo.
sox "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${utt}.wav $audio_dir/${utt}.wav remix 1 rate -v -s -a 16000 dither -s
done
fi

# Get labels, combine the selected utterances to a regex pattern.
export utts_pat=$(echo ${utts[@]}|tr " " "|")
# Select those labels of utts.data which belong to the selected utterances.
cat ${txt_dir}/utts.data | grep -wE "${utts_pat}" >| ${txt_dir}/utts_selected.data
# Turn every line of utts_selected.data into a txt file using the utterance id as file name.
awk -F' ' -v outDir=${txt_dir} '{print substr($0,2+length($2)+2,length($0)) > outDir"/"$2".txt"}' ${txt_dir}/utts_selected.data
# Remove unnecessary files.
rm ${txt_dir}/utts.data
rm ${txt_dir}/utts_selected.data

rm -rf $label_dir

### create some test files ###
echo "Hello world." > ${synthesis_dir}/txt/test_001.txt
echo "Hi, this is a demo voice from Merlin." > ${synthesis_dir}/txt/test_002.txt
echo "Hope you guys enjoy free open-source voices from Merlin." > ${synthesis_dir}/txt/test_003.txt
printf "test_001\ntest_002\ntest_003" > ${synthesis_dir}/test_id_list.scp

global_config_file=conf/global_settings.cfg

### default settings ###
echo "######################################" > $global_config_file
echo "############# PATHS ##################" >> $global_config_file
echo "######################################" >> $global_config_file
echo "" >> $global_config_file

echo "MerlinDir=${merlin_dir}" >> $global_config_file
echo "WorkDir=${current_working_dir}" >> $global_config_file
echo "" >> $global_config_file

echo "######################################" >> $global_config_file
echo "############# PARAMS #################" >> $global_config_file
echo "######################################" >> $global_config_file
echo "" >> $global_config_file

echo "Voice=${voice_name}" >> $global_config_file
echo "Labels=state_align" >> $global_config_file
echo "QuestionFile=questions-radio_dnn_416.hed" >> $global_config_file
echo "Vocoder=WORLD" >> $global_config_file
echo "SamplingFreq=16000" >> $global_config_file
echo "SilencePhone='sil'" >> $global_config_file
echo "FileIDList=file_id_list.scp" >> $global_config_file
echo "" >> $global_config_file

echo "######################################" >> $global_config_file
echo "######### No. of files ###############" >> $global_config_file
echo "######################################" >> $global_config_file
echo "" >> $global_config_file

# Select 59 examples in the demo.
if [[ "${voice_name}" == *"demo"* ]]; then
echo "Train=49" >> $global_config_file
echo "Valid=5" >> $global_config_file
echo "Test=5" >> $global_config_file
else # In the full version 5% of the utterances are used for validation and test set each.
num_files=$(ls -1 $audio_dir | wc -l)
num_dev_set=$(awk "BEGIN { pc=${num_files}*0.05; print(int(pc)) }")
num_train_set=$(($num_files-2*$num_dev_set))
echo "Train=$num_train_set" >> $global_config_file
echo "Valid=$num_dev_set" >> $global_config_file
echo "Test=$num_dev_set" >> $global_config_file
fi
echo "" >> $global_config_file

echo "######################################" >> $global_config_file
echo "############# TOOLS ##################" >> $global_config_file
echo "######################################" >> $global_config_file
echo "" >> $global_config_file

echo "ESTDIR=${merlin_dir}/tools/speech_tools" >> $global_config_file
echo "FESTDIR=${merlin_dir}/tools/festival" >> $global_config_file
echo "FESTVOXDIR=${merlin_dir}/tools/festvox" >> $global_config_file
echo "" >> $global_config_file
echo "HTKDIR=${merlin_dir}/tools/bin/htk" >> $global_config_file
echo "" >> $global_config_file

echo "Merlin default voice settings configured in \"$global_config_file\""
echo "Modify these params as per your data..."
echo "eg., sampling frequency, no. of train files etc.,"
echo "setup done...!"

60 changes: 60 additions & 0 deletions egs/cmu_arctic/s1/02_prepare_labels.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash

global_config_file=conf/global_settings.cfg
source $global_config_file

if test "$#" -ne 3; then
echo "################################"
echo "Usage:"
echo "./02_prepare_labels.sh <path_to_wav_dir> <path_to_text_dir> <path_to_labels_dir>"
echo ""
echo "default path to wav dir(Input): database/wav"
echo "default path to txt dir(Input): database/txt"
echo "default path to lab dir(Output): database/labels"
echo "################################"
exit 1
fi

wav_dir=$1
inp_txt=$2
lab_dir=$3

####################################
########## Prepare labels ##########
####################################

prepare_labels=true
copy=true

if [ "$prepare_labels" = true ]; then
echo "Step 2: "
echo "Preparing labels..."

if [ "$Labels" == "state_align" ]
then
./scripts/run_state_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file
elif [ "$Labels" == "phone_align" ]
then
./scripts/run_phone_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file
else
echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
fi
fi

if [ "$copy" = true ]; then
echo "Copying labels to duration and acoustic data directories..."

duration_data_dir=experiments/${Voice}/duration_model/data
acoustic_data_dir=experiments/${Voice}/acoustic_model/data

cp -r $lab_dir/label_$Labels $duration_data_dir
cp -r $lab_dir/label_$Labels $acoustic_data_dir

ls $lab_dir/label_$Labels > $duration_data_dir/$FileIDList
ls $lab_dir/label_$Labels > $acoustic_data_dir/$FileIDList

sed -i 's/\.lab//g' $duration_data_dir/$FileIDList
sed -i 's/\.lab//g' $acoustic_data_dir/$FileIDList

echo "done...!"
fi
43 changes: 43 additions & 0 deletions egs/cmu_arctic/s1/03_prepare_acoustic_features.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

global_config_file=conf/global_settings.cfg
source $global_config_file

if test "$#" -ne 2; then
echo "################################"
echo "Usage:"
echo "./03_prepare_acoustic_features.sh <path_to_wav_dir> <path_to_feat_dir>"
echo ""
echo "default path to wav dir(Input): database/wav"
echo "default path to feat dir(Output): database/feats"
echo "################################"
exit 1
fi

wav_dir=$1
feat_dir=$2

if [ ! "$(ls -A ${wav_dir})" ]; then
echo "Please place your audio files in: ${wav_dir}"
exit 1
fi

####################################
##### prepare vocoder features #####
####################################

prepare_feats=true
copy=true

if [ "$prepare_feats" = true ]; then
echo "Step 3:"
echo "Prepare acoustic features using WORLD vocoder..."
python ${MerlinDir}/misc/scripts/vocoder/world/extract_features_for_merlin.py ${MerlinDir} ${wav_dir} ${feat_dir} $SamplingFreq
fi

if [ "$copy" = true ]; then
echo "Copying features to acoustic data directory..."
acoustic_data_dir=experiments/${Voice}/acoustic_model/data
cp -r ${feat_dir}/* $acoustic_data_dir
echo "done...!"
fi
25 changes: 25 additions & 0 deletions egs/cmu_arctic/s1/04_prepare_conf_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash -e

if test "$#" -ne 1; then
echo "################################"
echo "Usage:"
echo "./04_prepare_conf_files.sh <path_to_global_conf_file>"
echo ""
echo "default path to global conf file: conf/global_settings.cfg"
echo "Config files will be prepared based on settings in global conf file"
echo "################################"
exit 1
fi

global_config_file=$1


### Step 4: prepare config files for acoustic, duration models and for synthesis ###
echo "Step 4:"

echo "preparing config files for acoustic, duration models..."
./scripts/prepare_config_files.sh $global_config_file

echo "preparing config files for synthesis..."
./scripts/prepare_config_files_for_synthesis.sh $global_config_file

23 changes: 23 additions & 0 deletions egs/cmu_arctic/s1/05_train_duration_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash -e

global_config_file=conf/global_settings.cfg
source $global_config_file

if test "$#" -ne 1; then
echo "################################"
echo "Usage:"
echo "./05_train_duration_model.sh <path_to_duration_conf_file>"
echo ""
echo "Default path to duration conf file: conf/duration_${Voice}.conf"
echo "################################"
exit 1
fi

duration_conf_file=$1

### Step 5: train duration model ###
echo "Step 5:"
echo "training duration model..."
./scripts/${cuda_cmd} "experiments/${Voice}/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"


23 changes: 23 additions & 0 deletions egs/cmu_arctic/s1/06_train_acoustic_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash -e

global_config_file=conf/global_settings.cfg
source $global_config_file

if test "$#" -ne 1; then
echo "################################"
echo "Usage:"
echo "./06_train_acoustic_model.sh <path_to_acoustic_conf_file>"
echo ""
echo "Default path to acoustic conf file: conf/acoustic_${Voice}.conf"
echo "################################"
exit 1
fi

acoustic_conf_file=$1

### Step 6: train acoustic model ###
echo "Step 6:"
echo "training acoustic model..."
./scripts/${cuda_cmd} "experiments/${Voice}/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"


Loading