CSTR-Edinburgh · b-schnell · Jul 4, 2017 · Jul 4, 2017 · Jul 7, 2017 · Jul 14, 2017
diff --git a/egs/cmu_arctic/README b/egs/cmu_arctic/README
@@ -0,0 +1,12 @@
+About the SLT Arctic corpus
+
+The CMU_ARCTIC databases were constructed at the Language Technologies Institute at Carnegie Mellon University as phonetically balanced, US English single speaker databases designed for unit selection speech synthesis research.
+
+The databases consist of around 1150 utterances carefully selected from out-of-copyright texts from Project Gutenberg. The databses include US English male (bdl) and female (slt) speakers (both experinced voice talent) as well as other accented speakers.
+
+Each subdirectory of this directory contains the
+scripts for a sequence of experiments.
+
+  s1: To run run_demo_voice with WORLD vocoder
+
+
diff --git a/egs/cmu_arctic/s1/01_setup.sh b/egs/cmu_arctic/s1/01_setup.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./01_setup.sh <voice_name>"
+    echo ""
+    echo "Give a voice name eg., slt"
+    echo "Available speakers are bdl, slt, jmk"
+    echo "################################"
+    exit 1
+fi
+
+IFS='_' read -ra voice_name <<< "$1"
+spk="${voice_name[0]}"
+voice_name="$1"
+echo "Speaker is ${spk}."
+
+# Define a regex to select only parts of the database for the demo version.
+if [[ "${voice_name}" == *"demo"* ]]; then
+    corpus_select_rgx="arctic_a00[0-5][0-9]" # Use only the first 59 utterances in demo.
+else
+    corpus_select_rgx="*" # Use all utterances here.
+fi
+
+### Step 1: setup directories and the training data files ###
+echo "Step 1:"
+
+current_working_dir=$(pwd)
+merlin_dir=$(dirname $(dirname $(dirname $current_working_dir)))
+experiments_dir=${current_working_dir}/experiments
+data_dir=${current_working_dir}/database
+
+voice_name=$1
+voice_dir=${experiments_dir}/${voice_name}
+
+acoustic_dir=${voice_dir}/acoustic_model
+duration_dir=${voice_dir}/duration_model
+synthesis_dir=${voice_dir}/test_synthesis
+
+mkdir -p ${data_dir}
+mkdir -p ${experiments_dir}
+mkdir -p ${voice_dir}
+mkdir -p ${acoustic_dir}
+mkdir -p ${duration_dir}
+mkdir -p ${synthesis_dir}
+mkdir -p ${acoustic_dir}/data
+mkdir -p ${duration_dir}/data
+mkdir -p ${synthesis_dir}/txt
+
+
+audio_dir=database/wav
+rawaudio_dir=database/rawaudio
+txt_dir=database/txt
+label_dir=database/labels
+
+# URL of arctic DB.
+arch=cmu_us_${spk}_arctic-WAVEGG.tar.bz2
+url=http://festvox.org/cmu_arctic/cmu_arctic/orig/$arch
+laburl=http://festvox.org/cmu_arctic/cmuarctic.data
+# Download the data.
+if [ ! -e $rawaudio_dir/$arch ]; then
+    mkdir -p $rawaudio_dir
+    cd $rawaudio_dir
+    wget $url
+    tar xjf $arch
+    cd ../../
+fi
+rm -rf $txt_dir
+if [ ! -e $txt_dir ]; then
+    mkdir -p $txt_dir
+    cd $txt_dir
+    wget $laburl
+    mv cmuarctic.data utts.data # For consistency.
+    cd ../../
+fi
+
+# Collect utterances ids of necessary audio files.
+utts=($(find "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${corpus_select_rgx}.wav -exec basename {} .wav \;))
+# Remove duplicates.
+utts=($(printf "%s\n" "${utts[@]}" | sort -u))
+
+# Audios have to be removed because demo/full could have been changed.
+rm -rf $audio_dir
+# Leave this check for fast testing, when $audio_dir does not have to be removed.
+if [ ! -e $audio_dir ]; then
+    mkdir -p $audio_dir
+    # Collect necessary audio files.
+    for utt in "${utts[@]}"; do
+        # Sample down to 16k mono, script 03_prepare_acoustic_features cannot handle stereo.
+        sox "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${utt}.wav $audio_dir/${utt}.wav remix 1 rate -v -s -a 16000 dither -s
+    done
+fi
+
+# Get labels, combine the selected utterances to a regex pattern.
+export utts_pat=$(echo ${utts[@]}|tr " " "|")
+# Select those labels of utts.data which belong to the selected utterances.
+cat ${txt_dir}/utts.data | grep -wE "${utts_pat}" >| ${txt_dir}/utts_selected.data
+# Turn every line of utts_selected.data into a txt file using the utterance id as file name.
+awk -F' ' -v outDir=${txt_dir} '{print substr($0,2+length($2)+2,length($0)) > outDir"/"$2".txt"}' ${txt_dir}/utts_selected.data
+# Remove unnecessary files.
+rm ${txt_dir}/utts.data
+rm ${txt_dir}/utts_selected.data
+
+rm -rf $label_dir
+
+### create some test files ###
+echo "Hello world." > ${synthesis_dir}/txt/test_001.txt
+echo "Hi, this is a demo voice from Merlin." > ${synthesis_dir}/txt/test_002.txt
+echo "Hope you guys enjoy free open-source voices from Merlin." > ${synthesis_dir}/txt/test_003.txt
+printf "test_001\ntest_002\ntest_003" > ${synthesis_dir}/test_id_list.scp
+
+global_config_file=conf/global_settings.cfg
+
+### default settings ###
+echo "######################################" > $global_config_file
+echo "############# PATHS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "MerlinDir=${merlin_dir}" >>  $global_config_file
+echo "WorkDir=${current_working_dir}" >>  $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# PARAMS #################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Voice=${voice_name}" >> $global_config_file
+echo "Labels=state_align" >> $global_config_file
+echo "QuestionFile=questions-radio_dnn_416.hed" >> $global_config_file
+echo "Vocoder=WORLD" >> $global_config_file
+echo "SamplingFreq=16000" >> $global_config_file
+echo "SilencePhone='sil'" >> $global_config_file
+echo "FileIDList=file_id_list.scp" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "######### No. of files ###############" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+# Select 59 examples in the demo.
+if [[ "${voice_name}" == *"demo"* ]]; then
+    echo "Train=49" >> $global_config_file
+    echo "Valid=5" >> $global_config_file 
+    echo "Test=5" >> $global_config_file
+else # In the full version 5% of the utterances are used for validation and test set each.
+    num_files=$(ls -1 $audio_dir | wc -l)
+    num_dev_set=$(awk "BEGIN { pc=${num_files}*0.05; print(int(pc)) }")
+    num_train_set=$(($num_files-2*$num_dev_set))
+    echo "Train=$num_train_set" >> $global_config_file
+    echo "Valid=$num_dev_set" >> $global_config_file 
+    echo "Test=$num_dev_set" >> $global_config_file
+fi
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# TOOLS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "ESTDIR=${merlin_dir}/tools/speech_tools" >> $global_config_file
+echo "FESTDIR=${merlin_dir}/tools/festival" >> $global_config_file
+echo "FESTVOXDIR=${merlin_dir}/tools/festvox" >> $global_config_file
+echo "" >> $global_config_file
+echo "HTKDIR=${merlin_dir}/tools/bin/htk" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Merlin default voice settings configured in \"$global_config_file\""
+echo "Modify these params as per your data..."
+echo "eg., sampling frequency, no. of train files etc.,"
+echo "setup done...!"
+
diff --git a/egs/cmu_arctic/s1/02_prepare_labels.sh b/egs/cmu_arctic/s1/02_prepare_labels.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 3; then
+    echo "################################"
+    echo "Usage:"
+    echo "./02_prepare_labels.sh <path_to_wav_dir> <path_to_text_dir> <path_to_labels_dir>"
+    echo ""
+    echo "default path to wav dir(Input): database/wav"
+    echo "default path to txt dir(Input): database/txt"
+    echo "default path to lab dir(Output): database/labels"
+    echo "################################"
+    exit 1
+fi
+
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+
+####################################
+########## Prepare labels ##########
+####################################
+
+prepare_labels=true
+copy=true
+
+if [ "$prepare_labels" = true ]; then
+    echo "Step 2: "
+    echo "Preparing labels..."
+
+    if [ "$Labels" == "state_align" ]
+    then
+        ./scripts/run_state_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file 
+    elif [ "$Labels" == "phone_align" ]
+    then
+        ./scripts/run_phone_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file 
+    else
+        echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+    fi
+fi
+
+if [ "$copy" = true ]; then
+    echo "Copying labels to duration and acoustic data directories..."
+
+    duration_data_dir=experiments/${Voice}/duration_model/data
+    acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+
+    cp -r $lab_dir/label_$Labels $duration_data_dir 
+    cp -r $lab_dir/label_$Labels $acoustic_data_dir
+
+    ls $lab_dir/label_$Labels > $duration_data_dir/$FileIDList
+    ls $lab_dir/label_$Labels > $acoustic_data_dir/$FileIDList
+
+    sed -i 's/\.lab//g' $duration_data_dir/$FileIDList
+    sed -i 's/\.lab//g' $acoustic_data_dir/$FileIDList
+
+    echo "done...!"
+fi
diff --git a/egs/cmu_arctic/s1/03_prepare_acoustic_features.sh b/egs/cmu_arctic/s1/03_prepare_acoustic_features.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 2; then
+    echo "################################"
+    echo "Usage:"
+    echo "./03_prepare_acoustic_features.sh <path_to_wav_dir> <path_to_feat_dir>"
+    echo ""
+    echo "default path to wav dir(Input): database/wav"
+    echo "default path to feat dir(Output): database/feats"
+    echo "################################"
+    exit 1
+fi
+
+wav_dir=$1
+feat_dir=$2
+
+if [ ! "$(ls -A ${wav_dir})" ]; then
+    echo "Please place your audio files in: ${wav_dir}"
+    exit 1
+fi
+
+####################################
+##### prepare vocoder features #####
+####################################
+
+prepare_feats=true
+copy=true
+
+if [ "$prepare_feats" = true ]; then
+    echo "Step 3:" 
+    echo "Prepare acoustic features using WORLD vocoder..."
+    python ${MerlinDir}/misc/scripts/vocoder/world/extract_features_for_merlin.py ${MerlinDir} ${wav_dir} ${feat_dir} $SamplingFreq 
+fi
+
+if [ "$copy" = true ]; then
+    echo "Copying features to acoustic data directory..."
+    acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+    cp -r ${feat_dir}/* $acoustic_data_dir
+    echo "done...!"
+fi
diff --git a/egs/cmu_arctic/s1/04_prepare_conf_files.sh b/egs/cmu_arctic/s1/04_prepare_conf_files.sh
@@ -0,0 +1,25 @@
+#!/bin/bash -e
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./04_prepare_conf_files.sh <path_to_global_conf_file>"
+    echo ""
+    echo "default path to global conf file: conf/global_settings.cfg"
+    echo "Config files will be prepared based on settings in global conf file"
+    echo "################################"
+    exit 1
+fi
+
+global_config_file=$1
+
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+echo "Step 4:"
+
+echo "preparing config files for acoustic, duration models..."
+./scripts/prepare_config_files.sh $global_config_file
+
+echo "preparing config files for synthesis..."
+./scripts/prepare_config_files_for_synthesis.sh $global_config_file
+
diff --git a/egs/cmu_arctic/s1/05_train_duration_model.sh b/egs/cmu_arctic/s1/05_train_duration_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./05_train_duration_model.sh <path_to_duration_conf_file>"
+    echo ""
+    echo "Default path to duration conf file: conf/duration_${Voice}.conf"
+    echo "################################"
+    exit 1
+fi
+
+duration_conf_file=$1
+
+### Step 5: train duration model ###
+echo "Step 5:"
+echo "training duration model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"
+
+
diff --git a/egs/cmu_arctic/s1/06_train_acoustic_model.sh b/egs/cmu_arctic/s1/06_train_acoustic_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./06_train_acoustic_model.sh <path_to_acoustic_conf_file>"
+    echo ""
+    echo "Default path to acoustic conf file: conf/acoustic_${Voice}.conf"
+    echo "################################"
+    exit 1
+fi
+
+acoustic_conf_file=$1
+
+### Step 6: train acoustic model ###
+echo "Step 6:"
+echo "training acoustic model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"
+
+