From 9037c2f79b1a79c0026b01353510d9bcf1913a95 Mon Sep 17 00:00:00 2001
From: Bastian SCHNELL <bschnell@italix39.idiap.ch>
Date: Tue, 4 Jul 2017 14:54:05 +0200
Subject: [PATCH 1/6] Added Sun Grid support in egs/slt_arctic

---
 egs/slt_arctic/s1/cmd.sh           |  21 ++
 egs/slt_arctic/s1/scripts/queue.pl | 312 +++++++++++++++++++++++++++++
 egs/slt_arctic/s1/scripts/run.pl   | 148 ++++++++++++++
 3 files changed, 481 insertions(+)
 create mode 100755 egs/slt_arctic/s1/cmd.sh
 create mode 100755 egs/slt_arctic/s1/scripts/queue.pl
 create mode 100755 egs/slt_arctic/s1/scripts/run.pl

diff --git a/egs/slt_arctic/s1/cmd.sh b/egs/slt_arctic/s1/cmd.sh
new file mode 100755
index 00000000..3f414ff6
--- /dev/null
+++ b/egs/slt_arctic/s1/cmd.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) Sun grid options (IDIAP)
+# ATTENTION: Do that in your shell: SETSHELL grid
+export cuda_cmd="queue.pl -l gpu"
+#export cuda_cmd="queue.pl -l q1d,hostname=dynamix03"
+#export cuda_cmd="..."
+
+#b) BUT cluster options
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+
+#c) run it locally...
+#export cuda_cmd=run.pl
diff --git a/egs/slt_arctic/s1/scripts/queue.pl b/egs/slt_arctic/s1/scripts/queue.pl
new file mode 100755
index 00000000..eb9ef3de
--- /dev/null
+++ b/egs/slt_arctic/s1/scripts/queue.pl
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine.  Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to 
+  # allow the JOB=1:n option to be interleaved with the
+  # options to qsub.
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $sync = 1;
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+
+if (@ARGV < 2) {
+  print STDERR
+   "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+   "e.g.: queue.pl foo.log echo baz\n" .
+   " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+   "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+   " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+   "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+   " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+   "  another string other than JOB)\n" .
+   "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+   "and change its behavior.  Otherwise it uses qstat to work out when the job finished\n";
+  exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+    && $jobend > $jobstart) {
+  print STDERR "queue.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do.  Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them.  Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program.  Our heuristic
+# is that stuff with spaces in should be quoted.  This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) { 
+  if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+                                            # as-is.
+  elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+  else { $cmd .= "\"$x\" "; }  # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+# make a directory called "q",
+# where we will put the log created by qsub... normally this doesn't contain
+# anything interesting, evertyhing goes to $logfile.
+if (! -d "$qdir") { 
+  system "mkdir $qdir 2>/dev/null";
+  sleep(5); ## This is to fix an issue we encountered in denominator lattice creation,
+  ## where if e.g. the exp/tri2b_denlats/log/15/q directory had just been
+  ## created and the job immediately ran, it would die with an error because nfs
+  ## had not yet synced.  I'm also decreasing the acdirmin and acdirmax in our
+  ## NFS settings to something like 5 seconds.
+} 
+
+if (defined $jobname) { # It's an array job.
+  $queue_array_opt = "-t $jobstart:$jobend"; 
+  $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get 
+  # replaced by qsub, in each job, with the job-id.
+  $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+  $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+  # is for the queue to put its log, and this doesn't need the task array subscript
+  # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+  $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q "  echo '#' Started at \`date\`\n";
+print Q "  echo -n '# '; cat <<EOF\n";
+print Q "$cmd\n"; # this is a way of echoing the command into a comment in the log file,
+print Q "EOF\n"; # without having to escape things like "|" and quote characters.
+print Q ") >$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; 
+  # let the script return with status 100 which will put it to E state; more easily rerunnable.
+if (!defined $jobname) { # not an array job
+  print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+  print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n";       # treats specially.
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+  die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ("qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1");
+if ($ret != 0) {
+  if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    print STDERR "queue.pl: job writing to $logfile failed\n";
+  } else {
+    print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+    print STDERR `tail $queue_logfile`;
+  }
+  exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+  # need to wait for the jobs to finish.  We wait for the
+  # sync-files we "touched" in the script to exist.
+  @syncfiles = ();
+  if (!defined $jobname) { # not an array job.
+    push @syncfiles, $syncfile;
+  } else {
+    for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+      push @syncfiles, "$syncfile.$jobid";
+    }
+  }
+  # We will need the sge_job_id, to check that job still exists
+  $sge_job_id=`grep "Your job" $queue_logfile | awk '{ print \$3 }' | sed 's|\\\..*||'`;
+  chomp($sge_job_id);
+  $check_sge_job_ctr=1;
+  #
+  $wait = 0.1;
+  foreach $f (@syncfiles) {
+    # wait for them to finish one by one.
+    while (! -f $f) {
+      sleep($wait);
+      $wait *= 1.2;
+      if ($wait > 3.0) {
+        $wait = 3.0; # never wait more than 3 seconds.
+        if (rand() > 0.5) {
+          system("touch $qdir/.kick");
+        } else {
+          system("rm $qdir/.kick 2>/dev/null");
+        }
+        # This seems to kick NFS in the teeth to cause it to refresh the
+        # directory.  I've seen cases where it would indefinitely fail to get
+        # updated, even though the file exists on the server.
+        system("ls $qdir >/dev/null");
+      }
+
+      # Check that the job exists in SGE. Job can be killed if duration 
+      # exceeds some hard limit, or in case of a machine shutdown. 
+      if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
+        if ( -f $f ) { next; }; #syncfile appeared, ok
+        $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
+        if($ret != 0) {
+          # Don't consider immediately missing job as error, first wait some  
+          # time to make sure it is not just delayed creation of the syncfile.
+          sleep(3);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(7);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(20);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          #Otherwise it is an error
+          if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+          print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+          print STDERR "          Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
+          exit(1);
+        }
+      }
+    }
+  }
+  $all_syncfiles = join(" ", @syncfiles);
+  system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status.  We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+  push @logfiles, $logfile;
+} else {
+  for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+    $l = $logfile; 
+    $l =~ s/\$SGE_TASK_ID/$jobid/g;
+    push @logfiles, $l;
+  }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+  @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+  for ($iter = 0; $iter <= @wait_times; $iter++) {
+    $line = `tail -10 $l 2>/dev/null`; # Note: although this line should be the last
+    # line of the file, I've seen cases where it was not quite the last line because
+    # of delayed output by the process that was running, or processes it had called.
+    # so tail -10 gives it a little leeway.
+    if ($line =~ m/with status (\d+)/) {
+      $status = $1;
+      last;
+    } else {
+      if ($iter < @wait_times) {
+        sleep($wait_times[$iter]);
+      } else {
+        if (! -f $l) {
+          print STDERR "Log-file $l does not exist.\n";
+        } else {
+          print STDERR "The last line of log-file $l does not seem to indicate the "
+            . "return status as expected\n";
+        }
+        exit(1);                # Something went wrong with the queue, or the
+        # machine it was running on, probably.
+      }
+    }
+  }
+  # OK, now we have $status, which is the return-status of
+  # the command in the job.
+  if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+  if (@logfiles == 1) {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+    print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  } else {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    $numjobs = 1 + $jobend - $jobstart;
+    print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+  }
+  exit(1);
+}
diff --git a/egs/slt_arctic/s1/scripts/run.pl b/egs/slt_arctic/s1/scripts/run.pl
new file mode 100755
index 00000000..1750bc50
--- /dev/null
+++ b/egs/slt_arctic/s1/scripts/run.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+# In general, doing 
+#  run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
+# 
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+$qsub_opts=""; # These will be ignored.
+
+# First parse an option like JOB=1:4, and any
+# options that would normally be given to 
+# queue.pl, which we will just discard.
+
+if (@ARGV > 0) {
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
+    # that would normally go to qsub, but which will be ignored here.
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $qsub_opts .= "-sync "; # Note: in the
+        # corresponding coce in queue.pl it says instead, just "$sync = 1;".
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+if ($qsub_opts ne "") {
+  print STDERR "Warning: run.pl ignoring options \"$qsub_opts\"\n";
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "run.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) { 
+    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
+    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+    else { $cmd .= "\"$x\" "; } 
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $childpid = fork();
+  if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+  if ($childpid == 0) { # We're in the child... this branch
+    # executes the job and returns (possibly with an error status).
+    if (defined $jobname) { 
+      $cmd =~ s/$jobname/$jobid/g;
+      $logfile =~ s/$jobname/$jobid/g;
+    }
+    system("mkdir -p `dirname $logfile` 2>/dev/null");
+    open(F, ">$logfile") || die "Error opening log file $logfile";
+    print F "# " . $cmd . "\n";
+    print F "# Started at " . `date`;
+    $starttime = `date +'%s'`;
+    print F "#\n";
+    close(F);
+
+    # Pipe into bash.. make sure we're not using any other shell.
+    open(B, "|bash") || die "Error opening shell command"; 
+    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+    close(B);                   # If there was an error, exit status is in $?
+    $ret = $?;
+
+    $endtime = `date +'%s'`;
+    open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+    $enddate = `date`;
+    chop $enddate;
+    print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+    close(F);
+    exit($ret == 0 ? 0 : 1);
+  }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $r = wait();
+  if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+  if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+  $njobs = $jobend - $jobstart + 1;
+  if ($njobs == 1) { 
+    print STDERR "run.pl: job failed, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  }
+  else {
+    $logfile =~ s/$jobname/*/g;
+    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+  }
+}
+
+
+exit ($ret);

From 3da3cfd7d41cf4c0d96bc928d1ce667e4ff190f7 Mon Sep 17 00:00:00 2001
From: Bastian SCHNELL <bschnell@italix39.idiap.ch>
Date: Tue, 4 Jul 2017 15:03:48 +0200
Subject: [PATCH 2/6] Added Sun Grid support in egs/slt_arctic part2.

---
 egs/slt_arctic/s1/03_train_duration_model.sh | 2 +-
 egs/slt_arctic/s1/04_train_acoustic_model.sh | 2 +-
 egs/slt_arctic/s1/05_run_merlin.sh           | 4 ++--
 egs/slt_arctic/s1/run_demo.sh                | 3 +++
 egs/slt_arctic/s1/run_full_voice.sh          | 2 ++
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/egs/slt_arctic/s1/03_train_duration_model.sh b/egs/slt_arctic/s1/03_train_duration_model.sh
index 1b2411fe..f32f5dfd 100755
--- a/egs/slt_arctic/s1/03_train_duration_model.sh
+++ b/egs/slt_arctic/s1/03_train_duration_model.sh
@@ -18,6 +18,6 @@ duration_conf_file=$1
 ### Step 3: train duration model ###
 echo "Step 3:"
 echo "training duration model..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $duration_conf_file
+./scripts/${cuda_cmd} "experiments/slt_arctic_demo/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"
 
 
diff --git a/egs/slt_arctic/s1/04_train_acoustic_model.sh b/egs/slt_arctic/s1/04_train_acoustic_model.sh
index 59e21d38..87c38368 100755
--- a/egs/slt_arctic/s1/04_train_acoustic_model.sh
+++ b/egs/slt_arctic/s1/04_train_acoustic_model.sh
@@ -18,6 +18,6 @@ acoustic_conf_file=$1
 ### Step 4: train acoustic model ###
 echo "Step 4:"
 echo "training acoustic model..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $acoustic_conf_file
+./scripts/${cuda_cmd} "experiments/slt_arctic_demo/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"
 
 
diff --git a/egs/slt_arctic/s1/05_run_merlin.sh b/egs/slt_arctic/s1/05_run_merlin.sh
index c7745e23..01e76d4a 100755
--- a/egs/slt_arctic/s1/05_run_merlin.sh
+++ b/egs/slt_arctic/s1/05_run_merlin.sh
@@ -22,10 +22,10 @@ test_synth_config_file=$2
 echo "Step 5:" 
 
 echo "synthesizing durations..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $test_dur_config_file
+./scripts/${cuda_cmd} "experiments/slt_arctic_demo/test_synthesis/_synth_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_dur_config_file"
 
 echo "synthesizing speech..."
-./scripts/submit.sh ${MerlinDir}/src/run_merlin.py $test_synth_config_file
+./scripts/${cuda_cmd} "experiments/slt_arctic_demo/test_synthesis/_synth_speech.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_synth_config_file"
 
 echo "deleting intermediate synthesis files..."
 ./scripts/remove_intermediate_files.sh $global_config_file
diff --git a/egs/slt_arctic/s1/run_demo.sh b/egs/slt_arctic/s1/run_demo.sh
index da2f2dab..a9fb2ff0 100755
--- a/egs/slt_arctic/s1/run_demo.sh
+++ b/egs/slt_arctic/s1/run_demo.sh
@@ -1,10 +1,13 @@
 #!/bin/bash -e
 
+source cmd.sh
+
 if test "$#" -ne 0; then
     echo "Usage: ./run_demo.sh"
     exit 1
 fi
 
+
 ### Step 1: setup directories and the training data files ###
 ./01_setup.sh slt_arctic_demo
 
diff --git a/egs/slt_arctic/s1/run_full_voice.sh b/egs/slt_arctic/s1/run_full_voice.sh
index 732044bf..cd19e0c5 100755
--- a/egs/slt_arctic/s1/run_full_voice.sh
+++ b/egs/slt_arctic/s1/run_full_voice.sh
@@ -1,5 +1,7 @@
 #!/bin/bash -e
 
+source cmd.sh
+
 if test "$#" -ne 0; then
     echo "Usage: ./run_full_voice.sh"
     exit 1

From 76b54984fa198cd54ae0ee9287630eed4ec790a3 Mon Sep 17 00:00:00 2001
From: Bastian SCHNELL <bschnell@italix39.idiap.ch>
Date: Fri, 14 Jul 2017 12:35:27 +0200
Subject: [PATCH 3/6] General version of the cmu_arctic recipe. Supports bdl,
 slt and jmk speaker

Relies on properly installed festival at merlin/tools.
---
 egs/cmu_arctic/README                         |  12 +
 egs/cmu_arctic/s1/01_setup.sh                 | 175 ++++++++++
 egs/cmu_arctic/s1/02_prepare_labels.sh        |  60 ++++
 .../s1/03_prepare_acoustic_features.sh        |  43 +++
 egs/cmu_arctic/s1/04_prepare_conf_files.sh    |  25 ++
 egs/cmu_arctic/s1/05_train_duration_model.sh  |  23 ++
 egs/cmu_arctic/s1/06_train_acoustic_model.sh  |  23 ++
 egs/cmu_arctic/s1/07_run_merlin.sh            |  41 +++
 egs/cmu_arctic/s1/README.md                   |  54 +++
 egs/cmu_arctic/s1/cmd.sh                      |  23 ++
 egs/cmu_arctic/s1/conf/logging_config.conf    | 129 ++++++++
 egs/cmu_arctic/s1/merlin_synthesis.sh         |  34 ++
 egs/cmu_arctic/s1/run_demo_voice.sh           |  34 ++
 egs/cmu_arctic/s1/run_full_voice.sh           |  33 ++
 .../s1/scripts/prepare_config_files.sh        | 195 +++++++++++
 .../prepare_config_files_for_synthesis.sh     | 223 +++++++++++++
 .../s1/scripts/prepare_labels_from_txt.sh     |  85 +++++
 egs/cmu_arctic/s1/scripts/queue.pl            | 312 ++++++++++++++++++
 .../s1/scripts/remove_intermediate_files.sh   |  38 +++
 egs/cmu_arctic/s1/scripts/run.pl              | 148 +++++++++
 .../s1/scripts/run_phone_aligner.sh           |  88 +++++
 .../s1/scripts/run_state_aligner.sh           |  60 ++++
 egs/cmu_arctic/s1/scripts/submit.sh           |  33 ++
 egs/cmu_arctic/s1/scripts/test_nan.sh         |  30 ++
 24 files changed, 1921 insertions(+)
 create mode 100644 egs/cmu_arctic/README
 create mode 100755 egs/cmu_arctic/s1/01_setup.sh
 create mode 100755 egs/cmu_arctic/s1/02_prepare_labels.sh
 create mode 100755 egs/cmu_arctic/s1/03_prepare_acoustic_features.sh
 create mode 100755 egs/cmu_arctic/s1/04_prepare_conf_files.sh
 create mode 100755 egs/cmu_arctic/s1/05_train_duration_model.sh
 create mode 100755 egs/cmu_arctic/s1/06_train_acoustic_model.sh
 create mode 100755 egs/cmu_arctic/s1/07_run_merlin.sh
 create mode 100644 egs/cmu_arctic/s1/README.md
 create mode 100755 egs/cmu_arctic/s1/cmd.sh
 create mode 100644 egs/cmu_arctic/s1/conf/logging_config.conf
 create mode 100755 egs/cmu_arctic/s1/merlin_synthesis.sh
 create mode 100755 egs/cmu_arctic/s1/run_demo_voice.sh
 create mode 100755 egs/cmu_arctic/s1/run_full_voice.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/prepare_config_files.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/prepare_config_files_for_synthesis.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/prepare_labels_from_txt.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/queue.pl
 create mode 100755 egs/cmu_arctic/s1/scripts/remove_intermediate_files.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/run.pl
 create mode 100755 egs/cmu_arctic/s1/scripts/run_phone_aligner.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/run_state_aligner.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/submit.sh
 create mode 100755 egs/cmu_arctic/s1/scripts/test_nan.sh

diff --git a/egs/cmu_arctic/README b/egs/cmu_arctic/README
new file mode 100644
index 00000000..433caad9
--- /dev/null
+++ b/egs/cmu_arctic/README
@@ -0,0 +1,12 @@
+About the SLT Arctic corpus
+
+The CMU_ARCTIC databases were constructed at the Language Technologies Institute at Carnegie Mellon University as phonetically balanced, US English single speaker databases designed for unit selection speech synthesis research.
+
+The databases consist of around 1150 utterances carefully selected from out-of-copyright texts from Project Gutenberg. The databses include US English male (bdl) and female (slt) speakers (both experinced voice talent) as well as other accented speakers.
+
+Each subdirectory of this directory contains the
+scripts for a sequence of experiments.
+
+  s1: To run run_demo_voice with WORLD vocoder
+
+
diff --git a/egs/cmu_arctic/s1/01_setup.sh b/egs/cmu_arctic/s1/01_setup.sh
new file mode 100755
index 00000000..e3063b3b
--- /dev/null
+++ b/egs/cmu_arctic/s1/01_setup.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./01_setup.sh <voice_name>"
+    echo ""
+    echo "Give a voice name eg., slt"
+    echo "Available speakers are bdl, slt, jmk"
+    echo "################################"
+    exit 1
+fi
+
+IFS='_' read -ra voice_name <<< "$1"
+spk="${voice_name[0]}"
+voice_name="$1"
+echo "Speaker is ${spk}."
+
+# Define a regex to select only parts of the database for the demo version.
+if [[ "${voice_name}" == *"demo"* ]]; then
+    corpus_select_rgx="arctic_a00[0-5][0-9]" # Use only the first 59 utterances in demo.
+else
+    corpus_select_rgx="*" # Use all utterances here.
+fi
+
+### Step 1: setup directories and the training data files ###
+echo "Step 1:"
+
+current_working_dir=$(pwd)
+merlin_dir=$(dirname $(dirname $(dirname $current_working_dir)))
+experiments_dir=${current_working_dir}/experiments
+data_dir=${current_working_dir}/database
+
+voice_name=$1
+voice_dir=${experiments_dir}/${voice_name}
+
+acoustic_dir=${voice_dir}/acoustic_model
+duration_dir=${voice_dir}/duration_model
+synthesis_dir=${voice_dir}/test_synthesis
+
+mkdir -p ${data_dir}
+mkdir -p ${experiments_dir}
+mkdir -p ${voice_dir}
+mkdir -p ${acoustic_dir}
+mkdir -p ${duration_dir}
+mkdir -p ${synthesis_dir}
+mkdir -p ${acoustic_dir}/data
+mkdir -p ${duration_dir}/data
+mkdir -p ${synthesis_dir}/txt
+
+
+audio_dir=database/wav
+rawaudio_dir=database/rawaudio
+txt_dir=database/txt
+label_dir=database/labels
+
+# URL of arctic DB.
+arch=cmu_us_${spk}_arctic-WAVEGG.tar.bz2
+url=http://festvox.org/cmu_arctic/cmu_arctic/orig/$arch
+laburl=http://festvox.org/cmu_arctic/cmuarctic.data
+# Download the data.
+if [ ! -e $rawaudio_dir/$arch ]; then
+    mkdir -p $rawaudio_dir
+    cd $rawaudio_dir
+    wget $url
+    tar xjf $arch
+    cd ../../
+fi
+rm -rf $txt_dir
+if [ ! -e $txt_dir ]; then
+    mkdir -p $txt_dir
+    cd $txt_dir
+    wget $laburl
+    mv cmuarctic.data utts.data # For consistency.
+    cd ../../
+fi
+
+# Collect utterances ids of necessary audio files.
+utts=($(find "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${corpus_select_rgx}.wav -exec basename {} .wav \;))
+# Remove duplicates.
+utts=($(printf "%s\n" "${utts[@]}" | sort -u))
+
+# Audios have to be removed because demo/full could have been changed.
+rm -rf $audio_dir
+# Leave this check for fast testing, when $audio_dir does not have to be removed.
+if [ ! -e $audio_dir ]; then
+    mkdir -p $audio_dir
+    # Collect necessary audio files.
+    for utt in "${utts[@]}"; do
+        # Sample down to 16k mono, script 03_prepare_acoustic_features cannot handle stereo.
+        sox "${rawaudio_dir}"/cmu_us_${spk}_arctic/orig/${utt}.wav $audio_dir/${utt}.wav remix 1 rate -v -s -a 16000 dither -s
+    done
+fi
+
+# Get labels, combine the selected utterances to a regex pattern.
+export utts_pat=$(echo ${utts[@]}|tr " " "|")
+# Select those labels of utts.data which belong to the selected utterances.
+cat ${txt_dir}/utts.data | grep -wE "${utts_pat}" >| ${txt_dir}/utts_selected.data
+# Turn every line of utts_selected.data into a txt file using the utterance id as file name.
+awk -F' ' -v outDir=${txt_dir} '{print substr($0,2+length($2)+2,length($0)) > outDir"/"$2".txt"}' ${txt_dir}/utts_selected.data
+# Remove unnecessary files.
+rm ${txt_dir}/utts.data
+rm ${txt_dir}/utts_selected.data
+
+rm -rf $label_dir
+
+### create some test files ###
+echo "Hello world." > ${synthesis_dir}/txt/test_001.txt
+echo "Hi, this is a demo voice from Merlin." > ${synthesis_dir}/txt/test_002.txt
+echo "Hope you guys enjoy free open-source voices from Merlin." > ${synthesis_dir}/txt/test_003.txt
+printf "test_001\ntest_002\ntest_003" > ${synthesis_dir}/test_id_list.scp
+
+global_config_file=conf/global_settings.cfg
+
+### default settings ###
+echo "######################################" > $global_config_file
+echo "############# PATHS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "MerlinDir=${merlin_dir}" >>  $global_config_file
+echo "WorkDir=${current_working_dir}" >>  $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# PARAMS #################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Voice=${voice_name}" >> $global_config_file
+echo "Labels=state_align" >> $global_config_file
+echo "QuestionFile=questions-radio_dnn_416.hed" >> $global_config_file
+echo "Vocoder=WORLD" >> $global_config_file
+echo "SamplingFreq=16000" >> $global_config_file
+echo "SilencePhone='sil'" >> $global_config_file
+echo "FileIDList=file_id_list.scp" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "######### No. of files ###############" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+# Select 59 examples in the demo.
+if [[ "${voice_name}" == *"demo"* ]]; then
+    echo "Train=49" >> $global_config_file
+    echo "Valid=5" >> $global_config_file 
+    echo "Test=5" >> $global_config_file
+else # In the full version 5% of the utterances are used for validation and test set each.
+    num_files=$(ls -1 $audio_dir | wc -l)
+    num_dev_set=$(awk "BEGIN { pc=${num_files}*0.05; print(int(pc)) }")
+    num_train_set=$(($num_files-2*$num_dev_set))
+    echo "Train=$num_train_set" >> $global_config_file
+    echo "Valid=$num_dev_set" >> $global_config_file 
+    echo "Test=$num_dev_set" >> $global_config_file
+fi
+echo "" >> $global_config_file
+
+echo "######################################" >> $global_config_file
+echo "############# TOOLS ##################" >> $global_config_file
+echo "######################################" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "ESTDIR=${merlin_dir}/tools/speech_tools" >> $global_config_file
+echo "FESTDIR=${merlin_dir}/tools/festival" >> $global_config_file
+echo "FESTVOXDIR=${merlin_dir}/tools/festvox" >> $global_config_file
+echo "" >> $global_config_file
+echo "HTKDIR=${merlin_dir}/tools/bin/htk" >> $global_config_file
+echo "" >> $global_config_file
+
+echo "Merlin default voice settings configured in \"$global_config_file\""
+echo "Modify these params as per your data..."
+echo "eg., sampling frequency, no. of train files etc.,"
+echo "setup done...!"
+
diff --git a/egs/cmu_arctic/s1/02_prepare_labels.sh b/egs/cmu_arctic/s1/02_prepare_labels.sh
new file mode 100755
index 00000000..cb52f940
--- /dev/null
+++ b/egs/cmu_arctic/s1/02_prepare_labels.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 3; then
+    echo "################################"
+    echo "Usage:"
+    echo "./02_prepare_labels.sh <path_to_wav_dir> <path_to_text_dir> <path_to_labels_dir>"
+    echo ""
+    echo "default path to wav dir(Input): database/wav"
+    echo "default path to txt dir(Input): database/txt"
+    echo "default path to lab dir(Output): database/labels"
+    echo "################################"
+    exit 1
+fi
+
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+
+####################################
+########## Prepare labels ##########
+####################################
+
+prepare_labels=true
+copy=true
+
+if [ "$prepare_labels" = true ]; then
+    echo "Step 2: "
+    echo "Preparing labels..."
+
+    if [ "$Labels" == "state_align" ]
+    then
+        ./scripts/run_state_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file 
+    elif [ "$Labels" == "phone_align" ]
+    then
+        ./scripts/run_phone_aligner.sh $wav_dir $inp_txt $lab_dir $global_config_file 
+    else
+        echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+    fi
+fi
+
+if [ "$copy" = true ]; then
+    echo "Copying labels to duration and acoustic data directories..."
+    
+    duration_data_dir=experiments/${Voice}/duration_model/data
+    acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+    
+    cp -r $lab_dir/label_$Labels $duration_data_dir 
+    cp -r $lab_dir/label_$Labels $acoustic_data_dir
+    
+    ls $lab_dir/label_$Labels > $duration_data_dir/$FileIDList
+    ls $lab_dir/label_$Labels > $acoustic_data_dir/$FileIDList
+    
+    sed -i 's/\.lab//g' $duration_data_dir/$FileIDList
+    sed -i 's/\.lab//g' $acoustic_data_dir/$FileIDList
+    
+    echo "done...!"
+fi
diff --git a/egs/cmu_arctic/s1/03_prepare_acoustic_features.sh b/egs/cmu_arctic/s1/03_prepare_acoustic_features.sh
new file mode 100755
index 00000000..f8267aaf
--- /dev/null
+++ b/egs/cmu_arctic/s1/03_prepare_acoustic_features.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 2; then
+    echo "################################"
+    echo "Usage:"
+    echo "./03_prepare_acoustic_features.sh <path_to_wav_dir> <path_to_feat_dir>"
+    echo ""
+    echo "default path to wav dir(Input): database/wav"
+    echo "default path to feat dir(Output): database/feats"
+    echo "################################"
+    exit 1
+fi
+
+wav_dir=$1
+feat_dir=$2
+
+if [ ! "$(ls -A ${wav_dir})" ]; then
+    echo "Please place your audio files in: ${wav_dir}"
+    exit 1
+fi
+
+####################################
+##### prepare vocoder features #####
+####################################
+
+prepare_feats=true
+copy=true
+
+if [ "$prepare_feats" = true ]; then
+    echo "Step 3:" 
+    echo "Prepare acoustic features using WORLD vocoder..."
+    python ${MerlinDir}/misc/scripts/vocoder/world/extract_features_for_merlin.py ${MerlinDir} ${wav_dir} ${feat_dir} $SamplingFreq 
+fi
+
+if [ "$copy" = true ]; then
+    echo "Copying features to acoustic data directory..."
+    acoustic_data_dir=experiments/${Voice}/acoustic_model/data
+    cp -r ${feat_dir}/* $acoustic_data_dir
+    echo "done...!"
+fi
diff --git a/egs/cmu_arctic/s1/04_prepare_conf_files.sh b/egs/cmu_arctic/s1/04_prepare_conf_files.sh
new file mode 100755
index 00000000..12962042
--- /dev/null
+++ b/egs/cmu_arctic/s1/04_prepare_conf_files.sh
@@ -0,0 +1,25 @@
+#!/bin/bash -e
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./04_prepare_conf_files.sh <path_to_global_conf_file>"
+    echo ""
+    echo "default path to global conf file: conf/global_settings.cfg"
+    echo "Config files will be prepared based on settings in global conf file"
+    echo "################################"
+    exit 1
+fi
+
+global_config_file=$1
+
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+echo "Step 4:"
+
+echo "preparing config files for acoustic, duration models..."
+./scripts/prepare_config_files.sh $global_config_file
+
+echo "preparing config files for synthesis..."
+./scripts/prepare_config_files_for_synthesis.sh $global_config_file
+
diff --git a/egs/cmu_arctic/s1/05_train_duration_model.sh b/egs/cmu_arctic/s1/05_train_duration_model.sh
new file mode 100755
index 00000000..26b7326f
--- /dev/null
+++ b/egs/cmu_arctic/s1/05_train_duration_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./05_train_duration_model.sh <path_to_duration_conf_file>"
+    echo ""
+    echo "Default path to duration conf file: conf/duration_${Voice}.conf"
+    echo "################################"
+    exit 1
+fi
+
+duration_conf_file=$1
+
+### Step 5: train duration model ###
+echo "Step 5:"
+echo "training duration model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/duration_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "${duration_conf_file}"
+
+
diff --git a/egs/cmu_arctic/s1/06_train_acoustic_model.sh b/egs/cmu_arctic/s1/06_train_acoustic_model.sh
new file mode 100755
index 00000000..e955f5a0
--- /dev/null
+++ b/egs/cmu_arctic/s1/06_train_acoustic_model.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 1; then
+    echo "################################"
+    echo "Usage:"
+    echo "./06_train_acoustic_model.sh <path_to_acoustic_conf_file>"
+    echo ""
+    echo "Default path to acoustic conf file: conf/acoustic_${Voice}.conf"
+    echo "################################"
+    exit 1
+fi
+
+acoustic_conf_file=$1
+
+### Step 6: train acoustic model ###
+echo "Step 6:"
+echo "training acoustic model..."
+./scripts/${cuda_cmd} "experiments/${Voice}/acoustic_model/log/_train.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$acoustic_conf_file"
+
+
diff --git a/egs/cmu_arctic/s1/07_run_merlin.sh b/egs/cmu_arctic/s1/07_run_merlin.sh
new file mode 100755
index 00000000..f9daea49
--- /dev/null
+++ b/egs/cmu_arctic/s1/07_run_merlin.sh
@@ -0,0 +1,41 @@
+#!/bin/bash -e
+
+global_config_file=conf/global_settings.cfg
+source $global_config_file
+
+if test "$#" -ne 3; then
+    echo "################################"
+    echo "Usage: "
+    echo "./07_run_merlin.sh <path_to_text_dir> <path_to_test_dur_conf_file> <path_to_test_synth_conf_file>"
+    echo ""
+    echo "default path to text dir: experiments/${Voice}/test_synthesis/txt"
+    echo "default path to test duration conf file: conf/test_dur_synth_${Voice}.conf"
+    echo "default path to test synthesis conf file: conf/test_synth_${Voice}.conf"
+    echo "################################"
+    exit 1
+fi
+
+inp_txt=$1
+test_dur_config_file=$2
+test_synth_config_file=$3
+
+### Step 7: synthesize speech from text ###
+echo "Step 7:" 
+echo "synthesizing speech from text..."
+
+echo "preparing full-contextual labels using Festival frontend..."
+lab_dir=$(dirname $inp_txt)
+./scripts/prepare_labels_from_txt.sh $inp_txt $lab_dir $global_config_file
+
+echo "synthesizing durations..."
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_dur_config_file"
+
+echo "synthesizing speech..."
+./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "$test_synth_config_file"
+
+echo "deleting intermediate synthesis files..."
+./scripts/remove_intermediate_files.sh $global_config_file
+
+echo "synthesized audio files are in: experiments/${Voice}/test_synthesis/wav"
+echo "All successfull!! Your demo voice is ready :)"
+
diff --git a/egs/cmu_arctic/s1/README.md b/egs/cmu_arctic/s1/README.md
new file mode 100644
index 00000000..afd5d697
--- /dev/null
+++ b/egs/cmu_arctic/s1/README.md
@@ -0,0 +1,54 @@
+Download Merlin
+---------------
+
+Step 1: git clone https://github.com/CSTR-Edinburgh/merlin.git
+
+Install tools
+-------------
+
+Step 2: cd merlin/tools <br/>
+Step 3: ./compile_tools.sh
+Step 4: install festival and HTS at merlin/tools/
+	Possible help: [Issue96](https://github.com/CSTR-Edinburgh/merlin/issues/96)
+
+Setup
+-----
+
+To setup voice: 
+
+Take a look at ./01_setup.sh
+You probably have to change the way the database is accessed, this depends on how your database is structured.
+Check the lines 70-95, the comments should guide you through the process.
+
+Demo voice
+----------
+
+To run demo voice, please follow below steps:
+ 
+Step 5: cd merlin/egs/roger_blizzard2008/s1 <br/>
+Step 6: ./run_demo_voice.sh speaker
+	speaker can be bdl, slt, jmk
+	The data for the speaker is downloaded from the cmu server.
+
+Demo voice trains only on 59 utterances and shouldn't take more than 5 min.
+
+Full voice
+----------
+
+To run full voice, please follow below steps:
+
+Step 5: cd merlin/egs/roger_blizzard2008/s1 <br/>
+Step 6: ./run_full_voice.sh speaker
+
+Full voice utilizes the whole arctic data (1132 utterances). The training of the voice approximately takes 1 to 2 hours. 
+
+Generate new sentences
+----------------------
+
+To generate new sentences, please follow below steps:
+
+Step  8: Run either demo voice or full voice. <br/>
+Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demo OR speaker_arctic_full/test_synthesis/txt
+	 NOTE: speaker should be the speaker you used before (bdl, slt, jmk).
+Step 10: ./merlin_synthesis.sh
+
diff --git a/egs/cmu_arctic/s1/cmd.sh b/egs/cmu_arctic/s1/cmd.sh
new file mode 100755
index 00000000..158e72c8
--- /dev/null
+++ b/egs/cmu_arctic/s1/cmd.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) Sun grid options (IDIAP)
+# ATTENTION: Do that in your shell: SETSHELL grid
+#export cuda_cmd="queue.pl -l gpu"
+#export cuda_short_cmd="queue.pl -l sgpu"
+#export cuda_cmd="queue.pl -l q1d,hostname=dynamix03"
+#export cuda_cmd="..."
+
+#b) BUT cluster options
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+
+#c) run it locally...
+export cuda_cmd=run.pl
+export cuda_short_cmd=$cuda_cmd
diff --git a/egs/cmu_arctic/s1/conf/logging_config.conf b/egs/cmu_arctic/s1/conf/logging_config.conf
new file mode 100644
index 00000000..d4aea5a3
--- /dev/null
+++ b/egs/cmu_arctic/s1/conf/logging_config.conf
@@ -0,0 +1,129 @@
+#  configuration for logging
+# 
+# logging levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL
+
+
+
+[loggers]
+# list all loggers here, then configure them below
+# if you don't list a logger here, the configuration below will be ignored
+# whenever you add a new logger to the code, remember to name it here and add a new logger_<name> section below
+keys=root,configuration,main,main.train_DNN,plotting,labels,dur,acoustic_comp,acoustic_norm,dnn_generation,param_generation,wav_generation,subprocess,ListDataProvider
+
+# two handlers; you should never need to change this
+[handlers]
+keys=console, file
+
+# two formatters for theose handlers; again, you should never need to change this
+[formatters]
+keys=console, file
+
+# this is the root loggger - don't change this (other than changing the level)
+[logger_root]
+level=WARNING
+handlers=file,console
+
+# configure each logger like this:
+# logger_<name of the logger as used in the [loggers] section above>
+# this logger is called 'configuration'
+[logger_configuration]
+# logging level - change this to WARNING for tested code, or to CRITICAL if you only want logging just before a crash
+level=DEBUG
+# name the handlers - this should be one or both of file,console depending where you want log messages written to
+handlers=file,console
+# name of the logger as referred to in the code
+qualname=configuration
+# whether to propagate messages to parent loggers (incl. the root logger), in general you want propagate=0
+propagate=0
+
+[logger_main]
+level=DEBUG
+handlers=file,console
+qualname=main
+propagate=0
+
+[logger_main.train_DNN]
+level=DEBUG
+handlers=file,console
+qualname=main.train_DNN
+propagate=0
+
+[logger_plotting]
+level=INFO
+qualname=plotting
+handlers=console,file
+propagate=0
+
+[logger_labels]
+level=DEBUG
+handlers=file,console
+qualname=labels
+propagate=0
+
+[logger_dur]
+level=DEBUG
+handlers=file,console
+qualname=dur
+propagate=0
+
+[logger_dnn_generation]
+level=DEBUG
+handlers=file,console
+qualname=dnn_generation
+propagate=0
+
+[logger_param_generation]
+level=DEBUG
+handlers=file,console
+qualname=param_generation
+propagate=0
+
+
+[logger_wav_generation]
+level=INFO
+handlers=file,console
+qualname=wav_generation
+propagate=0
+
+[logger_subprocess]
+level=INFO
+handlers=file,console
+qualname=subprocess
+propagate=0
+
+
+[logger_acoustic_norm]
+level=DEBUG
+handlers=file,console
+qualname=acoustic_norm
+propagate=0
+
+[logger_acoustic_comp]
+level=DEBUG
+handlers=file,console
+qualname=acoustic_comp
+propagate=0
+
+
+[logger_ListDataProvider]
+level=WARNING
+handlers=file,console
+qualname=ListDataProvider
+propagate=0
+
+# [handler_file]
+# do NOT define this here - it will be added automatically
+# because we need to know the log file name before creating this handler
+
+[handler_console]
+class=StreamHandler
+formatter=console
+args=(sys.stdout,)
+
+[formatter_file]
+class=logging.Formatter
+format=%(asctime)s %(levelname)8s%(name)15s: %(message)s
+
+[formatter_console]
+class=logplot.logging_plotting.ColouredFormatter
+format=%(asctime)s %(levelname)8s%(name)15s: %(message)s
diff --git a/egs/cmu_arctic/s1/merlin_synthesis.sh b/egs/cmu_arctic/s1/merlin_synthesis.sh
new file mode 100755
index 00000000..f8d363af
--- /dev/null
+++ b/egs/cmu_arctic/s1/merlin_synthesis.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+source cmd.sh
+
+if test "$#" -ne 0; then
+    echo "Usage: ./merlin_synthesis.sh"
+    exit 1
+fi
+
+global_config_file=conf/global_settings.cfg
+
+if [ ! -f  $global_config_file ]; then
+    echo "Please run steps from 1-5..."
+    exit 1
+else
+    source $global_config_file
+fi
+
+### define few variables here
+testDir=experiments/${Voice}/test_synthesis
+
+txt_dir=${testDir}/txt
+
+### Synthesize speech   ###
+echo "Synthesizing speech..."
+./07_run_merlin.sh $txt_dir conf/test_dur_synth_${Voice}.conf conf/test_synth_${Voice}.conf
+#./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_dur.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_dur_synth_${Voice}.conf"
+#./scripts/${cuda_short_cmd} "experiments/${Voice}/test_synthesis/_synth_speech.log" "./scripts/submit.sh" "${MerlinDir}/src/run_merlin.py" "conf/test_synth_${Voice}.conf"
+
+#echo "deleting intermediate synthesis files..."
+#./scripts/remove_intermediate_files.sh $global_config_file
+
+# echo "synthesized audio files are in: experiments/${Voice}/test_synthesis/wav"
+
diff --git a/egs/cmu_arctic/s1/run_demo_voice.sh b/egs/cmu_arctic/s1/run_demo_voice.sh
new file mode 100755
index 00000000..80a04b40
--- /dev/null
+++ b/egs/cmu_arctic/s1/run_demo_voice.sh
@@ -0,0 +1,34 @@
+#!/bin/bash -e
+
+source cmd.sh
+export cuda_cmd=$cuda_short_cmd
+
+if test "$#" -ne 1; then
+    echo "Usage: ./run_demo_voice.sh <speaker>"
+    echo "       Available speakers are bdl, slt, jmk"
+    exit 1
+fi
+spk=$1
+
+### Step 1: setup directories and the training data files ###
+./01_setup.sh ${spk}_arctic_demo
+
+### Step 2: prepare festival labels ###
+./02_prepare_labels.sh database/wav database/txt database/labels
+
+### Step 3: Extract acoustic features from audio files ###
+./03_prepare_acoustic_features.sh database/wav database/feats
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+./04_prepare_conf_files.sh conf/global_settings.cfg
+
+### Step 5: train duration model ###
+./05_train_duration_model.sh conf/duration_${spk}_arctic_demo.conf
+
+### Step 6: train acoustic model ###
+./06_train_acoustic_model.sh conf/acoustic_${spk}_arctic_demo.conf 
+
+### Step 7: synthesize speech ###
+./07_run_merlin.sh experiments/${spk}_arctic_demo/test_synthesis/txt/ conf/test_dur_synth_${spk}_arctic_demo.conf conf/test_synth_${spk}_arctic_demo.conf 
+
+
diff --git a/egs/cmu_arctic/s1/run_full_voice.sh b/egs/cmu_arctic/s1/run_full_voice.sh
new file mode 100755
index 00000000..1f661172
--- /dev/null
+++ b/egs/cmu_arctic/s1/run_full_voice.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+
+source cmd.sh
+
+if test "$#" -ne 1; then
+    echo "Usage: ./run_full_voice.sh <speaker>"
+    echo "       Available speakers are bdl, slt, jmk"
+    exit 1
+fi
+spk=$1
+
+### Step 1: setup directories and the training data files ###
+./01_setup.sh ${spk}_arctic_full
+
+### Step 2: prepare festival labels ###
+./02_prepare_labels.sh database/wav database/txt database/labels
+
+### Step 3: Extract acoustic features from audio files ###
+./03_prepare_acoustic_features.sh database/wav database/feats
+
+### Step 4: prepare config files for acoustic, duration models and for synthesis ###
+./04_prepare_conf_files.sh conf/global_settings.cfg
+
+### Step 5: train duration model ###
+./05_train_duration_model.sh conf/duration_${spk}_arctic_full.conf
+
+### Step 6: train acoustic model ###
+./06_train_acoustic_model.sh conf/acoustic_${spk}_arctic_full.conf 
+
+### Step 7: synthesize speech ###
+./07_run_merlin.sh experiments/${spk}_arctic_full/test_synthesis/txt/ conf/test_dur_synth_${spk}_arctic_full.conf conf/test_synth_${spk}_arctic_full.conf 
+
+
diff --git a/egs/cmu_arctic/s1/scripts/prepare_config_files.sh b/egs/cmu_arctic/s1/scripts/prepare_config_files.sh
new file mode 100755
index 00000000..d3aa7bf7
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/prepare_config_files.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+    echo "Usage: ./scripts/prepare_config_files.sh conf/global_settings.cfg"
+    exit 1
+fi
+
+if [ ! -f $1 ]; then
+    echo "Global config file doesn't exist"
+    exit 1
+else
+    source $1
+fi
+
+SED=sed
+if [[ "$OSTYPE" == "darwin"* ]]; then
+  which gsed > /dev/null
+  if [[ "$?" != 0 ]]; then
+    echo "You need to install GNU sed with 'brew install gnu-sed' on osX"
+    exit 1
+  fi
+  SED=gsed
+fi
+
+
+#########################################
+######## duration config file ###########
+#########################################
+
+duration_config_file=conf/duration_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/duration_demo.conf $duration_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $duration_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $duration_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model'# $duration_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $duration_config_file
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $duration_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $duration_config_file
+$SED -i s#'label_align:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model/data/label_'${Labels}# $duration_config_file
+$SED -i s#'question_file_name:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $duration_config_file
+
+
+# [Outputs]
+if [ "$Labels" == "state_align" ]
+then
+    $SED -i s#'dur\s*:.*'#'dur: 5'# $duration_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+    $SED -i s#'dur\s*:.*'#'dur: 1'# $duration_config_file
+else
+    echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Architecture]
+
+if [[ "$Voice" == *"demo"* ]]
+then
+    $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $duration_config_file
+    $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $duration_config_file
+fi
+
+$SED -i s#'sequential_training.*:.*'#'sequential_training: True'# $duration_config_file
+# $SED -i s#'learning_rate.*:.*'#'learning_rate: 0.002'# $duration_config_file
+$SED -i s#'training_epochs.*:.*'#'training_epochs: 50'# $duration_config_file
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $duration_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $duration_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $duration_config_file
+
+echo "Duration configuration settings stored in $duration_config_file"
+
+
+
+
+#########################################
+######## acoustic config file ###########
+#########################################
+
+acoustic_config_file=conf/acoustic_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/acoustic_demo.conf $acoustic_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $acoustic_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $acoustic_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model'# $acoustic_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $acoustic_config_file
+
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $acoustic_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $acoustic_config_file
+$SED -i s#'label_align:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model/data/label_'${Labels}# $acoustic_config_file
+$SED -i s#'question_file_name:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $acoustic_config_file
+
+if [ "$Labels" == "state_align" ]
+then
+    $SED -i s#'subphone_feats:.*'#'subphone_feats: full'# $acoustic_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+    $SED -i s#'subphone_feats:.*'#'subphone_feats: coarse_coding'# $acoustic_config_file
+else
+    echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Outputs]
+
+$SED -i s#'mgc\s*:.*'#'mgc: 60'# $acoustic_config_file
+$SED -i s#'dmgc\s*:.*'#'dmgc: 180'# $acoustic_config_file
+
+if [ "$Vocoder" == "STRAIGHT" ]
+then
+    $SED -i s#'bap\s*:.*'#'bap: 25'# $acoustic_config_file
+    $SED -i s#'dbap\s*:.*'#'dbap: 75'# $acoustic_config_file
+
+elif [ "$Vocoder" == "WORLD" ]
+then
+    if [ "$SamplingFreq" == "16000" ]
+    then
+        $SED -i s#'bap\s*:.*'#'bap: 1'# $acoustic_config_file
+        $SED -i s#'dbap\s*:.*'#'dbap: 3'# $acoustic_config_file
+    elif [ "$SamplingFreq" == "48000" ]
+    then
+        $SED -i s#'bap\s*:.*'#'bap: 5'# $acoustic_config_file
+        $SED -i s#'dbap\s*:.*'#'dbap: 15'# $acoustic_config_file
+    fi
+else
+    echo "This vocoder ($Vocoder) is not supported as of now...please configure yourself!!"
+fi
+
+$SED -i s#'lf0\s*:.*'#'lf0: 1'# $acoustic_config_file
+$SED -i s#'dlf0\s*:.*'#'dlf0: 3'# $acoustic_config_file
+
+
+# [Waveform]
+$SED -i s#'vocoder_type\s*:.*'#'vocoder_type: '${Vocoder}# $acoustic_config_file
+
+$SED -i s#'samplerate\s*:.*'#'samplerate: '${SamplingFreq}# $acoustic_config_file
+if [ "$SamplingFreq" == "16000" ]
+then
+    $SED -i s#'framelength\s*:.*'#'framelength: 1024'# $acoustic_config_file
+    $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 511'# $acoustic_config_file
+    $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.58'# $acoustic_config_file
+
+elif [ "$SamplingFreq" == "48000" ]
+then
+    if [ "$Vocoder" == "WORLD" ]
+    then
+        $SED -i s#'framelength\s*:.*'#'framelength: 2048'# $acoustic_config_file
+        $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 1023'# $acoustic_config_file
+    else
+        $SED -i s#'framelength\s*:.*'#'framelength: 4096'# $acoustic_config_file
+        $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 2047'# $acoustic_config_file
+    fi
+    $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.77'# $acoustic_config_file
+else
+    echo "This sampling frequency ($SamplingFreq) never tested before...please configure yourself!!"
+fi
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+    $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $acoustic_config_file
+    $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $acoustic_config_file
+fi
+
+#$SED -i s#'sequential_training.*:.*'#'sequential_training: True'# $acoustic_config_file
+#$SED -i s#'learning_rate.*:.*'#'learning_rate: 0.002'# $acoustic_config_file
+#$SED -i s#'training_epochs.*:.*'#'training_epochs: 50'# $acoustic_config_file
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $acoustic_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $acoustic_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $acoustic_config_file
+
+
+echo "Acoustic configuration settings stored in $acoustic_config_file"
diff --git a/egs/cmu_arctic/s1/scripts/prepare_config_files_for_synthesis.sh b/egs/cmu_arctic/s1/scripts/prepare_config_files_for_synthesis.sh
new file mode 100755
index 00000000..d6f672ca
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/prepare_config_files_for_synthesis.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+    echo "Usage: ./scripts/prepare_config_files_for_synthesis.sh conf/global_settings.cfg"
+    exit 1
+fi
+
+if [ ! -f $1 ]; then
+    echo "Global config file doesn't exist"
+    exit 1
+else
+    source $1
+fi
+
+SED=sed
+if [[ "$OSTYPE" == "darwin"* ]]; then
+  which gsed > /dev/null
+  if [[ "$?" != 0 ]]; then
+    echo "You need to install GNU sed with 'brew install gnu-sed' on osX"
+    exit 1
+  fi
+  SED=gsed
+fi
+
+
+#########################################
+######## duration config file ###########
+#########################################
+
+duration_config_file=conf/test_dur_synth_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/duration_demo.conf $duration_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin:.*'#'Merlin: '$MerlinDir# $duration_config_file
+$SED -i s#'TOPLEVEL:.*'#'TOPLEVEL: '${WorkDir}# $duration_config_file
+$SED -i s#'work:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/duration_model'# $duration_config_file
+
+$SED -i s#'file_id_list:.*'#'file_id_list: %(data)s/'${FileIDList}# $duration_config_file
+$SED -i s#'test_id_list\s*:.*'#'test_id_list: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/test_id_list.scp'# $duration_config_file
+
+
+# [Labels]
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $duration_config_file
+$SED -i s#'label_type:.*'#'label_type: '${Labels}# $duration_config_file
+$SED -i s#'label_align\s*:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/prompt-lab'# $duration_config_file
+$SED -i s#'question_file_name\s*:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $duration_config_file
+
+
+# [Outputs]
+
+if [ "$Labels" == "state_align" ]
+then
+    $SED -i s#'dur\s*:.*'#'dur: 5'# $duration_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+    $SED -i s#'dur\s*:.*'#'dur: 1'# $duration_config_file
+else
+    echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Waveform]
+
+$SED -i s#'test_synth_dir\s*:.*'#'test_synth_dir: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/gen-lab'# $duration_config_file
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+    $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $duration_config_file
+    $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $duration_config_file
+fi
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $duration_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $duration_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $duration_config_file
+
+
+# [Processes]
+
+$SED -i s#'DurationModel\s*:.*'#'DurationModel: True'# $duration_config_file
+$SED -i s#'GenTestList\s*:.*'#'GenTestList: True'# $duration_config_file
+
+$SED -i s#'NORMLAB\s*:.*'#'NORMLAB: True'# $duration_config_file
+
+$SED -i s#'MAKEDUR\s*:.*'#'MAKEDUR: False'# $duration_config_file
+$SED -i s#'MAKECMP\s*:.*'#'MAKECMP: False'# $duration_config_file
+$SED -i s#'NORMCMP\s*:.*'#'NORMCMP: False'# $duration_config_file
+$SED -i s#'TRAINDNN\s*:.*'#'TRAINDNN: False'# $duration_config_file
+$SED -i s#'CALMCD\s*:.*'#'CALMCD: False'# $duration_config_file
+
+$SED -i s#'DNNGEN\s*:.*'#'DNNGEN: True'# $duration_config_file
+
+echo "Duration configuration settings stored in $duration_config_file"
+
+
+
+#########################################
+######## acoustic config file ###########
+#########################################
+
+acoustic_config_file=conf/test_synth_${Voice}.conf
+
+# Start with a general recipe...
+cp -f $MerlinDir/misc/recipes/acoustic_demo.conf $acoustic_config_file
+
+# ... and modify it:
+
+$SED -i s#'Merlin\s*:.*'#'Merlin: '$MerlinDir# $acoustic_config_file
+$SED -i s#'TOPLEVEL\s*:.*'#'TOPLEVEL: '${WorkDir}# $acoustic_config_file
+$SED -i s#'work\s*:.*'#'work: %(TOPLEVEL)s/experiments/'${Voice}'/acoustic_model'# $acoustic_config_file
+
+$SED -i s#'file_id_list\s*:.*'#'file_id_list: %(data)s/'${FileIDList}# $acoustic_config_file
+$SED -i s#'test_id_list\s*:.*'#'test_id_list: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/test_id_list.scp'# $acoustic_config_file
+
+
+# [Labels]
+
+$SED -i s#"silence_pattern:.*"#"silence_pattern: ['*-"${SilencePhone}"+*']"# $acoustic_config_file
+$SED -i s#'enforce_silence\s*:.*'#'enforce_silence: True'# $acoustic_config_file
+$SED -i s#'label_type\s*:.*'#'label_type: '${Labels}# $acoustic_config_file
+$SED -i s#'label_align\s*:.*'#'label_align: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/gen-lab'# $acoustic_config_file
+$SED -i s#'question_file_name\s*:.*'#'question_file_name: %(Merlin)s/misc/questions/'${QuestionFile}# $acoustic_config_file
+if [ "$Labels" == "state_align" ]
+then
+    $SED -i s#'subphone_feats\s*:.*'#'subphone_feats: full'# $acoustic_config_file
+elif [ "$Labels" == "phone_align" ]
+then
+    $SED -i s#'subphone_feats\s*:.*'#'subphone_feats: coarse_coding'# $acoustic_config_file
+else
+    echo "These labels ($Labels) are not supported as of now...please use state_align or phone_align!!"
+fi
+
+
+# [Outputs]
+
+$SED -i s#'mgc\s*:.*'#'mgc: 60'# $acoustic_config_file
+$SED -i s#'dmgc\s*:.*'#'dmgc: 180'# $acoustic_config_file
+
+if [ "$Vocoder" == "STRAIGHT" ]
+then
+    $SED -i s#'bap\s*:.*'#'bap: 25'# $acoustic_config_file
+    $SED -i s#'dbap\s*:.*'#'dbap: 75'# $acoustic_config_file
+
+elif [ "$Vocoder" == "WORLD" ]
+then
+    if [ "$SamplingFreq" == "16000" ]
+    then
+        $SED -i s#'bap\s*:.*'#'bap: 1'# $acoustic_config_file
+        $SED -i s#'dbap\s*:.*'#'dbap: 3'# $acoustic_config_file
+    elif [ "$SamplingFreq" == "48000" ]
+    then
+        $SED -i s#'bap\s*:.*'#'bap: 5'# $acoustic_config_file
+        $SED -i s#'dbap\s*:.*'#'dbap: 15'# $acoustic_config_file
+    fi
+else
+    echo "This vocoder ($Vocoder) is not supported as of now...please configure yourself!!"
+fi
+
+$SED -i s#'lf0\s*:.*'#'lf0: 1'# $acoustic_config_file
+$SED -i s#'dlf0\s*:.*'#'dlf0: 3'# $acoustic_config_file
+
+
+# [Waveform]
+
+$SED -i s#'test_synth_dir\s*:.*'#'test_synth_dir: %(TOPLEVEL)s/experiments/'${Voice}'/test_synthesis/wav'# $acoustic_config_file
+
+$SED -i s#'vocoder_type\s*:.*'#'vocoder_type: '${Vocoder}# $acoustic_config_file
+
+$SED -i s#'samplerate\s*:.*'#'samplerate: '${SamplingFreq}# $acoustic_config_file
+if [ "$SamplingFreq" == "16000" ]
+then
+    $SED -i s#'framelength\s*:.*'#'framelength: 1024'# $acoustic_config_file
+    $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 511'# $acoustic_config_file
+    $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.58'# $acoustic_config_file
+
+elif [ "$SamplingFreq" == "48000" ]
+then
+    if [ "$Vocoder" == "WORLD" ]
+    then
+        $SED -i s#'framelength\s*:.*'#'framelength: 2048'# $acoustic_config_file
+        $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 1023'# $acoustic_config_file
+    else
+        $SED -i s#'framelength\s*:.*'#'framelength: 4096'# $acoustic_config_file
+        $SED -i s#'minimum_phase_order\s*:.*'#'minimum_phase_order: 2047'# $acoustic_config_file
+    fi
+    $SED -i s#'fw_alpha\s*:.*'#'fw_alpha: 0.77'# $acoustic_config_file
+else
+    echo "This sampling frequency ($SamplingFreq) never tested before...please configure yourself!!"
+fi
+
+
+# [Architecture]
+if [[ "$Voice" == *"demo"* ]]
+then
+    $SED -i s#'hidden_layer_size\s*:.*'#'hidden_layer_size: [512, 512, 512, 512]'# $acoustic_config_file
+    $SED -i s#'hidden_layer_type\s*:.*'#'hidden_layer_type: ['\''TANH'\'', '\''TANH'\'', '\''TANH'\'', '\''TANH'\'']'# $acoustic_config_file
+fi
+
+
+# [Data]
+$SED -i s#'train_file_number\s*:.*'#'train_file_number: '${Train}# $acoustic_config_file
+$SED -i s#'valid_file_number\s*:.*'#'valid_file_number: '${Valid}# $acoustic_config_file
+$SED -i s#'test_file_number\s*:.*'#'test_file_number: '${Test}# $acoustic_config_file
+
+
+# [Processes]
+
+$SED -i s#'AcousticModel\s*:.*'#'AcousticModel: True'# $acoustic_config_file
+$SED -i s#'GenTestList\s*:.*'#'GenTestList: True'# $acoustic_config_file
+
+$SED -i s#'MAKECMP\s*:.*'#'MAKECMP: False'# $acoustic_config_file
+$SED -i s#'NORMCMP\s*:.*'#'NORMCMP: False'# $acoustic_config_file
+$SED -i s#'TRAINDNN\s*:.*'#'TRAINDNN: False'# $acoustic_config_file
+$SED -i s#'CALMCD\s*:.*'#'CALMCD: False'# $acoustic_config_file
+
+
+echo "Acoustic configuration settings stored in $acoustic_config_file"
diff --git a/egs/cmu_arctic/s1/scripts/prepare_labels_from_txt.sh b/egs/cmu_arctic/s1/scripts/prepare_labels_from_txt.sh
new file mode 100755
index 00000000..0a35754a
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/prepare_labels_from_txt.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+if test "$#" -lt 3; then
+    echo "Usage: ./scripts/prepare_labels_from_txt.sh <path_to_text_dir> <path_to_lab_dir> <path_to_global_conf_file>"
+    exit 1
+fi
+
+### arguments
+inp_txt=$1
+lab_dir=$2
+global_config_file=$3
+
+if [ ! -f $global_config_file ]; then
+    echo "Global config file doesn't exist"
+    exit 1
+else
+    source $global_config_file
+fi
+
+if test "$#" -eq 3; then
+    train=false
+else
+    train=$4
+fi
+
+### tools required
+if [ ! -d "${FESTDIR}" ]; then
+    echo "Please configure festival path in $global_config_file !!"
+    exit 1
+fi
+
+### define few variables here
+frontend=${MerlinDir}/misc/scripts/frontend
+out_dir=$lab_dir
+
+if [ "$train" = true ]; then
+    file_id_scp=file_id_list.scp
+    scheme_file=train_sentences.scm
+else
+    file_id_scp=test_id_list.scp
+    scheme_file=new_test_sentences.scm
+fi
+
+### generate a scheme file 
+python ${frontend}/utils/genScmFile.py \
+                            ${inp_txt} \
+                            ${out_dir}/prompt-utt \
+                            ${out_dir}/$scheme_file \
+                            ${out_dir}/$file_id_scp 
+
+### generate utt from scheme file
+echo "generating utts from scheme file"
+${FESTDIR}/bin/festival -b ${out_dir}/$scheme_file
+
+### convert festival utt to lab
+echo "converting festival utts to labels..."
+${frontend}/festival_utt_to_lab/make_labels \
+                            ${out_dir}/prompt-lab \
+                            ${out_dir}/prompt-utt \
+                            ${FESTDIR}/examples/dumpfeats \
+                            ${frontend}/festival_utt_to_lab
+
+### normalize lab for merlin with options: state_align or phone_align
+echo "normalizing label files for merlin..."
+if [ "$train" = true ]; then
+    python ${frontend}/utils/normalize_lab_for_merlin.py \
+                            ${out_dir}/prompt-lab/full \
+                            ${out_dir}/label_no_align \
+                            phone_align \
+                            ${out_dir}/$file_id_scp 0
+    ### remove any un-necessary files
+    rm -rf ${out_dir}/prompt-lab
+else
+    python ${frontend}/utils/normalize_lab_for_merlin.py \
+                            ${out_dir}/prompt-lab/full \
+                            ${out_dir}/prompt-lab \
+                            ${Labels} \
+                            ${out_dir}/$file_id_scp
+    ### remove any un-necessary files
+    rm -rf ${out_dir}/prompt-lab/{full,mono,tmp}
+    
+   echo "Labels are ready in: ${out_dir}/prompt-lab !!"
+fi
+
+
diff --git a/egs/cmu_arctic/s1/scripts/queue.pl b/egs/cmu_arctic/s1/scripts/queue.pl
new file mode 100755
index 00000000..eb9ef3de
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/queue.pl
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine.  Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to 
+  # allow the JOB=1:n option to be interleaved with the
+  # options to qsub.
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $sync = 1;
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+
+if (@ARGV < 2) {
+  print STDERR
+   "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+   "e.g.: queue.pl foo.log echo baz\n" .
+   " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+   "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+   " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+   "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+   " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+   "  another string other than JOB)\n" .
+   "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+   "and change its behavior.  Otherwise it uses qstat to work out when the job finished\n";
+  exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+    && $jobend > $jobstart) {
+  print STDERR "queue.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do.  Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them.  Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program.  Our heuristic
+# is that stuff with spaces in should be quoted.  This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) { 
+  if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+                                            # as-is.
+  elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+  else { $cmd .= "\"$x\" "; }  # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+# make a directory called "q",
+# where we will put the log created by qsub... normally this doesn't contain
+# anything interesting, evertyhing goes to $logfile.
+if (! -d "$qdir") { 
+  system "mkdir $qdir 2>/dev/null";
+  sleep(5); ## This is to fix an issue we encountered in denominator lattice creation,
+  ## where if e.g. the exp/tri2b_denlats/log/15/q directory had just been
+  ## created and the job immediately ran, it would die with an error because nfs
+  ## had not yet synced.  I'm also decreasing the acdirmin and acdirmax in our
+  ## NFS settings to something like 5 seconds.
+} 
+
+if (defined $jobname) { # It's an array job.
+  $queue_array_opt = "-t $jobstart:$jobend"; 
+  $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get 
+  # replaced by qsub, in each job, with the job-id.
+  $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+  $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+  # is for the queue to put its log, and this doesn't need the task array subscript
+  # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+  $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q "  echo '#' Started at \`date\`\n";
+print Q "  echo -n '# '; cat <<EOF\n";
+print Q "$cmd\n"; # this is a way of echoing the command into a comment in the log file,
+print Q "EOF\n"; # without having to escape things like "|" and quote characters.
+print Q ") >$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; 
+  # let the script return with status 100 which will put it to E state; more easily rerunnable.
+if (!defined $jobname) { # not an array job
+  print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+  print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n";       # treats specially.
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+  die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ("qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1");
+if ($ret != 0) {
+  if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    print STDERR "queue.pl: job writing to $logfile failed\n";
+  } else {
+    print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+    print STDERR `tail $queue_logfile`;
+  }
+  exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+  # need to wait for the jobs to finish.  We wait for the
+  # sync-files we "touched" in the script to exist.
+  @syncfiles = ();
+  if (!defined $jobname) { # not an array job.
+    push @syncfiles, $syncfile;
+  } else {
+    for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+      push @syncfiles, "$syncfile.$jobid";
+    }
+  }
+  # We will need the sge_job_id, to check that job still exists
+  $sge_job_id=`grep "Your job" $queue_logfile | awk '{ print \$3 }' | sed 's|\\\..*||'`;
+  chomp($sge_job_id);
+  $check_sge_job_ctr=1;
+  #
+  $wait = 0.1;
+  foreach $f (@syncfiles) {
+    # wait for them to finish one by one.
+    while (! -f $f) {
+      sleep($wait);
+      $wait *= 1.2;
+      if ($wait > 3.0) {
+        $wait = 3.0; # never wait more than 3 seconds.
+        if (rand() > 0.5) {
+          system("touch $qdir/.kick");
+        } else {
+          system("rm $qdir/.kick 2>/dev/null");
+        }
+        # This seems to kick NFS in the teeth to cause it to refresh the
+        # directory.  I've seen cases where it would indefinitely fail to get
+        # updated, even though the file exists on the server.
+        system("ls $qdir >/dev/null");
+      }
+
+      # Check that the job exists in SGE. Job can be killed if duration 
+      # exceeds some hard limit, or in case of a machine shutdown. 
+      if(($check_sge_job_ctr++ % 10) == 0) { # Don't run qstat too often, avoid stress on SGE.
+        if ( -f $f ) { next; }; #syncfile appeared, ok
+        $ret = system("qstat -j $sge_job_id >/dev/null 2>/dev/null");
+        if($ret != 0) {
+          # Don't consider immediately missing job as error, first wait some  
+          # time to make sure it is not just delayed creation of the syncfile.
+          sleep(3);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(7);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          sleep(20);
+          if ( -f $f ) { next; }; #syncfile appeared, ok
+          #Otherwise it is an error
+          if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+          print STDERR "queue.pl: Error, unfinished job no longer exists, log is in $logfile\n";
+          print STDERR "          Possible reasons: a) Exceeded time limit? -> Use more jobs! b) Shutdown/Frozen machine? -> Run again!\n";
+          exit(1);
+        }
+      }
+    }
+  }
+  $all_syncfiles = join(" ", @syncfiles);
+  system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status.  We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+  push @logfiles, $logfile;
+} else {
+  for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+    $l = $logfile; 
+    $l =~ s/\$SGE_TASK_ID/$jobid/g;
+    push @logfiles, $l;
+  }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+  @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+  for ($iter = 0; $iter <= @wait_times; $iter++) {
+    $line = `tail -10 $l 2>/dev/null`; # Note: although this line should be the last
+    # line of the file, I've seen cases where it was not quite the last line because
+    # of delayed output by the process that was running, or processes it had called.
+    # so tail -10 gives it a little leeway.
+    if ($line =~ m/with status (\d+)/) {
+      $status = $1;
+      last;
+    } else {
+      if ($iter < @wait_times) {
+        sleep($wait_times[$iter]);
+      } else {
+        if (! -f $l) {
+          print STDERR "Log-file $l does not exist.\n";
+        } else {
+          print STDERR "The last line of log-file $l does not seem to indicate the "
+            . "return status as expected\n";
+        }
+        exit(1);                # Something went wrong with the queue, or the
+        # machine it was running on, probably.
+      }
+    }
+  }
+  # OK, now we have $status, which is the return-status of
+  # the command in the job.
+  if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+  if (@logfiles == 1) {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+    print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  } else {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    $numjobs = 1 + $jobend - $jobstart;
+    print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+  }
+  exit(1);
+}
diff --git a/egs/cmu_arctic/s1/scripts/remove_intermediate_files.sh b/egs/cmu_arctic/s1/scripts/remove_intermediate_files.sh
new file mode 100755
index 00000000..b68e8046
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/remove_intermediate_files.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+if test "$#" -ne 1; then
+    echo "Usage: ./scripts/remove_intermediate_files.sh conf/global_settings.cfg"
+    exit 1
+fi
+
+if [ ! -f $1 ]; then
+    echo "Global config file doesn't exist"
+    exit 1
+else
+    source $1
+fi
+
+###################################################
+######## remove intermediate synth files ##########
+###################################################
+
+current_working_dir=$(pwd)
+
+synthesis_dir=${WorkDir}/experiments/${Voice}/test_synthesis
+gen_lab_dir=${synthesis_dir}/gen-lab
+gen_wav_dir=${synthesis_dir}/wav
+
+shopt -s extglob
+
+if [ -d "$gen_lab_dir" ]; then
+    cd ${gen_lab_dir}
+    rm -f *.!(lab)
+fi
+
+if [ -d "$gen_wav_dir" ]; then
+    cd ${gen_wav_dir}
+    rm -f weight
+    rm -f *.!(wav)
+fi
+
+cd ${current_working_dir}
diff --git a/egs/cmu_arctic/s1/scripts/run.pl b/egs/cmu_arctic/s1/scripts/run.pl
new file mode 100755
index 00000000..1750bc50
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/run.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+
+# In general, doing 
+#  run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
+# 
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+$qsub_opts=""; # These will be ignored.
+
+# First parse an option like JOB=1:4, and any
+# options that would normally be given to 
+# queue.pl, which we will just discard.
+
+if (@ARGV > 0) {
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
+    # that would normally go to qsub, but which will be ignored here.
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $qsub_opts .= "-sync "; # Note: in the
+        # corresponding coce in queue.pl it says instead, just "$sync = 1;".
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+if ($qsub_opts ne "") {
+  print STDERR "Warning: run.pl ignoring options \"$qsub_opts\"\n";
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "run.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) { 
+    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
+    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+    else { $cmd .= "\"$x\" "; } 
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $childpid = fork();
+  if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+  if ($childpid == 0) { # We're in the child... this branch
+    # executes the job and returns (possibly with an error status).
+    if (defined $jobname) { 
+      $cmd =~ s/$jobname/$jobid/g;
+      $logfile =~ s/$jobname/$jobid/g;
+    }
+    system("mkdir -p `dirname $logfile` 2>/dev/null");
+    open(F, ">$logfile") || die "Error opening log file $logfile";
+    print F "# " . $cmd . "\n";
+    print F "# Started at " . `date`;
+    $starttime = `date +'%s'`;
+    print F "#\n";
+    close(F);
+
+    # Pipe into bash.. make sure we're not using any other shell.
+    open(B, "|bash") || die "Error opening shell command"; 
+    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+    close(B);                   # If there was an error, exit status is in $?
+    $ret = $?;
+
+    $endtime = `date +'%s'`;
+    open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+    $enddate = `date`;
+    chop $enddate;
+    print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+    close(F);
+    exit($ret == 0 ? 0 : 1);
+  }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $r = wait();
+  if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+  if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+  $njobs = $jobend - $jobstart + 1;
+  if ($njobs == 1) { 
+    print STDERR "run.pl: job failed, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  }
+  else {
+    $logfile =~ s/$jobname/*/g;
+    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+  }
+}
+
+
+exit ($ret);
diff --git a/egs/cmu_arctic/s1/scripts/run_phone_aligner.sh b/egs/cmu_arctic/s1/scripts/run_phone_aligner.sh
new file mode 100755
index 00000000..957be2d4
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/run_phone_aligner.sh
@@ -0,0 +1,88 @@
+#!/bin/bash -e
+
+if test "$#" -ne 4; then
+    echo "Usage: ./run_phone_aligner.sh <path_to_wav_dir> <path_to_text_dir> <path_to_labels_dir> <path_to_global_conf_file>"
+    exit 1
+fi
+
+### Arguments
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+global_config_file=$4
+
+### Use paths from global config file
+source $global_config_file
+
+### frontend scripts
+frontend=${MerlinDir}/misc/scripts/frontend
+
+#################################################################
+##### Create training labels for merlin with festvox tools ######
+#################################################################
+
+### tools required
+
+if [[ ! -d "${ESTDIR}" ]] || [[ ! -d "${FESTDIR}" ]] || [[ ! -d "${FESTVOXDIR}" ]]; then
+    echo "Please configure paths to speech_tools, festival and festvox in config.cfg !!"
+    exit 1
+fi
+
+### do forced alignment using ehmm in clustergen setup
+mkdir -p $lab_dir
+cd $lab_dir
+mkdir cmu_us_${Voice}
+cd cmu_us_${Voice}
+
+$FESTVOXDIR/src/clustergen/setup_cg cmu us ${Voice} 
+
+txt_file=${WorkDir}/${inp_txt}
+txt_dir=${WorkDir}/${inp_txt}
+
+if [ -f "${txt_file}" ]; then
+    cp ${txt_file} etc/txt.done.data
+elif [ -d "${txt_dir}" ]; then
+    python ${frontend}/utils/prepare_txt_done_data_file.py ${txt_dir} etc/txt.done.data
+else
+    echo "Please check ${inp_txt} !!"
+    exit 1
+fi
+
+cp $WorkDir/$wav_dir/*.wav wav/
+
+./bin/do_build build_prompts 
+./bin/do_build label
+./bin/do_build build_utts
+
+cd ../
+
+### convert festival utts to lab
+
+cat cmu_us_${Voice}/etc/txt.done.data | cut -d " " -f 2 > file_id_list.scp
+
+echo "converting festival utts to labels..."
+${frontend}/festival_utt_to_lab/make_labels \
+                        full-context-labels \
+                        cmu_us_slt_arctic/festival/utts \
+                        ${FESTDIR}/examples/dumpfeats \
+                        ${frontend}/festival_utt_to_lab 
+
+echo "normalizing label files for merlin..."
+python ${frontend}/utils/normalize_lab_for_merlin.py \
+                        full-context-labels/full \
+                        label_phone_align \
+                        phone_align \
+                        file_id_list.scp
+
+### return to working directory
+cd ${WorkDir}
+
+phone_labels=$lab_dir/label_phone_align
+
+if [ ! "$(ls -A ${phone_labels})" ]; then
+    echo "Force-alignment unsucessful!!"
+else
+    echo "You should have your labels ready in: $phone_labels !!"
+fi
+
+
diff --git a/egs/cmu_arctic/s1/scripts/run_state_aligner.sh b/egs/cmu_arctic/s1/scripts/run_state_aligner.sh
new file mode 100755
index 00000000..12b9bb00
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/run_state_aligner.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if test "$#" -ne 4; then
+    echo "Usage: ./run_state_aligner.sh <path_to_wav_dir> <path_to_text_dir> <path_to_labels_dir> <path_to_global_conf_file>"
+    exit 1
+fi
+
+### Arguments
+wav_dir=$1
+inp_txt=$2
+lab_dir=$3
+global_config_file=$4
+
+### Use paths from global config file
+source $global_config_file
+
+### force-alignment scripts
+aligner=${MerlinDir}/misc/scripts/alignment/state_align
+
+# initializations
+train=true
+
+####################################
+######## prepare labels ############
+####################################
+
+### do prepare full-contextual labels without timestamps
+echo "preparing full-contextual labels using Festival frontend..."
+bash ${WorkDir}/scripts/prepare_labels_from_txt.sh $inp_txt $lab_dir $global_config_file $train
+
+status_prev_step=$?
+if [ $status_prev_step -eq 1 ]; then
+    echo "Preparation of full-contextual labels unsuccessful!!"
+    echo "Please check scripts/prepare_labels_from_txt.sh"
+    exit 1
+fi
+
+### tools required
+if [[ ! -d "${HTKDIR}" ]]; then
+    echo "Please configure path to HTK tools in $global_config_file !!"
+    exit 1
+fi
+
+### do forced alignment using HVite 
+echo "forced-alignment using HTK tools..."
+
+sed -i s#'HTKDIR =.*'#'HTKDIR = "'$HTKDIR'"'# $aligner/forced_alignment.py
+sed -i s#'work_dir =.*'#'work_dir = "'$WorkDir/$lab_dir'"'# $aligner/forced_alignment.py
+sed -i s#'wav_dir =.*'#'wav_dir = "'$WorkDir/$wav_dir'"'# $aligner/forced_alignment.py
+
+python $aligner/forced_alignment.py
+
+state_labels=$lab_dir/label_state_align
+
+if [ ! "$(ls -A ${state_labels})" ]; then
+    echo "Force-alignment unsucessful!! Please check $aligner/forced_alignment.py"
+else
+    echo "You should have your labels ready in: $state_labels !!"
+fi
+
diff --git a/egs/cmu_arctic/s1/scripts/submit.sh b/egs/cmu_arctic/s1/scripts/submit.sh
new file mode 100755
index 00000000..f0500afd
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/submit.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+
+## Generic script for submitting any Theano job to GPU
+# usage: submit.sh [scriptname.py script_arguments ... ]
+
+src_dir=$(dirname $1)
+
+# Source install-related environment variables
+source ${src_dir}/setup_env.sh
+
+# Try to lock a GPU...
+gpu_id=$(python ${src_dir}/gpu_lock.py --id-to-hog)
+
+# Run the input command (run_merlin.py) with its arguments
+if [ $gpu_id -gt -1 ]; then
+    echo "Running on GPU id=$gpu_id ..."
+    THEANO_FLAGS="mode=FAST_RUN,device=gpu$gpu_id,"$MERLIN_THEANO_FLAGS
+    export THEANO_FLAGS
+    
+{ # try  
+        python $@
+        python ${src_dir}/gpu_lock.py --free $gpu_id
+} || { # catch   
+        python ${src_dir}/gpu_lock.py --free $gpu_id
+}
+else
+    echo "No GPU is available! Running on CPU..."
+
+    THEANO_FLAGS=$MERLIN_THEANO_FLAGS
+    export THEANO_FLAGS
+    
+    python $@
+fi
diff --git a/egs/cmu_arctic/s1/scripts/test_nan.sh b/egs/cmu_arctic/s1/scripts/test_nan.sh
new file mode 100755
index 00000000..41a52768
--- /dev/null
+++ b/egs/cmu_arctic/s1/scripts/test_nan.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+if test "$#" -ne 3; then
+    echo "bash scripts/test_nan.sh <input_dir> <file_list> <ext: .cmp/.lab/.lf0/.mgc/.bap>"
+    exit 1
+fi
+
+global_config_file="conf/global_settings.cfg"
+if [ ! -f $global_config_file ]; then
+    echo "Global config file doesn't exist"
+    exit 1
+else
+    source $global_config_file
+fi
+
+x2x=${MerlinDir}/tools/bin/SPTK-3.9/x2x
+
+ext=$3
+
+IFS=''
+while read sentence 
+do 
+    nlines=`$x2x +fa $1/$sentence$ext | grep "nan" | wc -l` 
+    z=0
+    if test $nlines -gt $z
+    then
+        echo $sentence
+        echo $nlines
+    fi
+done < $2

From 6ad5fa175cbda5d003269cd0d5109991f9f97910 Mon Sep 17 00:00:00 2001
From: Bastian SCHNELL <bschnell@italix39.idiap.ch>
Date: Fri, 14 Jul 2017 12:41:28 +0200
Subject: [PATCH 4/6] Readme fix

---
 egs/cmu_arctic/s1/README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/egs/cmu_arctic/s1/README.md b/egs/cmu_arctic/s1/README.md
index afd5d697..72eb7689 100644
--- a/egs/cmu_arctic/s1/README.md
+++ b/egs/cmu_arctic/s1/README.md
@@ -7,7 +7,7 @@ Install tools
 -------------
 
 Step 2: cd merlin/tools <br/>
-Step 3: ./compile_tools.sh
+Step 3: ./compile_tools.sh <br/>
 Step 4: install festival and HTS at merlin/tools/
 	Possible help: [Issue96](https://github.com/CSTR-Edinburgh/merlin/issues/96)
 
@@ -16,8 +16,8 @@ Setup
 
 To setup voice: 
 
-Take a look at ./01_setup.sh
-You probably have to change the way the database is accessed, this depends on how your database is structured.
+Take a look at ./01_setup.sh <br/>
+You probably have to change the way the database is accessed, this depends on how your database is structured. <br/>
 Check the lines 70-95, the comments should guide you through the process.
 
 Demo voice
@@ -26,9 +26,8 @@ Demo voice
 To run demo voice, please follow below steps:
  
 Step 5: cd merlin/egs/roger_blizzard2008/s1 <br/>
-Step 6: ./run_demo_voice.sh speaker
-	speaker can be bdl, slt, jmk
-	The data for the speaker is downloaded from the cmu server.
+Step 6: ./run_demo_voice.sh speaker <br/>
+	speaker can be bdl, slt, jmk. The data for the speaker is downloaded from the cmu server.
 
 Demo voice trains only on 59 utterances and shouldn't take more than 5 min.
 
@@ -48,7 +47,7 @@ Generate new sentences
 To generate new sentences, please follow below steps:
 
 Step  8: Run either demo voice or full voice. <br/>
-Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demo OR speaker_arctic_full/test_synthesis/txt
+Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demo OR speaker_arctic_full/test_synthesis/txt <br/>
 	 NOTE: speaker should be the speaker you used before (bdl, slt, jmk).
 Step 10: ./merlin_synthesis.sh
 

From 89cb126f7490563748311bf75e0a3a6fb77a1389 Mon Sep 17 00:00:00 2001
From: Bastian SCHNELL <bschnell@italix39.idiap.ch>
Date: Fri, 14 Jul 2017 12:42:39 +0200
Subject: [PATCH 5/6] Readme fix

---
 egs/cmu_arctic/s1/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/cmu_arctic/s1/README.md b/egs/cmu_arctic/s1/README.md
index 72eb7689..c555bb77 100644
--- a/egs/cmu_arctic/s1/README.md
+++ b/egs/cmu_arctic/s1/README.md
@@ -48,6 +48,6 @@ To generate new sentences, please follow below steps:
 
 Step  8: Run either demo voice or full voice. <br/>
 Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demo OR speaker_arctic_full/test_synthesis/txt <br/>
-	 NOTE: speaker should be the speaker you used before (bdl, slt, jmk).
+	 NOTE: speaker should be the speaker you used before (bdl, slt, jmk). <br/>
 Step 10: ./merlin_synthesis.sh
 

From 208937d9f55477951bd26c6ee72c7b8ae7f2482a Mon Sep 17 00:00:00 2001
From: b-schnell <b-schnell@users.noreply.github.com>
Date: Fri, 14 Jul 2017 15:53:02 +0200
Subject: [PATCH 6/6] README fix

---
 egs/cmu_arctic/s1/README.md | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/egs/cmu_arctic/s1/README.md b/egs/cmu_arctic/s1/README.md
index c555bb77..0cd0aa25 100644
--- a/egs/cmu_arctic/s1/README.md
+++ b/egs/cmu_arctic/s1/README.md
@@ -11,21 +11,12 @@ Step 3: ./compile_tools.sh <br/>
 Step 4: install festival and HTS at merlin/tools/
 	Possible help: [Issue96](https://github.com/CSTR-Edinburgh/merlin/issues/96)
 
-Setup
------
-
-To setup voice: 
-
-Take a look at ./01_setup.sh <br/>
-You probably have to change the way the database is accessed, this depends on how your database is structured. <br/>
-Check the lines 70-95, the comments should guide you through the process.
-
 Demo voice
 ----------
 
 To run demo voice, please follow below steps:
  
-Step 5: cd merlin/egs/roger_blizzard2008/s1 <br/>
+Step 5: cd merlin/egs/cmu_arctic/s1 <br/>
 Step 6: ./run_demo_voice.sh speaker <br/>
 	speaker can be bdl, slt, jmk. The data for the speaker is downloaded from the cmu server.
 
@@ -36,7 +27,7 @@ Full voice
 
 To run full voice, please follow below steps:
 
-Step 5: cd merlin/egs/roger_blizzard2008/s1 <br/>
+Step 5: cd merlin/egs/cmu_arctic/s1 <br/>
 Step 6: ./run_full_voice.sh speaker
 
 Full voice utilizes the whole arctic data (1132 utterances). The training of the voice approximately takes 1 to 2 hours. 
@@ -47,7 +38,6 @@ Generate new sentences
 To generate new sentences, please follow below steps:
 
 Step  8: Run either demo voice or full voice. <br/>
-Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demo OR speaker_arctic_full/test_synthesis/txt <br/>
-	 NOTE: speaker should be the speaker you used before (bdl, slt, jmk). <br/>
+Step  9: Place the txt files containing the utterances in experiments/speaker_arctic_demoORspeaker_arctic_full/test_synthesis/txt. NOTE: speaker should be the speaker you used before (bdl, slt, jmk). <br/>
 Step 10: ./merlin_synthesis.sh