PrepareExperiment.pl

Summary
Title: prepareExperiment.pl

Author: David Meehan

Location: mnt/main/scripts/user/prepareExperiment.pl

Usage: /mnt/main/scripts/user/prepareExperiment.pl / (ex. first_5hr/train)

Description
This script automates the entire training process up to feats generation (it does not update senone value, density or other trainer parameters), relying on using symlinks for audio files to reduce the amount of space. This script also makes use of the newest versions of genTrans and pruneDictionary which drastically improves the performance of generating the transcript and dictionary files.

Code
use Cwd; use File::Basename;
 * 1) !/usr/bin/perl

if ($#ARGV != 0) { print "\nusage (run from main experiment directory): buildData.pl   \n"; print "    : The name of the corpus and data set. Ex: 10hr/train\n"; exit -1; }

$corpus_dir = "/mnt/main/corpus/switchboard/". $ARGV[0]; $last = substr($corpus_dir, -1); if($last eq '/') { chop $corpus_dir; }
 * 1) set corpus directory

$path = getcwd; $exp = basename($path);

$cmd = "/mnt/main/root/tools/SphinxTrain-1.0/scripts_pl/setup_SphinxTrain.pl -task $exp"; system($cmd); $cmd = "sed -i s/^.CFG_HMM/TEMP/g etc/sphinx_train.cfg"; system($cmd); $cmd = "sed -i s/\#.CFG_HMM/\\\$CFG_HMM/g etc/sphinx_train.cfg"; system($cmd); $cmd = "sed -i s#/root/speechtools/SphinxTrain\-1\.0/train1#$path#g etc/sphinx_train.cfg"; system($cmd); $cmd = "sed -i s/train1/$exp/g etc/sphinx_train.cfg"; system($cmd); $cmd = "sed -i s/^TEMP/\#TEMP/g etc/sphinx_train.cfg"; system($cmd);

$out = "data"; $filename = "etc/.train_info"; $cmd = "rmdir wav"; system($cmd); $cmd = "ln -s $corpus_dir/audio/utt wav"; if($? == -1) { print "failed!\n"; exit 0; } print "Linking to utterance files... "; system($cmd); if($? == -1) { print "failed!\n"; exit 0; } print "\n-\nPreparing data input files...\n"; if($? == -1) { print "failed!\n"; exit 0; } print "done!\nGenerating transcript file..."; $cmd = "/mnt/main/scripts/user/genTrans10.pl $corpus_dir $exp"; system($cmd); print "done!\nGenerating dictionary file..."; $cmd = "/mnt/main/scripts/user/pruneDictionary4.pl etc/$exp". "_train.trans $exp"; system($cmd); print "done!\nGenerating filler dictionary..."; $cmd = "cp -i /mnt/main/root/tools/SphinxTrain-1.0/train1/etc/train1.filler etc/$exp.filler"; system($cmd); print "done!\nGenerating phones list..."; $cmd = "cp -i /mnt/main/scripts/user/genPhones.csh etc/. "; system($cmd); $cmd = "etc/genPhones.csh etc/$exp"; system($cmd); $cmd = "echo SIL >> etc/$exp.phone"; system($cmd); $cmd = "sort etc/$exp.phone -o etc/$exp.phone"; system($cmd); print "done!\nPreparation complete!\n";
 * 1) prefix is the exp_id