Speech:MakeTest.pl

Summary
Title: makeTest.pl

Author: Peter Ferro, Matthew Heyner

Location: mnt/main/scripts/user/makeTest.pl

Usage: makeTest.pl  ...cd to your main experiment sub-directory first, then...
 * Speech:GenFeats.pl

Code

 * 1) !/usr/bin/perl

=begin comment Make Test (Make Decode V2) Author: Peter Ferro, Matthew Heyner Semester: Spring 2016 Start Date: 2/24/16 Last Modified: 5/1/16 Recent changes: - LM creation has been cut except for directly copying it if it exists and the source is not equal to the destination. - More etc files have been added to the list of files to copy over. One of the files involved is also modified to update the paths in the file.

This code prepares a decode, along with language model creation. A train must have been previously run in the source.

(Trained data and untrained data have very little differences in the documentation, just to note...) =cut

$help = <<'=cut'; Make Test Script Produced by Peter Ferro Development Assistance by Matthew Heyner for Spring 2016 Semester at UNH Manchester

Usage: makeTest.pl -d switchboard/300hr 0287/001 0271/003 makeTest.pl train/trans/train.trans switchboard/300hr 0287/001 0271/003

For the flag... -d means use test/trans/dev.trans -e means use test/trans/eval.trans -t means use test/trans/train.trans

Please make sure you have ran a train in the source. If this has not happened, then this code will not work properly. =cut


 * 1) WARNING: This code is subject to modification.
 * 2) The basic features are implemented, but you may still slip by
 * 3) the verification. Please make sure you have executed a train in the source
 * 4) before running this code, or else bad things might happen.

use feature qw(say);

if((scalar @ARGV != 3) and (scalar @ARGV != 4)) {	if(scalar @ARGV == 0){die("$help\n\n");} #Add an error message if arguments were provided. else{die("Wrong number of parameters.\n\n$help\n\n");} }
 * 1) First, make sure the number of arguments is 3 or 4. If not, then display help.
 * 2) This acts as a manual for the users that are attempting to run the code.

if(scalar @ARGV == 4) {	$FLAG = @ARGV[0]; $CORPUS = @ARGV[1]; $SOURCE = @ARGV[2]; $DEST = @ARGV[3]; } else { $FLAG = @ARGV[0]; $CORPUS = @ARGV[1]; $SOURCE = @ARGV[2]; $DEST = @ARGV[2]; }
 * 1) If one path is provided, source equals destination.

$MASTER_DIR = "/mnt/main"; $CORPUS_DIR = "$MASTER_DIR/corpus/"; $EXP_DIR = "$MASTER_DIR/Exp"; $SOURCE_SUB = substr($SOURCE, -3); $DEST_SUB = substr($DEST, -3);

chdir("$EXP_DIR/$DEST") or die("Bad destination: $!"); if ($FLAG ne '') {	$TRANS_ID = ''; #By default if (index($FLAG, "-") == 0) { if ($FLAG eq '-d') {$TRANS_ID = 'test/trans/dev.trans';} elsif ($FLAG eq '-e') {$TRANS_ID = 'test/trans/eval.trans';} elsif ($FLAG eq '-t') {$TRANS_ID = 'test/trans/train.trans';} }	#The flag can also be used as a traditional path. This happens if the dash #is not used as the first character in the flag. else {$TRANS_ID = $FLAG;} if ($TRANS_ID ne '') { if (not (-d "$EXP_DIR/$DEST/LM")) { if ((-d "$EXP_DIR/$SOURCE/LM") and ($SOURCE ne $DEST)) { system("cp -ir $EXP_DIR/$SOURCE/LM .") == 0 or die("Problem trying to copy LM dir" .								"to destination: $!".								"Has a train been run in the source?\n"); }		}	}	else {die("Bad flag: $FLAG\n");} } else {die("Blank flag detected!\n");}
 * 1) Now go to where the destination experiment resides.
 * 1) This step only executes if a valid flag is entered.
 * 2) Only the test directory is used here.
 * 3) If you want to use the train/trans directory, execute in command line.

say("LM dir is ready.");
 * 1) say("Note: Please refer to your local corpus transcript parser");
 * 2) say("before executing lm_create.pl")

if ($SOURCE ne $DEST) {	if (not (-d "feat")){mkdir("feat") or die("Bad feat dir: $!\n");} #TODO: Insert feat script execution here if specifications get changed again... if (not (-d "wav")){mkdir("wav") or die("Bad wav dir: $!\n");}
 * 1) The following code only executes if the source is not equal to the destination.
 * 2) This copies folders from the source.

#Copy bin folder to give us the needed binaries to generate feats system("cp -ir $EXP_DIR/$SOURCE/bin .") == 0 or die("Went to copy bin from source, and... \n$!\n");

#Copy scripts_pl folder to gain access to make_feats.pl	system("cp -ir $EXP_DIR/$SOURCE/scripts_pl .") == 0 or die("Went to copy scripts_pl from source, and... \n$!\n");

if (not (-d "model_parameters")) {		symlink("$EXP_DIR/$SOURCE/model_parameters", "$EXP_DIR/$DEST/model_parameters") == 1 or die("Problem trying to symlink $SOURCE/model_parameters files " .				"to destination: $!\n" .				"Has a train been run in the source?\n"); }	#TODO: I am considering symlinking the following directories... #- trees #- model_architecture }

$SENONE_DIR = `ls model_parameters`; @SENONE_SET = split("\n", $SENONE_DIR); $SENONE_DIR = substr($SENONE_DIR, 0, index($SENONE_DIR, "\n"));
 * 1) Get the HMM directory from model_parameters.
 * 2) The script by default has all of the files processed.
 * 3) However, the senome count almost never equals
 * 4) the total file count in a corpus, so I have to use a different approach.

$SENONE_CNT = $SENONE_DIR; $SENONE_CNT =~ s/...\D+//; chdir("etc") or mkdir("etc"), chdir("etc") or die("Bad etc dir: $!\n");
 * 1) Extract senone count for preparation with run_decode.pl
 * 1) The etc directory cannot be symlinked because files are generated in there.
 * 2) Therefore, we make a new one and copy the required files to there.

if ($SOURCE ne $DEST) {	system("cp -i $EXP_DIR/$SOURCE/etc/$SOURCE_SUB.dic $EXP_DIR/$DEST/etc/$DEST_SUB.dic") == 0 or die("Problem trying to copy $SOURCE_SUB.dic " .				"to etc dir in destination: $!\n" .				"Has a train been run in the source?\n"); system("cp -i $EXP_DIR/$SOURCE/etc/$SOURCE_SUB.filler $EXP_DIR/$DEST/etc/$DEST_SUB.filler") == 0 or die("Problem trying to copy $SOURCE_SUB.filler " .				"to etc dir in destination: $!\n" .				"Has a train been run in the source?\n"); system("cp -i $EXP_DIR/$SOURCE/etc/$SOURCE_SUB.phone $EXP_DIR/$DEST/etc/$DEST_SUB.phone") == 0 or die("Problem trying to copy $SOURCE_SUB.phone " .				"to etc dir in destination: $!\n" .				"Has a train been run in the source?\n"); system("cp -i $EXP_DIR/$SOURCE/etc/feat.params $EXP_DIR/$DEST/etc/feat.params") == 0 or die("Problem trying to copy feat.params " .				"to etc dir in destination: $!\n" .				"Has a train been run in the source?\n"); system("cp -i $EXP_DIR/$SOURCE/etc/sphinx_train.cfg $EXP_DIR/$DEST/etc/sphinx_train.cfg") == 0 or die("Problem trying to copy sphinx_train.cfg " .				"to etc dir in destination: $!\n" .				"Has a train been run in the source?\n"); #Update two configuration parameters in the sphinx_train.cfg file #to use the destination path rather than the source path. system("perl -pi -e " .		"'s/(\$CFG_DB_NAME = \"$SOURCE_SUB\")+/\$CFG_DB_NAME = \"$DEST_SUB\"/;' " .		"sphinx_train.cfg"); system("perl -pi -e " .		"'s{(\$CFG_BASE_DIR = \"$EXP_DIR/$SOURCE\")+}{$CFG_BASE_DIR = \"$EXP_DIR/$DEST\"} ;' " .		"sphinx_train.cfg"); }

system('awk \'{print $1}\' ' .		"$CORPUS_DIR/$CORPUS/$TRANS_ID " .		"> $EXP_DIR/$DEST/etc/$DEST_SUB\_decode.fileids") == 0 or die("Error while creating etc directory:\n" .				"Problem trying to retreive fileids " .				"from $CORPUS/$TRANS_ID : $!\n");
 * 1) Get fileids from corpus transcript.
 * 2) Currently, only Switchboard corpus is supported.
 * 3) WARNING: This overwrites the old decode.fileids file if one already existed!

system("cp -i $MASTER_DIR/scripts/user/run_decode.pl .") == 0 or die("Error while updating etc directory:\n" .				"Went to copy run_decode.pl, and... \n$!\n");
 * 1) Prepare the run_decode.pl script by copying it.

say("Decode is ready to be executed."); say("AM pointed to $SOURCE"); say("LM generated from $CORPUS/$TRANS_ID"); say("Note: Generate feats with"); say("genFeats.pl -d"); say("then execute"); say("run_decode.pl $SOURCE $DEST $SENONE_CNT");