Speech:BuildData.pl

Summary
Title: buildData.pl

Author: David Meehan

Location: mnt/main/scripts/user/buildAll.pl

Usage: (run from main experiment directory): buildData.pl   : The name of the corpus and data set. Ex: 10hr/train : The experiment id options: (use these options to override any of the default locations for the following): -wav: The absolute path to the directory containing the audio files -fileids: The abolute path to the .fileids file -phones: The absolute path to the .phones file (Remember to add SIL if needed) -trans: The absolute path to the unedited transcript file

Description

 * This script will link and copy all files related to the corpus data into the experiment. The user provides the corpus subset and data set as well as the experiment number. Based on these inputs, the script will link to the audio/wav, trans/train.trans, info/train.dic, info/train_train_fileids, info/train.phones and create the filters file.
 * Note: trans/train.trans now points to the edited trans file. The unedited trans file is now trans/base.trans.

Code
use Getopt::Long;
 * 1) !/usr/bin/perl

$dictionary = -1; $audio = -1; $fileids = -1; $phones = -1; $trans_unedited = -1; $help;

$reslts = GetOptions('dict' => \$dictionary,	'wav=s' => \$audio,	'fileids=s' => \$fileids,	'phones=s' => \$phones,	'trans=s' => \$trans_unedited,	'help' => \$help );

if ($#ARGV != 1 || $help == 1) { print "\nusage (run from main experiment directory): buildData.pl   \n"; print "    : The name of the corpus and data set. Ex: 10hr/train\n"; print "    : The experiment id\n"; print "options (use these options to override any of the default locations for the following):\n"; print "    -wav: The absolute path to the directory containing the audio files\n"; print "    -fileids: The abolute path to the .fileids file\n"; print "    -phones: The absolute path to the .phones file (Remember to add SIL if needed)\n"; print "    -trans: The absolute path to the unedited transcript file\n"; exit -1; }

$corpus_dir = $ARGV[0]; $last = substr($corpus_dir, -1); if($last eq '/') { chop $corpus_dir; }
 * 1) set corpus directory

$id = $ARGV[1];
 * 1) prefix is the exp_id

if($trans_unedited == -1) { $trans_unedited = "/mnt/main/corpus/switchboard/$corpus_dir/trans/train.trans"; } if($dictionary == -1) 	 { $dictionary = "$corpus_dir/info/train.dic"; } if($audio == -1)     	  { $audio = "$corpus_dir/audio/wav"; } if($fileids == -1)   	  { $fileids = "$corpus_dir/info/train_train.fileids"; } if($phones == -1)    	  { $phones = "$corpus_dir/info/train.phone"; }
 * 1) append the path the trans file based on the corpus dir provided

print "\n\nDICT:   $dictionary\nTRANS:   $trans_unedited\nWAV:     $audio\nFILEIDS: $fileids\nPHONES:  $phones\n";

$sysCmd = "rmdir wav"; system($sysCmd); $sysCmd = "ln -s $trans_unedited wav"; if($? == -1) { print "failed!\n"; exit 0; } print "Linking to utterance files... "; system($sysCmd); $sysCmd = "cp -i /mnt/main/root/tools/SphinxTrain-1.0/train1/etc/train1.filler etc/$id.filler"; if($? == -1) { print "failed!\n"; exit 0; } print "done!\nGenerating filler file... "; system($sysCmd); $sysCmd = "ln -s $dictionary etc/$id.dic"; if($? == -1) { print "failed\n"; exit 0; } print "done!\nLinking to dictionary files... "; system($sysCmd); $sysCmd = "ln -s $fileids etc/$id". "_train.fileids"; if($? == -1) { print "failed!\n"; exit 0; } print "done!\nLinking to file id list... "; system($sysCmd); $sysCmd = "ln -s $phones etc/$id.phones"; if($? == -1) { print "failed!\n"; exit 0; } print "done\nLinking to phones list... "; system($sysCmd); if($? == -1) { print "failed!\n"; exit 0; } print "done!\nAll Files have been created.\n";