Speech:Spring 2011 SwitchboardToDecoderConverter


 * Home
 * Spring 2011
 * Proposal
 * Report
 * Appendix
 * [SwitchboardToDecoderConverter]


 * 1) !/usr/bin/perl

open(MYINPUTFILE, ">convertedTranscript.txt");

while()		# read in file line by line {     $line = $_; chomp $line; $utteranceID = $line;	    # copy line to new variable $utteranceID =~ s/sw[0-9]*//; # remove all characters prior to the speaker identification $utteranceID =~ s/ .*//;	    # remove all characters after the speaker and utteranceID, this pulls out the utterance ID      $start = $line;			       # copy line to new variable $start =~ s/sw[0-9]*[A-B]-ms98-a-[0-9]* //; # remove all characters up to and including the first whitespace $start =~ s/ .*//; 			      # remove everything after the whitespace, this pulls out start time

$stop = $line;				# copy line to new variable $stop =~ s/sw[0-9]*[A-B]-ms98-a-[0-9]* \d+\.(\d+) //; #remove all characters up to & including the 1st whitespace $stop =~ s/ .*//;			# substitute a blank for everything after the whitespace, this pulls out stop time $duration = $stop - $start; $message = $line;				 # copy line to new variable $message =~ s/sw[0-9]*[A-B]-ms98-a-[0-9]* [0-9]*.[0-9]* [0-9]*.[0-9]* //; # remove everything before the message $message =~ s/\"//g;     $message =~ s/\[noise] //g;      $message =~ s/\[//g;      $message =~ s/\]//g;      $message =~ s/\-//g;      $newTranscript = " $message ($utteranceID)";		      print MYOUTPUTFILE "$newTranscript\n";			# send transcript to new file    } close(MYINPUTFILE); close(MYOUTPUTFILE);