Speech:Spring 2018 Software Group main decode.c

Summary
Title: main_decode.c

Author: unknown

Location: /mnt/main/root/sphinx3/src/programs

Usage: File to help implement sphinx3_decode

Color Codes

 *  Danielle is purple 
 *  Lamia is red 
 *  Faruk is green 
 *  Josh is orange 
 *  Wesley is blue 

Description
''' Conclusion
 * '''I think this file is taking audio files, turning them into text files then converting them into binary files for the computer to be able to read. To figure out more we need to look at where this file is :being used, and how : it is being used.
 * Questions/comments about this file
 * 1. look into corpus.c (any lines related to -ctl_lm)- did a CONTROL + F, couldn't find ctl_lm but did find many instances of ctl in corpus.c
 * 2. what is MLLR? Maximum Likelihood Linear Regression, uses linear transformation of Gaussian model parameters to adapt to a given speaker.
 * for more information about MLLR check out this PDF page 8 http://www.cs.jhu.edu/~juri/pdf/mllr-rwth-2005.pdf
 * 3. what does st -->tm mean?  -dml Same as kb.stat (triple hierarchy). Temporary pointer. A better to write it would have been: (*(kb.stat)).tm 
 * 4. what is the string -ctl mean? what does it control?  wmc - It appears to point to a file with acoustic WAV file references within the [Experiment]/[Sub-experiment]/etc :folder. The file is typically [sub-experiment]_decode.fileids 
 *  5. file to possibly look at next, sphinx3_decode and corpus.c- found the corpus.c file it contains 820 lines of code, not sure if we want to start on this one or continue with the :main files we found

'''

Dictionary
 * -Cepstra- A type of file that has analyzed human speech; quefrency analysis 
 * -Gaussian- Represents the probability density function of a normally distributed random variable 
 * -Senone- A sound detector. A variety of sound detectors can be represented by a small amount of distinct short sound detectors 
 * -DAG search- A directed graph that has a topological ordering, a sequence of the vertices such that every edge is directed from earlier to later in the sequence. 
 * -Viterbi (algorithm)- A dynamic programming algorithm for finding the most likely sequence of hidden states—called the Viterbi path—that results in a sequence of observed events, especially in the context of Markov information sources and hidden Markov models. 
 * -'''HMM- Hidden Markov Model https://en.wikipedia.org/wiki/Hidden_Markov_model

Code

 * include 
 * include <sphinxbase/unlimit.h>


 * include "utt.h" <font color='purple'> //dml This decodes a block of incoming vector features, which must be completed by the calling routine. 
 * include "kb.h" <font color='purple'>//dml Inherits KB implementations, which is the global wrapper structure for all variables in 3.X search. 
 * <font color='red'> // lm kb.h means - Knowledge bases, search parameters, and auxiliary structures for decoding 
 * include "corpus.h" <font color='purple'>//dml A set of strings each associated with a unique ID. 
 * include "cmdln_macro.h" <font color='purple'>//dml File reference to many defined functions, such as log model and language model. 


 * static arg_t arg[] = {
 * log_table_command_line_macro, <font color='purple'>//dml Calculates log-likelihoods, determines whether to use the logs3 table or to compute the values at run time. 
 * waveform_to_cepstral_command_line_macro, <font color='purple'>//dml Writes logspectral files instead of cepstra and cepstral-smoothed logspectral files. Determines rates/sizes/filters. 
 * cepstral_to_feature_command_line_macro, <font color='purple'>//dml Determines stream type/number of components in vector, sets gain control :(type of noise), determines output feature using matrix, gives certification number. 
 * acoustic_model_command_line_macro, <font color='purple'>//dml Directory for specifying Sphinx 3's hmm. The type of files that are present are: mdef, mean, var, mixw, tmat, along with featparams (-mdef, -mean, -var, -mixw or -tmat). 
 * speaker_adaptation_command_line_macro, <font color='purple'>//dml MLLR transformation matrix to be applied to mixture gaussian means. Senone to MLLR transformation : matrix mapping file (or .1cls.). 
 * language_model_command_line_macro, <font color='purple'>//dml Deals with language model including input files, specifications, directories, name. 
 * dictionary_command_line_macro, <font color='purple'>//dml Main pronunciation dictionary, silence and filler pronunciation dictionary. Letter-to-sound rules that aren't in dictionary. 
 * phoneme_lookahead_command_line_macro, <font color='purple'>//dml Deals with phonemes and senones look ahead. 
 * histogram_pruning_command_line_macro, <font color='purple'>//dml Only used in Mode 4 and 5. Max number of word exits, histories, and active HMMs to maintain at each frame. 
 * fast_GMM_computation_command_line_macro, <font color='purple'>//dml Down samples, sets flags on certain inputs that will be used for Gaussian Selection. 
 * common_filler_properties_command_line_macro, <font color='purple'>//dml Filler word, default silence/non-silence probabilities. 
 * common_s3x_beam_properties_command_line_macro, <font color='purple'>//dml Beam selecting active HMMs. 
 * common_application_properties_command_line_macro, <font color='purple'>//dml Sets log file 
 * control_file_handling_command_line_macro, <font color='purple'>//dml Deals with utterances; lists utterances, amount to be skipped, amount :to be processed 
 * hypothesis_file_handling_command_line_macro,<font color='purple'>//dml Recognition result file with words, word segmentations, and word score 
 * score_handling_command_line_macro, <font color='purple'>//dml Deals with displaying results. Decides whether to unscale back the acoustic score with the best score in a frame. 
 * output_lattice_handling_command_line_macro, <font color='purple'>//dml Directory, format, and filename extension in which to dump word lattices. 
 * dag_handling_command_line_macro, <font color='purple'>//dml Adds frames depending on the number of nodes ignored. Deals with utterances. Controls CPU usage and memory usage. 
 * second_stage_dag_handling_command_line_macro, <font color='purple'>//dml Decides the best path DAG search. Inputs word lattice directory with per-utt files. Picks best filename extension. Generates best amount of hypotheses to generate per utterance and debugging information on the best search. 
 * input_lattice_handling_command_line_macro, <font color='purple'>//dml Input word-lattice directory with per-utt files for restricting words searched 
 * flat_fwd_debugging_command_line_macro, <font color='purple'>//dml Used for debugging/diagnosis/analysis of start and end frames 
 * history_table_command_line_macro, <font color='purple'>//dml Directory in which to dump word Viterbi back pointer table (for debugging) 


 * cepstral_input_handling_command_line_macro, <font color='purple'>//dml Input cepstrum files directory and file extension. Decides the number of bytes to skip at the beginning of the waveform file. Inputs data from waveform rather than cepstra. 
 * decode_specific_command_line_macro,<font color='orange'>//jay Sets mode between Decoding mode, Operation mode, and Bigram Mode. Decoding and Operation can dump active HMM to stderr or the lex tree to stderr. if Bigram mode active, it will dump one line at a time to stderr. All for debugging 
 * search_specific_command_line_macro,<font color='orange'>//jay Whether detailed backtrace information (word segmentation/scores) shown in log. When Best senone score directory 
 * search_modeTST_specific_command_line_macro,<font color='orange'>//jay Mode 4 only... Shows Number of lextrees to be instantiated, and Entries Per Lextree 
 * search_modeWST_specific_command_line_macro,<font color='orange'>//jay Mode 5 only... Shows number of lextrees to be instantiated statically 
 * control_lm_mllr_file_command_line_macro,<font color='orange'>//jay Points to control_lm_file_command_line_macro, Control file that list the corresponding LMs 
 * finite_state_grammar_command_line_macro,<font color='orange'>//jay Mode 2 only... Sets to Finite state grammar, Uses alternative pronunciations for FSG, and Inserts filler words at each state. 
 * phone_insertion_penalty_command_line_macro,<font color='orange'>//jay Mode 2 and 3 only... Adds word insertion penalty 


 * /* the following functions are used for MMIE trainingx
 * lqin 2010-03 */
 * unigram_only_lm_command_line_macro,
 * bigram_only_lm_command_line_macro,
 * /* end */


 * /* Things are yet to refactored */
 * if 0 <font color='orange'>//jay Will always be false 
 * /* Commented out; not supported */
 * {"-compsep",
 * ARG_STRING,
 * /* Default: No compound word (NULL separator char) */
 * "Separator character between components of a compound word (NULL if "
 * "none)"},
 * endif <font color='orange'>//jay Code "skipps commentted out lines in the above if statement and jumps to here 
 * endif <font color='orange'>//jay Code "skipps commentted out lines in the above if statement and jumps to here 


 * {"-phsegdir",<font color='orange'>//jay Sets phsegdir to NULL 
 * ARG_STRING,
 * NULL,
 * "(Allphone mode only) Output directory for phone segmentation files"},


 * {"-bestscoredir",<font color='orange'>//jay Sets bestscoredir to NULL 
 * ARG_STRING,
 * NULL,
 * "(Mode 3) Directory for writing best score/frame (used to set beamwidth;
 * "one file/utterance)"},


 * /** ARCHAN 20050717: The only argument which I didn't refactor,
 * reason is it makes sense to make every s3.0 family of tool to
 * accept -utt. DHD 20070525: I have no idea what that means. */


 * {"-utt",<font color='orange'>//jay sets utt to NULL 
 * ARG_STRING,
 * NULL,
 * "Utterance file to be processed (-ctlcount argument times)"},


 * {NULL, ARG_INT32, NULL, NULL}


 * };


 * int32                                                               <font color='blue'> // wmc Function declaration specifier is a 32 bit integer, returns usually either -1, 0, or 1 
 * main(int32 argc, char *argv[])                                      <font color='blue'> // wmc Main function declaration, arguments are the count of command line arguments, and the arguments themselves 
 * kb_t kb;                                                        <font color='blue'> // wmc Knowledge Base structure that includes core modules, acoustic and language models, dictionary, and feature generation 
 * stat_t *st;                                                     <font color='blue'> // wmc Statistics structure that holds all of said information 
 * cmd_ln_t *config;                                               <font color='blue'> // wmc An 'abstract' structure used to store command line parsing 
 * cmd_ln_t *config;                                               <font color='blue'> // wmc An 'abstract' structure used to store command line parsing 


 * print_appl_info(argv[0]);                                       <font color='blue'> // wmc Prints the hostname, directory, compile time, and date, takes in the appl_name 
 * cmd_ln_appl_enter(argc, argv, "default.arg", arg);              <font color='blue'> // wmc DEPRECATED: application initialization routine for Sphinx 3 


 * unlimit;                                                      <font color='blue'> // wmc OBSOLETE - KEEP FOR COMPILATION PURPOSES: "unlimits" the memory usage of program 


 * config = cmd_ln_get;                                          <font color='blue'> // wmc DEPRECATED: returns the global cmd_ln_t object used by 'non-re-entrant functions' 
 * kb_init(&kb, config);                                           <font color='blue'> // wmc Initializes the Knowledge Base structure with it's information based on the global cmd_ln_t object 
 * st = kb.stat;
 * fprintf(stdout, "\n");


 * if (cmd_ln_str_r(config, "-ctl")) {                                                 <font color='blue'> // wmc Checking to see if global command line object contains a string named '-ctl' and returns the value associated with the name 
 * /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and
 * -ctl_mllr to get the corresponding LM and MLLR for the utterance */
 * st->tm = ctl_process(cmd_ln_str_r(config, "-ctl"),                                                   <font color='green'> // Retrieving the string "-ctl" from the global command line. 
 * cmd_ln_str_r(config, "-ctl_lm"),                                                <font color='green'> // Retrieving the string "-ctl_lm" from the global command line. 
 * cmd_ln_str_r(config, "-ctl_mllr"),                                              <font color='green'> // Retrieving the string "-ctl_mllr" from the global command line. 
 * cmd_ln_int32_r(config, "-ctloffset"),                                           <font color='green'> // Retrieving the string "-ctloffset" from the global command line. 
 * cmd_ln_int32_r(config, "-ctlcount"), utt_decode, &kb);                          <font color='green'> // Retrieving the string "-ctlcount" from the global command line. Specifies utt to be decoded. 
 * }
 * else if (cmd_ln_str_r(config, "-utt")) {                                                                 <font color='green'> // Retrieving the string "-utt" from the global command line. 
 * /* When -utt is specified, corpus.c will wait for the utterance to
 * change */
 * st->tm = ctl_process_utt(cmd_ln_str_r(config, "-utt"),                                               <font color='green'> // Retrieving the string "-utt" from the global command line. 
 * cmd_ln_int32_r(config, "-ctlcount"),                                        <font color='green'> // Retrieving the string "-ctlcount" from the global command line. 
 * utt_decode, &kb);                                                           <font color='green'> // Specifies utt to be decoded. 


 * }
 * else {
 * /* Is error checking good enough?" */
 * E_FATAL("Both -utt and -ctl are not specified.\n"); <font color='red'> // lm:  when the utterances aren't clear and -ctl aren't identified 
 * }
 * if (kb.matchsegfp)
 * fclose(kb.matchsegfp);  <font color='red'> // lm: closing a file kb.matchsegfp 
 * if (kb.matchfp)
 * fclose(kb.matchfp); <font color='red'> // lm: closing a file b.matchfp 
 * if (kb.matchfp)
 * fclose(kb.matchfp); <font color='red'> // lm: closing a file b.matchfp 
 * fclose(kb.matchfp); <font color='red'> // lm: closing a file b.matchfp 


 * stat_report_corpus(kb.stat);


 * kb_free(&kb);
 * <font color='red'> commented out 
 * if (! WIN32)
 * if defined(_SUN4)
 * system("ps -el | grep sphinx3_decode"); <font color='red'> ps means process status and -e means Display information about other users' processes, including those without controlling terminals. And -l means :Display information associated with the following keywords: uid, pid, ppid, flags, cpu, pri, nice, vsz=SZ, rss, wchan, state=S, paddr=ADDR, tty, time, and command=CMD. 
 * else
 * system("ps aguxwww | grep sphinx3_decode"); <font color='red'> // lm:  taking the left inout and feeding it into the right input, looking to see if there is a Sphinx3 decode 
 * endif
 * endif
 * <font color='red'> end of comment 


 * cmd_ln_free_r(config);
 * exit(0);
 * }