From sumomo @ users.sourceforge.jp Tue Feb 10 02:27:46 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 02:27:46 +0900 Subject: [Julius-cvs 379] CVS update: julius4/libjulius/src Message-ID: <1234200466.200756.27781.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/src/m_fusion.c diff -u julius4/libjulius/src/m_fusion.c:1.11 julius4/libjulius/src/m_fusion.c:1.12 --- julius4/libjulius/src/m_fusion.c:1.11 Sat Jan 31 18:11:21 2009 +++ julius4/libjulius/src/m_fusion.c Tue Feb 10 02:27:45 2009 @@ -20,7 +20,7 @@ * @author Akinobu Lee * @date Thu May 12 13:31:47 2005 * - * $Revision: 1.11 $ + * $Revision: 1.12 $ * */ /* @@ -409,7 +409,10 @@ } /* map dict item to N-gram entry */ - make_voca_ref(ngram, winfo); + if (make_voca_ref(ngram, winfo) == FALSE) { + ngram_info_free(ngram); + return NULL; + } /* post-fix EOS / BOS uni prob for SRILM */ fix_uniprob_srilm(ngram, winfo); From sumomo @ users.sourceforge.jp Tue Feb 10 02:27:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 02:27:48 +0900 Subject: [Julius-cvs 380] CVS update: julius4/libsent/src/util Message-ID: <1234200468.455995.27877.nullmailer@users.sourceforge.jp> Index: julius4/libsent/src/util/ptree.c diff -u julius4/libsent/src/util/ptree.c:1.3 julius4/libsent/src/util/ptree.c:1.4 --- julius4/libsent/src/util/ptree.c:1.3 Tue Sep 30 12:58:18 2008 +++ julius4/libsent/src/util/ptree.c Tue Feb 10 02:27:48 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Thu Feb 17 15:34:39 2005 * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * */ /* @@ -240,6 +240,51 @@ return(ptree_search_data_r(node, str, strlen(str) * 8 + 8)); } +/** + * Recursive function to replace the data in the tree + * + * @param node [in] current node. + * @param str [in] key string + * @param val [in] new value + * @param maxbitplace [in] maximum number of bitplace + * + * @return the found integer value. + */ +static int +ptree_replace_data_r(PATNODE *node, char *str, int val, int maxbitplace) +{ + if (node->left0 == NULL && node->right1 == NULL) { + node->value.data = val; + return(node->value.data); + } else { + if (testbit_max(str, node->value.thres_bit, maxbitplace) != 0) { + return(ptree_replace_data_r(node->right1, str, val, maxbitplace)); + } else { + return(ptree_replace_data_r(node->left0, str, val, maxbitplace)); + } + } +} + +/** + * Search for the data whose key string matches the given string, and + * replace its value. + * + * @param str [in] search key string + * @param val [in] value + * @param node [in] root node of index tree + * + * @return the exactly found integer value, or the nearest one. + */ +int +ptree_replace_data(char *str, int val, PATNODE *node) +{ + if (node == NULL) { + //("Error: ptree_search_data: no node, search for \"%s\" failed\n", str); + return -1; + } + return(ptree_replace_data_r(node, str, val, strlen(str) * 8 + 8)); +} + /*******************************************************************/ /* add 1 node to given ptree */ From sumomo @ users.sourceforge.jp Tue Feb 10 02:27:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 02:27:48 +0900 Subject: [Julius-cvs 381] CVS update: julius4/mkbingram Message-ID: <1234200468.826748.27888.nullmailer@users.sourceforge.jp> Index: julius4/mkbingram/mkbingram.c diff -u julius4/mkbingram/mkbingram.c:1.2 julius4/mkbingram/mkbingram.c:1.3 --- julius4/mkbingram/mkbingram.c:1.2 Tue Dec 18 17:45:54 2007 +++ julius4/mkbingram/mkbingram.c Tue Feb 10 02:27:48 2009 @@ -18,7 +18,7 @@ * @author Akinobu LEE * @date Thu Mar 24 12:22:27 2005 * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * */ /* @@ -30,7 +30,7 @@ /* mkbingram --- make binary n-gram for JULIUS from ARPA standard format */ -/* $Id: mkbingram.c,v 1.2 2007/12/18 08:45:54 sumomo Exp $ */ +/* $Id: mkbingram.c,v 1.3 2009/02/09 17:27:48 sumomo Exp $ */ #include #include @@ -47,7 +47,8 @@ printf("\n options:\n"); printf(" -nlr file forward N-gram in ARPA format\n"); printf(" -nrl file backward N-gram in ARPA format\n"); - printf(" -d file Julius binary N-gram\n"); + printf(" -d bingramfile Julius binary N-gram file input\n"); + printf(" -swap swap \"%s\" and \"%s\"\n", BEGIN_WORD_DEFAULT, END_WORD_DEFAULT); printf("\n When both \"-nlr\" and \"-nrl\" are specified, \n"); printf(" Julius will use the BACKWARD N-gram as main LM\n"); printf(" and use the forward 2-gram only at the 1st pass\n"); @@ -65,6 +66,7 @@ time_t now; char *binfile, *lrfile, *rlfile, *outfile; int i; + boolean force_swap = FALSE; binfile = lrfile = rlfile = outfile = NULL; if (argc <= 1) { @@ -104,6 +106,8 @@ usage(argv[0]); return -1; } + } else if (argv[i][1] == 's') { + force_swap = TRUE; } } else { if (outfile == NULL) { @@ -166,6 +170,9 @@ if (init_ngram_bin(ngram, binfile) == FALSE) return -1; } else { /* read in ARPA n-gram */ + if (force_swap) { + ngram->bos_eos_swap = TRUE; + } if (rlfile) { if (init_ngram_arpa(ngram, rlfile, DIR_RL) == FALSE) return -1; if (lrfile) { From sumomo @ users.sourceforge.jp Tue Feb 10 02:27:47 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 02:27:47 +0900 Subject: [Julius-cvs 382] CVS update: julius4/libsent/src/ngram Message-ID: <1234200467.944057.27860.nullmailer@users.sourceforge.jp> Index: julius4/libsent/src/ngram/init_ngram.c diff -u julius4/libsent/src/ngram/init_ngram.c:1.4 julius4/libsent/src/ngram/init_ngram.c:1.5 --- julius4/libsent/src/ngram/init_ngram.c:1.4 Sat Jan 31 18:11:22 2009 +++ julius4/libsent/src/ngram/init_ngram.c Tue Feb 10 02:27:46 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 07:40:53 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -123,52 +123,62 @@ * @param ndata [i/o] word/class N-gram, the unknown word information will be set. * @param winfo [i/o] word dictionary, the word-to-ngram-entry mapping will be done here. */ -void +boolean make_voca_ref(NGRAM_INFO *ndata, WORD_INFO *winfo) { int i; + boolean ok_flag = TRUE; + int count = 0; jlog("Stat: init_ngram: mapping dictonary words to n-gram entries\n"); ndata->unk_num = 0; for (i = 0; i < winfo->num; i++) { winfo->wton[i] = make_ngram_ref(ndata, winfo->wname[i]); + if (winfo->wton[i] == WORD_INVALID) { + ok_flag = FALSE; + count++; + continue; + } if (winfo->wton[i] == ndata->unk_id) { (ndata->unk_num)++; } } + if (ok_flag == FALSE) { + jlog("Error: --- Failed to map %d words in dictionary to N-gram\n", count); + jlog("Error: --- Please fix the dict, or use open vocabulary N-gram that has either \"%s\" or \"%s\"\n", UNK_WORD_DEFAULT, UNK_WORD_DEFAULT2); + return FALSE; + } + if (ndata->unk_num == 0) { ndata->unk_num_log = 0.0; /* for safe */ } else { ndata->unk_num_log = (float)log10(ndata->unk_num); } jlog("Stat: init_ngram: finished word-to-ngram mapping\n"); + return TRUE; } /** * @brief Set unknown word ID to the N-gram data. * - * In CMU-Cam SLM toolkit, OOV words are always mapped to UNK, which - * always appear at the very beginning of N-gram entry, so we fix the - * unknown word ID at "0". + * Unknown word string should be UNK_WORD_DEFAULT or UNK_WORD_DEFAULT2, + * whose default is "" and "". If any of these is not found + * in vocabulary, treat the LM as closed vocabulary. * * @param ndata [out] N-gram data to set unknown word ID. */ void set_unknown_id(NGRAM_INFO *ndata) { -#if 0 - ndata->unk_id = ngram_lookup_word(ndata, unkword); + ndata->isopen = TRUE; + ndata->unk_id = ngram_lookup_word(ndata, UNK_WORD_DEFAULT); + if (ndata->unk_id == WORD_INVALID) { + ndata->unk_id = ngram_lookup_word(ndata, UNK_WORD_DEFAULT2); + } if (ndata->unk_id == WORD_INVALID) { - jlog("word %s not found, so assume this is a closed vocabulary model\n", - unkword); + jlog("Stat: \"%s\" or \"%s\" not found, assuming close vocabulary LM\n", UNK_WORD_DEFAULT, UNK_WORD_DEFAULT2); ndata->isopen = FALSE; - } else { - ndata->isopen = TRUE; } -#endif - ndata->isopen = TRUE; - ndata->unk_id = 0; /* unknown (OOV) words are always mapped to - the number 0 (by CMU-TK)*/ } /** Index: julius4/libsent/src/ngram/ngram_lookup.c diff -u julius4/libsent/src/ngram/ngram_lookup.c:1.2 julius4/libsent/src/ngram/ngram_lookup.c:1.3 --- julius4/libsent/src/ngram/ngram_lookup.c:1.2 Tue Dec 18 17:45:54 2007 +++ julius4/libsent/src/ngram/ngram_lookup.c Tue Feb 10 02:27:46 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 16:42:38 2005 * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * */ /* @@ -88,8 +88,13 @@ nw = ngram_lookup_word(ndata, wstr); if (nw == WORD_INVALID) { /* not found */ - jlog("Warning: ngram_lookup: word %s not exist in N-gram vocabulary, treat as \n", wstr); - return(ndata->unk_id); + if (ndata->isopen) { + jlog("Warning: ngram_lookup: \"%s\" not exist in N-gram, treat as unknown\n", wstr); + return(ndata->unk_id); + } else { + jlog("Error: ngram_lookup: \"%s\" not exist in N-gram\n", wstr); + return WORD_INVALID; + } } else { return(nw); } Index: julius4/libsent/src/ngram/ngram_malloc.c diff -u julius4/libsent/src/ngram/ngram_malloc.c:1.2 julius4/libsent/src/ngram/ngram_malloc.c:1.3 --- julius4/libsent/src/ngram/ngram_malloc.c:1.2 Tue Dec 18 17:45:54 2007 +++ julius4/libsent/src/ngram/ngram_malloc.c Tue Feb 10 02:27:46 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 16:48:56 2005 * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * */ /* @@ -38,6 +38,7 @@ new = (NGRAM_INFO *)mymalloc(sizeof(NGRAM_INFO)); new->bo_wt_1 = NULL; new->p_2 = NULL; + new->bos_eos_swap = FALSE; return(new); } Index: julius4/libsent/src/ngram/ngram_read_arpa.c diff -u julius4/libsent/src/ngram/ngram_read_arpa.c:1.13 julius4/libsent/src/ngram/ngram_read_arpa.c:1.14 --- julius4/libsent/src/ngram/ngram_read_arpa.c:1.13 Sat Jan 31 00:04:18 2009 +++ julius4/libsent/src/ngram/ngram_read_arpa.c Tue Feb 10 02:27:46 2009 @@ -20,7 +20,7 @@ * @author Akinobu LEE * @date Wed Feb 16 16:52:24 2005 * - * $Revision: 1.13 $ + * $Revision: 1.14 $ * */ /* @@ -30,7 +30,7 @@ * All rights reserved */ -/* $Id: ngram_read_arpa.c,v 1.13 2009/01/30 15:04:18 sumomo Exp $ */ +/* $Id: ngram_read_arpa.c,v 1.14 2009/02/09 17:27:46 sumomo Exp $ */ /* words should be alphabetically sorted */ @@ -129,12 +129,12 @@ while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') { if ((p = strtok(buf, DELM)) == NULL) { - jlog("Error: ngram_read_arpa: LR 1-gram: failed to parse, corrupted or invalid data?\n"); + jlog("Error: ngram_read_arpa: 1-gram: failed to parse, corrupted or invalid data?\n"); return FALSE; } prob = (LOGPROB)atof(p); if ((p = strtok(NULL, DELM)) == NULL) { - jlog("Error: ngram_read_arpa: LR 1-gram: failed to parse, corrupted or invalid data?\n"); + jlog("Error: ngram_read_arpa: 1-gram: failed to parse, corrupted or invalid data?\n"); return FALSE; } name = strcpy((char *)mymalloc(strlen(p)+1), p); @@ -222,7 +222,7 @@ } else { bo_wt = (LOGPROB)atof(p); } - + /* add bo_wt_rl to existing 1-gram entry */ nid = ngram_lookup_word(ndata, name); if (nid == WORD_INVALID) { @@ -251,9 +251,8 @@ } /** - * Read reverse 2-gram data from RL 3-gram file, and set RL 2-gram - * probabilities and back-off values for RL 3-gram to the corresponding - * LR 2-gram data. + * Read forward 2-gram data and set the LR 2-gram probabilities to the + * already loaded RL N-gram. * * @param fp [in] file pointer * @param ndata [i/o] N-gram to set the read data. @@ -381,7 +380,7 @@ cid = cid_last = NNID_INVALID; for(i=0;itotalnum); } - /* 2-gram probability */ + /* N-gram probability */ if ((s = strtok(buf, DELM)) == NULL) { jlog("Error: ngram_read_arpa: %d-gram: failed to parse, corrupted or invalid data?\n", n); return FALSE; @@ -656,7 +655,43 @@ /* set unknown (=OOV) word id */ set_unknown_id(ndata); - + + /* swap and for backward SRILM N-gram */ + if (ndata->dir == DIR_RL) { + WORD_ID bos, eos; + char *p; + bos = ngram_lookup_word(ndata, BEGIN_WORD_DEFAULT); + eos = ngram_lookup_word(ndata, END_WORD_DEFAULT); + if (!ndata->bos_eos_swap) { + /* check */ + if (bos != WORD_INVALID && eos != WORD_INVALID && ndata->d[0].prob[bos] == -99) { + jlog("Stat: \"P(%s) = -99\" in reverse N-gram, may be trained by SRILM\n", BEGIN_WORD_DEFAULT); + jlog("Stat: going to swap \"%s\" and \"%s\"\n", BEGIN_WORD_DEFAULT, END_WORD_DEFAULT); + ndata->bos_eos_swap = TRUE; + } + } + if (ndata->bos_eos_swap) { + if (bos == WORD_INVALID) { + jlog("Error: ngram_read_arpa: try to swap bos/eos but \"%s\" not found in N-gram\n", BEGIN_WORD_DEFAULT); + } + if (eos == WORD_INVALID) { + jlog("Error: ngram_read_arpa: try to swap bos/eos but \"%s\" not found in N-gram\n", END_WORD_DEFAULT); + } + if (bos == WORD_INVALID || eos == WORD_INVALID) { + return FALSE; + } + /* do swap */ + jlog("Stat: ngram_read_arpa: swap \"%s\" and \"%s\" at backward N-gram\n", BEGIN_WORD_DEFAULT, END_WORD_DEFAULT); + /* swap name buffer */ + p = ndata->wname[bos]; + ndata->wname[bos] = ndata->wname[eos]; + ndata->wname[eos] = p; + /* replace index */ + ptree_replace_data(BEGIN_WORD_DEFAULT, eos, ndata->root); + ptree_replace_data(END_WORD_DEFAULT, bos, ndata->root); + } + } + } #ifdef CLASS_NGRAM Index: julius4/libsent/src/ngram/ngram_util.c diff -u julius4/libsent/src/ngram/ngram_util.c:1.4 julius4/libsent/src/ngram/ngram_util.c:1.5 --- julius4/libsent/src/ngram/ngram_util.c:1.4 Wed Jan 21 00:48:04 2009 +++ julius4/libsent/src/ngram/ngram_util.c Tue Feb 10 02:27:46 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 17:18:55 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -92,7 +92,7 @@ fprintf(fp, "\t OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id); fprintf(fp, "\t OOV size = %d words in dict\n", ndata->unk_num); } else { - fprintf(fp, "\t OOV word = none\n"); + fprintf(fp, "\t OOV word = none (assume close vocabulary)\n"); } fprintf(fp, "\t wordset size = %d\n", ndata->max_word_num); for(i=0;in;i++) { From sumomo @ users.sourceforge.jp Tue Feb 10 02:27:46 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 02:27:46 +0900 Subject: [Julius-cvs 383] CVS update: julius4/libsent/include/sent Message-ID: <1234200466.842563.27821.nullmailer@users.sourceforge.jp> Index: julius4/libsent/include/sent/ngram2.h diff -u julius4/libsent/include/sent/ngram2.h:1.4 julius4/libsent/include/sent/ngram2.h:1.5 --- julius4/libsent/include/sent/ngram2.h:1.4 Sat Jan 31 18:11:21 2009 +++ julius4/libsent/include/sent/ngram2.h Tue Feb 10 02:27:46 2009 @@ -97,7 +97,7 @@ * @author Akinobu LEE * @date Fri Feb 11 15:04:02 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -124,6 +124,14 @@ #define NNID_INVALID_UPPER 255 ///< Value to indicate no id at NNID_UPPER (24bit) #define NNID_MAX_24 16711679 ///< Allowed maximum number of id (255*65536-1) (24bit) +/// Default word string of beginning-of-sentence word +#define BEGIN_WORD_DEFAULT "" +/// Default word string of end-of-sentence word +#define END_WORD_DEFAULT "" +/// Default word string of unknown word for open vocabulary +#define UNK_WORD_DEFAULT "" +#define UNK_WORD_DEFAULT2 "" + /** * N-gram entries for a m-gram (1 <= m <= N) * @@ -161,6 +169,7 @@ int dir; ///< direction (either DIR_LR or DIR_RL) boolean from_bin; ///< TRUE if source was bingram, otherwise ARPA boolean bigram_index_reversed; ///< TRUE if read from old (<=3.5.3) bingram, in which case the 2-gram tuple index is reversed (DIR_LR) against the RL 3-gram. + boolean bos_eos_swap; ///< TRUE if swap BOS and SOS on backward N-gram WORD_ID max_word_num; ///< N-gram vocabulary size char **wname; ///< List of word strings. PATNODE *root; ///< Root of index tree to search n-gram word ID from its name @@ -235,7 +244,7 @@ void print_ngram_info(FILE *fp, NGRAM_INFO *ndata); #include -void make_voca_ref(NGRAM_INFO *ndata, WORD_INFO *winfo); +boolean make_voca_ref(NGRAM_INFO *ndata, WORD_INFO *winfo); void fix_uniprob_srilm(NGRAM_INFO *ndata, WORD_INFO *winfo); #endif /* __SENT_NGRAM2_H__ */ Index: julius4/libsent/include/sent/ptree.h diff -u julius4/libsent/include/sent/ptree.h:1.3 julius4/libsent/include/sent/ptree.h:1.4 --- julius4/libsent/include/sent/ptree.h:1.3 Tue Sep 30 12:58:18 2008 +++ julius4/libsent/include/sent/ptree.h Tue Feb 10 02:27:46 2009 @@ -17,7 +17,7 @@ * @author Akinobu LEE * @date Fri Feb 11 17:27:24 2005 * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * */ /* @@ -70,6 +70,7 @@ PATNODE *make_ptree(char **words, int *data, int wordsnum, int bitplace); void disp_ptree(PATNODE *node, int level); int ptree_search_data(char *str, PATNODE *rootnode); +int ptree_replace_data(char *str, int val, PATNODE *node); PATNODE *ptree_make_root_node(int data); void ptree_add_entry(char *str, int data, char *matchstr, PATNODE **rootnode); void free_ptree(PATNODE *rootnode); Index: julius4/libsent/include/sent/vocabulary.h diff -u julius4/libsent/include/sent/vocabulary.h:1.4 julius4/libsent/include/sent/vocabulary.h:1.5 --- julius4/libsent/include/sent/vocabulary.h:1.4 Fri Oct 17 08:43:25 2008 +++ julius4/libsent/include/sent/vocabulary.h Tue Feb 10 02:27:46 2009 @@ -30,7 +30,7 @@ * @author Akinobu LEE * @date Sat Feb 12 12:38:13 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -46,11 +46,6 @@ #include #include -/// Default word string of beginning-of-sentence word -#define BEGIN_WORD_DEFAULT "" -/// Default word string of end-of-sentence word -#define END_WORD_DEFAULT "" - /// Memory allocation step in number of words when loading a word dictionary #define MAXWSTEP 4000 From sumomo @ users.sourceforge.jp Tue Feb 10 17:15:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 17:15:48 +0900 Subject: [Julius-cvs 384] CVS update: julius4/libjulius/include/julius Message-ID: <1234253748.615444.4709.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/include/julius/jconf.h diff -u julius4/libjulius/include/julius/jconf.h:1.7 julius4/libjulius/include/julius/jconf.h:1.8 --- julius4/libjulius/include/julius/jconf.h:1.7 Thu Sep 25 14:00:05 2008 +++ julius4/libjulius/include/julius/jconf.h Tue Feb 10 17:15:48 2009 @@ -23,7 +23,7 @@ * @author Akinobu Lee * @date Fri Feb 16 13:42:28 2007 * - * $Revision: 1.7 $ + * $Revision: 1.8 $ * */ /* @@ -335,6 +335,11 @@ char wordrecog_silence_context_name[MAX_HMMNAME_LEN]; /** + * Name string of Unknown word for N-gram + */ + char unknown_name[UNK_WORD_MAXLEN]; + + /** * Pointer to next instance * */ From sumomo @ users.sourceforge.jp Tue Feb 10 17:15:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 17:15:48 +0900 Subject: [Julius-cvs 385] CVS update: julius4/libjulius/src Message-ID: <1234253748.744333.4723.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/src/default.c diff -u julius4/libjulius/src/default.c:1.9 julius4/libjulius/src/default.c:1.10 --- julius4/libjulius/src/default.c:1.9 Wed Jan 28 14:19:13 2009 +++ julius4/libjulius/src/default.c Tue Feb 10 17:15:48 2009 @@ -17,7 +17,7 @@ * @author Akinobu Lee * @date Fri Feb 16 15:05:43 2007 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ * */ /* @@ -192,7 +192,8 @@ #endif strcpy(j->wordrecog_head_silence_model_name, "silB"); strcpy(j->wordrecog_tail_silence_model_name, "silE"); - j->wordrecog_silence_context_name[0] = '\0'; + j->wordrecog_silence_context_name[0] = '\0'; + strcpy(j->unknown_name, UNK_WORD_DEFAULT); // or UNK_WORD_DEFAULT2 } /** Index: julius4/libjulius/src/m_fusion.c diff -u julius4/libjulius/src/m_fusion.c:1.12 julius4/libjulius/src/m_fusion.c:1.13 --- julius4/libjulius/src/m_fusion.c:1.12 Tue Feb 10 02:27:45 2009 +++ julius4/libjulius/src/m_fusion.c Tue Feb 10 17:15:48 2009 @@ -20,7 +20,7 @@ * @author Akinobu Lee * @date Thu May 12 13:31:47 2005 * - * $Revision: 1.12 $ + * $Revision: 1.13 $ * */ /* @@ -408,6 +408,9 @@ return NULL; } + /* set unknown (=OOV) word id */ + set_unknown_id(ngram, lmconf->unknown_name); + /* map dict item to N-gram entry */ if (make_voca_ref(ngram, winfo) == FALSE) { ngram_info_free(ngram); Index: julius4/libjulius/src/m_options.c diff -u julius4/libjulius/src/m_options.c:1.16 julius4/libjulius/src/m_options.c:1.17 --- julius4/libjulius/src/m_options.c:1.16 Mon Oct 13 17:43:43 2008 +++ julius4/libjulius/src/m_options.c Tue Feb 10 17:15:48 2009 @@ -18,7 +18,7 @@ * @author Akinobu Lee * @date Thu May 12 18:52:07 2005 * - * $Revision: 1.16 $ + * $Revision: 1.17 $ * */ /* @@ -896,6 +896,10 @@ GET_TMPARG; jconf->lmnow->tail_silname = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; + } else if (strmatch(argv[i],"-mapunk")) { /* unknown word */ + if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; + strncpy(jconf->lmnow->unknown_name, tmparg, UNK_WORD_MAXLEN); + continue; } else if (strmatch(argv[i],"-iwspword")) { /* add short pause word */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; jconf->lmnow->enable_iwspword = TRUE; Index: julius4/libjulius/src/m_usage.c diff -u julius4/libjulius/src/m_usage.c:1.11 julius4/libjulius/src/m_usage.c:1.12 --- julius4/libjulius/src/m_usage.c:1.11 Thu Sep 25 14:00:06 2008 +++ julius4/libjulius/src/m_usage.c Tue Feb 10 17:15:48 2009 @@ -12,7 +12,7 @@ * @author Akinobu Lee * @date Fri May 13 15:04:34 2005 * - * $Revision: 1.11 $ + * $Revision: 1.12 $ * */ /* @@ -259,6 +259,7 @@ fprintf(fp, " -v dictfile dictionary file name\n"); fprintf(fp, " [-silhead wordname] (n-gram) beginning-of-sentence word (%s)\n", BEGIN_WORD_DEFAULT); fprintf(fp, " [-siltail wordname] (n-gram) end-of-sentence word (%s)\n", END_WORD_DEFAULT); + fprintf(fp, " [-mapunk wordname] (n-gram) map unknown words to this (%s)\n", UNK_WORD_DEFAULT); fprintf(fp, " [-forcedict] ignore error entry and keep running\n"); fprintf(fp, " [-iwspword] (n-gram) add short-pause word for inter-word CD sp\n"); fprintf(fp, " [-iwspentry entry] (n-gram) word entry for \"-iwspword\" (%s)\n", IWSPENTRY_DEFAULT); From sumomo @ users.sourceforge.jp Tue Feb 10 17:15:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 17:15:48 +0900 Subject: [Julius-cvs 386] CVS update: julius4/libsent/src/ngram Message-ID: <1234253748.917391.4742.nullmailer@users.sourceforge.jp> Index: julius4/libsent/src/ngram/init_ngram.c diff -u julius4/libsent/src/ngram/init_ngram.c:1.5 julius4/libsent/src/ngram/init_ngram.c:1.6 --- julius4/libsent/src/ngram/init_ngram.c:1.5 Tue Feb 10 02:27:46 2009 +++ julius4/libsent/src/ngram/init_ngram.c Tue Feb 10 17:15:48 2009 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Wed Feb 16 07:40:53 2005 * - * $Revision: 1.5 $ + * $Revision: 1.6 $ * */ /* @@ -145,7 +145,7 @@ } if (ok_flag == FALSE) { jlog("Error: --- Failed to map %d words in dictionary to N-gram\n", count); - jlog("Error: --- Please fix the dict, or use open vocabulary N-gram that has either \"%s\" or \"%s\"\n", UNK_WORD_DEFAULT, UNK_WORD_DEFAULT2); + jlog("Error: --- Specify the word to which those words are mapped with \"-mapunk\" (default: \"\" or \"\"\n"); return FALSE; } @@ -161,23 +161,31 @@ /** * @brief Set unknown word ID to the N-gram data. * - * Unknown word string should be UNK_WORD_DEFAULT or UNK_WORD_DEFAULT2, - * whose default is "" and "". If any of these is not found - * in vocabulary, treat the LM as closed vocabulary. * * @param ndata [out] N-gram data to set unknown word ID. + * @param str [in] word name string of unknown word */ void -set_unknown_id(NGRAM_INFO *ndata) +set_unknown_id(NGRAM_INFO *ndata, char *str) { - ndata->isopen = TRUE; - ndata->unk_id = ngram_lookup_word(ndata, UNK_WORD_DEFAULT); + ndata->unk_id = ngram_lookup_word(ndata, str); if (ndata->unk_id == WORD_INVALID) { - ndata->unk_id = ngram_lookup_word(ndata, UNK_WORD_DEFAULT2); + if (strmatch(str, UNK_WORD_DEFAULT)) { + /* if default "" is not found, also try "" */ + ndata->unk_id = ngram_lookup_word(ndata, UNK_WORD_DEFAULT2); + if (ndata->unk_id == WORD_INVALID) { + jlog("Stat: init_ngram: either \"%s\" and \"%s\" not found, assuming close vocabulary LM\n", UNK_WORD_DEFAULT, UNK_WORD_DEFAULT2); + ndata->isopen = FALSE; + return; + } + } } if (ndata->unk_id == WORD_INVALID) { - jlog("Stat: \"%s\" or \"%s\" not found, assuming close vocabulary LM\n", UNK_WORD_DEFAULT, UNK_WORD_DEFAULT2); + jlog("Stat: init_ngram: \"%s\" not found, assuming close vocabulary LM\n", str); ndata->isopen = FALSE; + } else { + jlog("Stat: init_ngram: unknown words will be mapped to \"%s\"\n", str); + ndata->isopen = TRUE; } } Index: julius4/libsent/src/ngram/ngram_read_arpa.c diff -u julius4/libsent/src/ngram/ngram_read_arpa.c:1.14 julius4/libsent/src/ngram/ngram_read_arpa.c:1.15 --- julius4/libsent/src/ngram/ngram_read_arpa.c:1.14 Tue Feb 10 02:27:46 2009 +++ julius4/libsent/src/ngram/ngram_read_arpa.c Tue Feb 10 17:15:48 2009 @@ -20,7 +20,7 @@ * @author Akinobu LEE * @date Wed Feb 16 16:52:24 2005 * - * $Revision: 1.14 $ + * $Revision: 1.15 $ * */ /* @@ -30,7 +30,7 @@ * All rights reserved */ -/* $Id: ngram_read_arpa.c,v 1.14 2009/02/09 17:27:46 sumomo Exp $ */ +/* $Id: ngram_read_arpa.c,v 1.15 2009/02/10 08:15:48 sumomo Exp $ */ /* words should be alphabetically sorted */ @@ -555,7 +555,7 @@ /* check if the numbers are the same with already read n-gram */ for(i=0;i<2;i++) { if (ndata->d[i].totalnum != num[i]) { - jlog("Warning: ngram_read_arpa: %d-gram total num differ between forward N-gram and backward N-gram, may cause some error\n", n+1); + jlog("Warning: ngram_read_arpa: %d-gram total num differ between forward N-gram and backward N-gram, may cause some error\n", i+1); } } /* read additional 1-gram data */ @@ -653,9 +653,6 @@ } } - /* set unknown (=OOV) word id */ - set_unknown_id(ndata); - /* swap and for backward SRILM N-gram */ if (ndata->dir == DIR_RL) { WORD_ID bos, eos; Index: julius4/libsent/src/ngram/ngram_read_bin.c diff -u julius4/libsent/src/ngram/ngram_read_bin.c:1.5 julius4/libsent/src/ngram/ngram_read_bin.c:1.6 --- julius4/libsent/src/ngram/ngram_read_bin.c:1.5 Sat Jan 31 00:04:18 2009 +++ julius4/libsent/src/ngram/ngram_read_bin.c Tue Feb 10 17:15:48 2009 @@ -48,7 +48,7 @@ * @author Akinobu LEE * @date Wed Feb 16 17:12:08 2005 * - * $Revision: 1.5 $ + * $Revision: 1.6 $ * */ /* @@ -641,9 +641,6 @@ jlog("Stat: ngram_read_bin: making entry name index\n"); ngram_make_lookup_tree(ndata); - /* set unknown id */ - set_unknown_id(ndata); - bi_prob_func_set(ndata); return TRUE; From sumomo @ users.sourceforge.jp Tue Feb 10 17:15:48 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Tue, 10 Feb 2009 17:15:48 +0900 Subject: [Julius-cvs 387] CVS update: julius4/libsent/include/sent Message-ID: <1234253748.821404.4730.nullmailer@users.sourceforge.jp> Index: julius4/libsent/include/sent/ngram2.h diff -u julius4/libsent/include/sent/ngram2.h:1.5 julius4/libsent/include/sent/ngram2.h:1.6 --- julius4/libsent/include/sent/ngram2.h:1.5 Tue Feb 10 02:27:46 2009 +++ julius4/libsent/include/sent/ngram2.h Tue Feb 10 17:15:48 2009 @@ -97,7 +97,7 @@ * @author Akinobu LEE * @date Fri Feb 11 15:04:02 2005 * - * $Revision: 1.5 $ + * $Revision: 1.6 $ * */ /* @@ -131,6 +131,8 @@ /// Default word string of unknown word for open vocabulary #define UNK_WORD_DEFAULT "" #define UNK_WORD_DEFAULT2 "" +/// Maximum length of unknown word string +#define UNK_WORD_MAXLEN 30 /** * N-gram entries for a m-gram (1 <= m <= N) @@ -239,7 +241,7 @@ boolean init_ngram_bin(NGRAM_INFO *ndata, char *ngram_file); boolean init_ngram_arpa(NGRAM_INFO *ndata, char *ngram_file, int dir); boolean init_ngram_arpa_additional(NGRAM_INFO *ndata, char *bigram_file); -void set_unknown_id(NGRAM_INFO *ndata); +void set_unknown_id(NGRAM_INFO *ndata, char *str); void print_ngram_info(FILE *fp, NGRAM_INFO *ndata); From sumomo @ users.sourceforge.jp Wed Feb 11 16:14:20 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:14:20 +0900 Subject: [Julius-cvs 388] CVS update: julius4/man Message-ID: <1234336460.966479.23345.nullmailer@users.sourceforge.jp> Index: julius4/man/julius.1 diff -u julius4/man/julius.1:1.1 julius4/man/julius.1:1.2 --- julius4/man/julius.1:1.1 Thu Oct 2 17:23:47 2008 +++ julius4/man/julius.1 Wed Feb 11 16:14:20 2009 @@ -3,11 +3,11 @@ .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 -.\" Date: 10/02/2008 +.\" Date: 02/11/2009 .\" Manual: .\" Source: .\" -.TH "JULIUS" "1" "10/02/2008" "" "" +.TH "JULIUS" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -539,6 +539,11 @@ Silence word defined in the dictionary, for silences at the beginning of sentence and end of sentence. (default: "", "") .RE .PP +\fB \-mapunk \fR \fIword_string\fR +.RS 3n +Specify unknown word. Default is "" or "". This will be used to assign word probability on unknown words, i.e. words in dictionary that are not in N\-gram vocabulary. +.RE +.PP \fB \-iwspword \fR .RS 3n Add a word entry to the dictionary that should correspond to inter\-word pauses. This may improve recognition accuracy in some language model that has no explicit inter\-word pause modeling. The word entry to be added can be changed by Index: julius4/man/mkbingram.1 diff -u julius4/man/mkbingram.1:1.1 julius4/man/mkbingram.1:1.2 --- julius4/man/mkbingram.1:1.1 Thu Oct 2 17:23:47 2008 +++ julius4/man/mkbingram.1 Wed Feb 11 16:14:20 2009 @@ -3,11 +3,11 @@ .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 -.\" Date: 10/02/2008 +.\" Date: 02/11/2009 .\" Manual: .\" Source: .\" -.TH "MKBINGRAM" "1" "10/02/2008" "" "" +.TH "MKBINGRAM" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -65,6 +65,11 @@ Read in a binary N\-gram file. .RE .PP +\fB \-swap \fR +.RS 3n +Swap BOS word and EOS word in N\-gram. +.RE +.PP \fIoutput_bingram_file\fR .RS 3n binary N\-gram file name to output. From sumomo @ users.sourceforge.jp Wed Feb 11 16:14:21 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:14:21 +0900 Subject: [Julius-cvs 389] CVS update: julius4/man/ja Message-ID: <1234336461.046196.23355.nullmailer@users.sourceforge.jp> Index: julius4/man/ja/julius.1 diff -u julius4/man/ja/julius.1:1.1 julius4/man/ja/julius.1:1.2 --- julius4/man/ja/julius.1:1.1 Thu Oct 2 17:23:48 2008 +++ julius4/man/ja/julius.1 Wed Feb 11 16:14:20 2009 @@ -3,11 +3,11 @@ .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 -.\" Date: 10/02/2008 +.\" Date: 02/11/2009 .\" Manual: .\" Source: .\" -.TH "JULIUS" "1" "10/02/2008" "" "" +.TH "JULIUS" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -429,6 +429,11 @@ 音声入力両端の無音区間に相当する「無音単語」エントリを指定する. 単語の読み(N\-gramエントリ名),あるいは"#"+単語番号(辞書ファ イルの行番号\-1)で指定する.デフォルトはそれぞれ "", "" である. .RE .PP +\fB \-mapunk \fR \fIword_string\fR +.RS 3n +unknown に対応する単語名を指定する.デフォルトは, "" あるいは "" である.この単語は, 認識辞書において N\-gram にない単語を指定した場合にマッピングされ る単語である. +.RE +.PP \fB \-iwspword \fR .RS 3n ポーズに対応する無音単語を辞書に追加する.追加される単語の内容は オプション\fB\-iwspentry\fRで変更できる. Index: julius4/man/ja/mkbingram.1 diff -u julius4/man/ja/mkbingram.1:1.1 julius4/man/ja/mkbingram.1:1.2 --- julius4/man/ja/mkbingram.1:1.1 Thu Oct 2 17:23:48 2008 +++ julius4/man/ja/mkbingram.1 Wed Feb 11 16:14:20 2009 @@ -3,11 +3,11 @@ .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 -.\" Date: 10/02/2008 +.\" Date: 02/11/2009 .\" Manual: .\" Source: .\" -.TH "MKBINGRAM" "1" "10/02/2008" "" "" +.TH "MKBINGRAM" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -53,6 +53,11 @@ バイナリN\-gramを読み込む(古いバイナリ形式の変換用) .RE .PP +\fB \-swap \fR +.RS 3n +文頭記号 と文末記号 を入れ替える. +.RE +.PP \fIoutput_bingram_file\fR .RS 3n 出力先のバイナリN\-gramファイル名 From sumomo @ users.sourceforge.jp Wed Feb 11 16:14:51 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:14:51 +0900 Subject: [Julius-cvs 390] CVS update: julius4 Message-ID: <1234336491.362036.23560.nullmailer@users.sourceforge.jp> Index: julius4/00readme-ja.txt diff -u julius4/00readme-ja.txt:1.7 julius4/00readme-ja.txt:1.8 --- julius4/00readme-ja.txt:1.7 Sat Dec 13 19:10:57 2008 +++ julius4/00readme-ja.txt Wed Feb 11 16:14:51 2009 @@ -4,6 +4,7 @@ Julius + (Rev 4.1.2 2009/02/12) (Rev 4.1.1 2008/12/13) (Rev 4.1 2008/10/03) (Rev 4.0.2 2008/05/27) @@ -13,10 +14,10 @@ (Rev 2.0 1999/02/20) (Rev 1.0 1998/02/20) - Copyright (c) 1991-2008 ???? ????コ + Copyright (c) 1991-2009 ???? ????コ Copyright (c) 1997-2000 ????U?????(IPA) Copyright (c) 2000-2005 ?????????w??? ュ???コ - Copyright (c) 2005-2008 ??????? Julius????? + Copyright (c) 2005-2009 ??????? Julius????? All rights reserved ====================================================================== @@ -36,10 +37,12 @@ Julius-4.1.1 ============= -4.1.1 ???????????????????????????????? -4.?????????????????D -4.0 ???????????????????????????????D -????????o????? Release-ja.txt ???????? +????? 4.1.1 ?N-gram?????????????????D +2GB???T??? N-gram ?????SRILM ??????-gram?? +????????????????????????????? +???????????????????????s?????? + +??????????Release-ja.txt ???????? ??????? Index: julius4/00readme.txt diff -u julius4/00readme.txt:1.7 julius4/00readme.txt:1.8 --- julius4/00readme.txt:1.7 Sat Dec 13 19:10:58 2008 +++ julius4/00readme.txt Wed Feb 11 16:14:51 2009 @@ -4,6 +4,7 @@ Julius + (Rev 4.1.2 2009/02/12) (Rev 4.1.1 2008/12/13) (Rev 4.1 2008/10/03) (Rev 4.0.2 2008/05/27) @@ -13,10 +14,10 @@ (Rev 2.0 1999/02/20) (Rev 1.0 1998/02/20) - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University + Copyright (c) 1991-2009 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology + Copyright (c) 2005-2009 Julius project team, Nagoya Institute of Technology All rights reserved ====================================================================== @@ -45,16 +46,18 @@ together with source codes. -What's new in Julius-4.1.1 +What's new in Julius-4.1.2 =========================== -Revision 4.1.1 is a bug fix release. Many bugs are fixed relating -N-gram handling (tested on SRILM), module mode and more. All users -that uses older versions of Julius-4 are strongly recommended to use -this version. +Revision 4.1.2 includes many fixes for N-gram. +It now allows over 2GB N-gram, improved support for SRILM, and +correct handling of unknown words. Some bugs and improvements +that has been reported on the Forum has been also fixed. +see "Release.txt" for more details of changes. -Contents of Julius-4.1.1 + +Contents of Julius-4.1.2 ======================== (Documents with suffix "ja" are written in Japanese) Index: julius4/Release-ja.txt diff -u julius4/Release-ja.txt:1.8 julius4/Release-ja.txt:1.9 --- julius4/Release-ja.txt:1.8 Sat Dec 13 19:10:58 2008 +++ julius4/Release-ja.txt Wed Feb 11 16:14:51 2009 @@ -1,3 +1,27 @@ +4.1.2 (2009.02.12) +=================== + +[N-gram] + - ??????2GB ?? 4GB ??? + - "-mapunk" ? unknown word ????????????? "" "" + - ?????????ォ???m??????CN-gram ? "" ??????? +[SRILM??] + - backward ARPA ?????? ?ゥ????? + - mkbingram ? backward ARPA ?????? "-swap" ??ヲ?????? + - ? 1-gram??? -99 ?????ゥ???+[????] + - successor list ????????????? + - yomi2voca.pl ?? + - ALSA audio buffer overrun ?? + - generate-ngram ? -debug ????????? +[JuliusLib] + - j_close_stream() ?? +[???? + - adintool ???????????????+ - cygwin ????????????(libesd) + - "-input" ?????????????+ + 4.1.1 (2008.12.13) =================== ????F Index: julius4/Release.txt diff -u julius4/Release.txt:1.8 julius4/Release.txt:1.9 --- julius4/Release.txt:1.8 Sat Dec 13 19:10:58 2008 +++ julius4/Release.txt Wed Feb 11 16:14:51 2009 @@ -1,3 +1,30 @@ +4.1.2 (2009.02.12) +=================== + +[SRILM support] + - Added swapping "" and "" when reading BACKWARD ARPA file + trained by SRILM. It will be automatically detected. If detection + fails, you can specify an option "-swap" in mkbingram to do that. + - Internally modify the unigram probability of "" or "", since + they may be set to "-99" in SRILM model. The same value as + opposite will be assigned. +[N-gram] + - Size limit extended from 2GB to 4GB for big N-gram. + - "" and "" can be changed by "-mapunk". + - More strict check for unknown words: Julius now terminates with + error when dictionary has OOV words and N-gram is not open (no unk word). +[Improvements] + - Faster successor list building algorithm + - Update yomi2voca.pl to cover more minor Japanese pronunciation. + - Workaround for audio buffer overrun in ALSA +[JuliusLib] + - Added API function "j_close_stream()" to exit main recognition loop. +[Bug Fixes] + - Fixed segfault on adintool when specifying multiple servers. + - Fixed compilation error on cygwin (libesd) + - Fixed segfault when not specifying "-input" option. + + 4.1.1 (2008.12.13) =================== Bug fixes: Index: julius4/Sample.jconf diff -u julius4/Sample.jconf:1.6 julius4/Sample.jconf:1.7 --- julius4/Sample.jconf:1.6 Sat Dec 13 19:10:58 2008 +++ julius4/Sample.jconf Wed Feb 11 16:14:51 2009 @@ -200,6 +200,7 @@ ## param. #-silhead "" # beginning-of-sentence (silence) word #-siltail "" # end-of-sentence (silence) word +#-mapunk "" # word to which unknown words should be mapped #-iwspword # add a pause word to the dictionary #-iwspentry " [sp] sp sp" # word that will be added by "-iwspword" #-sepnum 150 # num of high freq words to linearize From sumomo @ users.sourceforge.jp Wed Feb 11 16:22:45 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:22:45 +0900 Subject: [Julius-cvs 391] CVS update: julius4/libjulius Message-ID: <1234336965.100133.31849.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/configure diff -u julius4/libjulius/configure:1.8 julius4/libjulius/configure:1.9 --- julius4/libjulius/configure:1.8 Sat Dec 13 19:10:58 2008 +++ julius4/libjulius/configure Wed Feb 11 16:22:44 2009 @@ -592,7 +592,7 @@ JULIUS_PRODUCTNAME=JuliusLib -JULIUS_VERSION=4.1.1 +JULIUS_VERSION=4.1.2 # Check whether --enable-pthread or --disable-pthread was given. Index: julius4/libjulius/configure.in diff -u julius4/libjulius/configure.in:1.8 julius4/libjulius/configure.in:1.9 --- julius4/libjulius/configure.in:1.8 Sat Dec 13 19:10:58 2008 +++ julius4/libjulius/configure.in Wed Feb 11 16:22:45 2009 @@ -4,7 +4,7 @@ dnl Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl -dnl $Id: configure.in,v 1.8 2008/12/13 10:10:58 sumomo Exp $ +dnl $Id: configure.in,v 1.9 2009/02/11 07:22:45 sumomo Exp $ dnl AC_INIT(src/search_bestfirst_main.c) @@ -12,7 +12,7 @@ AC_CONFIG_AUX_DIR(../support) JULIUS_PRODUCTNAME=JuliusLib -JULIUS_VERSION=4.1.1 +JULIUS_VERSION=4.1.2 dnl Checks for options From sumomo @ users.sourceforge.jp Wed Feb 11 16:22:45 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:22:45 +0900 Subject: [Julius-cvs 392] CVS update: julius4/support Message-ID: <1234336965.280963.31866.nullmailer@users.sourceforge.jp> Index: julius4/support/build-all.sh diff -u julius4/support/build-all.sh:1.6 julius4/support/build-all.sh:1.7 --- julius4/support/build-all.sh:1.6 Sat Dec 13 19:10:58 2008 +++ julius4/support/build-all.sh Wed Feb 11 16:22:45 2009 @@ -1,20 +1,21 @@ #!/bin/sh # -# Build all binaries under directory "build-bin". +# Build all files under directory "build". # # This should be invoked at "..", top of the source archive. # # argument: any configure options except "--enable-setup=..." is allowed. # -JULIUS_VERSION=4.1.1 +JULIUS_VERSION=4.1.2 ###################################################################### mkdir build dir=`pwd` +defconf="--without-sndfile" # make julius and other tools with default setting -./configure --prefix=${dir}/build $* +./configure --prefix=${dir}/build ${defconf} $* make make install @@ -26,7 +27,7 @@ # standard cd ../libjulius make distclean -./configure --prefix=${dir}/build --enable-setup=standard $* +./configure --prefix=${dir}/build ${defconf} --enable-setup=standard $* make cd ../julius make clean @@ -35,7 +36,7 @@ # GMM-VAD cd ../libjulius make distclean -./configure --prefix=${dir}/build --enable-gmm-vad $* +./configure --prefix=${dir}/build ${defconf} --enable-gmm-vad $* make cd ../julius make clean @@ -44,7 +45,7 @@ # Decoder-VAD cd ../libjulius make distclean -./configure --prefix=${dir}/build --enable-decoder-vad --enable-power-reject $* +./configure --prefix=${dir}/build ${defconf} --enable-decoder-vad --enable-power-reject $* make cd ../julius make clean From sumomo @ users.sourceforge.jp Wed Feb 11 16:22:45 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:22:45 +0900 Subject: [Julius-cvs 393] CVS update: julius4/libsent Message-ID: <1234336965.203013.31859.nullmailer@users.sourceforge.jp> Index: julius4/libsent/configure diff -u julius4/libsent/configure:1.13 julius4/libsent/configure:1.14 --- julius4/libsent/configure:1.13 Sun Jan 18 19:50:46 2009 +++ julius4/libsent/configure Wed Feb 11 16:22:45 2009 @@ -563,7 +563,7 @@ ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. -LIBSENT_VERSION=4.1.1 +LIBSENT_VERSION=4.1.2 # specify mic type # Check whether --with-mictype or --without-mictype was given. Index: julius4/libsent/configure.in diff -u julius4/libsent/configure.in:1.13 julius4/libsent/configure.in:1.14 --- julius4/libsent/configure.in:1.13 Sun Jan 18 19:50:46 2009 +++ julius4/libsent/configure.in Wed Feb 11 16:22:45 2009 @@ -3,7 +3,7 @@ dnl Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl -dnl $Id: configure.in,v 1.13 2009/01/18 10:50:46 sumomo Exp $ +dnl $Id: configure.in,v 1.14 2009/02/11 07:22:45 sumomo Exp $ dnl dnl Process this file with autoconf to produce a configure script. @@ -11,7 +11,7 @@ AC_CONFIG_HEADER(include/sent/config.h) AC_CONFIG_AUX_DIR(../support) -LIBSENT_VERSION=4.1.1 +LIBSENT_VERSION=4.1.2 dnl Checks for options # specify mic type From sumomo @ users.sourceforge.jp Wed Feb 11 16:30:45 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:30:45 +0900 Subject: [Julius-cvs 394] CVS update: julius4/julius-simple Message-ID: <1234337445.341147.7579.nullmailer@users.sourceforge.jp> Index: julius4/julius-simple/julius-simple.c diff -u julius4/julius-simple/julius-simple.c:1.1 julius4/julius-simple/julius-simple.c:1.2 --- julius4/julius-simple/julius-simple.c:1.1 Tue Dec 18 17:45:49 2007 +++ julius4/julius-simple/julius-simple.c Wed Feb 11 16:30:45 2009 @@ -23,7 +23,7 @@ * @author Akinobu Lee * @date Tue Dec 11 14:40:04 2007 * - * $Revision: 1.1 $ + * $Revision: 1.2 $ * */ @@ -364,6 +364,10 @@ /*******/ } + /* calling j_close_stream(recog) at any time will terminate + recognition and exit j_recognize_stream() */ + j_close_stream(recog); + j_recog_free(recog); /* exit program */ From sumomo @ users.sourceforge.jp Wed Feb 11 16:31:30 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 16:31:30 +0900 Subject: [Julius-cvs 395] CVS update: julius4/libjulius/src Message-ID: <1234337490.155594.8988.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/src/version.c.in diff -u julius4/libjulius/src/version.c.in:1.4 julius4/libjulius/src/version.c.in:1.5 --- julius4/libjulius/src/version.c.in:1.4 Sun Apr 27 22:06:08 2008 +++ julius4/libjulius/src/version.c.in Wed Feb 11 16:31:30 2009 @@ -14,7 +14,7 @@ * @author Akinobu Lee * @date Mon Sep 12 01:34:15 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -66,10 +66,10 @@ if (strm == NULL) return; fprintf(strm,"\n%s rev.%s (%s) built for %s\n\n", JULIUS_PRODUCTNAME, JULIUS_VERSION, JULIUS_SETUP, JULIUS_HOSTINFO); - fprintf(strm,"Copyright (c) 1991-2008 Kawahara Lab., Kyoto University\n"); + fprintf(strm,"Copyright (c) 1991-2009 Kawahara Lab., Kyoto University\n"); fprintf(strm,"Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan\n"); fprintf(strm,"Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology\n"); - fprintf(strm,"Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology\n\n"); + fprintf(strm,"Copyright (c) 2005-2009 Julius project team, Nagoya Institute of Technology\n\n"); } /** From sumomo @ users.sourceforge.jp Wed Feb 11 17:18:01 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 17:18:01 +0900 Subject: [Julius-cvs 396] CVS update: julius4 Message-ID: <1234340281.965591.792.nullmailer@users.sourceforge.jp> Index: julius4/LICENSE.txt diff -u julius4/LICENSE.txt:1.3 julius4/LICENSE.txt:1.4 --- julius4/LICENSE.txt:1.3 Wed Mar 12 15:43:39 2008 +++ julius4/LICENSE.txt Wed Feb 11 17:18:01 2009 @@ -3,10 +3,10 @@ ?????????ッ???? Julius? ????? - Copyright (c) 1991-2008 ???? ????コ + Copyright (c) 1991-2009 ???? ????コ Copyright (c) 1997-2000 ????U?????(IPA) Copyright (c) 2000-2005 ?????????w??? ュ???コ - Copyright (c) 2005-2008 ??????? Julius????? + Copyright (c) 2005-2009 ??????? Julius????? ---------------------------------------------------------------------------- @@ -40,10 +40,10 @@ ???????ヲ?????????????? ? - Copyright (c) 1991-2008 ???? ????コ + Copyright (c) 1991-2009 ???? ????コ Copyright (c) 1997-2000 ????U?????(IPA) Copyright (c) 2000-2005 ?????????w??? ュ???コ - Copyright (c) 2005-2008 ??????? Julius????? + Copyright (c) 2005-2009 ??????? Julius????? 3. ???????????????????????????????? ?????????ッ???? Julius????????????????? @@ -80,9 +80,9 @@ Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University + Copyright (c) 1991-2009 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology + Copyright (c) 2005-2009 Julius project team, Nagoya Institute of Technology "Large Vocabulary Continuous Speech Recognition Engine Julius", including Julian, is being developed at Kawahara Lab., Kyoto @@ -130,9 +130,9 @@ Form of copyright notice: Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University + Copyright (c) 1991-2009 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology + Copyright (c) 2005-2009 Julius project team, Nagoya Institute of Technology 3. When you publish or present any results by using the Software, you must explicitly mention your use of "Large Vocabulary Continuous From sumomo @ users.sourceforge.jp Wed Feb 11 18:14:13 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Wed, 11 Feb 2009 18:14:13 +0900 Subject: [Julius-cvs 397] CVS update: julius4 Message-ID: <1234343653.699624.6108.nullmailer@users.sourceforge.jp> Index: julius4/00readme-ja.txt diff -u julius4/00readme-ja.txt:1.8 julius4/00readme-ja.txt:1.9 --- julius4/00readme-ja.txt:1.8 Wed Feb 11 16:14:51 2009 +++ julius4/00readme-ja.txt Wed Feb 11 18:14:13 2009 @@ -34,10 +34,10 @@ http://julius.sourceforge.jp/ -Julius-4.1.1 +Julius-4.1.2 ============= -????? 4.1.1 ?N-gram?????????????????D +????? 4.1.2 ?N-gram?????????????????D 2GB???T??? N-gram ?????SRILM ??????-gram?? ????????????????????????????? ???????????????????????s?????? From sumomo @ users.sourceforge.jp Thu Feb 12 23:22:27 2009 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Thu, 12 Feb 2009 23:22:27 +0900 Subject: [Julius-cvs 398] CVS update: julius4/libsent/src/adin Message-ID: <1234448547.843058.23535.nullmailer@users.sourceforge.jp> Index: julius4/libsent/src/adin/adin_file.c diff -u julius4/libsent/src/adin/adin_file.c:1.4 julius4/libsent/src/adin/adin_file.c:1.5 --- julius4/libsent/src/adin/adin_file.c:1.4 Fri Nov 14 01:36:56 2008 +++ julius4/libsent/src/adin/adin_file.c Thu Feb 12 23:22:27 2009 @@ -56,7 +56,7 @@ * @author Akinobu LEE * @date Sun Feb 13 13:31:20 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -246,7 +246,7 @@ char dummy[4]; if (filename != NULL) { - if ((fp = fopen(filename, "r")) == NULL) { + if ((fp = fopen(filename, "rb")) == NULL) { jlog("Error: adin_file: failed to open %s\n",filename); return(FALSE); }