Hiroyuki Komatsu
komat****@users*****
2004年 12月 18日 (土) 17:07:11 JST
Index: prime/lib/prime2.rb diff -u prime/lib/prime2.rb:1.1.2.2 prime/lib/prime2.rb:1.1.2.3 --- prime/lib/prime2.rb:1.1.2.2 Fri Dec 17 23:11:32 2004 +++ prime/lib/prime2.rb Sat Dec 18 17:07:10 2004 @@ -1,5 +1,5 @@ # prime2.rb: Module for PRIME2 protocol. -# $Id: prime2.rb,v 1.1.2.2 2004/12/17 14:11:32 komatsu Exp $ +# $Id: prime2.rb,v 1.1.2.3 2004/12/18 08:07:10 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -129,9 +129,9 @@ ## ## Conversion methods ## - def conv_convert (session, method = nil) - query_string = edit_get_query_string(session) + ## This returns a PrimeConversionList. + def conv_convert (session, method = nil) if PRIME_ENV['typing_method'] == 'tcode' or PRIME_ENV['typing_method'] == 'handwrite' then candidates = lookup_direct_all(string) @@ -145,12 +145,12 @@ results_compact = PrimeWordList::merge_with_label(@context, words_compact) results_conversion = PrimeWordList::merge_with_label(@context, - words_overall) -# words_japanese) + words_overall, + words_japanese) candidates = PrimeWordList::concat(results_compact | results_conversion) end - conversions = _adhoc_wordlist_to_conversionlist(query_string, candidates) + conversions = _adhoc_wordlist_to_conversionlist(candidates) session_set_conversions(session, conversions) return conversions @@ -158,15 +158,16 @@ # return candidates end - def _adhoc_wordlist_to_conversionlist (reading, wordlist) + def _adhoc_wordlist_to_conversionlist (wordlist) conversion_list = [] wordlist.length.times { | index | + word = wordlist[index] + reading = word.to_text_pron() segment = PrimeSegment.new(reading) segment.set_candidates(wordlist, index) - score = wordlist[index].score - conversion = PrimeConversion.new( [segment], score ) - conversion_list.push(conversion) + + conversion_list.push( PrimeConversion.new( [segment], word.score ) ) } return PrimeConversionList.new(conversion_list) end @@ -185,7 +186,7 @@ candidates = PrimeWordList::merge_with_label(@context, words_compact) end - conversion = PrimeConversion(candidates) + conversions = PrimeConversionList.new( PrimeConversion.new(candidates) ) session_set_candidates(session, candidates) return candidates @@ -247,6 +248,15 @@ ## ## convertion methods ## + + ## This is a wrapper for convert_*. This converts query to + ## a PrimeConvertionList insted of PrimeWordList and returns it. + def convert (query) + wordlist = search(query) + return _adhoc_wordlist_to_conversionlist( wordlist ) + end + private :convert + def convert_prefix (composer, context) # 「よ→予測」 expansion = composer.edit_get_expansion() @@ -282,19 +292,19 @@ def convert_compact (composer, context) words_prefix = convert_prefix(composer, context) -# words_japanese = convert_japanese_uniclause(composer, context) - # words_japanese = search_japanese(string) - # words_japanese = search_japanese_prefix(string) -# words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] - words_compact = PrimeWordList::merge(words_prefix)[0,1] - - ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the - ## value of word is [], and the class of the result of word[0,1] - ## becomes Array which is a super class of PrimewordList. - if words_compact.empty? then - words_compact = PrimeWordList.new() + + ## If the result of search_prefix is empty, this method stops the following + ## search_japanese_uniclause for the quickness. + if words_prefix.empty? then + ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the + ## value of word is [], and the class of the result of word[0,1] + ## becomes Array which is a super class of PrimewordList. + return PrimeWordList.new() end + words_japanese = convert_japanese_uniclause(composer, context) + words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] + if words_compact.length > 0 then predict_with_multi_clauses!(words_compact) @@ -305,9 +315,9 @@ return words_compact end - def convert_japanese_process_segments_list (composer) + def convert_japanese_process_segments_list (composer, threshold = 4) string = composer.edit_get_surface_string() - (depth, segments_list) = guess_clauses_internal(string) + (depth, segments_list) = guess_clauses_internal(string, 1, nil, threshold) conversions = [] min_length = depth @@ -318,8 +328,9 @@ min_length = segments.length elsif segments.length == min_length then prime_segments = [] - segments.each { | (reading, pos, adjunct, pos_adjunct, engines) | - prime_segment = PrimeSegment.new(reading, pos, adjunct, pos_adjunct) + segments.each { | (base, pos, adjunct, pos_adjunct, engines) | + prime_segment = + PrimeSegment.new(base + adjunct, base, pos, adjunct, pos_adjunct) prime_segments.push(prime_segment) } conversions.push(prime_segments) @@ -351,31 +362,34 @@ return score end + ## This returns a PrimeConversionList. def convert_japanese (composer, context) - rest = nil - segments_list = convert_japanese_process_segments_list(composer) - conversions = PrimeConversionList.new() segments_list.each { | segments | if segments.length == 1 then segment = segments[0] - reading = segment.reading - query = PrimeQuery.new( [reading], segment.pos ) + query = PrimeQuery.new( [segment.base], segment.pos ) words = search(query) words.length.times { | index | + word = words[index] + word.conjugation = segment.adjunct + word.conjugation_pos = segment.pos_adjunct + new_segment = segment.dup() new_segment.set_candidates(words, index) - score = words[index].score - conversion = PrimeConversion.new( [new_segment], score ) - conversions.push(conversion) + conversions.push( PrimeConversion.new( [new_segment], word.score ) ) } else segments.each { | segment | query = PrimeQuery.new( [segment.reading], segment.pos ) words = search(query) + words.each { | word | + word.conjugation = segment.adjunct + word.conjugation_pos = segment.pos_adjunct + } index = (segment.pos == nil) ? -1 : 0 segment.set_candidates(words, index) } @@ -383,13 +397,26 @@ conversions.push( PrimeConversion.new(segments, score) ) end } -# conversions = PrimeConversionList.new(conversions) - puts "----" - puts conversions.to_text_debug() - puts "----" return conversions end + ## This returns a PrimeWordList. + def convert_japanese_uniclause (composer, context) + segments_list = convert_japanese_process_segments_list(composer, 1) + words = PrimeWordList.new() + segments_list.each { | segments | + segment = segments[0] # The lengh of segments must be 1. + query = PrimeQuery.new( [segment.base], segment.pos ) + words = search(query) + + words.each { | word | + word.conjugation = segment.adjunct + word.conjugation_pos = segment.pos_adjunct + } + } + return words + end + class PrimeSession def initialize () @composer = initialize_composer() @@ -516,20 +543,26 @@ end end + + class PrimeSegment - attr_reader :reading, :pos - def initialize (reading, pos = nil, adjunct = "", pos_adjunct = nil) - @reading = reading - @pos = pos - @adjunct = adjunct - @pos_adjunct = pos_adjunct + attr_reader :reading, :base, :pos, :adjunct, :pos_adjunct + def initialize (reading, + base = nil, pos = nil, adjunct = "", pos_adjunct = nil) + @reading = reading + + ## The following data is a query guideline. + @base = base + @pos = pos + @adjunct = adjunct + @pos_adjunct = pos_adjunct @candidates = PrimeWordList.new() @candidate_index = -1 end def dup () - segment = PrimeSegment.new(@reading, @pos, @adjunct, @pos_adjunct) + segment = PrimeSegment.new(@reading, @base, @pos, @adjunct, @pos_adjunct) segment.set_candidates(@candidates, @candidate_index) return segment end @@ -561,11 +594,9 @@ def to_text_data () if @candidate_index == -1 then - text = @reading + @adjunct + text = @reading else - text = \ - [ @candidates[@candidate_index].to_text_literal() + @adjunct, - @candidates[@candidate_index].to_text_data() ].join("\t") + text = @candidates[@candidate_index].to_text2() end return text end Index: prime/lib/prime.rb diff -u prime/lib/prime.rb:1.7.4.10 prime/lib/prime.rb:1.7.4.11 --- prime/lib/prime.rb:1.7.4.10 Fri Dec 17 23:11:32 2004 +++ prime/lib/prime.rb Sat Dec 18 17:07:10 2004 @@ -1,5 +1,5 @@ # prime/prime.rb -# $Id: prime.rb,v 1.7.4.10 2004/12/17 14:11:32 komatsu Exp $ +# $Id: prime.rb,v 1.7.4.11 2004/12/18 08:07:10 komatsu Exp $ # # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -305,23 +305,19 @@ def search_compact (string) words_prefix = search_prefix(string) - ## FIXME: The argument of words_japanese 'string' should be expanded. - ## FIXME: sonnnak そんな感じ 10137 (by prefix) - ## FIXME: sonnnaka そんなか 10188 (by japanese) - ## FIXME: sonnnakan そんな感じ 10137 (by prefix) - ## FIXME: <komat****@taiya*****> (2004-06-26) - words_japanese = search_japanese_uniclause(string) -# words_japanese = search_japanese(string) -# words_japanese = search_japanese_prefix(string) - words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] - ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the - ## value of word is [], and the class of the result of word[0,1] - ## becomes Array which is a super class of PrimewordList. - if words_compact.empty? then - words_compact = PrimeWordList.new() + ## If the result of search_prefix is empty, this method stops the following + ## search_japanese_uniclause for the quickness. + if words_prefix.empty? then + ## Ruby 1.6 does not keep the class PrimeWordList word[0,1] if the + ## value of word is [], and the class of the result of word[0,1] + ## becomes Array which is a super class of PrimewordList. + return PrimeWordList.new() end + words_japanese = search_japanese_uniclause(string) + words_compact = PrimeWordList::merge(words_prefix, words_japanese)[0,1] + if words_compact.length > 0 then predict_with_multi_clauses!(words_compact) @@ -457,8 +453,9 @@ end ## This is for the PRIME2 protocol. - def to_text_data + def to_text2 data_list = [ + to_text_literal(), ( "form=#{@data['annotation']}" if****@data*****_key?('annotation') ), ( "usage=#{@data['usage']}" if****@data*****_key?('usage') ), ( "comment=#{@data['usage']}" if****@data*****_key?('comment') ), Index: prime/lib/prime-japanese.rb diff -u prime/lib/prime-japanese.rb:1.4.4.3 prime/lib/prime-japanese.rb:1.4.4.4 --- prime/lib/prime-japanese.rb:1.4.4.3 Mon Sep 27 20:07:51 2004 +++ prime/lib/prime-japanese.rb Sat Dec 18 17:07:10 2004 @@ -1,5 +1,5 @@ # prime/prime-japnese.rb: Japanese module for PRIME. -# $Id: prime-japanese.rb,v 1.4.4.3 2004/09/27 11:07:51 komatsu Exp $ +# $Id: prime-japanese.rb,v 1.4.4.4 2004/12/18 08:07:10 komatsu Exp $ # # Copyright (C) 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -248,7 +248,7 @@ results = search_raw(string) rest = nil - clauses_list = process_clauses(string) + clauses_list = process_clauses(string, 1) clauses_list.each {|clauses| if clauses.length == 1 then ## FIXME: It's an adhoc routine. @@ -296,8 +296,8 @@ ## FIXME: 文節区切りが同じものは統合したい。 ## FIXME: <komat****@taiya*****> (2004-01-24) - def process_clauses (string) - (depth, clauses_list) = guess_clauses_internal(string) + def process_clauses (string, threshold = 4) + (depth, clauses_list) = guess_clauses_internal(string, 1, nil, threshold) processed_clauses_list = [] clauses_list.each {|clauses| if clauses.length <= depth then