Hiroyuki Komatsu
komat****@users*****
2004年 12月 9日 (木) 21:24:16 JST
Index: prime/lib/taiyaki.rb diff -u prime/lib/taiyaki.rb:1.6 prime/lib/taiyaki.rb:1.7 --- prime/lib/taiyaki.rb:1.6 Fri Mar 26 02:19:49 2004 +++ prime/lib/taiyaki.rb Thu Dec 9 21:24:16 2004 @@ -1,5 +1,5 @@ # taiyaki.rb: Ruby Libraries by Hiroyuki Komatsu -# $Id: taiyaki.rb,v 1.6 2004/03/25 17:19:49 komatsu Exp $ +# $Id: taiyaki.rb,v 1.7 2004/12/09 12:24:16 komatsu Exp $ # # Copyright (C) 2002 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -8,6 +8,9 @@ # You can redistribute it and/or modify it under the terms of # the GNU General Public License version 2. +$KCODE = 'e' +require 'jcode' + begin Kernel::require('progressbar') rescue LoadError @@ -130,15 +133,26 @@ # 'ぁ-ん' HIRAGANA_SJIS = "\202\237-\202\361" HIRAGANA_EUC = "\244\241-\244\363" + + # う゛ + HIRAGANA_VU_SJIS = "\x82\xA4\x81J" + HIRAGANA_VU_EUC = "\xA4\xA6\xA1\xAB" + # 'ァ-ン' KATAKANA_SJIS = "\203 @ -\203\223" KATAKANA_EUC = "\245\241-\245\363" + # ヴ + KATAKANA_VU_SJIS = "\x83\x94" + KATAKANA_VU_EUC = "\xA5\xF4" + def hiragana! case $KCODE[0] when ?s, ?S + self.gsub!(KATAKANA_VU_SJIS, HIRAGANA_VU_SJIS) return self.tr!(KATAKANA_SJIS, HIRAGANA_SJIS) when ?e, ?E + self.gsub!(KATAKANA_VU_EUC, HIRAGANA_VU_EUC) return self.tr!(KATAKANA_EUC, HIRAGANA_EUC) else return nil @@ -152,8 +166,10 @@ def katakana! case $KCODE[0] when ?s, ?S + self.gsub!(HIRAGANA_VU_SJIS, KATAKANA_VU_SJIS) return self.tr!(HIRAGANA_SJIS, KATAKANA_SJIS) when ?e, ?E + self.gsub!(HIRAGANA_VU_EUC, KATAKANA_VU_EUC) return self.tr!(HIRAGANA_EUC, KATAKANA_EUC) else return self @@ -164,6 +180,14 @@ return ((string = self.dup).katakana! or string) end +# def halfwidth +# +# end + + def chars + return self.split(//) + end + def increase increasing_list = get_increasing_list() if block_given? then @@ -175,6 +199,17 @@ end end + def separate_increase + increasing_list = get_increasing_separated_pair_list() + if block_given? then + increasing_list.each {| (head, tail) | + yield(head, tail) + } + else + return increasing_list + end + end + def decrease decreasing_list = get_increasing_list().reverse() if block_given? then @@ -186,16 +221,32 @@ end end + def separate (index) + string_chars = self.chars() + if index < 0 then + index += string_chars.length + end + return [string_chars[0 , index].join(), string_chars[index .. -1].join()] + end + private def get_increasing_list () increasing_string = "" increasing_list = [] - self.split(//).each {|char| + self.chars.each {|char| increasing_string += char increasing_list.push(increasing_string) } return increasing_list end + + def get_increasing_separated_pair_list () + increasing_list = [] + self.chars.length.times { | index | + increasing_list.push( self.separate(index + 1) ) + } + return increasing_list + end end @@ -230,14 +281,18 @@ class Dir def Dir::ensure (path) - if File::directory?(path) then + path = File::expand_path(path) + if File::directory?(path) then return true else if File::exist?(path) then return false else upper_dir = File::join(path.split(File::Separator)[0..-2]) - if Dir::ensure(upper_dir) && File::writable_real?(upper_dir) then + ## If path does not have any directory separator like "dir" + ## not "./dir", the value of upper_dir becames an empty string "". + if upper_dir == "" or + ( Dir::ensure(upper_dir) && File::writable_real?(upper_dir) ) then Dir::mkdir(path) return true end @@ -255,6 +310,16 @@ self[key] = [list_item] end end + + def set (key, value) + if self.has_key?(key) then + unless self[key].member?(value) then + self[key].push(value) + end + else + self[key] = [value] + end + end end module Debug Index: prime/lib/session.rb diff -u prime/lib/session.rb:1.5 prime/lib/session.rb:1.6 --- prime/lib/session.rb:1.5 Tue May 4 07:55:34 2004 +++ prime/lib/session.rb Thu Dec 9 21:24:16 2004 @@ -1,5 +1,5 @@ # session.rb -# $Id: session.rb,v 1.5 2004/05/03 22:55:34 komatsu Exp $ +# $Id: session.rb,v 1.6 2004/12/09 12:24:16 komatsu Exp $ # # Copyright (C) 2001 Satoru Takabayashi <sator****@namaz*****> # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> @@ -10,6 +10,8 @@ # the GNU General Public License version 2. # +require 'suikyo/suikyo-composer' + class Command attr_reader :name, :args, :nargs, :min_nargs, :description @@ -74,8 +76,11 @@ end def help +# commands = @command_table.values.sort {|a, b| +# (a.nargs <=> b.nargs).nonzero? || a.name.to_s <=> b.name.to_s +# } commands = @command_table.values.sort {|a, b| - (a.nargs <=> b.nargs).nonzero? || a.name.to_s <=> b.name.to_s + (a.name.to_s <=> b.name.to_s).nonzero? || a.nargs <=> b.nargs } help = "" commands.each {|c| @@ -147,6 +152,8 @@ "look up PATTERN with hybrid matching", 0) add_command(:lookup_prefix, [:PATTERN], "look up PATTERN with prefix matching", 0) + add_command(:lookup_prefix_ex, [:PATTERN], + "look up PATTERN with prefix matching", 0) add_command(:lookup_exact, [:PATTERN], "look up PATTERN with exact matching", 0) add_command(:lookup_expansion, [:PATTERN], @@ -238,6 +245,9 @@ def lookup_prefix (pattern = "") return reply_successful(@prime.lookup_prefix(pattern).to_text) end + def lookup_prefix_ex (pattern = "") + return reply_successful(@prime.lookup_prefix_ex(pattern).to_text) + end def lookup_exact (pattern = "") return reply_successful(@prime.lookup_exact(pattern).to_text) end @@ -254,6 +264,185 @@ end end +class SessionPrime2 < SessionPrime + def initialize (prime, version) + super(prime, version) + @sessions = {} + @session_no = 0 + + add_command(:session_start, [], + "start a session and return the session id.") + add_command(:session_end, [:SESSION], + "close the session specified with the session id.") + add_command(:edit_insert, [:SESSION, :STRING], + "insert this string into the preediting string.") + add_command(:edit_delete, [:SESSION], + "delete a character from the preediting string.") + add_command(:edit_backspace, [:SESSION], + "delete a character backward from the preediting string.") + add_command(:edit_erase, [:SESSION], + "erase the preediting string.") + add_command(:edit_undo, [:SESSION], + "undo the preediting string.") + add_command(:edit_cursor_right, [:SESSION], + "move the cursor right") + add_command(:edit_cursor_left, [:SESSION], + "move the cursor left") + add_command(:edit_cursor_right_edge, [:SESSION], + "move the cursor the end of the preediting string.") + add_command(:edit_cursor_left_edge, [:SESSION], + "move the cursor the beginning of the preediting string.") + add_command(:edit_get_preedition, [:SESSION], + "return a list fo the preediting string [left, cursor, right]") + add_command(:edit_get_query_string, [:SESSION], + "return a query string for lookup functions. (temporal)") + + add_command(:edit_set_mode, [:SESSION, :MODE], + "set display mode of the preedition.\n" + + " MODE = [default, katakana, half_katakana, \n" + + " wide_ascii, raw]") + +# add_command(:convert_start, [:SESSION], +# "convert the preedition string.") +# add_command(:predict, [:METHOD], +# "predict candidate words with the method", 0) + end + +# def predict (method = nil) +# end + + ## + ## Conversion methods +# def convert_start (session) +# composer = session_get_composer(session) +# composer. + + + ## + ## Session methods + ## + def session_start () + ## FIXME: Revise the following code. + ## FIXME: (2004-12-06) <Hiro> + suikyo = PrimeTypeConv::initialize_suikyo() + composer = SuikyoComposer.new(suikyo.table) + composer.set_reverse_table(PRIME_ENV['suikyo_reverse_tables']) + + ## Setting hybrid_typing. + if PRIME_ENV['hybrid_typing'].nil? then + if PRIME_ENV['typing_method'] == 'romaji' then + composer.hybrid_typing = true + end + else + composer.hybrid_typing = PRIME_ENV['hybrid_typing'] + end + + @session_no += 1 + @sessions[@session_no.to_s] = composer + + return reply_successful( @session_no.to_s ) + end + + def session_end (session) + @sessions[session] = nil + return reply_successful() + end + + ## + ## Composition methods + ## + def edit_insert (session, string) + composer = session_get_composer(session) + composer.edit_insert(string) + return reply_with_preediting_string(composer) + end + def edit_delete (session) + composer = session_get_composer(session) + composer.edit_delete() + return reply_with_preediting_string(composer) + end + def edit_backspace (session) + composer = session_get_composer(session) + composer.edit_backspace() + return reply_with_preediting_string(composer) + end + def edit_erase (session) + composer = session_get_composer(session) + composer.edit_erase() + if PRIME_ENV['hybrid_typing'] then + composer.set_mode_hybrid() + end + return reply_with_preediting_string(composer) + end + def edit_undo (session) + composer = session_get_composer(session) + composer.undo() + return reply_with_preediting_string(composer) + end + def edit_cursor_right (session) + composer = session_get_composer(session) + composer.cursor_right() + return reply_with_preediting_string(composer) + end + def edit_cursor_left (session) + composer = session_get_composer(session) + composer.cursor_left() + return reply_with_preediting_string(composer) + end + def edit_cursor_right_edge (session) + composer = session_get_composer(session) + composer.cursor_right_edge() + return reply_with_preediting_string(composer) + end + def edit_cursor_left_edge (session) + composer = session_get_composer(session) + composer.cursor_left_edge() + return reply_with_preediting_string(composer) + end + def edit_get_preedition (session) + composer = session_get_composer(session) + return reply_with_preediting_string(composer) + end + def edit_get_query_string (session) + composer = session_get_composer(session) + return reply_successful( composer.edit_get_query_string() ) + end + + def edit_set_mode (session, mode) + composer = session_get_composer(session) + case mode + when "hybrid" then # Original + composer.set_mode_hybrid() + when "default" then # F6 + composer.set_mode_default() + when "katakana" then # F7 + composer.set_mode_katakana() + when "half_katakana" then # F8 + composer.set_mode_half_katakana() + when "wide_ascii" then # F9 + composer.set_mode_wide_ascii() + when "raw" then # F10 + composer.set_mode_raw() + else + error_message = "Unknown mode. Valid modes are: \n" + + "[default, katakana, half_katakana, wide_ascii, raw]" + return reply_unsuccessful(error_message) + end + return reply_with_preediting_string(composer) + end + + private + def reply_with_preediting_string (composer) + result = composer.edit_get_preediting_string().join("\t") + return reply_successful(result) + end + + def session_get_composer (session) + composer = @sessions[session] + return composer + end +end + # ---- class SessionSKK < SessionCore @@ -310,6 +499,21 @@ end end + +# POBox Protocol +# ---- +# Close "0" "<none>" +# GetWords "1<query>" "1/<word1>/<word2>/.../\n" or +# "0\n" (error) or +# "4\n" (no word) +# GetVersion "2" "<major>.<minor> " +# GetHostName "3" "<hostname>:<port> " +# SetContext "4<context>" "1" +# RegisterWord "5<word>\t<pattern>" "1" +# DeleteWord "6<word>" "1" +# SaveDict "7" "1" +# SelectWord "8<number>" "1" + class SessionPOBox < SessionSKK def lookup (pattern) retur****@prime*****(pattern) Index: prime/lib/prime.rb diff -u prime/lib/prime.rb:1.13 prime/lib/prime.rb:1.14 --- prime/lib/prime.rb:1.13 Sun Aug 29 21:49:28 2004 +++ prime/lib/prime.rb Thu Dec 9 21:24:16 2004 @@ -1,5 +1,5 @@ # prime/prime.rb -# $Id: prime.rb,v 1.13 2004/08/29 12:49:28 komatsu Exp $ +# $Id: prime.rb,v 1.14 2004/12/09 12:24:16 komatsu Exp $ # # Copyright (C) 2002, 2003, 2004 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -56,7 +56,7 @@ case PRIME_ENV['typing_method'] when 'tcode' then if PRIME_ENV.has_key?('style_mask_pending_chars') == false then - PRIME_ENV['style_mask_pending_chars'] == true + PRIME_ENV['style_mask_pending_chars'] = true end end end @@ -220,6 +220,14 @@ return PrimeWordList::merge_with_label(@context, words_prefix) end + def lookup_prefix_ex (string) + words_prefix = search_prefix(string) + results_expansion = lookup_expansion(string) + return PrimeWordList::merge_with_label(@context, + words_prefix, + results_expansion) + end + def lookup_exact (string) words_exact = search_exact(string) return PrimeWordList::merge_with_label(@context, words_exact) @@ -393,18 +401,17 @@ end end +## PrimeWord contains data of a word generated by a user's query. class PrimeWord attr_reader :pron, :literal, :pos, :index - attr_accessor :category, :annotation, :data, :score, + attr_accessor :data, :score, :conjugation, :conjugation_pos, :rest, :prefix - def initialize(pron, literal, pos, score, - category = nil, annotation = nil, *data) + def initialize(pron, literal, pos, score, data = {}) + @pron = (pron or "") @literal = (literal or "") @pos = (pos or "未知語") @score = score.to_i - @category = category - @annotation = annotation @data = data @conjugation = "" @rest = "" @@ -414,7 +421,7 @@ end def values - return [@pron, @literal, @pos, @score, @category, @annotation, @data] + return [@pron, @literal, @pos, @score, @data] end def label () @@ -428,16 +435,21 @@ return (@pron + @conjugation + @rest) end + ## This returns a string data of this word for sending to clients. def to_text - return [to_text_pron(), - to_text_literal(), - "priority=#{@score}", - "part=#{@pos}", - "base=#{@literal}", - "basekey=#{@pron}", - ("conjugation=#{@conjugation}" unles****@conju*****?), - ("suffix=#{@rest}" unles****@rest*****?) - ].compact.join("\t") + data_list = [ + to_text_pron(), + to_text_literal(), + "priority=#{@score}", + "part=#{@pos}", + "base=#{@literal}", + "basekey=#{@pron}", + ( "conjugation=#{@conjugation}" unles****@conju*****?() ), + ( "suffix=#{@rest}" unles****@rest*****?() ), + ( "annotation=#{@data['annotation']}" if****@data*****_key?('annotation') ), + ( "usage=#{@data['usage']}" if****@data*****_key?('usage') ), + ] + return data_list.compact.join("\t") end end @@ -453,8 +465,13 @@ word0 = mark[word.index] ## FIXME: Make this merging more intelligence. ## FIXME: <komat****@taiya*****> (2004-01-24) - word0.category = (word0.category or word.category) - word0.annotation = (word0.annotation or word.annotation) + word.data.keys().each { | key | + if word0.data.has_key?(key) then + # Do nothing yet + else + word0.data[key] = word.data[key] + end + } else mark[word.index] = word merged.push(word) @@ -463,84 +480,56 @@ return merged end - def PrimeWordList::merge (*words) - ## merge should be sorted by inter-engine score. - ## FIXME: Isn't it necessary to sort here? - ## FIXME: <komat****@taiya*****> (2004-01-24) + def PrimeWordList::sort (*words) result = words.flatten.compact.sort {|word1, word2| (word2.score != word1.score) ? (word2.score <=> word1.score) : - (word1.pron.length <=> word2.pron.length) + (word1.pron.length <=> word2.pron.length) } + return result + end - mark = {} - merged = PrimeWordList.new - result.each {|word| - if mark[word.index] then - word0 = mark[word.index] - ## FIXME: Make this merging more intelligence. - ## FIXME: <komat****@taiya*****> (2004-01-24) - word0.category = (word0.category or word.category) - word0.annotation = (word0.annotation or word.annotation) - else - mark[word.index] = word - merged.push(word) - end - } - return merged + def PrimeWordList::merge (*words) + return PrimeWordList::merge_internal(:index, nil, words) end ## FIXME: Change the name of method. ## FIXME: <komat****@taiya*****> (2004-01-26) def PrimeWordList::merge_with_label (context, *words) - ## merge should be sorted by inter-engine score. - ## FIXME: Isn't it necessary to sort here? - ## FIXME: <komat****@taiya*****> (2004-01-24) - result = words.flatten.compact.sort {|word1, word2| - (word2.score != word1.score) ? (word2.score <=> word1.score) : - (word1.pron.length <=> word2.pron.length) - } - - mark = {} - merged = PrimeWordList.new - result.each {|word| - if word.literal != "" then - if mark[word.label()] then - word0 = mark[word.label()] - ## FIXME: Make this merging more intelligence. - ## FIXME: <komat****@taiya*****> (2004-01-24) - word0.category = (word0.category or word.category) - word0.annotation = (word0.annotation or word.annotation) - else - mark[word.label()] = word - word.prefix = Prime::get_prefix(context, word.literal) - merged.push(word) - end - end - } - return merged + return PrimeWordList::merge_internal(:label, context, words) end def PrimeWordList::merge_by_literal (context, *words) + return PrimeWordList::merge_internal(:literal, context, words) + end + + def PrimeWordList::merge_internal (get_key_function, context, words) ## merge should be sorted by inter-engine score. ## FIXME: Isn't it necessary to sort here? ## FIXME: <komat****@taiya*****> (2004-01-24) result = words.flatten.compact.sort {|word1, word2| (word1.score != word2.score) ? (word2.score <=> word1.score) : - (word2.pron.length <=> word1.pron.length) + (word2.pron.length <=> word1.pron.length) } mark = {} - merged = PrimeWordList.new + merged = PrimeWordList.new() result.each {|word| if word.literal != "" then - if mark[word.literal] then - word0 = mark[word.literal] + word_key = word.send(get_key_function) + if mark[word_key] then + word0 = mark[word_key] + ## FIXME: Make this merging more intelligence. ## FIXME: <komat****@taiya*****> (2004-01-24) - word0.category = (word0.category or word.category) - word0.annotation = (word0.annotation or word.annotation) + word.data.keys().each { | key | + if word0.data.has_key?(key) then + # Do nothing yet + else + word0.data[key] = word.data[key] + end + } else - mark[word.literal] = word + mark[word_key] = word word.prefix = Prime::get_prefix(context, word.literal) merged.push(word) end Index: prime/lib/prime-config.rb.in diff -u prime/lib/prime-config.rb.in:1.3 prime/lib/prime-config.rb.in:1.4 --- prime/lib/prime-config.rb.in:1.3 Sun Aug 29 21:49:28 2004 +++ prime/lib/prime-config.rb.in Thu Dec 9 21:24:16 2004 @@ -1,5 +1,5 @@ # prime-config.rb: Setting of global variables and default variables for PRIME. -# $Id: prime-config.rb.in,v 1.3 2004/08/29 12:49:28 komatsu Exp $ +# $Id: prime-config.rb.in,v 1.4 2004/12/09 12:24:16 komatsu Exp $ # # Copyright (C) 2003 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -38,6 +38,11 @@ ## Suikyo: romaji-kana conversion library. PRIME_TYPING_METHOD_DEFAULT = 'romaji' PRIME_ENV['typing_method'] = PRIME_TYPING_METHOD_DEFAULT + +## If true, PRIME considers the validation of the preedition. +## ex). "あっplえ" => "apple". The available values are [nil, true, false]. +PRIME_ENV['hybrid_typing'] = nil + #PRIME_ENV['suikyo_tables'] = ["romaji"] #PRIME_ENV['suikyo_reverse_tables'] = ["romaji_reverse"] PRIME_ENV['suikyo_use_cache'] = true Index: prime/lib/Makefile.am diff -u prime/lib/Makefile.am:1.2 prime/lib/Makefile.am:1.3 --- prime/lib/Makefile.am:1.2 Fri Mar 26 02:19:49 2004 +++ prime/lib/Makefile.am Thu Dec 9 21:24:16 2004 @@ -1,5 +1,5 @@ # Makefile.am: Template of Automake for prime/lib. -# $Id: Makefile.am,v 1.2 2004/03/25 17:19:49 komatsu Exp $ +# $Id: Makefile.am,v 1.3 2004/12/09 12:24:16 komatsu Exp $ # # Copyright (C) 2003 Hiroyuki Komatsu <komat****@taiya*****> # All rights reserved. @@ -9,10 +9,10 @@ # the GNU General Public License version 2. SUBDIRS = engine grammar makedict -EXTRA_DIST = prime.rb prime-japanese.rb prime-mixed.rb session.rb server.rb \ - taiyaki.rb prime-config.rb.in +RUBY_FILES = prime.rb prime-japanese.rb prime-mixed.rb session.rb server.rb \ + taiyaki.rb + +EXTRA_DIST = $(RUBY_FILES) prime-config.rb.in primelibdir = $(rubydir)/prime -primelib_DATA = prime.rb prime-japanese.rb prime-mixed.rb \ - session.rb server.rb taiyaki.rb \ - prime-config.rb +primelib_DATA = $(RUBY_FILES) prime-config.rb