null+****@clear*****
null+****@clear*****
Thu Jan 19 21:51:57 JST 2012
SHIMADA Koji 2012-01-19 21:51:57 +0900 (Thu, 19 Jan 2012) New Revision: c2b70c48404a185ba3ced0f4125c3e7bc0c92f88 Log: Add gene95 dictionary importer Added files: lib/logaling/external_glossaries/gene95.rb Added: lib/logaling/external_glossaries/gene95.rb (+43 -0) 100644 =================================================================== --- /dev/null +++ lib/logaling/external_glossaries/gene95.rb 2012-01-19 21:51:57 +0900 (005f3cc) @@ -0,0 +1,43 @@ +# Copyright (C) 2011 Koji SHIMADA +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +require 'open-uri' +require 'zlib' + +module Logaling + class Gene95 < ExternalGlossary + description 'GENE95 Dictionary (http://www.namazu.org/~tsuchiya/sdic/data/gene.html)' + source_language 'en' + target_language 'ja' + + def convert + buffer = "" + CSV.generate(buffer) do |csv| + puts "downloading gene95 dictionary..." + Zlib::GzipReader.open(open('http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz')) do |gz| + contents = false + puts "importing gene95 dictionary..." + gz.readlines.map{|l| l.encode("UTF-8", "CP932", undef: :replace, replace: '').strip }.each_slice(2) do |source, target| + if contents + csv << [ source.split(" ").map(&:strip)[0], target ] + else + contents = true + end + end + end + end + end + end +end