Sutou Kouhei 2019-06-18 12:05:04 +0900 (Tue, 18 Jun 2019) Revision: 3380cd1f66edf248a520e17e3d0d9c48d3b47025 https://github.com/ranguba/chupa-text-decomposer-abiword/commit/3380cd1f66edf248a520e17e3d0d9c48d3b47025 Message: Convert to text directly Modified files: chupa-text-decomposer-abiword.gemspec lib/chupa-text/decomposers/abiword.rb test/test-abw.rb test/test-doc.rb test/test-docx.rb test/test-odt.rb test/test-rtf.rb test/test-zabw.rb Modified: chupa-text-decomposer-abiword.gemspec (+0 -1) =================================================================== --- chupa-text-decomposer-abiword.gemspec 2019-06-13 16:29:03 +0900 (aeaabac) +++ chupa-text-decomposer-abiword.gemspec 2019-06-18 12:05:04 +0900 (f7c2d69) @@ -40,7 +40,6 @@ Gem::Specification.new do |spec| spec.files += Dir.glob("test/**/*") spec.add_runtime_dependency("chupa-text") - spec.add_runtime_dependency("chupa-text-decomposer-pdf") spec.add_development_dependency("bundler") spec.add_development_dependency("rake") Modified: lib/chupa-text/decomposers/abiword.rb (+24 -33) =================================================================== --- lib/chupa-text/decomposers/abiword.rb 2019-06-13 16:29:03 +0900 (1735752) +++ lib/chupa-text/decomposers/abiword.rb 2019-06-18 12:05:04 +0900 (eaf8d2e) @@ -66,30 +66,9 @@ module ChupaText end def decompose(data) - pdf_data = convert_to_pdf(data) - return if pdf_data.nil? - yield(pdf_data) - end - - private - def find_command - candidates = [ - @options[:abiword], - ENV["ABIWORD"], - "abiword", - ] - candidates.each do |candidate| - next if candidate.nil? - command = ExternalCommand.new(candidate) - return command if command.exist? - end - nil - end - - def convert_to_pdf(data) - create_tempfiles(data) do |pdf, stdout, stderr| - succeeded =****@comma*****("--to", "pdf", - "--to-name", pdf.path, + create_tempfiles(data) do |text, stdout, stderr| + succeeded =****@comma*****("--to", "text", + "--to-name", text.path, data.path.to_s, { data: data, @@ -107,26 +86,38 @@ module ChupaText "error: <#{stderr.read}>", ].join("\n") end - return nil + return end - normalized_pdf_uri = data.uri.to_s.gsub(/\.[^.]+\z/, ".pdf") - File.open(pdf.path, "rb") do |pdf_input| - VirtualFileData.new(normalized_pdf_uri, - pdf_input, - source_data: data) + File.open(text.path) do |text_input| + yield(TextData.new(text_input.read, source_data: data)) end end end + private + def find_command + candidates = [ + @options[:abiword], + ENV["ABIWORD"], + "abiword", + ] + candidates.each do |candidate| + next if candidate.nil? + command = ExternalCommand.new(candidate) + return command if command.exist? + end + nil + end + def create_tempfiles(data) basename = File.basename(data.path) - pdf = Tempfile.new([basename, ".pdf"]) + text = Tempfile.new([basename, ".txt"]) stdout = Tempfile.new([basename, ".stdout.log"]) stderr = Tempfile.new([basename, ".stderr.log"]) begin - yield(pdf, stdout, stderr) + yield(text, stdout, stderr) ensure - pdf.close! + text.close! stdout.close! stderr.close! end Modified: test/test-abw.rb (+3 -2) =================================================================== --- test/test-abw.rb 2019-06-13 16:29:03 +0900 (ace5ca1) +++ test/test-abw.rb 2019-06-18 12:05:04 +0900 (f0c7d12) @@ -58,7 +58,7 @@ class TestAbw < Test::Unit::TestCase sub_test_case("one page") do def test_body - assert_equal(["Page1\n"], decompose.collect(&:body)) + assert_equal(["Page1"], decompose.collect(&:body)) end private @@ -69,8 +69,9 @@ class TestAbw < Test::Unit::TestCase sub_test_case("multi pages") do def test_body - assert_equal([<<-BODY], decompose.collect(&:body)) + assert_equal([<<-BODY.chomp], decompose.collect(&:body)) Page1 +\f Page2 BODY end Modified: test/test-doc.rb (+1 -0) =================================================================== --- test/test-doc.rb 2019-06-13 16:29:03 +0900 (24d8407) +++ test/test-doc.rb 2019-06-18 12:05:04 +0900 (613494f) @@ -71,6 +71,7 @@ class TestDoc < Test::Unit::TestCase def test_body assert_equal([<<-BODY], decompose.collect(&:body)) Page1 +\f Page2 BODY end Modified: test/test-docx.rb (+3 -3) =================================================================== --- test/test-docx.rb 2019-06-13 16:29:03 +0900 (a4f2196) +++ test/test-docx.rb 2019-06-18 12:05:04 +0900 (353ec18) @@ -58,7 +58,7 @@ class TestDocx < Test::Unit::TestCase sub_test_case("one page") do def test_body - assert_equal(["Page1\n"], decompose.collect(&:body)) + assert_equal(["Page1"], decompose.collect(&:body)) end private @@ -69,9 +69,9 @@ class TestDocx < Test::Unit::TestCase sub_test_case("multi pages") do def test_body - assert_equal([<<-BODY], decompose.collect(&:body)) + assert_equal([<<-BODY.chomp], decompose.collect(&:body)) Page1 -Page2 +\fPage2 BODY end Modified: test/test-odt.rb (+3 -2) =================================================================== --- test/test-odt.rb 2019-06-13 16:29:03 +0900 (515a9a6) +++ test/test-odt.rb 2019-06-18 12:05:04 +0900 (816d378) @@ -58,7 +58,7 @@ class TestOdt < Test::Unit::TestCase sub_test_case("one page") do def test_body - assert_equal(["Page1\n"], decompose.collect(&:body)) + assert_equal(["Page1"], decompose.collect(&:body)) end private @@ -69,8 +69,9 @@ class TestOdt < Test::Unit::TestCase sub_test_case("multi pages") do def test_body - assert_equal([<<-BODY], decompose.collect(&:body)) + assert_equal([<<-BODY.chomp], decompose.collect(&:body)) Page1 +\f Page2 BODY end Modified: test/test-rtf.rb (+2 -2) =================================================================== --- test/test-rtf.rb 2019-06-13 16:29:03 +0900 (8d8793f) +++ test/test-rtf.rb 2019-06-18 12:05:04 +0900 (7d701f8) @@ -58,7 +58,7 @@ class TestRtf < Test::Unit::TestCase sub_test_case("one page") do def test_body - assert_equal(["Page1\n"], decompose.collect(&:body)) + assert_equal(["Page1"], decompose.collect(&:body)) end private @@ -69,7 +69,7 @@ class TestRtf < Test::Unit::TestCase sub_test_case("multi pages") do def test_body - assert_equal([<<-BODY], decompose.collect(&:body)) + assert_equal([<<-BODY.chomp], decompose.collect(&:body)) Page1 Page2 BODY Modified: test/test-zabw.rb (+3 -2) =================================================================== --- test/test-zabw.rb 2019-06-13 16:29:03 +0900 (193866c) +++ test/test-zabw.rb 2019-06-18 12:05:04 +0900 (6aa52ad) @@ -45,7 +45,7 @@ class TestZabw < Test::Unit::TestCase sub_test_case("one page") do def test_body - assert_equal(["Page1\n"], decompose.collect(&:body)) + assert_equal(["Page1"], decompose.collect(&:body)) end private @@ -56,8 +56,9 @@ class TestZabw < Test::Unit::TestCase sub_test_case("multi pages") do def test_body - assert_equal([<<-BODY], decompose.collect(&:body)) + assert_equal([<<-BODY.chomp], decompose.collect(&:body)) Page1 +\f Page2 BODY end -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190618/bb231ea8/attachment-0001.html>