[Groonga-commit] ranguba/chupa-text-decomposer-abiword at 3380cd1 [master] Convert to text directly

Back to archive index
Sutou Kouhei null+****@clear*****
Tue Jun 18 12:05:04 JST 2019


Sutou Kouhei	2019-06-18 12:05:04 +0900 (Tue, 18 Jun 2019)

  Revision: 3380cd1f66edf248a520e17e3d0d9c48d3b47025
  https://github.com/ranguba/chupa-text-decomposer-abiword/commit/3380cd1f66edf248a520e17e3d0d9c48d3b47025

  Message:
    Convert to text directly

  Modified files:
    chupa-text-decomposer-abiword.gemspec
    lib/chupa-text/decomposers/abiword.rb
    test/test-abw.rb
    test/test-doc.rb
    test/test-docx.rb
    test/test-odt.rb
    test/test-rtf.rb
    test/test-zabw.rb

  Modified: chupa-text-decomposer-abiword.gemspec (+0 -1)
===================================================================
--- chupa-text-decomposer-abiword.gemspec    2019-06-13 16:29:03 +0900 (aeaabac)
+++ chupa-text-decomposer-abiword.gemspec    2019-06-18 12:05:04 +0900 (f7c2d69)
@@ -40,7 +40,6 @@ Gem::Specification.new do |spec|
   spec.files += Dir.glob("test/**/*")
 
   spec.add_runtime_dependency("chupa-text")
-  spec.add_runtime_dependency("chupa-text-decomposer-pdf")
 
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("rake")

  Modified: lib/chupa-text/decomposers/abiword.rb (+24 -33)
===================================================================
--- lib/chupa-text/decomposers/abiword.rb    2019-06-13 16:29:03 +0900 (1735752)
+++ lib/chupa-text/decomposers/abiword.rb    2019-06-18 12:05:04 +0900 (eaf8d2e)
@@ -66,30 +66,9 @@ module ChupaText
       end
 
       def decompose(data)
-        pdf_data = convert_to_pdf(data)
-        return if pdf_data.nil?
-        yield(pdf_data)
-      end
-
-      private
-      def find_command
-        candidates = [
-          @options[:abiword],
-          ENV["ABIWORD"],
-          "abiword",
-        ]
-        candidates.each do |candidate|
-          next if candidate.nil?
-          command = ExternalCommand.new(candidate)
-          return command if command.exist?
-        end
-        nil
-      end
-
-      def convert_to_pdf(data)
-        create_tempfiles(data) do |pdf, stdout, stderr|
-          succeeded =****@comma*****("--to", "pdf",
-                                   "--to-name", pdf.path,
+        create_tempfiles(data) do |text, stdout, stderr|
+          succeeded =****@comma*****("--to", "text",
+                                   "--to-name", text.path,
                                    data.path.to_s,
                                    {
                                      data: data,
@@ -107,26 +86,38 @@ module ChupaText
                 "error: <#{stderr.read}>",
               ].join("\n")
             end
-            return nil
+            return
           end
-          normalized_pdf_uri = data.uri.to_s.gsub(/\.[^.]+\z/, ".pdf")
-          File.open(pdf.path, "rb") do |pdf_input|
-            VirtualFileData.new(normalized_pdf_uri,
-                                pdf_input,
-                                source_data: data)
+          File.open(text.path) do |text_input|
+            yield(TextData.new(text_input.read, source_data: data))
           end
         end
       end
 
+      private
+      def find_command
+        candidates = [
+          @options[:abiword],
+          ENV["ABIWORD"],
+          "abiword",
+        ]
+        candidates.each do |candidate|
+          next if candidate.nil?
+          command = ExternalCommand.new(candidate)
+          return command if command.exist?
+        end
+        nil
+      end
+
       def create_tempfiles(data)
         basename = File.basename(data.path)
-        pdf = Tempfile.new([basename, ".pdf"])
+        text = Tempfile.new([basename, ".txt"])
         stdout = Tempfile.new([basename, ".stdout.log"])
         stderr = Tempfile.new([basename, ".stderr.log"])
         begin
-          yield(pdf, stdout, stderr)
+          yield(text, stdout, stderr)
         ensure
-          pdf.close!
+          text.close!
           stdout.close!
           stderr.close!
         end

  Modified: test/test-abw.rb (+3 -2)
===================================================================
--- test/test-abw.rb    2019-06-13 16:29:03 +0900 (ace5ca1)
+++ test/test-abw.rb    2019-06-18 12:05:04 +0900 (f0c7d12)
@@ -58,7 +58,7 @@ class TestAbw < Test::Unit::TestCase
 
     sub_test_case("one page") do
       def test_body
-        assert_equal(["Page1\n"], decompose.collect(&:body))
+        assert_equal(["Page1"], decompose.collect(&:body))
       end
 
       private
@@ -69,8 +69,9 @@ class TestAbw < Test::Unit::TestCase
 
     sub_test_case("multi pages") do
       def test_body
-        assert_equal([<<-BODY], decompose.collect(&:body))
+        assert_equal([<<-BODY.chomp], decompose.collect(&:body))
 Page1
+\f
 Page2
         BODY
       end

  Modified: test/test-doc.rb (+1 -0)
===================================================================
--- test/test-doc.rb    2019-06-13 16:29:03 +0900 (24d8407)
+++ test/test-doc.rb    2019-06-18 12:05:04 +0900 (613494f)
@@ -71,6 +71,7 @@ class TestDoc < Test::Unit::TestCase
       def test_body
         assert_equal([<<-BODY], decompose.collect(&:body))
 Page1
+\f
 Page2
         BODY
       end

  Modified: test/test-docx.rb (+3 -3)
===================================================================
--- test/test-docx.rb    2019-06-13 16:29:03 +0900 (a4f2196)
+++ test/test-docx.rb    2019-06-18 12:05:04 +0900 (353ec18)
@@ -58,7 +58,7 @@ class TestDocx < Test::Unit::TestCase
 
     sub_test_case("one page") do
       def test_body
-        assert_equal(["Page1\n"], decompose.collect(&:body))
+        assert_equal(["Page1"], decompose.collect(&:body))
       end
 
       private
@@ -69,9 +69,9 @@ class TestDocx < Test::Unit::TestCase
 
     sub_test_case("multi pages") do
       def test_body
-        assert_equal([<<-BODY], decompose.collect(&:body))
+        assert_equal([<<-BODY.chomp], decompose.collect(&:body))
 Page1
-Page2
+\fPage2
         BODY
       end
 

  Modified: test/test-odt.rb (+3 -2)
===================================================================
--- test/test-odt.rb    2019-06-13 16:29:03 +0900 (515a9a6)
+++ test/test-odt.rb    2019-06-18 12:05:04 +0900 (816d378)
@@ -58,7 +58,7 @@ class TestOdt < Test::Unit::TestCase
 
     sub_test_case("one page") do
       def test_body
-        assert_equal(["Page1\n"], decompose.collect(&:body))
+        assert_equal(["Page1"], decompose.collect(&:body))
       end
 
       private
@@ -69,8 +69,9 @@ class TestOdt < Test::Unit::TestCase
 
     sub_test_case("multi pages") do
       def test_body
-        assert_equal([<<-BODY], decompose.collect(&:body))
+        assert_equal([<<-BODY.chomp], decompose.collect(&:body))
 Page1
+\f
 Page2
         BODY
       end

  Modified: test/test-rtf.rb (+2 -2)
===================================================================
--- test/test-rtf.rb    2019-06-13 16:29:03 +0900 (8d8793f)
+++ test/test-rtf.rb    2019-06-18 12:05:04 +0900 (7d701f8)
@@ -58,7 +58,7 @@ class TestRtf < Test::Unit::TestCase
 
     sub_test_case("one page") do
       def test_body
-        assert_equal(["Page1\n"], decompose.collect(&:body))
+        assert_equal(["Page1"], decompose.collect(&:body))
       end
 
       private
@@ -69,7 +69,7 @@ class TestRtf < Test::Unit::TestCase
 
     sub_test_case("multi pages") do
       def test_body
-        assert_equal([<<-BODY], decompose.collect(&:body))
+        assert_equal([<<-BODY.chomp], decompose.collect(&:body))
 Page1
 Page2
         BODY

  Modified: test/test-zabw.rb (+3 -2)
===================================================================
--- test/test-zabw.rb    2019-06-13 16:29:03 +0900 (193866c)
+++ test/test-zabw.rb    2019-06-18 12:05:04 +0900 (6aa52ad)
@@ -45,7 +45,7 @@ class TestZabw < Test::Unit::TestCase
 
     sub_test_case("one page") do
       def test_body
-        assert_equal(["Page1\n"], decompose.collect(&:body))
+        assert_equal(["Page1"], decompose.collect(&:body))
       end
 
       private
@@ -56,8 +56,9 @@ class TestZabw < Test::Unit::TestCase
 
     sub_test_case("multi pages") do
       def test_body
-        assert_equal([<<-BODY], decompose.collect(&:body))
+        assert_equal([<<-BODY.chomp], decompose.collect(&:body))
 Page1
+\f
 Page2
         BODY
       end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190618/bb231ea8/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index