[Groonga-commit] groonga/groonga at a321b6c [master] highlight_html: support similar search

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Mar 17 14:00:24 JST 2017


Kouhei Sutou	2017-03-17 14:00:24 +0900 (Fri, 17 Mar 2017)

  New Revision: a321b6cf5a255e4dfaa8679143e21e577a5d7f49
  https://github.com/groonga/groonga/commit/a321b6cf5a255e4dfaa8679143e21e577a5d7f49

  Message:
    highlight_html: support similar search

  Added files:
    test/command/suite/select/function/highlight_html/similar_search.expected
    test/command/suite/select/function/highlight_html/similar_search.test
  Modified files:
    lib/expr.c
    lib/proc/proc_highlight.c

  Modified: lib/expr.c (+51 -3)
===================================================================
--- lib/expr.c    2017-03-17 14:43:00 +0900 (1d6e319)
+++ lib/expr.c    2017-03-17 14:00:24 +0900 (ee60806)
@@ -27,6 +27,7 @@
 #include "grn_scanner.h"
 #include "grn_util.h"
 #include "grn_report.h"
+#include "grn_token_cursor.h"
 #include "grn_mrb.h"
 #include "mrb/mrb_expr.h"
 
@@ -8722,9 +8723,56 @@ grn_expr_get_keywords(grn_ctx *ctx, grn_obj *expr, grn_obj *keywords)
           butp = 1 - butp;
         }
       } else {
-        if (si->op == GRN_OP_MATCH && si->query) {
-          if (butp == (si->logical_op == GRN_OP_AND_NOT)) {
-            GRN_PTR_PUT(ctx, keywords, si->query);
+        if (butp == (si->logical_op == GRN_OP_AND_NOT) &&
+            si->query) {
+          switch (si->op) {
+          case GRN_OP_MATCH :
+            if (keywords->header.type == GRN_PVECTOR) {
+              GRN_PTR_PUT(ctx, keywords, si->query);
+            } else {
+              grn_vector_add_element(ctx,
+                                     keywords,
+                                     GRN_TEXT_VALUE(si->query),
+                                     GRN_TEXT_LEN(si->query),
+                                     0,
+                                     GRN_DB_TEXT);
+            }
+            break;
+          case GRN_OP_SIMILAR :
+            if (keywords->header.type == GRN_VECTOR &&
+                GRN_BULK_VSIZE(&(si->index)) > 0) {
+              grn_token_cursor *token_cursor;
+              unsigned int token_flags = 0;
+              grn_obj *index = GRN_PTR_VALUE(&(si->index));
+              grn_obj *lexicon;
+
+              lexicon = grn_ctx_at(ctx, index->header.domain);
+              token_cursor = grn_token_cursor_open(ctx,
+                                                   lexicon,
+                                                   GRN_TEXT_VALUE(si->query),
+                                                   GRN_TEXT_LEN(si->query),
+                                                   GRN_TOKENIZE_GET,
+                                                   token_flags);
+              if (token_cursor) {
+                while (token_cursor->status != GRN_TOKEN_CURSOR_DONE) {
+                  grn_id token_id;
+                  token_id = grn_token_cursor_next(ctx, token_cursor);
+                  if (token_id == GRN_ID_NIL) {
+                    continue;
+                  }
+                  grn_vector_add_element(ctx,
+                                         keywords,
+                                         token_cursor->curr,
+                                         token_cursor->curr_size,
+                                         0,
+                                         GRN_DB_TEXT);
+                }
+                grn_token_cursor_close(ctx, token_cursor);
+              }
+            }
+            break;
+          default :
+            break;
           }
         }
         if (si->flags & SCAN_PUSH) {

  Modified: lib/proc/proc_highlight.c (+15 -8)
===================================================================
--- lib/proc/proc_highlight.c    2017-03-17 14:43:00 +0900 (670b29a)
+++ lib/proc/proc_highlight.c    2017-03-17 14:00:24 +0900 (7d81518)
@@ -421,19 +421,26 @@ func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression)
   if (condition) {
     size_t i, n_keywords;
     grn_obj current_keywords;
-    GRN_PTR_INIT(&current_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL);
+    GRN_TEXT_INIT(&current_keywords, GRN_OBJ_VECTOR);
     grn_expr_get_keywords(ctx, condition, &current_keywords);
 
-    n_keywords = GRN_BULK_VSIZE(&current_keywords) / sizeof(grn_obj *);
+    n_keywords = grn_vector_size(ctx, &current_keywords);
     for (i = 0; i < n_keywords; i++) {
-      grn_obj *keyword;
-      keyword = GRN_PTR_VALUE_AT(&current_keywords, i);
-      grn_table_add(ctx, keywords,
-                    GRN_TEXT_VALUE(keyword),
-                    GRN_TEXT_LEN(keyword),
+      const char *keyword;
+      unsigned int keyword_size;
+      keyword_size = grn_vector_get_element(ctx,
+                                            &current_keywords,
+                                            i,
+                                            &keyword,
+                                            NULL,
+                                            NULL);
+      grn_table_add(ctx,
+                    keywords,
+                    keyword,
+                    keyword_size,
                     NULL);
     }
-    grn_obj_unlink(ctx, &current_keywords);
+    GRN_OBJ_FIN(ctx, &current_keywords);
   }
 
   return keywords;

  Added: test/command/suite/select/function/highlight_html/similar_search.expected (+47 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/highlight_html/similar_search.expected    2017-03-17 14:00:24 +0900 (7633b78)
@@ -0,0 +1,47 @@
+plugin_register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Entries TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Entries body COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters TokenFilterStopWord
+[[0,0.0,0.0],true]
+column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body
+[[0,0.0,0.0],true]
+column_create Terms is_stop_word COLUMN_SCALAR Bool
+[[0,0.0,0.0],true]
+load --table Terms
+[
+{"_key": "is", "is_stop_word": true},
+{"_key": ".",  "is_stop_word": true}
+]
+[[0,0.0,0.0],2]
+load --table Entries
+[
+{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."}
+]
+[[0,0.0,0.0],1]
+select Entries   --filter 'body *S "Groonga is fast full text search engine. There are SQL interfaces by Mroonga and PGroonga and Ruby interface by Rroonga."'   --output_columns 'highlight_html(body)'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        1
+      ],
+      [
+        [
+          "highlight_html",
+          null
+        ]
+      ],
+      [
+        "<span class=\"keyword\">Mroonga</span> is a MySQL storage <span class=\"keyword\">engine</span> based on <span class=\"keyword\">Groonga</span>. &lt;b&gt;<span class=\"keyword\">Rroonga</span>&lt;/b&gt; is a <span class=\"keyword\">Ruby</span> binding of <span class=\"keyword\">Groonga</span>."
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/highlight_html/similar_search.test (+26 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/highlight_html/similar_search.test    2017-03-17 14:00:24 +0900 (68e7563)
@@ -0,0 +1,26 @@
+plugin_register token_filters/stop_word
+
+table_create Entries TABLE_NO_KEY
+column_create Entries body COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters TokenFilterStopWord
+column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body
+column_create Terms is_stop_word COLUMN_SCALAR Bool
+
+load --table Terms
+[
+{"_key": "is", "is_stop_word": true},
+{"_key": ".",  "is_stop_word": true}
+]
+
+load --table Entries
+[
+{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."}
+]
+
+select Entries \
+  --filter 'body *S "Groonga is fast full text search engine. There are SQL interfaces by Mroonga and PGroonga and Ruby interface by Rroonga."' \
+  --output_columns 'highlight_html(body)'
-------------- next part --------------
HTML����������������������������...
다운로드 



More information about the Groonga-commit mailing list
Back to archive index