[Groonga-commit] groonga/groonga at 7f46e81 [master] regexp: make match target text in vector normalized

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Sep 8 11:08:31 JST 2015


Kouhei Sutou	2015-09-08 11:08:31 +0900 (Tue, 08 Sep 2015)

  New Revision: 7f46e815f4771e4a7a2d670bd60070a7105711cc
  https://github.com/groonga/groonga/commit/7f46e815f4771e4a7a2d670bd60070a7105711cc

  Message:
    regexp: make match target text in vector normalized
    
    It's also incompatible change.

  Copied files:
    test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected
      (from test/command/suite/select/filter/no_index/regexp/vector_text.expected)
    test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test
      (from test/command/suite/select/filter/no_index/regexp/vector_text.test)
  Modified files:
    lib/operator.c
    test/command/suite/select/filter/no_index/regexp/vector_text.expected
    test/command/suite/select/filter/no_index/regexp/vector_text.test

  Modified: lib/operator.c (+23 -2)
===================================================================
--- lib/operator.c    2015-09-08 10:59:50 +0900 (30076f2)
+++ lib/operator.c    2015-09-08 11:08:31 +0900 (459becd)
@@ -1022,6 +1022,7 @@ static grn_bool
 exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern)
 {
 #ifdef GRN_SUPPORT_REGEXP
+  grn_obj *normalizer = NULL;
   grn_bool matched = GRN_FALSE;
   unsigned int i, size;
   OnigRegex regex;
@@ -1036,18 +1037,38 @@ exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern)
     return GRN_FALSE;
   }
 
+  normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
   for (i = 0; i < size; i++) {
     const char *content;
     unsigned int content_size;
     grn_id domain_id;
+    grn_obj *norm_content;
+    const char *norm_content_raw;
+    unsigned int norm_content_raw_length_in_bytes;
 
     content_size = grn_vector_get_element(ctx, vector, i,
                                           &content, NULL, &domain_id);
-    if (regexp_is_match(ctx, regex, content, content_size)) {
-      matched = GRN_TRUE;
+    if (content_size == 0) {
+      continue;
+    }
+
+    norm_content = grn_string_open(ctx, content, content_size, normalizer, 0);
+    grn_string_get_normalized(ctx, norm_content,
+                              &norm_content_raw,
+                              &norm_content_raw_length_in_bytes,
+                              NULL);
+
+    matched = regexp_is_match(ctx, regex,
+                              norm_content_raw,
+                              norm_content_raw_length_in_bytes);
+
+    grn_obj_unlink(ctx, norm_content);
+
+    if (matched) {
       break;
     }
   }
+  grn_obj_unlink(ctx, normalizer);
 
   onig_free(regex);
 

  Copied: test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected (+1 -1) 92%
===================================================================
--- test/command/suite/select/filter/no_index/regexp/vector_text.expected    2015-09-08 10:59:50 +0900 (deaa379)
+++ test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected    2015-09-08 11:08:31 +0900 (c7273ff)
@@ -8,7 +8,7 @@ load --table Memos
 {"tags": ["Groonga", "PGroonga", "Mroonga"]}
 ]
 [[0,0.0,0.0],2]
-select Memos --filter 'tags @~ "\\\\APGr"'
+select Memos --filter 'tags @~ "\\\\Apgr"'
 [
   [
     0,

  Copied: test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test (+1 -1) 81%
===================================================================
--- test/command/suite/select/filter/no_index/regexp/vector_text.test    2015-09-08 10:59:50 +0900 (114aff1)
+++ test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test    2015-09-08 11:08:31 +0900 (f3fa4a9)
@@ -7,4 +7,4 @@ load --table Memos
 {"tags": ["Groonga", "PGroonga", "Mroonga"]}
 ]
 
-select Memos --filter 'tags @~ "\\\\APGr"'
+select Memos --filter 'tags @~ "\\\\Apgr"'

  Modified: test/command/suite/select/filter/no_index/regexp/vector_text.expected (+6 -6)
===================================================================
--- test/command/suite/select/filter/no_index/regexp/vector_text.expected    2015-09-08 10:59:50 +0900 (deaa379)
+++ test/command/suite/select/filter/no_index/regexp/vector_text.expected    2015-09-08 11:08:31 +0900 (d8308dd)
@@ -4,11 +4,11 @@ column_create Memos tags COLUMN_VECTOR Text
 [[0,0.0,0.0],true]
 load --table Memos
 [
-{"tags": ["Groonga", "Rroonga",  "Mroonga"]},
-{"tags": ["Groonga", "PGroonga", "Mroonga"]}
+{"tags": ["groonga", "rroonga",  "mroonga"]},
+{"tags": ["groonga", "pgroonga", "mroonga"]}
 ]
 [[0,0.0,0.0],2]
-select Memos --filter 'tags @~ "\\\\APGr"'
+select Memos --filter 'tags @~ "\\\\Apgr"'
 [
   [
     0,
@@ -33,9 +33,9 @@ select Memos --filter 'tags @~ "\\\\APGr"'
       [
         2,
         [
-          "Groonga",
-          "PGroonga",
-          "Mroonga"
+          "groonga",
+          "pgroonga",
+          "mroonga"
         ]
       ]
     ]

  Modified: test/command/suite/select/filter/no_index/regexp/vector_text.test (+3 -3)
===================================================================
--- test/command/suite/select/filter/no_index/regexp/vector_text.test    2015-09-08 10:59:50 +0900 (114aff1)
+++ test/command/suite/select/filter/no_index/regexp/vector_text.test    2015-09-08 11:08:31 +0900 (027693b)
@@ -3,8 +3,8 @@ column_create Memos tags COLUMN_VECTOR Text
 
 load --table Memos
 [
-{"tags": ["Groonga", "Rroonga",  "Mroonga"]},
-{"tags": ["Groonga", "PGroonga", "Mroonga"]}
+{"tags": ["groonga", "rroonga",  "mroonga"]},
+{"tags": ["groonga", "pgroonga", "mroonga"]}
 ]
 
-select Memos --filter 'tags @~ "\\\\APGr"'
+select Memos --filter 'tags @~ "\\\\Apgr"'
-------------- next part --------------
HTML����������������������������...
다운로드 



More information about the Groonga-commit mailing list
Back to archive index