Kouhei Sutou
null+****@clear*****
Tue Sep 8 11:08:31 JST 2015
Kouhei Sutou 2015-09-08 11:08:31 +0900 (Tue, 08 Sep 2015) New Revision: 7f46e815f4771e4a7a2d670bd60070a7105711cc https://github.com/groonga/groonga/commit/7f46e815f4771e4a7a2d670bd60070a7105711cc Message: regexp: make match target text in vector normalized It's also incompatible change. Copied files: test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected (from test/command/suite/select/filter/no_index/regexp/vector_text.expected) test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test (from test/command/suite/select/filter/no_index/regexp/vector_text.test) Modified files: lib/operator.c test/command/suite/select/filter/no_index/regexp/vector_text.expected test/command/suite/select/filter/no_index/regexp/vector_text.test Modified: lib/operator.c (+23 -2) =================================================================== --- lib/operator.c 2015-09-08 10:59:50 +0900 (30076f2) +++ lib/operator.c 2015-09-08 11:08:31 +0900 (459becd) @@ -1022,6 +1022,7 @@ static grn_bool exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern) { #ifdef GRN_SUPPORT_REGEXP + grn_obj *normalizer = NULL; grn_bool matched = GRN_FALSE; unsigned int i, size; OnigRegex regex; @@ -1036,18 +1037,38 @@ exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern) return GRN_FALSE; } + normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); for (i = 0; i < size; i++) { const char *content; unsigned int content_size; grn_id domain_id; + grn_obj *norm_content; + const char *norm_content_raw; + unsigned int norm_content_raw_length_in_bytes; content_size = grn_vector_get_element(ctx, vector, i, &content, NULL, &domain_id); - if (regexp_is_match(ctx, regex, content, content_size)) { - matched = GRN_TRUE; + if (content_size == 0) { + continue; + } + + norm_content = grn_string_open(ctx, content, content_size, normalizer, 0); + grn_string_get_normalized(ctx, norm_content, + &norm_content_raw, + &norm_content_raw_length_in_bytes, + NULL); + + matched = regexp_is_match(ctx, regex, + norm_content_raw, + norm_content_raw_length_in_bytes); + + grn_obj_unlink(ctx, norm_content); + + if (matched) { break; } } + grn_obj_unlink(ctx, normalizer); onig_free(regex); Copied: test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected (+1 -1) 92% =================================================================== --- test/command/suite/select/filter/no_index/regexp/vector_text.expected 2015-09-08 10:59:50 +0900 (deaa379) +++ test/command/suite/select/filter/no_index/regexp/normalized_vector_text.expected 2015-09-08 11:08:31 +0900 (c7273ff) @@ -8,7 +8,7 @@ load --table Memos {"tags": ["Groonga", "PGroonga", "Mroonga"]} ] [[0,0.0,0.0],2] -select Memos --filter 'tags @~ "\\\\APGr"' +select Memos --filter 'tags @~ "\\\\Apgr"' [ [ 0, Copied: test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test (+1 -1) 81% =================================================================== --- test/command/suite/select/filter/no_index/regexp/vector_text.test 2015-09-08 10:59:50 +0900 (114aff1) +++ test/command/suite/select/filter/no_index/regexp/normalized_vector_text.test 2015-09-08 11:08:31 +0900 (f3fa4a9) @@ -7,4 +7,4 @@ load --table Memos {"tags": ["Groonga", "PGroonga", "Mroonga"]} ] -select Memos --filter 'tags @~ "\\\\APGr"' +select Memos --filter 'tags @~ "\\\\Apgr"' Modified: test/command/suite/select/filter/no_index/regexp/vector_text.expected (+6 -6) =================================================================== --- test/command/suite/select/filter/no_index/regexp/vector_text.expected 2015-09-08 10:59:50 +0900 (deaa379) +++ test/command/suite/select/filter/no_index/regexp/vector_text.expected 2015-09-08 11:08:31 +0900 (d8308dd) @@ -4,11 +4,11 @@ column_create Memos tags COLUMN_VECTOR Text [[0,0.0,0.0],true] load --table Memos [ -{"tags": ["Groonga", "Rroonga", "Mroonga"]}, -{"tags": ["Groonga", "PGroonga", "Mroonga"]} +{"tags": ["groonga", "rroonga", "mroonga"]}, +{"tags": ["groonga", "pgroonga", "mroonga"]} ] [[0,0.0,0.0],2] -select Memos --filter 'tags @~ "\\\\APGr"' +select Memos --filter 'tags @~ "\\\\Apgr"' [ [ 0, @@ -33,9 +33,9 @@ select Memos --filter 'tags @~ "\\\\APGr"' [ 2, [ - "Groonga", - "PGroonga", - "Mroonga" + "groonga", + "pgroonga", + "mroonga" ] ] ] Modified: test/command/suite/select/filter/no_index/regexp/vector_text.test (+3 -3) =================================================================== --- test/command/suite/select/filter/no_index/regexp/vector_text.test 2015-09-08 10:59:50 +0900 (114aff1) +++ test/command/suite/select/filter/no_index/regexp/vector_text.test 2015-09-08 11:08:31 +0900 (027693b) @@ -3,8 +3,8 @@ column_create Memos tags COLUMN_VECTOR Text load --table Memos [ -{"tags": ["Groonga", "Rroonga", "Mroonga"]}, -{"tags": ["Groonga", "PGroonga", "Mroonga"]} +{"tags": ["groonga", "rroonga", "mroonga"]}, +{"tags": ["groonga", "pgroonga", "mroonga"]} ] -select Memos --filter 'tags @~ "\\\\APGr"' +select Memos --filter 'tags @~ "\\\\Apgr"' -------------- next part -------------- HTML����������������������������... 다운로드