Kouhei Sutou
null+****@clear*****
Fri Mar 17 14:00:24 JST 2017
Kouhei Sutou 2017-03-17 14:00:24 +0900 (Fri, 17 Mar 2017) New Revision: a321b6cf5a255e4dfaa8679143e21e577a5d7f49 https://github.com/groonga/groonga/commit/a321b6cf5a255e4dfaa8679143e21e577a5d7f49 Message: highlight_html: support similar search Added files: test/command/suite/select/function/highlight_html/similar_search.expected test/command/suite/select/function/highlight_html/similar_search.test Modified files: lib/expr.c lib/proc/proc_highlight.c Modified: lib/expr.c (+51 -3) =================================================================== --- lib/expr.c 2017-03-17 14:43:00 +0900 (1d6e319) +++ lib/expr.c 2017-03-17 14:00:24 +0900 (ee60806) @@ -27,6 +27,7 @@ #include "grn_scanner.h" #include "grn_util.h" #include "grn_report.h" +#include "grn_token_cursor.h" #include "grn_mrb.h" #include "mrb/mrb_expr.h" @@ -8722,9 +8723,56 @@ grn_expr_get_keywords(grn_ctx *ctx, grn_obj *expr, grn_obj *keywords) butp = 1 - butp; } } else { - if (si->op == GRN_OP_MATCH && si->query) { - if (butp == (si->logical_op == GRN_OP_AND_NOT)) { - GRN_PTR_PUT(ctx, keywords, si->query); + if (butp == (si->logical_op == GRN_OP_AND_NOT) && + si->query) { + switch (si->op) { + case GRN_OP_MATCH : + if (keywords->header.type == GRN_PVECTOR) { + GRN_PTR_PUT(ctx, keywords, si->query); + } else { + grn_vector_add_element(ctx, + keywords, + GRN_TEXT_VALUE(si->query), + GRN_TEXT_LEN(si->query), + 0, + GRN_DB_TEXT); + } + break; + case GRN_OP_SIMILAR : + if (keywords->header.type == GRN_VECTOR && + GRN_BULK_VSIZE(&(si->index)) > 0) { + grn_token_cursor *token_cursor; + unsigned int token_flags = 0; + grn_obj *index = GRN_PTR_VALUE(&(si->index)); + grn_obj *lexicon; + + lexicon = grn_ctx_at(ctx, index->header.domain); + token_cursor = grn_token_cursor_open(ctx, + lexicon, + GRN_TEXT_VALUE(si->query), + GRN_TEXT_LEN(si->query), + GRN_TOKENIZE_GET, + token_flags); + if (token_cursor) { + while (token_cursor->status != GRN_TOKEN_CURSOR_DONE) { + grn_id token_id; + token_id = grn_token_cursor_next(ctx, token_cursor); + if (token_id == GRN_ID_NIL) { + continue; + } + grn_vector_add_element(ctx, + keywords, + token_cursor->curr, + token_cursor->curr_size, + 0, + GRN_DB_TEXT); + } + grn_token_cursor_close(ctx, token_cursor); + } + } + break; + default : + break; } } if (si->flags & SCAN_PUSH) { Modified: lib/proc/proc_highlight.c (+15 -8) =================================================================== --- lib/proc/proc_highlight.c 2017-03-17 14:43:00 +0900 (670b29a) +++ lib/proc/proc_highlight.c 2017-03-17 14:00:24 +0900 (7d81518) @@ -421,19 +421,26 @@ func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression) if (condition) { size_t i, n_keywords; grn_obj current_keywords; - GRN_PTR_INIT(¤t_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_TEXT_INIT(¤t_keywords, GRN_OBJ_VECTOR); grn_expr_get_keywords(ctx, condition, ¤t_keywords); - n_keywords = GRN_BULK_VSIZE(¤t_keywords) / sizeof(grn_obj *); + n_keywords = grn_vector_size(ctx, ¤t_keywords); for (i = 0; i < n_keywords; i++) { - grn_obj *keyword; - keyword = GRN_PTR_VALUE_AT(¤t_keywords, i); - grn_table_add(ctx, keywords, - GRN_TEXT_VALUE(keyword), - GRN_TEXT_LEN(keyword), + const char *keyword; + unsigned int keyword_size; + keyword_size = grn_vector_get_element(ctx, + ¤t_keywords, + i, + &keyword, + NULL, + NULL); + grn_table_add(ctx, + keywords, + keyword, + keyword_size, NULL); } - grn_obj_unlink(ctx, ¤t_keywords); + GRN_OBJ_FIN(ctx, ¤t_keywords); } return keywords; Added: test/command/suite/select/function/highlight_html/similar_search.expected (+47 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/similar_search.expected 2017-03-17 14:00:24 +0900 (7633b78) @@ -0,0 +1,47 @@ +plugin_register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord +[[0,0.0,0.0],true] +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +column_create Terms is_stop_word COLUMN_SCALAR Bool +[[0,0.0,0.0],true] +load --table Terms +[ +{"_key": "is", "is_stop_word": true}, +{"_key": ".", "is_stop_word": true} +] +[[0,0.0,0.0],2] +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] +[[0,0.0,0.0],1] +select Entries --filter 'body *S "Groonga is fast full text search engine. There are SQL interfaces by Mroonga and PGroonga and Ruby interface by Rroonga."' --output_columns 'highlight_html(body)' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_html", + null + ] + ], + [ + "<span class=\"keyword\">Mroonga</span> is a MySQL storage <span class=\"keyword\">engine</span> based on <span class=\"keyword\">Groonga</span>. <b><span class=\"keyword\">Rroonga</span></b> is a <span class=\"keyword\">Ruby</span> binding of <span class=\"keyword\">Groonga</span>." + ] + ] + ] +] Added: test/command/suite/select/function/highlight_html/similar_search.test (+26 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/similar_search.test 2017-03-17 14:00:24 +0900 (68e7563) @@ -0,0 +1,26 @@ +plugin_register token_filters/stop_word + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters TokenFilterStopWord +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body +column_create Terms is_stop_word COLUMN_SCALAR Bool + +load --table Terms +[ +{"_key": "is", "is_stop_word": true}, +{"_key": ".", "is_stop_word": true} +] + +load --table Entries +[ +{"body": "Mroonga is a MySQL storage engine based on Groonga. <b>Rroonga</b> is a Ruby binding of Groonga."} +] + +select Entries \ + --filter 'body *S "Groonga is fast full text search engine. There are SQL interfaces by Mroonga and PGroonga and Ruby interface by Rroonga."' \ + --output_columns 'highlight_html(body)' -------------- next part -------------- HTML����������������������������...다운로드