Kouhei Sutou 2019-03-20 15:03:54 +0900 (Wed, 20 Mar 2019) Revision: 34a13c9ebe3ff6f9ceebb5542c6717d4a35629d1 https://github.com/groonga/groonga/commit/34a13c9ebe3ff6f9ceebb5542c6717d4a35629d1 Message: index_column_diff: improve performance Copied files: test/command/suite/index_column_diff/missings/with_section.expected (from test/command/suite/index_column_diff/missing/with_section.expected) test/command/suite/index_column_diff/missings/with_section.test (from test/command/suite/index_column_diff/missing/with_section.test) test/command/suite/index_column_diff/missings/without_section.expected (from test/command/suite/index_column_diff/missing/without_section.expected) test/command/suite/index_column_diff/missings/without_section.test (from test/command/suite/index_column_diff/missing/without_section.test) Modified files: lib/index_column.c Renamed files: test/command/suite/index_column_diff/remains/with_section.expected (from test/command/suite/index_column_diff/missing/with_section.expected) test/command/suite/index_column_diff/remains/with_section.test (from test/command/suite/index_column_diff/missing/with_section.test) test/command/suite/index_column_diff/remains/without_section.expected (from test/command/suite/index_column_diff/missing/without_section.expected) test/command/suite/index_column_diff/remains/without_section.test (from test/command/suite/index_column_diff/missing/without_section.test) Modified: lib/index_column.c (+234 -184) =================================================================== --- lib/index_column.c 2019-03-20 08:49:48 +0900 (673f310cb) +++ lib/index_column.c 2019-03-20 15:03:54 +0900 (550de0e7f) @@ -236,6 +236,7 @@ grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column) GRN_API_RETURN(ctx->rc); } +static const char *postings_column_name = "postings"; static const char *remains_column_name = "remains"; static const char *missings_column_name = "missings"; #define GRN_INDEX_COLUMN_DIFF_CACHE_SIZE 256 @@ -251,12 +252,19 @@ typedef struct { size_t n_posting_elements; grn_obj *source_table; grn_obj source_columns; - grn_obj *tokens; + grn_obj *diff; + grn_obj *postings; grn_obj *remains; grn_obj *missings; struct { + grn_id token_id; + grn_id diff_id; + grn_posting posting; + grn_bool is_new_diff; + } current; + struct { grn_obj value; - grn_obj missings; + grn_obj postings; } buffers; struct { unsigned int n_records; @@ -269,9 +277,9 @@ typedef struct { } progress; struct { grn_id token_id; - grn_obj buffer1; - grn_obj buffer2; - grn_obj *postings; + grn_id diff_id; + grn_obj postings; + size_t offset; } cache[GRN_INDEX_COLUMN_DIFF_CACHE_SIZE]; } grn_index_column_diff_data; @@ -281,13 +289,13 @@ grn_index_column_diff_data_init(grn_ctx *ctx, { GRN_PTR_INIT(&(data->source_columns), GRN_OBJ_VECTOR, GRN_ID_NIL); GRN_VOID_INIT(&(data->buffers.value)); - GRN_UINT32_INIT(&(data->buffers.missings), GRN_OBJ_VECTOR); + GRN_UINT32_INIT(&(data->buffers.postings), GRN_OBJ_VECTOR); for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { data->cache[i].token_id = GRN_ID_NIL; - GRN_UINT32_INIT(&(data->cache[i].buffer1), GRN_OBJ_VECTOR); - GRN_UINT32_INIT(&(data->cache[i].buffer2), GRN_OBJ_VECTOR); - data->cache[i].postings = NULL; + data->cache[i].diff_id = GRN_ID_NIL; + GRN_UINT32_INIT(&(data->cache[i].postings), GRN_OBJ_VECTOR); + data->cache[i].offset = 0; } } @@ -307,106 +315,13 @@ grn_index_column_diff_data_fin(grn_ctx *ctx, } GRN_OBJ_FIN(ctx, &(data->buffers.value)); - GRN_OBJ_FIN(ctx, &(data->buffers.missings)); + GRN_OBJ_FIN(ctx, &(data->buffers.postings)); for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { if (data->cache[i].token_id == GRN_ID_NIL) { continue; } - GRN_OBJ_FIN(ctx, &(data->cache[i].buffer1)); - GRN_OBJ_FIN(ctx, &(data->cache[i].buffer2)); - } -} - -static size_t -grn_index_column_diff_cache_compute_key(grn_id token_id) -{ - return token_id % GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; -} - -static grn_obj * -grn_index_column_diff_cache_allocate(grn_ctx *ctx, - grn_index_column_diff_data *data, - grn_id token_id) -{ - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); - const grn_id cached_token_id = data->cache[cache_key].token_id; - if (cached_token_id != GRN_ID_NIL) { - grn_obj_set_value(ctx, - data->remains, - token_id, - data->cache[cache_key].postings, - GRN_OBJ_SET); - } - data->cache[cache_key].token_id = token_id; - data->cache[cache_key].postings = &(data->cache[cache_key].buffer1); - return data->cache[cache_key].postings; -} - -static grn_obj * -grn_index_column_diff_cache_get(grn_ctx *ctx, - grn_index_column_diff_data *data, - grn_id token_id) -{ - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); - const grn_id cached_token_id = data->cache[cache_key].token_id; - if (cached_token_id == token_id) { - return data->cache[cache_key].postings; - } else { - grn_obj *postings = - grn_index_column_diff_cache_allocate(ctx, data, token_id); - GRN_BULK_REWIND(postings); - grn_obj_get_value(ctx, data->remains, token_id, postings); - return postings; - } -} - -static void -grn_index_column_diff_cache_remove_posting(grn_ctx *ctx, - grn_index_column_diff_data *data, - grn_id token_id, - int64_t nth_posting) -{ - const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); - grn_obj *postings = data->cache[cache_key].postings; - grn_obj *new_postings = NULL; - if (postings == &(data->cache[cache_key].buffer1)) { - new_postings = &(data->cache[cache_key].buffer2); - } else { - new_postings = &(data->cache[cache_key].buffer1); - } - - GRN_BULK_REWIND(new_postings); - const size_t n_posting_elements = data->n_posting_elements; - const size_t posting_size = sizeof(uint32_t) * n_posting_elements; - grn_bulk_write(ctx, - new_postings, - GRN_BULK_HEAD(postings), - posting_size * nth_posting); - const size_t n_postings = - GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements; - grn_bulk_write(ctx, - new_postings, - GRN_BULK_HEAD(postings) + - (posting_size * (nth_posting + 1)), - posting_size * (n_postings - nth_posting - 1)); - data->cache[cache_key].postings = new_postings; -} - -static void -grn_index_column_diff_cache_flush(grn_ctx *ctx, - grn_index_column_diff_data *data) -{ - for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { - const grn_id token_id = data->cache[i].token_id; - if (token_id == GRN_ID_NIL) { - continue; - } - grn_obj_set_value(ctx, - data->remains, - token_id, - data->cache[i].postings, - GRN_OBJ_SET); + GRN_OBJ_FIN(ctx, &(data->cache[i].postings)); } } @@ -523,19 +438,131 @@ grn_index_column_diff_progress(grn_ctx *ctx, } } +static size_t +grn_index_column_diff_cache_compute_key(grn_id token_id) +{ + return token_id % GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; +} + +static void +grn_index_column_diff_cache_flush(grn_ctx *ctx, + grn_index_column_diff_data *data, + size_t cache_key) +{ + const grn_id cached_token_id = data->cache[cache_key].token_id; + if (cached_token_id == GRN_ID_NIL) { + return; + } + + grn_obj *new_postings = &(data->buffers.postings); + GRN_BULK_REWIND(new_postings); + grn_obj *postings = &(data->cache[cache_key].postings); + const size_t n_posting_elements = data->n_posting_elements; + const size_t offset = data->cache[cache_key].offset; + const uint32_t *postings_raw = + ((uint32_t *)GRN_BULK_HEAD(postings)) + + (n_posting_elements * offset); + const size_t postings_raw_size = + sizeof(uint32_t) * + (GRN_UINT32_VECTOR_SIZE(postings) - (n_posting_elements * offset)); + grn_bulk_write(ctx, + new_postings, + (const char *)postings_raw, + postings_raw_size); + grn_obj_set_value(ctx, + data->postings, + data->cache[cache_key].diff_id, + new_postings, + GRN_OBJ_SET); + + data->cache[cache_key].token_id = GRN_ID_NIL; + data->cache[cache_key].diff_id = GRN_ID_NIL; + data->cache[cache_key].offset = 0; + GRN_BULK_REWIND(&(data->cache[cache_key].postings)); +} + +static void +grn_index_column_diff_cache_flush_all(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + for (size_t i = 0; i < GRN_INDEX_COLUMN_DIFF_CACHE_SIZE; i++) { + grn_index_column_diff_cache_flush(ctx, data, i); + } +} + static grn_obj * +grn_index_column_diff_cache_allocate(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + const grn_id token_id = data->current.token_id; + const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + grn_index_column_diff_cache_flush(ctx, data, cache_key); + data->cache[cache_key].token_id = token_id; + data->cache[cache_key].diff_id = data->current.diff_id; + return &(data->cache[cache_key].postings); +} + +static const uint32_t * +grn_index_column_diff_cache_get(grn_ctx *ctx, + grn_index_column_diff_data *data, + size_t *n_postings) +{ + const grn_id token_id = data->current.token_id; + const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + const grn_id cached_token_id = data->cache[cache_key].token_id; + grn_obj *postings; + if (cached_token_id == token_id) { + postings = &(data->cache[cache_key].postings); + } else { + postings = grn_index_column_diff_cache_allocate(ctx, data); + const grn_id diff_id = data->current.diff_id; + grn_obj_get_value(ctx, data->postings, diff_id, postings); + } + const size_t n_posting_elements = data->n_posting_elements; + const size_t offset = data->cache[cache_key].offset; + *n_postings = + (GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements) - + offset; + return ((uint32_t *)GRN_BULK_HEAD(postings)) + (n_posting_elements * offset); +} + +static void +grn_index_column_diff_cache_remove_posting(grn_ctx *ctx, + grn_index_column_diff_data *data, + size_t nth_posting) +{ + const grn_id token_id = data->current.token_id; + const size_t cache_key = grn_index_column_diff_cache_compute_key(token_id); + const size_t n_posting_elements = data->n_posting_elements; + size_t n_postings = 0; + const uint32_t *postings = + grn_index_column_diff_cache_get(ctx, data, &n_postings); + + if (nth_posting > 0) { + grn_obj *remains = &(data->buffers.postings); + GRN_BULK_REWIND(remains); + const size_t posting_size = sizeof(uint32_t) * n_posting_elements; + grn_bulk_write(ctx, + remains, + (const char *)postings, + posting_size * nth_posting); + const grn_id diff_id = data->current.diff_id; + grn_obj_set_value(ctx, data->remains, diff_id, remains, GRN_OBJ_APPEND); + } + data->cache[cache_key].offset += nth_posting + 1; +} + +static const uint32_t * grn_index_column_diff_get_postings(grn_ctx *ctx, grn_index_column_diff_data *data, - grn_id token_id) + size_t *n_postings) { - int added = 0; - grn_table_add(ctx, data->tokens, &token_id, sizeof(grn_id), &added); - if (!added) { - return grn_index_column_diff_cache_get(ctx, data, token_id); + if (!data->current.is_new_diff) { + return grn_index_column_diff_cache_get(ctx, data, n_postings); } - grn_obj *postings = grn_index_column_diff_cache_allocate(ctx, data, token_id); - GRN_BULK_REWIND(postings); + grn_obj *postings = grn_index_column_diff_cache_allocate(ctx, data); + const grn_id token_id = data->current.token_id; const unsigned int ii_cursor_flags = 0; grn_ii_cursor *ii_cursor = grn_ii_cursor_open(ctx, data->ii, @@ -569,23 +596,21 @@ grn_index_column_diff_get_postings(grn_ctx *ctx, } grn_ii_cursor_close(ctx, ii_cursor); } - return postings; + *n_postings = GRN_UINT32_VECTOR_SIZE(postings) / data->n_posting_elements; + return (const uint32_t *)(GRN_BULK_HEAD(postings)); } static int grn_index_column_diff_compare_posting(grn_ctx *ctx, grn_index_column_diff_data *data, - grn_obj *postings, - size_t nth_posting, - const grn_posting *current_posting) + const uint32_t *posting) { const grn_bool with_section = data->index.with_section; const grn_bool with_position = data->index.with_position; - const size_t n_posting_elements = data->n_posting_elements; - - size_t i = nth_posting * n_posting_elements; + const grn_posting *current_posting = &(data->current.posting); - const grn_id record_id = GRN_UINT32_VALUE_AT(postings, i); + size_t i = 0; + const grn_id record_id = posting[i]; if (record_id < current_posting->rid) { return -1; } else if (record_id > current_posting->rid) { @@ -594,7 +619,7 @@ grn_index_column_diff_compare_posting(grn_ctx *ctx, if (with_section) { i++; - const uint32_t section_id = GRN_UINT32_VALUE_AT(postings, i); + const uint32_t section_id = posting[i]; if (section_id < current_posting->sid) { return -1; } else if (section_id > current_posting->sid) { @@ -604,7 +629,7 @@ grn_index_column_diff_compare_posting(grn_ctx *ctx, if (with_position) { i++; - const uint32_t position = GRN_UINT32_VALUE_AT(postings, i); + const uint32_t position = posting[i]; if (position < current_posting->pos) { return -1; } else if (position > current_posting->pos) { @@ -617,27 +642,20 @@ grn_index_column_diff_compare_posting(grn_ctx *ctx, static int64_t grn_index_column_diff_find_posting(grn_ctx *ctx, - grn_index_column_diff_data *data, - grn_obj *postings, - const grn_posting *current_posting) + grn_index_column_diff_data *data) { + size_t n_postings = 0; + const uint32_t *postings = + grn_index_column_diff_get_postings(ctx, data, &n_postings); const size_t n_posting_elements = data->n_posting_elements; - int64_t min = 0; - int64_t max = (GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements) - 1; - while (min <= max) { - const int64_t middle = min + ((max - min) / 2); + for (size_t i = 0; i < n_postings; i++) { + const uint32_t *posting = postings + (n_posting_elements * i); const int compared = - grn_index_column_diff_compare_posting(ctx, - data, - postings, - middle, - current_posting); + grn_index_column_diff_compare_posting(ctx, data, posting); if (compared == 0) { - return middle; - } else if (compared < 0) { - min = middle + 1; - } else { - max = middle - 1; + return i; + } else if (compared > 0) { + return -1; } } return -1; @@ -650,7 +668,7 @@ grn_index_column_diff_compute(grn_ctx *ctx, grn_obj *source_columns = &(data->source_columns); const size_t n_source_columns = GRN_PTR_VECTOR_SIZE(source_columns); grn_obj *value = &(data->buffers.value); - grn_obj *missings = &(data->buffers.missings); + grn_obj *postings = &(data->buffers.postings); const grn_bool with_section = data->index.with_section; const grn_bool with_position = data->index.with_position; @@ -663,11 +681,11 @@ grn_index_column_diff_compute(grn_ctx *ctx, GRN_CURSOR_BY_ID) { grn_index_column_diff_progress(ctx, data); for (size_t i = 0; i < n_source_columns; i++) { - grn_posting current_posting = {0}; - current_posting.rid = id; - current_posting.sid = i + 1; grn_obj *source = GRN_PTR_VALUE_AT(source_columns, i); + data->current.posting.rid = id; + data->current.posting.sid = i + 1; + GRN_BULK_REWIND(value); grn_obj_get_value(ctx, source, id, value); @@ -689,40 +707,46 @@ grn_index_column_diff_compute(grn_ctx *ctx, while (grn_token_cursor_get_status(ctx, token_cursor) == GRN_TOKEN_CURSOR_DOING) { - const grn_id token_id = grn_token_cursor_next(ctx, token_cursor); - if (token_id == GRN_ID_NIL) { + data->current.token_id = grn_token_cursor_next(ctx, token_cursor); + if (data->current.token_id == GRN_ID_NIL) { continue; } - grn_token *token = grn_token_cursor_get_token(ctx, token_cursor); - current_posting.pos = grn_token_get_position(ctx, token); + if (with_position) { + grn_token *token = grn_token_cursor_get_token(ctx, token_cursor); + data->current.posting.pos = grn_token_get_position(ctx, token); + } - grn_obj *postings = - grn_index_column_diff_get_postings(ctx, data, token_id); + { + int added = 0; + data->current.diff_id = + grn_table_add(ctx, + data->diff, + &(data->current.token_id), sizeof(grn_id), + &added); + data->current.is_new_diff = (added != 0); + } - int64_t nth_posting = - grn_index_column_diff_find_posting(ctx, - data, - postings, - ¤t_posting); + const int64_t nth_posting = + grn_index_column_diff_find_posting(ctx, data); if (nth_posting >= 0) { grn_index_column_diff_cache_remove_posting(ctx, data, - token_id, - nth_posting); + (size_t)nth_posting); } else { - GRN_BULK_REWIND(missings); - GRN_UINT32_PUT(ctx, missings, current_posting.rid); + const grn_posting *posting = &(data->current.posting); + GRN_BULK_REWIND(postings); + GRN_UINT32_PUT(ctx, postings, posting->rid); if (with_section) { - GRN_UINT32_PUT(ctx, missings, current_posting.sid); + GRN_UINT32_PUT(ctx, postings, posting->sid); } if (with_position) { - GRN_UINT32_PUT(ctx, missings, current_posting.pos); + GRN_UINT32_PUT(ctx, postings, posting->pos); } grn_obj_set_value(ctx, data->missings, - token_id, - missings, + data->current.diff_id, + postings, GRN_OBJ_APPEND); } } @@ -730,23 +754,31 @@ grn_index_column_diff_compute(grn_ctx *ctx, } } GRN_TABLE_EACH_END(ctx, cursor); - grn_index_column_diff_cache_flush(ctx, data); + grn_index_column_diff_cache_flush_all(ctx, data); { - GRN_TABLE_EACH_BEGIN(ctx, data->tokens, cursor, id) { - GRN_BULK_REWIND(missings); - grn_obj_get_value(ctx, data->remains, id, missings); - if (GRN_UINT32_VECTOR_SIZE(missings) > 0) { + GRN_TABLE_EACH_BEGIN(ctx, data->diff, cursor, id) { + GRN_BULK_REWIND(postings); + grn_obj_get_value(ctx, data->postings, id, postings); + if (GRN_UINT32_VECTOR_SIZE(postings) > 0) { + grn_obj_set_value(ctx, data->remains, id, postings, GRN_OBJ_APPEND); continue; } - GRN_BULK_REWIND(missings); - grn_obj_get_value(ctx, data->missings, id, missings); - if (GRN_UINT32_VECTOR_SIZE(missings) > 0) { + GRN_BULK_REWIND(postings); + grn_obj_get_value(ctx, data->remains, id, postings); + if (GRN_UINT32_VECTOR_SIZE(postings) > 0) { + continue; + } + GRN_BULK_REWIND(postings); + grn_obj_get_value(ctx, data->missings, id, postings); + if (GRN_UINT32_VECTOR_SIZE(postings) > 0) { continue; } grn_table_cursor_delete(ctx, cursor); } GRN_TABLE_EACH_END(ctx, cursor); } + + grn_obj_close(ctx, data->postings); } grn_rc @@ -815,25 +847,43 @@ grn_index_column_diff(grn_ctx *ctx, data.lexicon = grn_ctx_at(ctx, index_column->header.domain); - data.tokens = grn_table_create(ctx, - NULL, 0, - NULL, - GRN_TABLE_HASH_KEY, - data.lexicon, - NULL); - if (!data.tokens) { + data.diff = grn_table_create(ctx, + NULL, 0, + NULL, + GRN_TABLE_HASH_KEY, + data.lexicon, + NULL); + if (!data.diff) { + char message[GRN_CTX_MSGSIZE]; + grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] failed to create diff table: <%.*s>: %s", + name_size, name, + message); + goto exit; + } + data.postings = grn_column_create(ctx, + data.diff, + postings_column_name, + strlen(postings_column_name), + NULL, + GRN_OBJ_COLUMN_VECTOR, + grn_ctx_at(ctx, GRN_DB_UINT32)); + if (!data.postings) { char message[GRN_CTX_MSGSIZE]; grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); char name[GRN_TABLE_MAX_KEY_SIZE]; int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); ERR(GRN_INVALID_ARGUMENT, - "[index-column][diff] failed to create token table: <%.*s>: %s", + "[index-column][diff] failed to create postings column: <%.*s>: %s", name_size, name, message); goto exit; } data.remains = grn_column_create(ctx, - data.tokens, + data.diff, remains_column_name, strlen(remains_column_name), NULL, @@ -845,13 +895,13 @@ grn_index_column_diff(grn_ctx *ctx, char name[GRN_TABLE_MAX_KEY_SIZE]; int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); ERR(GRN_INVALID_ARGUMENT, - "[index-column][diff] failed to create reamins column: <%.*s>: %s", + "[index-column][diff] failed to create remains column: <%.*s>: %s", name_size, name, message); goto exit; } data.missings = grn_column_create(ctx, - data.tokens, + data.diff, missings_column_name, strlen(missings_column_name), NULL, @@ -870,12 +920,12 @@ grn_index_column_diff(grn_ctx *ctx, } grn_index_column_diff_compute(ctx, &data); - *diff = data.tokens; - data.tokens = NULL; + *diff = data.diff; + data.diff = NULL; exit : - if (data.tokens) { - grn_obj_close(ctx, data.tokens); + if (data.diff) { + grn_obj_close(ctx, data.diff); } grn_index_column_diff_data_fin(ctx, &data); Copied: test/command/suite/index_column_diff/missings/with_section.expected (+0 -0) 100% =================================================================== Copied: test/command/suite/index_column_diff/missings/with_section.test (+0 -0) 100% =================================================================== Copied: test/command/suite/index_column_diff/missings/without_section.expected (+0 -0) 100% =================================================================== Copied: test/command/suite/index_column_diff/missings/without_section.test (+0 -0) 100% =================================================================== Renamed: test/command/suite/index_column_diff/remains/with_section.expected (+34 -84) 57% =================================================================== --- test/command/suite/index_column_diff/missing/with_section.expected 2019-03-20 08:49:48 +0900 (dace34582) +++ test/command/suite/index_column_diff/remains/with_section.expected 2019-03-20 15:03:54 +0900 (d29dfa9e8) @@ -4,10 +4,18 @@ column_create Data value1 COLUMN_SCALAR ShortText [[0,0.0,0.0],true] column_create Data value2 COLUMN_SCALAR ShortText [[0,0.0,0.0],true] -table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenNgram --normalizer NormalizerNFKC100 +table_create Patterns TABLE_PAT_KEY ShortText --normalizer NormalizerNFKC100 +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer "TokenTable('table', 'Patterns')" --normalizer NormalizerNFKC100 [[0,0.0,0.0],true] column_create Terms data_values_index COLUMN_INDEX|WITH_POSITION|WITH_SECTION Data value1,value2 [[0,0.0,0.0],true] +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} +] +[[0,0.0,0.0],2] load --table Data [ {"value1": "Hello World", @@ -16,14 +24,22 @@ load --table Data "value2": "Good-by Groonga"} ] [[0,0.0,0.0],2] -truncate Terms.data_values_index +truncate Patterns [[0,0.0,0.0],true] load --table Data [ -{"value1": "Morning World", - "value2": "Good night World"}, -{"value1": "Morning Groonga", - "value2": "Good night Groonga"} +{"_id": 1, + "value1": "Hello Groonga", + "value2": "Good-by Groonga"}, +{"_id": 2, + "value1": "Hello World", + "value2": "Good-by World"} +] +[[0,0.0,0.0],2] +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} ] [[0,0.0,0.0],2] index_column_diff Terms data_values_index @@ -36,62 +52,29 @@ index_column_diff Terms data_values_index [ { "token": { - "id": 1, - "value": "hello" + "id": 2, + "value": "groonga" }, "remains": [ - - ], - "missings": [ { - "record_id": 1, + "record_id": 2, "section_id": 1, "position": 0 }, { "record_id": 2, - "section_id": 1, + "section_id": 2, "position": 0 } - ] - }, - { - "token": { - "id": 2, - "value": "world" - }, - "remains": [ - ], "missings": [ { "record_id": 1, "section_id": 1, - "position": 1 - }, - { - "record_id": 1, - "section_id": 2, - "position": 3 - } - ] - }, - { - "token": { - "id": 3, - "value": "good" - }, - "remains": [ - - ], - "missings": [ - { - "record_id": 1, - "section_id": 2, "position": 0 }, { - "record_id": 2, + "record_id": 1, "section_id": 2, "position": 0 } @@ -99,64 +82,31 @@ index_column_diff Terms data_values_index }, { "token": { - "id": 4, - "value": "-" + "id": 1, + "value": "world" }, "remains": [ - - ], - "missings": [ { "record_id": 1, - "section_id": 2, - "position": 1 + "section_id": 1, + "position": 0 }, { - "record_id": 2, - "section_id": 2, - "position": 1 - } - ] - }, - { - "token": { - "id": 5, - "value": "by" - }, - "remains": [ - - ], - "missings": [ - { "record_id": 1, "section_id": 2, - "position": 2 - }, - { - "record_id": 2, - "section_id": 2, - "position": 2 + "position": 0 } - ] - }, - { - "token": { - "id": 6, - "value": "groonga" - }, - "remains": [ - ], "missings": [ { "record_id": 2, "section_id": 1, - "position": 1 + "position": 0 }, { "record_id": 2, "section_id": 2, - "position": 3 + "position": 0 } ] } Renamed: test/command/suite/index_column_diff/remains/with_section.test (+23 -6) 54% =================================================================== --- test/command/suite/index_column_diff/missing/with_section.test 2019-03-20 08:49:48 +0900 (39d0ea54c) +++ test/command/suite/index_column_diff/remains/with_section.test 2019-03-20 15:03:54 +0900 (2b617bd89) @@ -2,13 +2,22 @@ table_create Data TABLE_NO_KEY column_create Data value1 COLUMN_SCALAR ShortText column_create Data value2 COLUMN_SCALAR ShortText +table_create Patterns TABLE_PAT_KEY ShortText \ + --normalizer NormalizerNFKC100 + table_create Terms TABLE_PAT_KEY ShortText \ - --default_tokenizer TokenNgram \ + --default_tokenizer "TokenTable('table', 'Patterns')" \ --normalizer NormalizerNFKC100 column_create Terms data_values_index \ COLUMN_INDEX|WITH_POSITION|WITH_SECTION \ Data value1,value2 +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} +] + load --table Data [ {"value1": "Hello World", @@ -17,14 +26,22 @@ load --table Data "value2": "Good-by Groonga"} ] -truncate Terms.data_values_index +truncate Patterns load --table Data [ -{"value1": "Morning World", - "value2": "Good night World"}, -{"value1": "Morning Groonga", - "value2": "Good night Groonga"} +{"_id": 1, + "value1": "Hello Groonga", + "value2": "Good-by Groonga"}, +{"_id": 2, + "value1": "Hello World", + "value2": "Good-by World"} +] + +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} ] index_column_diff Terms data_values_index Renamed: test/command/suite/index_column_diff/remains/without_section.expected (+27 -26) 68% =================================================================== --- test/command/suite/index_column_diff/missing/without_section.expected 2019-03-20 08:49:48 +0900 (47e54d6b4) +++ test/command/suite/index_column_diff/remains/without_section.expected 2019-03-20 15:03:54 +0900 (210756488) @@ -2,22 +2,36 @@ table_create Data TABLE_NO_KEY [[0,0.0,0.0],true] column_create Data value COLUMN_SCALAR ShortText [[0,0.0,0.0],true] -table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenNgram --normalizer NormalizerNFKC100 +table_create Patterns TABLE_PAT_KEY ShortText --normalizer NormalizerNFKC100 +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer "TokenTable('table', 'Patterns')" --normalizer NormalizerNFKC100 [[0,0.0,0.0],true] column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value [[0,0.0,0.0],true] +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} +] +[[0,0.0,0.0],2] load --table Data [ {"value": "Hello World"}, {"value": "Hello Groonga"} ] [[0,0.0,0.0],2] -truncate Terms.data_value_index +truncate Patterns [[0,0.0,0.0],true] load --table Data [ -{"value": "Good-by World"}, -{"value": "Good-by Groonga"} +{"_id": 1, "value": "Hello Groonga"}, +{"_id": 2, "value": "Hello World"} +] +[[0,0.0,0.0],2] +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} ] [[0,0.0,0.0],2] index_column_diff Terms data_value_index @@ -30,50 +44,37 @@ index_column_diff Terms data_value_index [ { "token": { - "id": 1, - "value": "hello" + "id": 2, + "value": "groonga" }, "remains": [ - + { + "record_id": 2, + "position": 0 + } ], "missings": [ { "record_id": 1, "position": 0 - }, - { - "record_id": 2, - "position": 0 } ] }, { "token": { - "id": 2, + "id": 1, "value": "world" }, "remains": [ - - ], - "missings": [ { "record_id": 1, - "position": 1 + "position": 0 } - ] - }, - { - "token": { - "id": 3, - "value": "groonga" - }, - "remains": [ - ], "missings": [ { "record_id": 2, - "position": 1 + "position": 0 } ] } Renamed: test/command/suite/index_column_diff/remains/without_section.test (+19 -4) 50% =================================================================== --- test/command/suite/index_column_diff/missing/without_section.test 2019-03-20 08:49:48 +0900 (5429f610a) +++ test/command/suite/index_column_diff/remains/without_section.test 2019-03-20 15:03:54 +0900 (ba9d8b754) @@ -1,23 +1,38 @@ table_create Data TABLE_NO_KEY column_create Data value COLUMN_SCALAR ShortText +table_create Patterns TABLE_PAT_KEY ShortText \ + --normalizer NormalizerNFKC100 + table_create Terms TABLE_PAT_KEY ShortText \ - --default_tokenizer TokenNgram \ + --default_tokenizer "TokenTable('table', 'Patterns')" \ --normalizer NormalizerNFKC100 column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} +] + load --table Data [ {"value": "Hello World"}, {"value": "Hello Groonga"} ] -truncate Terms.data_value_index +truncate Patterns load --table Data [ -{"value": "Good-by World"}, -{"value": "Good-by Groonga"} +{"_id": 1, "value": "Hello Groonga"}, +{"_id": 2, "value": "Hello World"} +] + +load --table Patterns +[ +{"_key": "World"}, +{"_key": "Groonga"} ] index_column_diff Terms data_value_index -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190320/69f22869/attachment-0001.html>