[Groonga-commit] groonga/groonga at dd67962 [ii-improve-performance-on-buffer-new-worst-case] ii: reduce existing buffer search time

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Dec 27 13:33:12 JST 2016


Kouhei Sutou	2016-12-27 13:33:12 +0900 (Tue, 27 Dec 2016)

  New Revision: dd679629787d760032fbb13c36fd614662ace1be
  https://github.com/groonga/groonga/commit/dd679629787d760032fbb13c36fd614662ace1be

  Message:
    ii: reduce existing buffer search time
    
    If cursor for searching existing buffer doesn't find any existing buffer
    and has many records (= the worst case), many times are spent. It means
    that index construction time is slow for the case.
    
    This change limits the number of records for searching existing buffer.
    
    The following case is the worst case:
    
        table_create a TABLE_NO_KEY
        column_create a x COLUMN_SCALAR ShortText
        table_create b TABLE_PAT_KEY ShortText
        column_create b index COLUMN_INDEX a x
    
        load --table a
        [
        {"x": "00000000"},
        {"x": "00000001"},
        ...
        {"x": "00486036"},
        {"x": "00000000"},
        {"x": "00000001"},
        ...
        {"x": "00413962"}
        ]

  Modified files:
    lib/ii.c

  Modified: lib/ii.c (+12 -3)
===================================================================
--- lib/ii.c    2016-12-27 13:23:00 +0900 (aded74c)
+++ lib/ii.c    2016-12-27 13:33:12 +0900 (db76e88)
@@ -4018,6 +4018,8 @@ buffer_open_by_tid(grn_ctx *ctx,
   return pseg;
 }
 
+#define EXISTING_BUFFER_FIND_LIMIT 10000
+
 inline static uint32_t
 buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos,
            buffer_term **bt, buffer_rec **br, buffer **bp, grn_id id, grn_hash *h)
@@ -4031,6 +4033,7 @@ buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos,
   int key_size = grn_table_get_key(ctx, ii->lexicon, id, key,
                                    GRN_TABLE_MAX_KEY_SIZE);
   uint32_t lseg = NOT_ASSIGNED, pseg = NOT_ASSIGNED;
+  int cursor_limit = -1;
   grn_table_cursor *tc = NULL;
   if (S_SEGMENT - sizeof(buffer_header) < size + sizeof(buffer_term)) {
     DEFINE_NAME(ii);
@@ -4042,16 +4045,22 @@ buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos,
          (size_t)(S_SEGMENT - sizeof(buffer_header)));
     return NOT_ASSIGNED;
   }
+  if (ii->next_buffer_candidate_tid != GRN_ID_NIL) {
+    cursor_limit = EXISTING_BUFFER_FIND_LIMIT;
+  }
   if (ii->lexicon->header.type == GRN_TABLE_PAT_KEY) {
     if (ii->lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
-      tc = grn_table_cursor_open(ctx, ii->lexicon, key, key_size, NULL, 0, 0, -1,
+      tc = grn_table_cursor_open(ctx, ii->lexicon, key, key_size, NULL, 0,
+                                 0, cursor_limit,
                                  GRN_CURSOR_ASCENDING|GRN_CURSOR_GT);
     } else {
-      tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, key, key_size, 0, -1,
+      tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, key, key_size,
+                                 0, cursor_limit,
                                  GRN_CURSOR_PREFIX);
     }
   } else {
-    tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0, 0, -1,
+    tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0,
+                               0, cursor_limit,
                                GRN_CURSOR_ASCENDING);
   }
   if (tc) {
-------------- next part --------------
HTML����������������������������...
다운로드 



More information about the Groonga-commit mailing list
Back to archive index