Kouhei Sutou
null+****@clear*****
Sat Apr 4 20:42:16 JST 2015
Kouhei Sutou 2015-04-04 20:42:16 +0900 (Sat, 04 Apr 2015) New Revision: 1e870f31a57dce5114e375df4da7ed78f71b41e8 https://github.com/groonga/groonga/commit/1e870f31a57dce5114e375df4da7ed78f71b41e8 Message: hash: increase max key size (0x1000) 4096 -> (0xffff) 65535 Modified files: lib/db.c lib/grn_hash.h lib/hash.c lib/proc.c test/unit/core/test-hash-cursor.c test/unit/core/test-hash-sort.c test/unit/core/test-hash.c Modified: lib/db.c (+2 -2) =================================================================== --- lib/db.c 2015-04-03 17:37:10 +0900 (960fdad) +++ lib/db.c 2015-04-04 20:42:16 +0900 (9ed0864) @@ -7744,7 +7744,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) switch (DB_OBJ(obj)->header.type) { case GRN_TABLE_HASH_KEY : ((grn_hash *)obj)->tokenizer = value; - ((grn_hash *)obj)->header->tokenizer = grn_obj_id(ctx, value); + ((grn_hash *)obj)->header.common->tokenizer = grn_obj_id(ctx, value); rc = GRN_SUCCESS; break; case GRN_TABLE_PAT_KEY : @@ -7765,7 +7765,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) switch (DB_OBJ(obj)->header.type) { case GRN_TABLE_HASH_KEY : ((grn_hash *)obj)->normalizer = value; - ((grn_hash *)obj)->header->normalizer = grn_obj_id(ctx, value); + ((grn_hash *)obj)->header.common->normalizer = grn_obj_id(ctx, value); rc = GRN_SUCCESS; break; case GRN_TABLE_PAT_KEY : Modified: lib/grn_hash.h (+45 -21) =================================================================== --- lib/grn_hash.h 2015-04-03 17:37:10 +0900 (44f5bcb) +++ lib/grn_hash.h 2015-04-04 20:42:16 +0900 (d50836d) @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009-2015 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -187,7 +187,15 @@ GRN_API grn_id grn_table_queue_tail(grn_table_queue *queue); /**** grn_hash ****/ #define GRN_HASH_TINY (0x01<<6) -#define GRN_HASH_MAX_KEY_SIZE GRN_TABLE_MAX_KEY_SIZE +#define GRN_HASH_MAX_KEY_SIZE_NORMAL GRN_TABLE_MAX_KEY_SIZE +#define GRN_HASH_MAX_KEY_SIZE_LARGE (0xffff) + +#define GRN_HASH_IS_LARGE_KEY(hash)\ + ((hash)->key_size > GRN_HASH_MAX_KEY_SIZE_NORMAL) + +typedef struct _grn_hash_header_common grn_hash_header_common; +typedef struct _grn_hash_header_normal grn_hash_header_normal; +typedef struct _grn_hash_header_large grn_hash_header_large; struct _grn_hash { grn_db_obj obj; @@ -205,7 +213,11 @@ struct _grn_hash { /* For grn_io_hash. */ grn_io *io; - struct grn_hash_header *header; + union { + grn_hash_header_common *common; + grn_hash_header_normal *normal; + grn_hash_header_large *large; + } header; uint32_t *lock; // uint32_t nref; // unsigned int max_n_subrecs; @@ -230,24 +242,36 @@ struct _grn_hash { grn_tiny_bitmap bitmap; }; -/* Header of grn_io_hash. */ -struct grn_hash_header { - uint32_t flags; - grn_encoding encoding; - uint32_t key_size; - uint32_t value_size; - grn_id tokenizer; - uint32_t curr_rec; - int32_t curr_key; - uint32_t idx_offset; - uint32_t entry_size; - uint32_t max_offset; - uint32_t n_entries; - uint32_t n_garbages; - uint32_t lock; - grn_id normalizer; - uint32_t reserved[15]; - grn_id garbages[GRN_HASH_MAX_KEY_SIZE]; +#define GRN_HASH_HEADER_COMMON_FIELDS\ + uint32_t flags;\ + grn_encoding encoding;\ + uint32_t key_size;\ + uint32_t value_size;\ + grn_id tokenizer;\ + uint32_t curr_rec;\ + int32_t curr_key;\ + uint32_t idx_offset;\ + uint32_t entry_size;\ + uint32_t max_offset;\ + uint32_t n_entries;\ + uint32_t n_garbages;\ + uint32_t lock;\ + grn_id normalizer;\ + uint32_t reserved[15] + +struct _grn_hash_header_common { + GRN_HASH_HEADER_COMMON_FIELDS; +}; + +struct _grn_hash_header_normal { + GRN_HASH_HEADER_COMMON_FIELDS; + grn_id garbages[GRN_HASH_MAX_KEY_SIZE_NORMAL]; + grn_table_queue queue; +}; + +struct _grn_hash_header_large { + GRN_HASH_HEADER_COMMON_FIELDS; + grn_id garbages[GRN_HASH_MAX_KEY_SIZE_LARGE]; grn_table_queue queue; }; Modified: lib/hash.c (+61 -27) =================================================================== --- lib/hash.c 2015-04-03 17:37:10 +0900 (22491c5) +++ lib/hash.c 2015-04-04 20:42:16 +0900 (bfdc695) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2012 Brazil + Copyright(C) 2009-2015 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -1167,7 +1167,11 @@ grn_array_unblock(grn_ctx *ctx, grn_array *array) /* grn_hash : hash table */ #define GRN_HASH_MAX_SEGMENT 0x400 -#define GRN_HASH_HEADER_SIZE 0x9000 +#define GRN_HASH_HEADER_SIZE_NORMAL 0x9000 +#define GRN_HASH_HEADER_SIZE_LARGE\ + (GRN_HASH_HEADER_SIZE_NORMAL +\ + (sizeof(grn_id) *\ + (GRN_HASH_MAX_KEY_SIZE_LARGE - GRN_HASH_MAX_KEY_SIZE_NORMAL))) #define GRN_HASH_SEGMENT_SIZE 0x400000 #define W_OF_KEY_IN_A_SEGMENT 22 #define IDX_MASK_IN_A_SEGMENT 0xfffff @@ -1298,7 +1302,7 @@ inline static grn_id * grn_hash_idx_at(grn_ctx *ctx, grn_hash *hash, grn_id id) { if (grn_hash_is_io_hash(hash)) { - id = (id & *hash->max_offset) + hash->header->idx_offset; + id = (id & *hash->max_offset) + hash->header.common->idx_offset; return grn_io_hash_idx_at(ctx, hash, id); } else { return hash->index + (id & *hash->max_offset); @@ -1380,15 +1384,18 @@ grn_io_hash_entry_put_key(grn_ctx *ctx, grn_hash *hash, key_offset = entry->key.offset; } else { uint32_t segment_id; + grn_hash_header_common *header; + + header = hash->header.common; if (key_size >= GRN_HASH_SEGMENT_SIZE) { return GRN_INVALID_ARGUMENT; } - key_offset = hash->header->curr_key; + key_offset = header->curr_key; segment_id = (key_offset + key_size) >> W_OF_KEY_IN_A_SEGMENT; if ((key_offset >> W_OF_KEY_IN_A_SEGMENT) != segment_id) { - key_offset = hash->header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT; + key_offset = header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT; } - hash->header->curr_key += key_size; + header->curr_key += key_size; entry->key.offset = key_offset; } @@ -1537,7 +1544,8 @@ grn_io_hash_calculate_entry_size(uint32_t key_size, uint32_t value_size, } static grn_io * -grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size) +grn_io_hash_create_io(grn_ctx *ctx, const char *path, + uint32_t header_size, uint32_t entry_size) { uint32_t w_of_element = 0; grn_io_array_spec array_spec[4]; @@ -1555,7 +1563,7 @@ grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size) array_spec[GRN_HASH_INDEX_SEGMENT].max_n_segments = 1U << (30 - (22 - 2)); array_spec[GRN_HASH_BITMAP_SEGMENT].w_of_element = 0; array_spec[GRN_HASH_BITMAP_SEGMENT].max_n_segments = 1U << (30 - (22 + 3)); - return grn_io_create_with_array(ctx, path, GRN_HASH_HEADER_SIZE, + return grn_io_create_with_array(ctx, path, header_size, GRN_HASH_SEGMENT_SIZE, grn_io_auto, 4, array_spec); } @@ -1566,12 +1574,17 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, grn_encoding encoding, uint32_t init_size) { grn_io *io; - struct grn_hash_header *header; - uint32_t entry_size, max_offset; + grn_hash_header_common *header; + uint32_t header_size, entry_size, max_offset; + if (key_size <= GRN_HASH_MAX_KEY_SIZE_NORMAL) { + header_size = GRN_HASH_HEADER_SIZE_NORMAL; + } else { + header_size = GRN_HASH_HEADER_SIZE_LARGE; + } entry_size = grn_io_hash_calculate_entry_size(key_size, value_size, flags); - io = grn_io_hash_create_io(ctx, path, entry_size); + io = grn_io_hash_create_io(ctx, path, header_size, entry_size); if (!io) { return GRN_NO_MEMORY_AVAILABLE; } @@ -1587,6 +1600,8 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, encoding = ctx->encoding; } + hash->key_size = key_size; + header = grn_io_header(io); header->flags = flags; header->encoding = encoding; @@ -1610,11 +1625,18 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, header->normalizer = GRN_ID_NIL; } GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); - grn_table_queue_init(ctx, &header->queue); + { + grn_table_queue *queue; + if (GRN_HASH_IS_LARGE_KEY(hash)) { + queue = &(((grn_hash_header_large *)(header))->queue); + } else { + queue = &(((grn_hash_header_normal *)(header))->queue); + } + grn_table_queue_init(ctx, queue); + } hash->obj.header.flags = header->flags; hash->ctx = ctx; - hash->key_size = key_size; hash->encoding = encoding; hash->value_size = value_size; hash->entry_size = entry_size; @@ -1622,7 +1644,7 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, hash->n_entries = &header->n_entries; hash->max_offset = &header->max_offset; hash->io = io; - hash->header = header; + hash->header.common = header; hash->lock = &header->lock; hash->tokenizer = NULL; return GRN_SUCCESS; @@ -1711,7 +1733,7 @@ grn_hash_create(grn_ctx *ctx, const char *path, uint32_t key_size, uint32_t valu if (!ctx) { return NULL; } - if (key_size > GRN_HASH_MAX_KEY_SIZE) { + if (key_size > GRN_HASH_MAX_KEY_SIZE_LARGE) { return NULL; } hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash)); @@ -1732,7 +1754,7 @@ grn_hash_open(grn_ctx *ctx, const char *path) if (ctx) { grn_io * const io = grn_io_open(ctx, path, grn_io_auto); if (io) { - struct grn_hash_header * const header = grn_io_header(io); + grn_hash_header_common * const header = grn_io_header(io); if (grn_io_get_type(io) == GRN_TABLE_HASH_KEY) { grn_hash * const hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash)); if (hash) { @@ -1747,7 +1769,7 @@ grn_hash_open(grn_ctx *ctx, const char *path) hash->n_entries = &header->n_entries; hash->max_offset = &header->max_offset; hash->io = io; - hash->header = header; + hash->header.common = header; hash->lock = &header->lock; hash->tokenizer = grn_ctx_at(ctx, header->tokenizer); if (header->flags & GRN_OBJ_KEY_NORMALIZE) { @@ -1911,7 +1933,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries) if (grn_hash_is_io_hash(hash)) { uint32_t i; - src_offset = hash->header->idx_offset; + src_offset = hash->header.common->idx_offset; dest_offset = MAX_INDEX_SIZE - src_offset; for (i = 0; i < new_index_size; i += (IDX_MASK_IN_A_SEGMENT + 1)) { /* @@ -1979,7 +2001,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries) } if (grn_hash_is_io_hash(hash)) { - hash->header->idx_offset = dest_offset; + hash->header.common->idx_offset = dest_offset; } else { grn_id * const old_index = hash->index; hash->index = new_index; @@ -2038,15 +2060,22 @@ grn_io_hash_add(grn_ctx *ctx, grn_hash *hash, uint32_t hash_value, { grn_id entry_id; grn_hash_entry *entry; - struct grn_hash_header * const header = hash->header; + grn_hash_header_common * const header = hash->header.common; + grn_id *garbages; - entry_id = header->garbages[key_size - 1]; + if (GRN_HASH_IS_LARGE_KEY(hash)) { + garbages = hash->header.large->garbages; + } else { + garbages = hash->header.normal->garbages; + } + + entry_id = garbages[key_size - 1]; if (entry_id) { entry = grn_io_hash_entry_at(ctx, hash, entry_id, GRN_TABLE_ADD); if (!entry) { return GRN_ID_NIL; } - header->garbages[key_size - 1] = *(grn_id *)entry; + garbages[key_size - 1] = *(grn_id *)entry; if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) { /* keep entry->io_entry's hash_value, flag, key_size and key. */ memset(entry->io_entry.value, 0, header->value_size); @@ -2436,9 +2465,14 @@ grn_hash_set_value(grn_ctx *ctx, grn_hash *hash, grn_id id, *ep = GARBAGE;\ if (grn_hash_is_io_hash(hash)) {\ uint32_t size = key_size - 1;\ - struct grn_hash_header *hh = hash->header;\ - ee->key = hh->garbages[size];\ - hh->garbages[size] = e;\ + grn_id *garbages;\ + if (GRN_HASH_IS_LARGE_KEY(hash)) {\ + garbages = hash->header.large->garbages;\ + } else {\ + garbages = hash->header.normal->garbages;\ + }\ + ee->key = garbages[size];\ + garbages[size] = e;\ grn_io_array_bit_off(ctx, hash->io, GRN_HASH_BITMAP_SEGMENT, e);\ } else {\ ee->key = hash->garbages;\ @@ -2537,7 +2571,7 @@ grn_hash_cursor_close(grn_ctx *ctx, grn_hash_cursor *c) } #define HASH_CURR_MAX(hash) \ - ((grn_hash_is_io_hash(hash)) ? (hash)->header->curr_rec : (hash)->a.max) + ((grn_hash_is_io_hash(hash)) ? (hash)->header.common->curr_rec : (hash)->a.max) grn_hash_cursor * grn_hash_cursor_open(grn_ctx *ctx, grn_hash *hash, @@ -3061,7 +3095,7 @@ void grn_hash_check(grn_ctx *ctx, grn_hash *hash) { char buf[8]; - struct grn_hash_header *h = hash->header; + grn_hash_header_common *h = hash->header.common; GRN_OUTPUT_ARRAY_OPEN("RESULT", 1); GRN_OUTPUT_MAP_OPEN("SUMMARY", 25); GRN_OUTPUT_CSTR("flags"); Modified: lib/proc.c (+1 -1) =================================================================== --- lib/proc.c 2015-04-03 17:37:10 +0900 (10ce09f) +++ lib/proc.c 2015-04-04 20:42:16 +0900 (1142f53) @@ -2659,7 +2659,7 @@ dump_plugins(grn_ctx *ctx, grn_obj *outbuf) return; } - processed_paths = grn_hash_create(ctx, NULL, GRN_HASH_MAX_KEY_SIZE, 0, + processed_paths = grn_hash_create(ctx, NULL, GRN_TABLE_MAX_KEY_SIZE, 0, GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_VAR_SIZE); if (!processed_paths) { Modified: test/unit/core/test-hash-cursor.c (+2 -2) =================================================================== --- test/unit/core/test-hash-cursor.c 2015-04-03 17:37:10 +0900 (af49367) +++ test/unit/core/test-hash-cursor.c 2015-04-04 20:42:16 +0900 (0fc8928) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2; coding: utf-8 -*- */ /* - Copyright (C) 2008-2012 Kouhei Sutou <kou �� clear-code.com> + Copyright (C) 2008-2015 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -53,7 +53,7 @@ cut_setup(void) keys_and_values = NULL; grn_test_hash_factory_set_flags(factory, GRN_OBJ_KEY_VAR_SIZE); - grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE); + grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL); sample_value = NULL; } Modified: test/unit/core/test-hash-sort.c (+5 -4) =================================================================== --- test/unit/core/test-hash-sort.c 2015-04-03 17:37:10 +0900 (35cb548) +++ test/unit/core/test-hash-sort.c 2015-04-04 20:42:16 +0900 (95d3734) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2; coding: utf-8 -*- */ /* - Copyright (C) 2008-2009 Kouhei Sutou <kou �� cozmixng.org> + Copyright (C) 2008-2015 Kouhei Sutou <kou �� cozmixng.org> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -156,12 +156,13 @@ retrieve_all_keys (grn_array *array, grn_id n_entries) while (id != GRN_ID_NIL) { grn_id *hash_id; void *array_value; - gchar key[GRN_HASH_MAX_KEY_SIZE]; + gchar key[GRN_HASH_MAX_KEY_SIZE_NORMAL]; int size; grn_array_cursor_get_value(context, cursor, &array_value); hash_id = array_value; - size = grn_hash_get_key(context, hash, *hash_id, key, GRN_HASH_MAX_KEY_SIZE); + size = grn_hash_get_key(context, hash, *hash_id, + key, GRN_HASH_MAX_KEY_SIZE_NORMAL); key[size] = '\0'; keys = g_list_append(keys, g_strdup(key)); id = grn_array_cursor_next(context, cursor); @@ -656,7 +657,7 @@ test_sort_by_variable_size_key(gconstpointer data) const GList *node; int n_entries; - grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE); + grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL); grn_test_hash_factory_add_flags(factory, GRN_OBJ_KEY_VAR_SIZE); if (test_data->set_parameters) Modified: test/unit/core/test-hash.c (+2 -2) =================================================================== --- test/unit/core/test-hash.c 2015-04-03 17:37:10 +0900 (f7392ca) +++ test/unit/core/test-hash.c 2015-04-04 20:42:16 +0900 (4eefb54) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2; coding: utf-8 -*- */ /* - Copyright (C) 2008-2012 Kouhei Sutou <kou �� clear-code.com> + Copyright (C) 2008-2015 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -96,7 +96,7 @@ set_value_size_to_zero(void) static void set_variable_size(void) { - grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE); + grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL); grn_test_hash_factory_add_flags(factory, GRN_OBJ_KEY_VAR_SIZE); } -------------- next part -------------- HTML����������������������������...다운로드