susumu.yata
null+****@clear*****
Sun Jan 20 15:18:02 JST 2013
susumu.yata 2013-01-20 15:18:02 +0900 (Sun, 20 Jan 2013) New Revision: 2ee513e9f8429149c7af559e9fa3a2d77856871a https://github.com/groonga/grnxx/commit/2ee513e9f8429149c7af559e9fa3a2d77856871a Log: Add components of insert(). (DoubleArray) Modified files: lib/alpha/double_array.cpp lib/alpha/double_array.hpp Modified: lib/alpha/double_array.cpp (+75 -9) =================================================================== --- lib/alpha/double_array.cpp 2013-01-20 12:40:11 +0900 (ca6d4f5) +++ lib/alpha/double_array.cpp 2013-01-20 15:18:02 +0900 (c68db51) @@ -33,8 +33,13 @@ DoubleArrayHeader::DoubleArrayHeader() entries_block_id_(io::BLOCK_INVALID_ID), keys_block_id_(io::BLOCK_INVALID_ID), root_node_id_(0), + total_key_length_(0), + next_key_id_(0), + max_key_id_(0), + num_keys_(0), num_chunks_(0), num_phantoms_(0), + num_zombies_(0), leaders_(), inter_process_mutex_() { for (uint32_t i = 0; i < DOUBLE_ARRAY_MAX_CHUNK_LEVEL; ++i) { @@ -42,7 +47,7 @@ DoubleArrayHeader::DoubleArrayHeader() } } -DoubleArrayKey::DoubleArrayKey(uint64_t id, const char *address, +DoubleArrayKey::DoubleArrayKey(uint64_t id, const void *address, uint64_t length) : id_low_(static_cast<uint32_t>(id)), id_high_(static_cast<uint8_t>(id >> 32)), @@ -93,7 +98,7 @@ std::unique_ptr<DoubleArrayImpl> DoubleArrayImpl::open(io::Pool pool, } bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length, - uint64_t *key_offset) { + uint64_t *key_pos) { uint64_t node_id = root_node_id(); uint64_t query_pos = 0; if (!search_leaf(ptr, length, node_id, query_pos)) { @@ -106,9 +111,9 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length, } if (node.key_length() == length) { - if (get_key(node.key_offset()).equals_to(ptr, length, query_pos)) { - if (key_offset) { - *key_offset = node.key_offset(); + if (get_key(node.key_pos()).equals_to(ptr, length, query_pos)) { + if (key_pos) { + *key_pos = node.key_pos(); } return true; } @@ -116,6 +121,46 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length, return false; } +bool DoubleArrayImpl::insert(const uint8_t *ptr, uint64_t length, + uint64_t *key_pos) { + // TODO +// GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); +// StatusFlagManager status_flag_manager(header_, INSERTING_FLAG); + +// GRN_DAT_DEBUG_THROW_IF(!ptr && (length != 0)); + + uint64_t node_id = root_node_id(); + uint64_t query_pos = 0; + + // TODO +// search_leaf(ptr, length, node_id, query_pos); +// if (!insert_leaf(ptr, length, node_id, query_pos)) { +// if (key_pos) { +// *key_pos = nodes_[node_id].key_pos(); +// } +// return false; +// } + + const uint64_t new_key_id = header_->next_key_id(); + const uint64_t new_key_pos = append_key(ptr, length, new_key_id); + + header_->set_total_key_length(header_->total_key_length() + length); + header_->set_num_keys(header_->num_keys() + 1); + if (new_key_id > header_->max_key_id()) { + header_->set_max_key_id(new_key_id); + header_->set_next_key_id(new_key_id + 1); + } else { + header_->set_next_key_id(entries_[new_key_id].next()); + } + + entries_[new_key_id].set_key(new_key_pos, length); + nodes_[node_id].set_key(new_key_pos, length); + if (key_pos) { + *key_pos = new_key_pos; + } + return true; +} + DoubleArrayImpl::DoubleArrayImpl() : pool_(), block_info_(nullptr), @@ -201,9 +246,30 @@ bool DoubleArrayImpl::search_leaf(const uint8_t *ptr, uint64_t length, return nodes_[next].is_leaf(); } +uint64_t DoubleArrayImpl::append_key(const uint8_t *ptr, uint64_t length, + uint64_t key_id) { + // TODO +// GRN_DAT_THROW_IF(SIZE_ERROR, key_id > max_num_keys()); + + uint64_t key_pos = header_->next_key_pos(); + const uint64_t key_size = DoubleArrayKey::estimate_size(length); + + // TODO +// GRN_DAT_THROW_IF(SIZE_ERROR, key_size > (key_buf_size() - key_pos)); + const uint64_t size_left_in_page = + (~key_pos + 1) % DOUBLE_ARRAY_KEYS_PAGE_SIZE; + if (size_left_in_page < key_size) { + key_pos += size_left_in_page; + } + new (&keys_[key_pos]) DoubleArrayKey(key_id, ptr, length); + + header_->set_next_key_pos(key_pos + key_size); + return key_pos; +} + void DoubleArrayImpl::reserve_node(uint64_t node_id) { if (node_id >= header_->num_nodes()) { -// reserve_block(node_id / DOUBLE_ARRAY_BLOCK_SIZE); + reserve_chunk(node_id / DOUBLE_ARRAY_CHUNK_SIZE); } DoubleArrayNode &node = nodes_[node_id]; @@ -231,15 +297,15 @@ void DoubleArrayImpl::reserve_node(uint64_t node_id) { const uint64_t threshold = uint64_t(1) << ((DOUBLE_ARRAY_MAX_CHUNK_LEVEL - chunk.level() - 1) * 2); if (chunk.num_phantoms() == threshold) { -// update_chunk_level(chunk_id, chunk.level() + 1); + update_chunk_level(chunk_id, chunk.level() + 1); } } chunk.set_num_phantoms(chunk.num_phantoms() - 1); node.set_is_phantom(false); -// GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET); -// GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL); +// GRN_DAT_DEBUG_THROW_IF(node.offset() != DOUBLE_ARRAY_INVALID_OFFSET); +// GRN_DAT_DEBUG_THROW_IF(node.label() != DOUBLE_ARRAY_INVALID_LABEL); header_->set_num_phantoms(header_->num_phantoms() - 1); } Modified: lib/alpha/double_array.hpp (+78 -26) =================================================================== --- lib/alpha/double_array.hpp 2013-01-20 12:40:11 +0900 (a8b5041) +++ lib/alpha/double_array.hpp 2013-01-20 15:18:02 +0900 (aa7dc40) @@ -23,6 +23,7 @@ namespace grnxx { namespace alpha { +// FIXME: To be removed when moved to grnxx::db. using namespace grnxx::db; extern struct DoubleArrayCreate {} DOUBLE_ARRAY_CREATE; @@ -51,6 +52,9 @@ constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_LEVEL = 5; // the linked list is empty and there exists no leader. constexpr uint64_t DOUBLE_ARRAY_INVALID_LEADER = 0x7FFFFFFF; +constexpr uint64_t DOUBLE_ARRAY_KEYS_PAGE_SIZE = + VECTOR_DEFAULT_PAGE_SIZE; + class DoubleArrayString { public: DoubleArrayString() : ptr_(nullptr), length_(0) {} @@ -160,6 +164,21 @@ class DoubleArrayHeader { uint64_t root_node_id() const { return root_node_id_; } + uint64_t total_key_length() const { + return total_key_length_; + } + uint64_t next_key_id() const { + return next_key_id_; + } + uint64_t next_key_pos() const { + return next_key_pos_; + } + uint64_t max_key_id() const { + return max_key_id_; + } + uint64_t num_keys() const { + return num_keys_; + } uint64_t num_chunks() const { return num_chunks_; } @@ -169,8 +188,10 @@ class DoubleArrayHeader { uint64_t num_phantoms() const { return num_phantoms_; } + uint64_t num_zombies() const { + return num_zombies_; + } uint64_t ith_leader(uint64_t i) const { -// GRN_DAT_DEBUG_THROW_IF(i > DOUBLE_ARRAY_MAX_CHUNK_LEVEL); return leaders_[i]; } @@ -189,15 +210,31 @@ class DoubleArrayHeader { void set_root_node_id(uint64_t value) { root_node_id_ = value; } + void set_total_key_length(uint64_t value) { + total_key_length_ = value; + } + void set_next_key_id(uint64_t value) { + next_key_id_ = value; + } + void set_next_key_pos(uint64_t value) { + next_key_pos_ = value; + } + void set_max_key_id(uint64_t value) { + max_key_id_ = value; + } + void set_num_keys(uint64_t value) { + num_keys_ = value; + } void set_num_chunks(uint64_t value) { num_chunks_ = value; } void set_num_phantoms(uint64_t value) { num_phantoms_ = value; } + void set_num_zombies(uint64_t value) { + num_zombies_ = value; + } void set_ith_leader(uint64_t i, uint64_t x) { -// GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); -// GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks())); leaders_[i] = x; } @@ -211,8 +248,14 @@ class DoubleArrayHeader { uint32_t entries_block_id_; uint32_t keys_block_id_; uint64_t root_node_id_; + uint64_t total_key_length_; + uint64_t next_key_id_; + uint64_t next_key_pos_; + uint64_t max_key_id_; + uint64_t num_keys_; uint64_t num_chunks_; uint64_t num_phantoms_; + uint64_t num_zombies_; uint64_t leaders_[DOUBLE_ARRAY_MAX_CHUNK_LEVEL + 1]; Mutex inter_process_mutex_; }; @@ -300,24 +343,31 @@ class DoubleArrayNode { qword_ = (qword_ & ~LABEL_MASK) | value; } - // A leaf node stores the offset and the length of its associated key. - uint64_t key_offset() const { + // A leaf node stores the start position and the length of the associated + // key. + uint64_t key_pos() const { // 40 bits. - return (qword_ >> KEY_OFFSET_SHIFT) & KEY_OFFSET_MASK; + return (qword_ >> KEY_POS_SHIFT) & KEY_POS_MASK; } uint64_t key_length() const { // 12 bits. return (qword_ >> KEY_LENGTH_SHIFT) & KEY_LENGTH_MASK; } - void set_key_offset(uint64_t value) { - qword_ = (qword_ & ~(KEY_OFFSET_MASK << KEY_OFFSET_SHIFT)) | - (value << KEY_OFFSET_SHIFT); - } - void set_key_length(uint64_t value) { - qword_ = (qword_ & ~(KEY_LENGTH_MASK << KEY_LENGTH_SHIFT)) | - (value << KEY_LENGTH_SHIFT); + void set_key(uint64_t key_pos, uint64_t key_length) { + qword_ = (qword_ & ~((KEY_POS_MASK << KEY_POS_SHIFT) | + (KEY_LENGTH_MASK << KEY_LENGTH_SHIFT))) | + (key_pos << KEY_POS_SHIFT) | (key_length << KEY_LENGTH_SHIFT); } + // FIXME: may not be required. +// void set_key_pos(uint64_t value) { +// qword_ = (qword_ & ~(KEY_POS_MASK << KEY_POS_SHIFT)) | +// (value << KEY_POS_SHIFT); +// } +// void set_key_length(uint64_t value) { +// qword_ = (qword_ & ~(KEY_LENGTH_MASK << KEY_LENGTH_SHIFT)) | +// (value << KEY_LENGTH_SHIFT); +// } // A non-phantom and non-leaf node stores the offset to its children, // the label of its next sibling, and the label of its first child. @@ -362,8 +412,8 @@ class DoubleArrayNode { static constexpr uint64_t LABEL_MASK = 0xFF; - static constexpr uint64_t KEY_OFFSET_MASK = (uint64_t(1) << 40) - 1; - static constexpr uint8_t KEY_OFFSET_SHIFT = 8; + static constexpr uint64_t KEY_POS_MASK = (uint64_t(1) << 40) - 1; + static constexpr uint8_t KEY_POS_SHIFT = 8; static constexpr uint64_t KEY_LENGTH_MASK = (uint64_t(1) << 12) - 1; static constexpr uint8_t KEY_LENGTH_SHIFT = 48; @@ -452,15 +502,15 @@ class DoubleArrayEntry { } // A valid entry stores the offset and the length of its associated key. - uint64_t key_offset() const { - return qword_ & OFFSET_MASK; + uint64_t key_pos() const { + return qword_ & POS_MASK; } uint64_t key_length() const { return qword_ >> 48; } - void set_key(uint64_t offset, uint64_t length) { - qword_ = IS_VALID_FLAG | offset | (length << 48); + void set_key(uint64_t pos, uint64_t length) { + qword_ = IS_VALID_FLAG | pos | (length << 48); } // An invalid entry stores the index of the next invalid entry. @@ -476,14 +526,14 @@ class DoubleArrayEntry { uint64_t qword_; // 11 (= 64 - (1 + 40 + 12)) bits are not used. - static constexpr uint64_t OFFSET_MASK = uint64_t(1) << 40; + static constexpr uint64_t POS_MASK = uint64_t(1) << 40; static constexpr uint64_t IS_VALID_FLAG = uint64_t(1) << 47; }; // TODO class DoubleArrayKey { public: - DoubleArrayKey(uint64_t id, const char *address, uint64_t length); + DoubleArrayKey(uint64_t id, const void *address, uint64_t length); explicit operator bool() const { // FIXME: Magic number. @@ -532,18 +582,18 @@ class DoubleArrayImpl { uint32_t block_id); bool search(const uint8_t *ptr, uint64_t length, - uint64_t *key_offset = nullptr); + uint64_t *key_pos = nullptr); // TODO bool insert(const uint8_t *ptr, uint64_t length, - uint64_t *key_offset = nullptr); + uint64_t *key_pos = nullptr); - const DoubleArrayKey &get_key(uint64_t key_offset) { - return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_offset]); + const DoubleArrayKey &get_key(uint64_t key_pos) { + return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_pos]); } const DoubleArrayKey &ith_key(uint64_t key_id) { if (entries_[key_id]) { - return get_key(entries_[key_id].key_offset()); + return get_key(entries_[key_id].key_pos()); } return DoubleArrayKey::invalid_key(); } @@ -576,6 +626,8 @@ class DoubleArrayImpl { bool search_leaf(const uint8_t *ptr, uint64_t length, uint64_t &node_id, uint64_t &query_pos); + uint64_t append_key(const uint8_t *ptr, uint64_t length, uint64_t key_id); + void reserve_node(uint64_t node_id); void reserve_chunk(uint64_t chunk_id); -------------- next part -------------- HTML����������������������������...다운로드