susumu.yata
null+****@clear*****
Tue Dec 16 10:43:57 JST 2014
susumu.yata 2014-11-20 17:47:04 +0900 (Thu, 20 Nov 2014) New Revision: e856f1a2126fd7e998bf51c4e8b4bacd4deae464 https://github.com/groonga/grnxx/commit/e856f1a2126fd7e998bf51c4e8b4bacd4deae464 Message: Enable Sorter for Bool. (#112) Added files: lib/grnxx/impl/sorter.cpp lib/grnxx/impl/sorter.hpp Copied files: lib/grnxx/sorter-old.cpp (from lib/grnxx/sorter.cpp) Modified files: include/grnxx/Makefile.am include/grnxx/sorter.hpp lib/grnxx/Makefile.am lib/grnxx/impl/Makefile.am lib/grnxx/sorter.cpp Modified: include/grnxx/Makefile.am (+1 -0) =================================================================== --- include/grnxx/Makefile.am 2014-11-19 17:19:44 +0900 (b1cc5d8) +++ include/grnxx/Makefile.am 2014-11-20 17:47:04 +0900 (eb8daa7) @@ -13,6 +13,7 @@ pkginclude_HEADERS = \ features.hpp \ index.hpp \ library.hpp \ + sorter.hpp \ string.hpp \ table.hpp Modified: include/grnxx/sorter.hpp (+46 -43) =================================================================== --- include/grnxx/sorter.hpp 2014-11-19 17:19:44 +0900 (c2bd287) +++ include/grnxx/sorter.hpp 2014-11-20 17:47:04 +0900 (335f378) @@ -1,79 +1,82 @@ #ifndef GRNXX_SORTER_HPP #define GRNXX_SORTER_HPP -#include "grnxx/types.hpp" +#include <limits> +#include <memory> + +#include "grnxx/array.hpp" +#include "grnxx/data_types.hpp" +#include "grnxx/expression.hpp" +#include "grnxx/table.hpp" namespace grnxx { -namespace sorter { -class Node; +enum SorterOrderType { + // The natural order (the ascending order in most cases). + SORTER_REGULAR_ORDER, + // The reverse order (the descending order in most cases). + SORTER_REVERSE_ORDER +}; -} // namespace sorter +struct SorterOrder { + std::unique_ptr<Expression> expression; + SorterOrderType type; +}; -class Sorter { - public: - using Node = sorter::Node; +struct SorterOptions { + // The first "offset" records are skipped. + size_t offset; - ~Sorter(); + // At most "limit" records are sorted. + size_t limit; - // Return the associated table. - const Table *table() const { - return table_; - } + SorterOptions() + : offset(0), + limit(std::numeric_limits<size_t>::max()) {} +}; + +class Sorter { + public: + Sorter() = default; + virtual ~Sorter() = default; // Create an object for sorting records. // - // On success, returns a poitner to the sorter. - // On failure, returns nullptr and stores error information into "*error" if - // "error" != nullptr. - static unique_ptr<Sorter> create( - Error *error, - Array<SortOrder> &&orders, + // On success, returns the sorter. + // On failure, throws an exception. + static std::unique_ptr<Sorter> create( + Array<SorterOrder> &&orders, const SorterOptions &options = SorterOptions()); + // Return the associated table. + virtual const Table *table() const = 0; + // Set the target record set. // // Aborts sorting the old record set and starts sorting the new record set. // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool reset(Error *error, Array<Record> *records); + // On failure, throws an exception. + virtual void reset(Array<Record> *records) = 0; // Progress sorting. // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool progress(Error *error); + // On failure, throws an exception. + virtual void progress() = 0; // Finish sorting. // // Assumes that all the records are ready. // Leaves only the result records if offset and limit are specified. // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool finish(Error *error); + // On failure, throws an exception. + virtual void finish() = 0; // Sort records. // // Calls reset() and finish() to sort records. // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool sort(Error *error, Array<Record> *records); - - private: - const Table *table_; - unique_ptr<Node> head_; - Array<Record> *records_; - Int offset_; - Int limit_; - - Sorter(); + // On failure, throws an exception. + virtual void sort(Array<Record> *records) = 0; }; } // namespace grnxx Modified: lib/grnxx/Makefile.am (+2 -2) =================================================================== --- lib/grnxx/Makefile.am 2014-11-19 17:19:44 +0900 (9ff3252) +++ lib/grnxx/Makefile.am 2014-11-20 17:47:04 +0900 (e4062c3) @@ -13,12 +13,12 @@ libgrnxx_la_SOURCES = \ db.cpp \ expression.cpp \ library.cpp \ + sorter.cpp \ string.cpp # index.cpp \ # merger.cpp \ -# pipeline.cpp \ -# sorter.cpp +# pipeline.cpp libgrnxx_includedir = ${includedir}/grnxx libgrnxx_include_HEADERS = \ Modified: lib/grnxx/impl/Makefile.am (+2 -0) =================================================================== --- lib/grnxx/impl/Makefile.am 2014-11-19 17:19:44 +0900 (789fca8) +++ lib/grnxx/impl/Makefile.am 2014-11-20 17:47:04 +0900 (fc6cb2d) @@ -11,6 +11,7 @@ libgrnxx_impl_la_LDFLAGS = @AM_LTLDFLAGS@ libgrnxx_impl_la_SOURCES = \ db.cpp \ expression.cpp \ + sorter.cpp \ table.cpp libgrnxx_impl_includedir = ${includedir}/grnxx/impl @@ -20,4 +21,5 @@ libgrnxx_impl_include_HEADERS = \ db.hpp \ expression.hpp \ index.hpp \ + sorter.hpp \ table.hpp Added: lib/grnxx/impl/sorter.cpp (+581 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/impl/sorter.cpp 2014-11-20 17:47:04 +0900 (962e26a) @@ -0,0 +1,581 @@ +#include "grnxx/impl/sorter.hpp" + +namespace grnxx { +namespace impl { +namespace sorter { + +// -- Node -- + +class Node { + public: + // Create a node for sorting records in "order". + // + // On success, returns the node. + // On failure, throws an exception. + static Node *create(SorterOrder &&order); + + explicit Node(SorterOrder &&order) + : order_(std::move(order)), + next_(nullptr) {} + virtual ~Node() = default; + + // Set the next node. + void set_next(Node *next) { + next_ = next; + } + + // Sort records in [begin, end). + // + // On success, returns true. + // On failure, throws an exception. + virtual void sort(ArrayRef<Record> ref, size_t begin, size_t end) = 0; + + protected: + SorterOrder order_; + Node *next_; +}; + +// --- TypedNode --- + +template <typename T> +class TypedNode : public Node { + public: + using Value = T; + + explicit TypedNode(SorterOrder &&order) + : Node(std::move(order)), + values_() {} + virtual ~TypedNode() = default; + + protected: + Array<Value> values_; +}; + +// ---- SeparatorNode ---- + +class SeparatorNode : public TypedNode<Bool> { + public: + explicit SeparatorNode(SorterOrder &&order) + : TypedNode<Bool>(std::move(order)), + prior_value_((order.type == SORTER_REGULAR_ORDER) ? + Bool::false_value() : Bool::true_value()) {} + ~SeparatorNode() {} + + void sort(ArrayRef<Record> records, size_t begin, size_t end); + + private: + uint8_t prior_value_; +}; + +void SeparatorNode::sort(ArrayRef<Record> records, size_t begin, size_t end) { + // Sort "records" as follows: + // - Prior values: [0, posterior_offset) + // - Posterior values: [posterior_offset, na_offset) + // - N/A: [na_offset, records.size()) + order_.expression->evaluate(records, &values_); + size_t posterior_offset = 0; + size_t na_offset = records.size(); + for (size_t i = 0; i < na_offset; ) { + if (values_[i].is_na()) { + std::swap(records[i], records[na_offset - 1]); + values_[i] = values_[na_offset - 1]; + --na_offset; + } else { + if (values_[i].value() == prior_value_) { + std::swap(records[posterior_offset], records[i]); + ++posterior_offset; + } + ++i; + } + } + + // Apply the next sort condition for blocks with 2 or more records. + if (next_) { + if ((posterior_offset >= 2) && + (posterior_offset > begin)) { + next_->sort(records.ref(0, posterior_offset), + begin, + (end < posterior_offset) ? end : posterior_offset); + } + if (((na_offset - posterior_offset) >= 2) && + (na_offset > begin) && + (posterior_offset < end)) { + next_->sort(records.ref(posterior_offset, na_offset - posterior_offset), + (begin < posterior_offset) ? 0 : (begin - posterior_offset), + ((end < na_offset) ? end : na_offset) - posterior_offset); + } + if (((records.size() - na_offset) >= 2) && + (na_offset < end)) { + next_->sort(records.ref(na_offset), + (begin < na_offset) ? 0 : (begin - na_offset), + end - na_offset); + } + } +} + +//// ----- RegularIsPrior ----- + +//struct RegularIsPrior { +// bool operator()(Bool arg) const { +// return !arg; +// } +//}; + +//// ----- ReverseIsPrior ----- + +//struct ReverseIsPrior { +// bool operator()(Bool arg) const { +// return arg; +// } +//}; + +//// ---- QuickSortNode ---- + +//template <typename T> +//struct Equal { +// bool operator()(T lhs, T rhs) const { +// return lhs == rhs; +// } +//}; + +//template <> +//struct Equal<Float> { +// bool operator()(Float lhs, Float rhs) const { +// return (lhs == rhs) || (std::isnan(lhs) && std::isnan(rhs)); +// } +//}; + +//template <typename T> +//class QuickSortNode : public TypedNode<typename T::Value> { +// public: +// using Comparer = T; +// using Value = typename Comparer::Value; + +// explicit QuickSortNode(SorterOrder &&order) +// : TypedNode<Value>(std::move(order)), +// comparer_() {} +// ~QuickSortNode() {} + +// bool sort(Error *error, ArrayRef<Record> records, Int begin, Int end); + +// private: +// Comparer comparer_; + +// // Sort records with ternary quick sort. +// // +// // Switches to insertion sort when the sorting range becomes small enough. +// // +// // On success, returns true. +// // On failure, returns false and stores error information into "*error" if +// // "error" != nullptr. +// bool quick_sort(Error *error, +// ArrayRef<Record> records, +// Value *values, +// Int begin, +// Int end); + +// // Sort records with insertion sort. +// // +// // Insertion sort should be used when there few records. +// // +// // On success, returns true. +// // On failure, returns false and stores error information into "*error" if +// // "error" != nullptr. +// bool insertion_sort(Error *error, ArrayRef<Record> records, Value *values); + +// // Choose the pivot and move it to the front. +// void move_pivot_first(ArrayRef<Record> records, Value *values); +//}; + +//template <typename T> +//bool QuickSortNode<T>::sort(Error *error, +// ArrayRef<Record> records, +// Int begin, +// Int end) { +// if (!this->order_.expression->evaluate(error, records, &this->values_)) { +// return false; +// } +// return quick_sort(error, records, this->values_.data(), begin, end); +//} + +//template <typename T> +//bool QuickSortNode<T>::quick_sort(Error *error, +// ArrayRef<Record> records, +// Value *values, +// Int begin, +// Int end) { +// // Use ternary quick sort if there are enough records. +// // +// // TODO: Currently, the threshold is 16. +// // This value should be optimized and replaced with a named constant. +// while (records.size() >= 16) { +// move_pivot_first(records, values); +// const Value pivot = values[0]; +// Int left = 1; +// Int right = records.size(); +// Int pivot_left = 1; +// Int pivot_right = records.size(); +// for ( ; ; ) { +// // Move entries based on comparison against the pivot. +// // Prior entries are moved to left. +// // Less prior entries are moved to right. +// // Entries which equal to the pivot are moved to the edges. +// while (left < right) { +// if (comparer_(pivot, values[left])) { +// break; +// } else if (Equal<Value>()(pivot, values[left])) { +// std::swap(values[left], values[pivot_left]); +// records.swap(left, pivot_left); +// ++pivot_left; +// } +// ++left; +// } +// while (left < right) { +// --right; +// if (comparer_(values[right], pivot)) { +// break; +// } else if (Equal<Value>()(values[right], pivot)) { +// --pivot_right; +// std::swap(values[right], values[pivot_right]); +// records.swap(right, pivot_right); +// } +// } +// if (left >= right) { +// break; +// } +// std::swap(values[left], values[right]); +// records.swap(left, right); +// ++left; +// } + +// // Move left pivot-equivalent entries to the left side of the boundary. +// while (pivot_left > 0) { +// --pivot_left; +// --left; +// std::swap(values[pivot_left], values[left]); +// records.swap(pivot_left, left); +// } +// // Move right pivot-equivalent entries to the right side of the boundary. +// while (pivot_right < records.size()) { +// std::swap(values[pivot_right], values[right]); +// records.swap(pivot_right, right); +// ++pivot_right; +// ++right; +// } + +// // Apply the next sort condition to the pivot-equivalent records. +// if (this->next_) { +// if (((right - left) >= 2) && (begin < right) && (end > left)) { +// Int next_begin = (begin < left) ? 0 : (begin - left); +// Int next_end = ((end > right) ? right : end) - left; +// if (!this->next_->sort(error, records.ref(left, right - left), +// next_begin, next_end)) { +// return false; +// } +// } +// } + +// // There are the left group and the right group. +// // A recursive call is used for sorting the smaller group. +// // The recursion depth is less than log_2(n) where n is the number of +// // records. +// // The next loop of the current call is used for sorting the larger group. +// if (left < (records.size() - right)) { +// if ((begin < left) && (left >= 2)) { +// Int next_end = (end < left) ? end : left; +// if (!quick_sort(error, records.ref(0, left), values, +// begin, next_end)) { +// return false; +// } +// } +// if (end <= right) { +// return true; +// } +// records = records.ref(right); +// values += right; +// begin -= right; +// if (begin < 0) { +// begin = 0; +// } +// end -= right; +// } else { +// if ((end > right) && ((records.size() - right) >= 2)) { +// Int next_begin = (begin < right) ? 0 : (begin - right); +// Int next_end = end - right; +// if (!quick_sort(error, records.ref(right), +// values + right, next_begin, next_end)) { +// return false; +// } +// } +// if (begin >= left) { +// return true; +// } +// records = records.ref(0, left); +// if (end > left) { +// end = left; +// } +// } +// } + +// if (records.size() >= 2) { +// return insertion_sort(error, records, values); +// } +// return true; +//} + +//template <typename T> +//bool QuickSortNode<T>::insertion_sort(Error *error, +// ArrayRef<Record> records, +// Value *values) { +// for (Int i = 1; i < records.size(); ++i) { +// for (Int j = i; j > 0; --j) { +// if (comparer_(values[j], values[j - 1])) { +// std::swap(values[j], values[j - 1]); +// records.swap(j, j - 1); +// } else { +// break; +// } +// } +// } + +// // Apply the next sorting if there are records having the same value. +// if (this->next_) { +// Int begin = 0; +// for (Int i = 1; i < records.size(); ++i) { +// if (!Equal<Value>()(values[i], values[begin])) { +// if ((i - begin) >= 2) { +// if (!this->next_->sort(error, records.ref(begin, i - begin), +// 0, i - begin)) { +// return false; +// } +// } +// begin = i; +// } +// } +// if ((records.size() - begin) >= 2) { +// if (!this->next_->sort(error, records.ref(begin), +// 0, records.size() - begin)) { +// return false; +// } +// } +// } +// return true; +//} + +//template <typename T> +//void QuickSortNode<T>::move_pivot_first(ArrayRef<Record> records, +// Value *values) { +// // Choose the median from values[1], values[1 / size], and values[size - 2]. +// // The reason why not using values[0] and values[size - 1] is to avoid the +// // worst case which occurs when the records are sorted in reverse order. +// Int first = 1; +// Int middle = records.size() / 2; +// Int last = records.size() - 2; +// if (comparer_(values[first], values[middle])) { +// // first < middle. +// if (comparer_(values[middle], values[last])) { +// // first < middle < last. +// std::swap(values[0], values[middle]); +// records.swap(0, middle); +// } else if (comparer_(values[first], values[last])) { +// // first < last < middle. +// std::swap(values[0], values[last]); +// records.swap(0, last); +// } else { +// // last < first < middle. +// std::swap(values[0], values[first]); +// records.swap(0, first); +// } +// } else if (comparer_(values[last], values[middle])) { +// // last < middle < first. +// std::swap(values[0], values[middle]); +// records.swap(0, middle); +// } else if (comparer_(values[last], values[first])) { +// // middle < last < first. +// std::swap(values[0], values[last]); +// records.swap(0, last); +// } else { +// // middle < first < last. +// std::swap(values[0], values[first]); +// records.swap(0, first); +// } +//} + +//// ----- RegularComparer ----- + +//template <typename T> +//struct RegularComparer { +// using Value = T; +// bool operator()(Value lhs, Value rhs) const { +// return lhs < rhs; +// } +//}; + +//template <> +//struct RegularComparer<Float> { +// using Value = Float; +// bool operator()(Value lhs, Value rhs) const { +// // Numbers are prior to NaN. +// if (std::isnan(lhs)) { +// return false; +// } else if (std::isnan(rhs)) { +// return true; +// } +// return lhs < rhs; +// } +//}; + +//template <> +//struct RegularComparer<GeoPoint> { +// using Value = GeoPoint; +// bool operator()(Value lhs, Value rhs) const { +// if (lhs.latitude() != rhs.latitude()) { +// return lhs.latitude() < rhs.latitude(); +// } +// return lhs.longitude() < rhs.longitude(); +// } +//}; + +//// ----- ReverseComparer ----- + +//template <typename T> +//struct ReverseComparer { +// using Value = T; +// bool operator()(Value lhs, Value rhs) const { +// return RegularComparer<T>()(rhs, lhs); +// } +//}; + +Node *Node::create(SorterOrder &&order) try { + switch (order.expression->data_type()) { + case BOOL_DATA: { + return new SeparatorNode(std::move(order)); + } +// case INT_DATA: { +// if (order.type == REGULAR_ORDER) { +// node.reset(new (nothrow) QuickSortNode<RegularComparer<Int>>( +// std::move(order))); +// } else { +// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Int>>( +// std::move(order))); +// } +// break; +// } +// case FLOAT_DATA: { +// if (order.type == REGULAR_ORDER) { +// node.reset(new (nothrow) QuickSortNode<RegularComparer<Float>>( +// std::move(order))); +// } else { +// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Float>>( +// std::move(order))); +// } +// break; +// } +// case GEO_POINT_DATA: { +// if (order.type == REGULAR_ORDER) { +// node.reset(new (nothrow) QuickSortNode<RegularComparer<GeoPoint>>( +// std::move(order))); +// } else { +// node.reset(new (nothrow) QuickSortNode<ReverseComparer<GeoPoint>>( +// std::move(order))); +// } +// break; +// } +// case TEXT_DATA: { +// if (order.type == REGULAR_ORDER) { +// node.reset(new (nothrow) QuickSortNode<RegularComparer<Text>>( +// std::move(order))); +// } else { +// node.reset(new (nothrow) QuickSortNode<ReverseComparer<Text>>( +// std::move(order))); +// } +// break; +// } + default: { + throw "Invalid data type"; // TODO + } + } +} catch (const std::bad_alloc &) { + throw "Memory allocation failed"; // TODO +} + +} // namespace sorter + +using namespace sorter; + +Sorter::~Sorter() {} + +void Sorter::reset(Array<Record> *records) { + records_ = records; +} + +void Sorter::progress() { + // TODO: Incremental sorting is not supported yet. +} + +void Sorter::finish() { + if (!records_) { + throw "No target"; // TODO + } + if ((offset_ >= records_->size()) || (limit_ <= 0)) { + records_->clear(); + return; + } + size_t begin = offset_; + size_t end; + if (limit_ <= (records_->size() - offset_)) { + end = offset_ + limit_; + } else { + end = records_->size(); + } + if (records_->size() <= 1) { + return; + } + nodes_[0]->sort(records_->ref(), begin, end); + for (size_t i = begin, j = 0; i < end; ++i, ++j) { + (*records_)[j] = (*records_)[i]; + } + records_->resize(end - begin); +} + +void Sorter::sort(Array<Record> *records) { + reset(records); + finish(); +} + + +Sorter::Sorter(Array<SorterOrder> &&orders, const SorterOptions &options) + : table_(nullptr), + nodes_(), + records_(nullptr), + offset_(options.offset), + limit_(options.limit) { + // A sorter requires one or more orders. + // Also, expressions must be valid and associated tables must be the same. + if (orders.size() == 0) { + throw "No order"; // TODO + } + for (size_t i = 0; i < orders.size(); ++i) { + if (!orders[i].expression) { + throw "Missing expression"; // TODO + } + } + table_ = static_cast<const Table *>(orders[0].expression->table()); + for (size_t i = 1; i < orders.size(); ++i) { + if (orders[i].expression->table() != table_) { + throw "Table conflict"; // TODO + } + } + + nodes_.resize(orders.size()); + for (size_t i = 0; i < orders.size(); ++i) { + nodes_[i].reset(Node::create(std::move(orders[i]))); + } + for (size_t i = 1; i < orders.size(); ++i) { + nodes_[i - 1]->set_next(nodes_[i].get()); + } + orders.clear(); +} + +} // namespace impl +} // namespace grnxx Added: lib/grnxx/impl/sorter.hpp (+49 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/impl/sorter.hpp 2014-11-20 17:47:04 +0900 (96734ec) @@ -0,0 +1,49 @@ +#ifndef GRNXX_IMPL_SORTER_HPP +#define GRNXX_IMPL_SORTER_HPP + +#include "grnxx/impl/table.hpp" +#include "grnxx/sorter.hpp" + +namespace grnxx { +namespace impl { +namespace sorter { + +class Node; + +} // namespace sorter + +using SorterInterface = grnxx::Sorter; + +class Sorter : public SorterInterface { + public: + using Node = sorter::Node; + + // -- Public API (grnxx/sorter.hpp) -- + + ~Sorter(); + + const Table *table() const { + return table_; + } + void reset(Array<Record> *records); + void progress(); + void finish(); + void sort(Array<Record> *records); + + // -- Internal API -- + + // On failure, throws an exception. + Sorter(Array<SorterOrder> &&orders, const SorterOptions &options); + + private: + const Table *table_; + Array<std::unique_ptr<Node>> nodes_; + Array<Record> *records_; + size_t offset_; + size_t limit_; +}; + +} // namespace impl +} // namespace grnxx + +#endif // GRNXX_IMPL_SORTER_HPP Copied: lib/grnxx/sorter-old.cpp (+0 -0) 100% =================================================================== Modified: lib/grnxx/sorter.cpp (+9 -614) =================================================================== --- lib/grnxx/sorter.cpp 2014-11-19 17:19:44 +0900 (2e79c57) +++ lib/grnxx/sorter.cpp 2014-11-20 17:47:04 +0900 (bc6365c) @@ -1,623 +1,18 @@ #include "grnxx/sorter.hpp" -#include <cmath> +#include <new> -#include "grnxx/expression.hpp" +#include "grnxx/impl/sorter.hpp" namespace grnxx { -namespace sorter { -// -- Node -- - -class Node { - public: - static unique_ptr<Node> create(Error *error, SortOrder &&order); - - explicit Node(SortOrder &&order) : order_(std::move(order)), next_() {} - virtual ~Node() {} - - // Set the next node. - void set_next(unique_ptr<Node> &&next) { - next_ = std::move(next); - } - - // Sort records in [begin, end). - // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - virtual bool sort(Error *error, - ArrayRef<Record> ref, - Int begin, - Int end) = 0; - - protected: - SortOrder order_; - unique_ptr<Node> next_; -}; - -// --- TypedNode --- - -template <typename T> -class TypedNode : public Node { - public: - using Value = T; - - explicit TypedNode(SortOrder &&order) - : Node(std::move(order)), - values_() {} - virtual ~TypedNode() {} - - protected: - Array<Value> values_; -}; - -// ---- SeparatorNode ---- - -template <typename T> -class SeparatorNode : public TypedNode<Bool> { - public: - using IsPrior = T; - - explicit SeparatorNode(SortOrder &&order) - : TypedNode<Bool>(std::move(order)), - is_prior_() {} - ~SeparatorNode() {} - - bool sort(Error *error, ArrayRef<Record> records, Int begin, Int end); - - private: - IsPrior is_prior_; -}; - -template <typename T> -bool SeparatorNode<T>::sort(Error *error, - ArrayRef<Record> records, - Int begin, - Int end) { - if (!order_.expression->evaluate(error, records, &values_)) { - return false; - } - - // Move prior entries to left and others to right. - Int left = 0; - Int right = records.size(); - while (left < right) { - if (is_prior_(values_[left])) { - ++left; - } else { - // Note that values_[left] will not be used again. - --right; - values_.set(left, values_[right]); - records.swap(left, right); - } - } - - // Now "left" equals to "right" and it points to the boundary. - // The left group is [0, left) and the right group is [left, records.size()). - if (this->next_) { - // Apply the next sort condition if blocks contain 2 or more records. - if ((left >= 2) && (begin < left)) { - if (!this->next_->sort(error, records.ref(0, left), - begin, (end < left) ? end : left)) { - return false; - } - } - if (((records.size() - left) >= 2) && (end > left)) { - if (begin < left) { - begin = 0; - } else { - begin -= left; - } - end -= left; - if (!this->next_->sort(error, records.ref(left), begin, end)) { - return false; - } - } - } - - return true; -} - -// ----- RegularIsPrior ----- - -struct RegularIsPrior { - bool operator()(Bool arg) const { - return !arg; - } -}; - -// ----- ReverseIsPrior ----- - -struct ReverseIsPrior { - bool operator()(Bool arg) const { - return arg; - } -}; - -// ---- QuickSortNode ---- - -template <typename T> -struct Equal { - bool operator()(T lhs, T rhs) const { - return lhs == rhs; - } -}; - -template <> -struct Equal<Float> { - bool operator()(Float lhs, Float rhs) const { - return (lhs == rhs) || (std::isnan(lhs) && std::isnan(rhs)); - } -}; - -template <typename T> -class QuickSortNode : public TypedNode<typename T::Value> { - public: - using Comparer = T; - using Value = typename Comparer::Value; - - explicit QuickSortNode(SortOrder &&order) - : TypedNode<Value>(std::move(order)), - comparer_() {} - ~QuickSortNode() {} - - bool sort(Error *error, ArrayRef<Record> records, Int begin, Int end); - - private: - Comparer comparer_; - - // Sort records with ternary quick sort. - // - // Switches to insertion sort when the sorting range becomes small enough. - // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool quick_sort(Error *error, - ArrayRef<Record> records, - Value *values, - Int begin, - Int end); - - // Sort records with insertion sort. - // - // Insertion sort should be used when there few records. - // - // On success, returns true. - // On failure, returns false and stores error information into "*error" if - // "error" != nullptr. - bool insertion_sort(Error *error, ArrayRef<Record> records, Value *values); - - // Choose the pivot and move it to the front. - void move_pivot_first(ArrayRef<Record> records, Value *values); -}; - -template <typename T> -bool QuickSortNode<T>::sort(Error *error, - ArrayRef<Record> records, - Int begin, - Int end) { - if (!this->order_.expression->evaluate(error, records, &this->values_)) { - return false; - } - return quick_sort(error, records, this->values_.data(), begin, end); +std::unique_ptr<Sorter> Sorter::create( + Array<SorterOrder> &&orders, + const SorterOptions &options) try { + return std::unique_ptr<Sorter>( + new impl::Sorter(std::move(orders), options)); +} catch (const std::bad_alloc &) { + throw "Memory allocation failed"; // TODO } -template <typename T> -bool QuickSortNode<T>::quick_sort(Error *error, - ArrayRef<Record> records, - Value *values, - Int begin, - Int end) { - // Use ternary quick sort if there are enough records. - // - // TODO: Currently, the threshold is 16. - // This value should be optimized and replaced with a named constant. - while (records.size() >= 16) { - move_pivot_first(records, values); - const Value pivot = values[0]; - Int left = 1; - Int right = records.size(); - Int pivot_left = 1; - Int pivot_right = records.size(); - for ( ; ; ) { - // Move entries based on comparison against the pivot. - // Prior entries are moved to left. - // Less prior entries are moved to right. - // Entries which equal to the pivot are moved to the edges. - while (left < right) { - if (comparer_(pivot, values[left])) { - break; - } else if (Equal<Value>()(pivot, values[left])) { - std::swap(values[left], values[pivot_left]); - records.swap(left, pivot_left); - ++pivot_left; - } - ++left; - } - while (left < right) { - --right; - if (comparer_(values[right], pivot)) { - break; - } else if (Equal<Value>()(values[right], pivot)) { - --pivot_right; - std::swap(values[right], values[pivot_right]); - records.swap(right, pivot_right); - } - } - if (left >= right) { - break; - } - std::swap(values[left], values[right]); - records.swap(left, right); - ++left; - } - - // Move left pivot-equivalent entries to the left side of the boundary. - while (pivot_left > 0) { - --pivot_left; - --left; - std::swap(values[pivot_left], values[left]); - records.swap(pivot_left, left); - } - // Move right pivot-equivalent entries to the right side of the boundary. - while (pivot_right < records.size()) { - std::swap(values[pivot_right], values[right]); - records.swap(pivot_right, right); - ++pivot_right; - ++right; - } - - // Apply the next sort condition to the pivot-equivalent records. - if (this->next_) { - if (((right - left) >= 2) && (begin < right) && (end > left)) { - Int next_begin = (begin < left) ? 0 : (begin - left); - Int next_end = ((end > right) ? right : end) - left; - if (!this->next_->sort(error, records.ref(left, right - left), - next_begin, next_end)) { - return false; - } - } - } - - // There are the left group and the right group. - // A recursive call is used for sorting the smaller group. - // The recursion depth is less than log_2(n) where n is the number of - // records. - // The next loop of the current call is used for sorting the larger group. - if (left < (records.size() - right)) { - if ((begin < left) && (left >= 2)) { - Int next_end = (end < left) ? end : left; - if (!quick_sort(error, records.ref(0, left), values, - begin, next_end)) { - return false; - } - } - if (end <= right) { - return true; - } - records = records.ref(right); - values += right; - begin -= right; - if (begin < 0) { - begin = 0; - } - end -= right; - } else { - if ((end > right) && ((records.size() - right) >= 2)) { - Int next_begin = (begin < right) ? 0 : (begin - right); - Int next_end = end - right; - if (!quick_sort(error, records.ref(right), - values + right, next_begin, next_end)) { - return false; - } - } - if (begin >= left) { - return true; - } - records = records.ref(0, left); - if (end > left) { - end = left; - } - } - } - - if (records.size() >= 2) { - return insertion_sort(error, records, values); - } - return true; -} - -template <typename T> -bool QuickSortNode<T>::insertion_sort(Error *error, - ArrayRef<Record> records, - Value *values) { - for (Int i = 1; i < records.size(); ++i) { - for (Int j = i; j > 0; --j) { - if (comparer_(values[j], values[j - 1])) { - std::swap(values[j], values[j - 1]); - records.swap(j, j - 1); - } else { - break; - } - } - } - - // Apply the next sorting if there are records having the same value. - if (this->next_) { - Int begin = 0; - for (Int i = 1; i < records.size(); ++i) { - if (!Equal<Value>()(values[i], values[begin])) { - if ((i - begin) >= 2) { - if (!this->next_->sort(error, records.ref(begin, i - begin), - 0, i - begin)) { - return false; - } - } - begin = i; - } - } - if ((records.size() - begin) >= 2) { - if (!this->next_->sort(error, records.ref(begin), - 0, records.size() - begin)) { - return false; - } - } - } - return true; -} - -template <typename T> -void QuickSortNode<T>::move_pivot_first(ArrayRef<Record> records, - Value *values) { - // Choose the median from values[1], values[1 / size], and values[size - 2]. - // The reason why not using values[0] and values[size - 1] is to avoid the - // worst case which occurs when the records are sorted in reverse order. - Int first = 1; - Int middle = records.size() / 2; - Int last = records.size() - 2; - if (comparer_(values[first], values[middle])) { - // first < middle. - if (comparer_(values[middle], values[last])) { - // first < middle < last. - std::swap(values[0], values[middle]); - records.swap(0, middle); - } else if (comparer_(values[first], values[last])) { - // first < last < middle. - std::swap(values[0], values[last]); - records.swap(0, last); - } else { - // last < first < middle. - std::swap(values[0], values[first]); - records.swap(0, first); - } - } else if (comparer_(values[last], values[middle])) { - // last < middle < first. - std::swap(values[0], values[middle]); - records.swap(0, middle); - } else if (comparer_(values[last], values[first])) { - // middle < last < first. - std::swap(values[0], values[last]); - records.swap(0, last); - } else { - // middle < first < last. - std::swap(values[0], values[first]); - records.swap(0, first); - } -} - -// ----- RegularComparer ----- - -template <typename T> -struct RegularComparer { - using Value = T; - bool operator()(Value lhs, Value rhs) const { - return lhs < rhs; - } -}; - -template <> -struct RegularComparer<Float> { - using Value = Float; - bool operator()(Value lhs, Value rhs) const { - // Numbers are prior to NaN. - if (std::isnan(lhs)) { - return false; - } else if (std::isnan(rhs)) { - return true; - } - return lhs < rhs; - } -}; - -template <> -struct RegularComparer<GeoPoint> { - using Value = GeoPoint; - bool operator()(Value lhs, Value rhs) const { - if (lhs.latitude() != rhs.latitude()) { - return lhs.latitude() < rhs.latitude(); - } - return lhs.longitude() < rhs.longitude(); - } -}; - -// ----- ReverseComparer ----- - -template <typename T> -struct ReverseComparer { - using Value = T; - bool operator()(Value lhs, Value rhs) const { - return RegularComparer<T>()(rhs, lhs); - } -}; - -} // namespace sorter - -using namespace sorter; - -unique_ptr<Node> Node::create(Error *error, SortOrder &&order) { - unique_ptr<Node> node; - switch (order.expression->data_type()) { - case BOOL_DATA: { - if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) SeparatorNode<RegularIsPrior>( - std::move(order))); - } else { - node.reset(new (nothrow) SeparatorNode<ReverseIsPrior>( - std::move(order))); - } - break; - } - case INT_DATA: { - if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) QuickSortNode<RegularComparer<Int>>( - std::move(order))); - } else { - node.reset(new (nothrow) QuickSortNode<ReverseComparer<Int>>( - std::move(order))); - } - break; - } - case FLOAT_DATA: { - if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) QuickSortNode<RegularComparer<Float>>( - std::move(order))); - } else { - node.reset(new (nothrow) QuickSortNode<ReverseComparer<Float>>( - std::move(order))); - } - break; - } - case GEO_POINT_DATA: { - if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) QuickSortNode<RegularComparer<GeoPoint>>( - std::move(order))); - } else { - node.reset(new (nothrow) QuickSortNode<ReverseComparer<GeoPoint>>( - std::move(order))); - } - break; - } - case TEXT_DATA: { - if (order.type == REGULAR_ORDER) { - node.reset(new (nothrow) QuickSortNode<RegularComparer<Text>>( - std::move(order))); - } else { - node.reset(new (nothrow) QuickSortNode<ReverseComparer<Text>>( - std::move(order))); - } - break; - } - default: { - GRNXX_ERROR_SET(error, NOT_SUPPORTED_YET, "Not supported yet"); - return nullptr; - } - } - if (!node) { - GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed"); - } - return node; -} - -Sorter::~Sorter() {} - -unique_ptr<Sorter> Sorter::create( - Error *error, - Array<SortOrder> &&orders, - const SorterOptions &options) { - // Sorting require at least one sort order. - // Also, expressions must be valid and associated tables must be same. - if (orders.size() == 0) { - GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Empty sort order"); - return nullptr; - } - for (Int i = 0; i < orders.size(); ++i) { - if (!orders[i].expression) { - GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Empty sort order"); - return nullptr; - } - } - const Table *table = orders[0].expression->table(); - for (Int i = 1; i < orders.size(); ++i) { - if (orders[i].expression->table() != table) { - GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Table conflict"); - return nullptr; - } - } - - if ((options.offset < 0) || (options.limit < 0)) { - GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Invalid argument"); - return nullptr; - } - unique_ptr<Sorter> sorter(new (nothrow) Sorter); - if (!sorter) { - GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed"); - return nullptr; - } - sorter->table_ = table; - for (Int i = orders.size() - 1; i >= 0; --i) { - unique_ptr<Node> node(Node::create(error, std::move(orders[i]))); - if (!node) { - return nullptr; - } - node->set_next(std::move(sorter->head_)); - sorter->head_ = std::move(node); - } - orders.clear(); - sorter->offset_ = options.offset; - sorter->limit_ = options.limit; - return sorter; -} - -bool Sorter::reset(Error *, Array<Record> *records) { - records_ = records; - return true; -} - -bool Sorter::progress(Error *) { - // TODO: Incremental sorting is not supported yet. - return true; -} - -bool Sorter::finish(Error *error) { - if (!records_) { - // Nothing to do. - return true; - } - if ((offset_ >= records_->size()) || (limit_ <= 0)) { - records_->clear(); - return true; - } - Int begin = offset_; - Int end; - if (limit_ <= (records_->size() - offset_)) { - end = offset_ + limit_; - } else { - end = records_->size(); - } - if (records_->size() <= 1) { - return true; - } - if (!head_->sort(error, records_->ref(), begin, end)) { - return false; - } - for (Int i = begin, j = 0; i < end; ++i, ++j) { - records_->set(j, records_->get(i)); - } - records_->resize(nullptr, end - begin); - return true; -} - -bool Sorter::sort(Error *error, Array<Record> *records) { - return reset(error, records) && finish(error); -} - -Sorter::Sorter() - : table_(nullptr), - head_(), - records_(nullptr), - offset_(0), - limit_(0) {} - } // namespace grnxx -------------- next part -------------- HTML����������������������������... 다운로드