[Groonga-commit] groonga/grnxx at 3703cba [master] Simplify and enable benchmark for Sorter.

Back to archive index

susumu.yata null+****@clear*****
Wed Dec 3 18:40:28 JST 2014


susumu.yata	2014-12-03 18:40:28 +0900 (Wed, 03 Dec 2014)

  New Revision: 3703cbabbd080d83c6d019e1a47e61bbd644e330
  https://github.com/groonga/grnxx/commit/3703cbabbd080d83c6d019e1a47e61bbd644e330

  Message:
    Simplify and enable benchmark for Sorter.

  Modified files:
    benchmark/Makefile.am
    benchmark/benchmark_sorter.cpp

  Modified: benchmark/Makefile.am (+5 -5)
===================================================================
--- benchmark/Makefile.am    2014-12-03 15:13:03 +0900 (f1e3aaa)
+++ benchmark/Makefile.am    2014-12-03 18:40:28 +0900 (389b1e0)
@@ -1,9 +1,9 @@
 noinst_PROGRAMS =		\
 	benchmark_filter_and	\
-	benchmark_filter_or
+	benchmark_filter_or	\
+	benchmark_sorter
 
-#	benchmark_adjuster	\
-#	benchmark_sorter
+#	benchmark_adjuster
 
 benchmark_filter_and_SOURCES = benchmark_filter_and.cpp
 benchmark_filter_and_LDADD = $(top_srcdir)/lib/grnxx/libgrnxx.la
@@ -14,5 +14,5 @@ benchmark_filter_or_LDADD = $(top_srcdir)/lib/grnxx/libgrnxx.la
 #benchmark_adjuster_SOURCES = benchmark_adjuster.cpp
 #benchmark_adjuster_LDADD = $(top_srcdir)/lib/grnxx/libgrnxx.la
 
-#benchmark_sorter_SOURCES = benchmark_sorter.cpp
-#benchmark_sorter_LDADD = $(top_srcdir)/lib/grnxx/libgrnxx.la
+benchmark_sorter_SOURCES = benchmark_sorter.cpp
+benchmark_sorter_LDADD = $(top_srcdir)/lib/grnxx/libgrnxx.la

  Modified: benchmark/benchmark_sorter.cpp (+219 -566)
===================================================================
--- benchmark/benchmark_sorter.cpp    2014-12-03 15:13:03 +0900 (f89e3a3)
+++ benchmark/benchmark_sorter.cpp    2014-12-03 18:40:28 +0900 (dca8475)
@@ -15,20 +15,21 @@
   License along with this library; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
-#include <time.h>
-
 #include <algorithm>
 #include <cassert>
+#include <cstdio>
+#include <ctime>
 #include <iostream>
 #include <random>
 
-#include "grnxx/column.hpp"
-#include "grnxx/cursor.hpp"
 #include "grnxx/db.hpp"
-#include "grnxx/expression.hpp"
-#include "grnxx/table.hpp"
 #include "grnxx/sorter.hpp"
 
+namespace {
+
+constexpr size_t SIZE = 2000000;
+constexpr size_t LOOP = 5;
+
 class Timer {
  public:
   Timer() : base_(now()) {}
@@ -47,618 +48,270 @@ class Timer {
   double base_;
 };
 
-grnxx::Array<grnxx::Record> create_records(grnxx::Table *table) {
-  auto cursor = table->create_cursor();
-  grnxx::Array<grnxx::Record> records;
-  size_t count = cursor->read_all(&records);
-  assert(count == table->num_rows());
-  return records;
-}
+void create_bool_columns(grnxx::Table *table) {
+  grnxx::Column *columns[3];
+  columns[0] = table->create_column("Bool1", grnxx::BOOL_DATA);
+  columns[1] = table->create_column("Bool2", grnxx::BOOL_DATA);
+  columns[2] = table->create_column("Bool3", grnxx::BOOL_DATA);
 
-void benchmark_row_id() {
-  constexpr size_t NUM_ROWS = 1 << 21;
-  auto db = grnxx::open_db("");
-  auto table = db->create_table("Table");
   std::mt19937_64 rng;
-  for (size_t i = 0; i < NUM_ROWS; ++i) {
-    table->insert_row();
-  }
-
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      std::vector<grnxx::Int> row_ids(NUM_ROWS);
-      for (size_t i = 0; i < NUM_ROWS; ++i) {
-        row_ids[i] = grnxx::Int(i);
-      }
-      std::shuffle(row_ids.begin(), row_ids.end(), rng);
-      grnxx::Array<grnxx::Record> records;
-      records.resize(NUM_ROWS);
-      for (size_t i = 0; i < NUM_ROWS; ++i) {
-        records[i].row_id = row_ids[i];
-        records[i].score = grnxx::Float(0.0);
-      }
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_row_id();
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+  for (size_t i = 0; i < SIZE; ++i) {
+    grnxx::Int row_id(i);
+    columns[0]->set(row_id, grnxx::Bool((rng() % 4) != 0));
+    columns[1]->set(row_id, grnxx::Bool((rng() % 2) != 0));
+    if ((rng() % 4) != 0) {
+      columns[2]->set(row_id, grnxx::Bool((rng() % 2) != 0));
     }
-    std::cout << "RowID" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
   }
 }
 
-void benchmark_score() {
-  constexpr size_t NUM_ROWS = 1 << 21;
-  auto db = grnxx::open_db("");
-  auto table = db->create_table("Table");
-  std::mt19937_64 rng;
-  for (size_t i = 0; i < NUM_ROWS; ++i) {
-    table->insert_row();
-  }
+void create_int_columns(grnxx::Table *table) {
+  grnxx::Column *columns[3];
+  columns[0] = table->create_column("Int1", grnxx::INT_DATA);
+  columns[1] = table->create_column("Int2", grnxx::INT_DATA);
+  columns[2] = table->create_column("Int3", grnxx::INT_DATA);
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      for (size_t i = 0; i < NUM_ROWS; ++i) {
-        if ((rng() % 4) != 0) {
-          records[i].score = grnxx::Float(1.0 * (rng() % 256) / 255);
-        }
-      }
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_score();
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+  std::mt19937_64 rng;
+  for (size_t i = 0; i < SIZE; ++i) {
+    grnxx::Int row_id(i);
+    if ((rng() % 256) != 0) {
+      columns[0]->set(row_id, grnxx::Int(rng() % 256));
     }
-    std::cout << "Score_1" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
-
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      for (size_t i = 0; i < NUM_ROWS; ++i) {
-        if ((rng() % 4) != 0) {
-          records[i].score = grnxx::Float(1.0 * (rng() % 65536) / 65536);
-        }
-      }
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_score();
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+    if ((rng() % 65536) != 0) {
+      columns[1]->set(row_id, grnxx::Int(rng() % 65536));
     }
-    std::cout << "Score_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
-
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      for (size_t i = 0; i < NUM_ROWS; ++i) {
-        if ((rng() % 4) != 0) {
-          records[i].score = grnxx::Float(1.0 * rng() / rng.max());
-        }
-      }
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_score();
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Score_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
+    columns[2]->set(row_id, grnxx::Int(rng()));
   }
 }
 
-void benchmark_bool() {
-  constexpr size_t NUM_ROWS = 1 << 21;
-  auto db = grnxx::open_db("");
-  auto table = db->create_table("Table");
-  auto bool_1 = table->create_column("Bool_1", grnxx::BOOL_DATA);
-  auto bool_2 = table->create_column("Bool_2", grnxx::BOOL_DATA);
-  auto bool_3 = table->create_column("Bool_3", grnxx::BOOL_DATA);
+void create_float_columns(grnxx::Table *table) {
+  grnxx::Column *columns[3];
+  columns[0] = table->create_column("Float1", grnxx::FLOAT_DATA);
+  columns[1] = table->create_column("Float2", grnxx::FLOAT_DATA);
+  columns[2] = table->create_column("Float3", grnxx::FLOAT_DATA);
+
   std::mt19937_64 rng;
-  for (size_t i = 0; i < NUM_ROWS; ++i) {
-    grnxx::Int row_id = table->insert_row();
-    bool_1->set(row_id, grnxx::Bool((rng() % 4) != 0));
-    bool_2->set(row_id, grnxx::Bool((rng() % 2) != 0));
-    if ((rng() % 4) != 0) {
-      bool_3->set(row_id, grnxx::Bool((rng() % 2) != 0));
+  for (size_t i = 0; i < SIZE; ++i) {
+    grnxx::Int row_id(i);
+    if ((rng() % 256) != 0) {
+      columns[0]->set(row_id, grnxx::Float((rng() % 256) / 256.0));
     }
-  }
-
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Bool_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+    if ((rng() % 65536) != 0) {
+      columns[1]->set(row_id, grnxx::Float((rng() % 65536) / 65536.0));
     }
-    std::cout << "Bool_1" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
+    columns[2]->set(row_id, grnxx::Float(1.0 * rng() / rng.max()));
   }
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Bool_2");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Bool_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void create_text_columns(grnxx::Table *table) {
+  grnxx::Column *columns[3];
+  columns[0] = table->create_column("Text1", grnxx::TEXT_DATA);
+  columns[1] = table->create_column("Text2", grnxx::TEXT_DATA);
+  columns[2] = table->create_column("Text3", grnxx::TEXT_DATA);
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Bool_3");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Bool_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
+  std::mt19937_64 rng;
+  char buf[16];
+  for (size_t i = 0; i < SIZE; ++i) {
+    grnxx::Int row_id(i);
+    std::sprintf(buf, "%02d", static_cast<int>(rng() % 100));
+    columns[0]->set(row_id, grnxx::Text(buf));
+    std::sprintf(buf, "%04d", static_cast<int>(rng() % 10000));
+    columns[1]->set(row_id, grnxx::Text(buf));
+    std::sprintf(buf, "%06d", static_cast<int>(rng() % 1000000));
+    columns[2]->set(row_id, grnxx::Text(buf));
   }
 }
 
-void benchmark_int() {
-  constexpr size_t NUM_ROWS = 1 << 21;
-  auto db = grnxx::open_db("");
+grnxx::Table *create_table(grnxx::DB *db) {
   auto table = db->create_table("Table");
-  auto int_1 = table->create_column("Int_1", grnxx::INT_DATA);
-  auto int_2 = table->create_column("Int_2", grnxx::INT_DATA);
-  auto int_3 = table->create_column("Int_3", grnxx::INT_DATA);
-  std::mt19937_64 rng;
-  for (size_t i = 0; i < NUM_ROWS; ++i) {
-    grnxx::Int row_id = table->insert_row();
-    if ((rng() % 4) != 0) {
-      int_1->set(row_id, grnxx::Int(rng() % 256));
-    }
-    if ((rng() % 4) != 0) {
-      int_2->set(row_id, grnxx::Int(rng() % 65536));
-    }
-    if ((rng() % 4) != 0) {
-      int_3->set(row_id, grnxx::Int(rng()));
-    }
+  for (size_t i = 0; i < SIZE; ++i) {
+    table->insert_row();
   }
+  create_bool_columns(table);
+  create_int_columns(table);
+  create_float_columns(table);
+  create_text_columns(table);
+  return table;
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Int_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Int_1" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+grnxx::Array<grnxx::Record> create_records(grnxx::Table *table) {
+  grnxx::Array<grnxx::Record> records;
+  auto cursor = table->create_cursor();
+  assert(cursor->read_all(&records) == SIZE);
+  return records;
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Int_2");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Int_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void benchmark_row_id(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Int_3");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+  double min_elapsed = std::numeric_limits<double>::max();
+  std::mt19937_64 rng;
+  for (size_t i = 0; i < LOOP; ++i) {
+    std::vector<grnxx::Int> row_ids(SIZE);
+    for (size_t i = 0; i < SIZE; ++i) {
+      row_ids[i] = grnxx::Int(i);
     }
-    std::cout << "Int_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+    std::shuffle(row_ids.begin(), row_ids.end(), rng);
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Int_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Int_2");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+    grnxx::Array<grnxx::Record> records;
+    records.resize(SIZE);
+    for (size_t j = 0; j < SIZE; ++j) {
+      records[j].row_id = row_ids[j];
+      records[j].score = grnxx::Float(0.0);
     }
-    std::cout << "Int_1, Int_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Int_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Int_3");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+    Timer timer;
+    auto expression_builder = grnxx::ExpressionBuilder::create(table);
+    grnxx::Array<grnxx::SorterOrder> orders;
+    orders.resize(1);
+    expression_builder->push_row_id();
+    orders[0].expression = std::move(expression_builder->release());
+    orders[0].type = grnxx::SORTER_REGULAR_ORDER;
+    auto sorter = grnxx::Sorter::create(std::move(orders));
+    sorter->sort(&records);
+    double elapsed = timer.elapsed();
+    if (elapsed < min_elapsed) {
+      min_elapsed = elapsed;
     }
-    std::cout << "Int_1, Int_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
   }
+  std::cout << "min. elapsed [s] = " << min_elapsed << std::endl;
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Int_2");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Int_3");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+void benchmark_score(grnxx::Table *table, const char *column_name) {
+  double min_elapsed = std::numeric_limits<double>::max();
+  for (size_t i = 0; i < LOOP; ++i) {
+    auto records = create_records(table);
+    auto expression_builder = grnxx::ExpressionBuilder::create(table);
+    expression_builder->push_column(column_name);
+    expression_builder->release()->adjust(&records);
+
+    Timer timer;
+    grnxx::Array<grnxx::SorterOrder> orders;
+    orders.resize(1);
+    expression_builder->push_score();
+    orders[0].expression = std::move(expression_builder->release());
+    orders[0].type = grnxx::SORTER_REGULAR_ORDER;
+    auto sorter = grnxx::Sorter::create(std::move(orders));
+    sorter->sort(&records);
+    double elapsed = timer.elapsed();
+    if (elapsed < min_elapsed) {
+      min_elapsed = elapsed;
     }
-    std::cout << "Int_2, Int_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
   }
+  std::cout << column_name << ": "
+            << "RowID: min. elapsed [s] = " << min_elapsed << std::endl;
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(3);
-      expression_builder->push_column("Int_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Int_2");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Int_3");
-      orders[2].expression = std::move(expression_builder->release());
-      orders[2].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Int_1, Int_2, Int_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void benchmark_score(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  benchmark_score(table, "Float1");
+  benchmark_score(table, "Float2");
+  benchmark_score(table, "Float3");
 }
 
-void benchmark_float() {
-  constexpr size_t NUM_ROWS = 1 << 21;
-  auto db = grnxx::open_db("");
-  auto table = db->create_table("Table");
-  auto float_1 = table->create_column("Float_1", grnxx::FLOAT_DATA);
-  auto float_2 = table->create_column("Float_2", grnxx::FLOAT_DATA);
-  auto float_3 = table->create_column("Float_3", grnxx::FLOAT_DATA);
-  std::mt19937_64 rng;
-  for (size_t i = 0; i < NUM_ROWS; ++i) {
-    grnxx::Int row_id = table->insert_row();
-    if ((rng() % 4) != 0) {
-      float_1->set(row_id, grnxx::Float(1.0 * (rng() % 256) / 255));
+void benchmark_columns(grnxx::Table *table, const char *column_names) {
+  // Parse "column_names" as comma-separated column names.
+  grnxx::Array<grnxx::String> column_name_array;
+  grnxx::String string(column_names);
+  while (!string.is_empty()) {
+    size_t delim_pos = 0;
+    while (delim_pos < string.size()) {
+      if (string[delim_pos] == ',') {
+        break;
+      }
+      ++delim_pos;
     }
-    if ((rng() % 4) != 0) {
-      float_2->set(row_id, grnxx::Float(1.0 * (rng() % 65536) / 65535));
-    }
-    if ((rng() % 4) != 0) {
-      float_3->set(row_id, grnxx::Float(1.0 * rng() / rng.max()));
+    column_name_array.push_back(string.substring(0, delim_pos));
+    if (delim_pos == string.size()) {
+      break;
     }
+    string = string.substring(delim_pos + 1);
   }
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Float_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+  double min_elapsed = std::numeric_limits<double>::max();
+  for (size_t i = 0; i < LOOP; ++i) {
+    grnxx::Array<grnxx::Record> records = create_records(table);
+
+    Timer timer;
+    auto expression_builder = grnxx::ExpressionBuilder::create(table);
+    grnxx::Array<grnxx::SorterOrder> orders;
+    orders.resize(column_name_array.size());
+    for (size_t j = 0; j < orders.size(); ++j) {
+      expression_builder->push_column(column_name_array[j]);
+      orders[j].expression = std::move(expression_builder->release());
+      orders[j].type = grnxx::SORTER_REGULAR_ORDER;
     }
-    std::cout << "Float_1" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
-
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Float_2");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
+    auto sorter = grnxx::Sorter::create(std::move(orders));
+    sorter->sort(&records);
+    double elapsed = timer.elapsed();
+    if (elapsed < min_elapsed) {
+      min_elapsed = elapsed;
     }
-    std::cout << "Float_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
   }
+  std::cout << column_names << ": "
+            << "min. elapsed [s] = " << min_elapsed << std::endl;
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(1);
-      expression_builder->push_column("Float_3");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Float_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void benchmark_bool(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Float_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Float_2");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Float_1, Float_2" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+  benchmark_columns(table, "Bool1");
+  benchmark_columns(table, "Bool2");
+  benchmark_columns(table, "Bool3");
+  benchmark_columns(table, "Bool1,Bool2");
+  benchmark_columns(table, "Bool1,Bool3");
+  benchmark_columns(table, "Bool2,Bool3");
+  benchmark_columns(table, "Bool1,Bool2,Bool3");
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Float_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Float_3");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Float_1, Float_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void benchmark_int(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(2);
-      expression_builder->push_column("Float_2");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Float_3");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Float_2, Float_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+  benchmark_columns(table, "Int1");
+  benchmark_columns(table, "Int2");
+  benchmark_columns(table, "Int3");
+  benchmark_columns(table, "Int1,Int2");
+  benchmark_columns(table, "Int1,Int3");
+  benchmark_columns(table, "Int2,Int3");
+  benchmark_columns(table, "Int1,Int2,Int3");
+}
 
-  {
-    double best_elapsed = std::numeric_limits<double>::max();
-    for (int i = 0; i < 5; ++i) {
-      grnxx::Array<grnxx::Record> records = create_records(table);
-      Timer timer;
-      auto expression_builder = grnxx::ExpressionBuilder::create(table);
-      grnxx::Array<grnxx::SorterOrder> orders;
-      orders.resize(3);
-      expression_builder->push_column("Float_1");
-      orders[0].expression = std::move(expression_builder->release());
-      orders[0].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Float_2");
-      orders[1].expression = std::move(expression_builder->release());
-      orders[1].type = grnxx::SORTER_REGULAR_ORDER;
-      expression_builder->push_column("Float_3");
-      orders[2].expression = std::move(expression_builder->release());
-      orders[2].type = grnxx::SORTER_REGULAR_ORDER;
-      auto sorter = grnxx::Sorter::create(std::move(orders));
-      sorter->sort(&records);
-      double elapsed = timer.elapsed();
-      if (elapsed < best_elapsed) {
-        best_elapsed = elapsed;
-      }
-    }
-    std::cout << "Float_1, Float_2, Float_3" << std::endl;
-    std::cout << "best elapsed [s] = " << best_elapsed << std::endl;
-  }
+void benchmark_float(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  benchmark_columns(table, "Float1");
+  benchmark_columns(table, "Float2");
+  benchmark_columns(table, "Float3");
+  benchmark_columns(table, "Float1,Float2");
+  benchmark_columns(table, "Float1,Float3");
+  benchmark_columns(table, "Float2,Float3");
+  benchmark_columns(table, "Float1,Float2,Float3");
 }
 
+void benchmark_text(grnxx::Table *table) {
+  std::cout << __PRETTY_FUNCTION__ << std::endl;
+
+  benchmark_columns(table, "Text1");
+  benchmark_columns(table, "Text2");
+  benchmark_columns(table, "Text3");
+  benchmark_columns(table, "Text1,Text2");
+  benchmark_columns(table, "Text1,Text3");
+  benchmark_columns(table, "Text2,Text3");
+  benchmark_columns(table, "Text1,Text2,Text3");
+}
+
+}  // namespace
+
 int main() {
-  benchmark_row_id();
-  benchmark_score();
-  benchmark_bool();
-  benchmark_int();
-  benchmark_float();
+  auto db = grnxx::open_db("");
+  auto table = create_table(db.get());
+
+  benchmark_row_id(table);
+  benchmark_score(table);
+  benchmark_bool(table);
+  benchmark_int(table);
+  benchmark_float(table);
+  benchmark_text(table);
   return 0;
 }
-------------- next part --------------
HTML����������������������������...
다운로드 



More information about the Groonga-commit mailing list
Back to archive index