[Groonga-commit] groonga/groonga at 2d8ecfe [master] Support weight in UVECTOR

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Jun 20 15:50:40 JST 2014


Kouhei Sutou	2014-06-20 15:50:40 +0900 (Fri, 20 Jun 2014)

  New Revision: 2d8ecfeabbf7ad929c8a1090903a0b2a5167d95c
  https://github.com/groonga/groonga/commit/2d8ecfeabbf7ad929c8a1090903a0b2a5167d95c

  Merged 58a9743: Merge pull request #176 from groonga/support-weight-uvector

  Message:
    Support weight in UVECTOR
    
    If uvector->header.impl_flags has GRN_OBJ_WITH_WEIGHT flag, the
    uvector has weight information.
    
    If an uvector has weight information, it uses the following format:
    
        |grn_id1|weight1|grn_id2|weight2|...|
         uint    uint    uint    uint
    
    Uvector without weight uses the following format:
    
        |grn_id1|grn_id2|...|
         uint    uint

  Modified files:
    include/groonga.h
    lib/db.c
    lib/ii.c

  Modified: include/groonga.h (+13 -0)
===================================================================
--- include/groonga.h    2014-06-16 17:20:41 +0900 (0a1e264)
+++ include/groonga.h    2014-06-20 15:50:40 +0900 (1f10ac3)
@@ -896,6 +896,19 @@ GRN_API unsigned int grn_vector_get_element(grn_ctx *ctx, grn_obj *vector,
                                             unsigned int *weight, grn_id *domain);
 
 /*-------------------------------------------------------------
+ * grn_uvector
+*/
+
+GRN_API unsigned int grn_uvector_size(grn_ctx *ctx, grn_obj *uvector);
+
+GRN_API grn_rc grn_uvector_add_element(grn_ctx *ctx, grn_obj *vector,
+                                       grn_id id, unsigned int weight);
+
+GRN_API grn_id grn_uvector_get_element(grn_ctx *ctx, grn_obj *uvector,
+                                       unsigned int offset,
+                                       unsigned int *weight);
+
+/*-------------------------------------------------------------
  * API for hook
  */
 

  Modified: lib/db.c (+207 -24)
===================================================================
--- lib/db.c    2014-06-16 17:20:41 +0900 (2f81fda)
+++ lib/db.c    2014-06-20 15:50:40 +0900 (d89a3c8)
@@ -1,5 +1,5 @@
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2009-2013 Brazil
+/* Copyright(C) 2009-2014 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -32,6 +32,13 @@
 #include <string.h>
 #include <float.h>
 
+typedef struct {
+  grn_id id;
+  unsigned int weight;
+} weight_uvector_entry;
+
+#define IS_WEIGHT_UVECTOR(obj) ((obj)->header.impl_flags & GRN_OBJ_WITH_WEIGHT)
+
 #define NEXT_ADDR(p) (((byte *)(p)) + sizeof(*(p)))
 
 #define GRN_TABLE_GROUPED (0x01<<0)
@@ -3821,6 +3828,20 @@ grn_vector_delimit(grn_ctx *ctx, grn_obj *vector)
 }
 */
 
+static unsigned int
+grn_uvector_size_internal(grn_ctx *ctx, grn_obj *uvector)
+{
+  unsigned int size;
+
+  if (IS_WEIGHT_UVECTOR(uvector)) {
+    size = GRN_BULK_VSIZE(uvector) / sizeof(weight_uvector_entry);
+  } else {
+    size = GRN_BULK_VSIZE(uvector) / sizeof(grn_id);
+  }
+
+  return size;
+}
+
 unsigned int
 grn_vector_size(grn_ctx *ctx, grn_obj *vector)
 {
@@ -3835,7 +3856,7 @@ grn_vector_size(grn_ctx *ctx, grn_obj *vector)
     size = GRN_BULK_VSIZE(vector);
     break;
   case GRN_UVECTOR :
-    size = GRN_BULK_VSIZE(vector) / sizeof(grn_id);
+    size = grn_uvector_size_internal(ctx, vector);
     break;
   case GRN_VECTOR :
     size = vector->u.v.n_sections;
@@ -4063,6 +4084,101 @@ grn_vector_to_sections(grn_ctx *ctx, grn_obj *vector, grn_obj *sections)
 }
 */
 
+/**** uvector ****/
+
+unsigned int
+grn_uvector_size(grn_ctx *ctx, grn_obj *uvector)
+{
+  unsigned int size;
+
+  if (!uvector) {
+    ERR(GRN_INVALID_ARGUMENT, "uvector must not be NULL");
+    return 0;
+  }
+
+  if (uvector->header.type != GRN_UVECTOR) {
+    grn_obj type_name;
+    GRN_TEXT_INIT(&type_name, 0);
+    grn_inspect_type(ctx, &type_name, uvector->header.type);
+    ERR(GRN_INVALID_ARGUMENT, "must be GRN_UVECTOR: %.*s",
+        (int)GRN_TEXT_LEN(&type_name), GRN_TEXT_VALUE(&type_name));
+    GRN_OBJ_FIN(ctx, &type_name);
+    return 0;
+  }
+
+  GRN_API_ENTER;
+  size = grn_uvector_size_internal(ctx, uvector);
+  GRN_API_RETURN(size);
+}
+
+
+grn_rc
+grn_uvector_add_element(grn_ctx *ctx, grn_obj *uvector,
+                        grn_id id, unsigned int weight)
+{
+  GRN_API_ENTER;
+  if (!uvector) {
+    ERR(GRN_INVALID_ARGUMENT, "uvector is null");
+    goto exit;
+  }
+  if (IS_WEIGHT_UVECTOR(uvector)) {
+    weight_uvector_entry entry;
+    entry.id = id;
+    entry.weight = weight;
+    grn_bulk_write(ctx, uvector,
+                   (const char *)&entry, sizeof(weight_uvector_entry));
+  } else {
+    grn_bulk_write(ctx, uvector,
+                   (const char *)&id, sizeof(grn_id));
+  }
+exit :
+  GRN_API_RETURN(ctx->rc);
+}
+
+grn_id
+grn_uvector_get_element(grn_ctx *ctx, grn_obj *uvector,
+                        unsigned int offset, unsigned int *weight)
+{
+  grn_id id = GRN_ID_NIL;
+
+  GRN_API_ENTER;
+  if (!uvector || uvector->header.type != GRN_UVECTOR) {
+    ERR(GRN_INVALID_ARGUMENT, "invalid uvector");
+    goto exit;
+  }
+
+  if (IS_WEIGHT_UVECTOR(uvector)) {
+    const weight_uvector_entry *entry;
+    const weight_uvector_entry *entries_start;
+    const weight_uvector_entry *entries_end;
+
+    entries_start = (const weight_uvector_entry *)GRN_BULK_HEAD(uvector);
+    entries_end = (const weight_uvector_entry *)GRN_BULK_CURR(uvector);
+    if (offset > entries_end - entries_start) {
+      ERR(GRN_RANGE_ERROR, "offset out of range");
+      goto exit;
+    }
+
+    entry = entries_start + offset;
+    id = entry->id;
+    if (weight) { *weight = entry->weight; }
+  } else {
+    const grn_id *ids_start;
+    const grn_id *ids_end;
+
+    ids_start = (const grn_id *)GRN_BULK_HEAD(uvector);
+    ids_end = (const grn_id *)GRN_BULK_CURR(uvector);
+    if (offset > ids_end - ids_start) {
+      ERR(GRN_RANGE_ERROR, "offset out of range");
+      goto exit;
+    }
+    id = ids_start[offset];
+    if (weight) { *weight = 0; }
+  }
+exit :
+  GRN_API_RETURN(id);
+}
+
 /**** accessor ****/
 
 static grn_accessor *
@@ -5484,6 +5600,56 @@ grn_obj_set_value_column_var_size_scalar(grn_ctx *ctx, grn_obj *obj, grn_id id,
 }
 
 static grn_rc
+grn_obj_set_value_column_var_size_vector_uvector(grn_ctx *ctx, grn_obj *column,
+                                                 grn_id id, grn_obj *value,
+                                                 int flags)
+{
+  grn_rc rc = GRN_INVALID_ARGUMENT;
+  grn_obj uvector;
+  grn_obj_flags uvector_flags = 0;
+  grn_bool need_convert = GRN_FALSE;
+  void *raw_value;
+  unsigned int size;
+
+  if (column->header.flags & GRN_OBJ_WITH_WEIGHT) {
+    if (!IS_WEIGHT_UVECTOR(value)) {
+      need_convert = GRN_TRUE;
+    }
+  } else {
+    if (IS_WEIGHT_UVECTOR(value)) {
+      need_convert = GRN_TRUE;
+      uvector_flags = GRN_OBJ_WITH_WEIGHT;
+    }
+  }
+
+  if (need_convert) {
+    unsigned int i, n;
+    GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR, value->header.domain);
+    uvector.header.impl_flags |= uvector_flags;
+    n = grn_uvector_size(ctx, value);
+    for (i = 0; i < n; i++) {
+      grn_id id;
+      unsigned int weight = 0;
+      id = grn_uvector_get_element(ctx, value, i, NULL);
+      grn_uvector_add_element(ctx, &uvector, id, weight);
+    }
+    raw_value = GRN_BULK_HEAD(&uvector);
+    size = GRN_BULK_VSIZE(&uvector);
+  } else {
+    raw_value = GRN_BULK_HEAD(value);
+    size = GRN_BULK_VSIZE(value);
+  }
+
+  rc = grn_ja_put(ctx, (grn_ja *)column, id, raw_value, size, flags, NULL);
+
+  if (need_convert) {
+    GRN_OBJ_FIN(ctx, &uvector);
+  }
+
+  return rc;
+}
+
+static grn_rc
 grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id,
                                          grn_obj *value, int flags)
 {
@@ -5497,6 +5663,13 @@ grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id,
     return rc;
   }
 
+  if (value->header.type == GRN_UVECTOR) {
+    rc = grn_obj_set_value_column_var_size_vector_uvector(ctx, obj,
+                                                          id, value,
+                                                          flags);
+    return rc;
+  }
+
   if (GRN_OBJ_TABLEP(lexicon)) {
     grn_obj buf;
     GRN_TEXT_INIT(&buf, 0);
@@ -5561,9 +5734,6 @@ grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id,
       rc = grn_ja_put(ctx, (grn_ja *)obj, id,
                       GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf), flags, NULL);
       break;
-    case GRN_UVECTOR :
-      rc = grn_ja_put(ctx, (grn_ja *)obj, id, v, s, flags, NULL);
-      break;
     default :
       ERR(GRN_INVALID_ARGUMENT, "vector, uvector or bulk required");
       break;
@@ -5583,9 +5753,6 @@ grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id,
         grn_obj_close(ctx, &v);
       }
       break;
-    case GRN_UVECTOR :
-      rc = grn_ja_put(ctx, (grn_ja *)obj, id, v, s, flags, NULL);
-      break;
     case GRN_VECTOR :
       rc = grn_ja_putv(ctx, (grn_ja *)obj, id, value, 0);
       break;
@@ -5795,6 +5962,35 @@ grn_obj_get_value_column_index(grn_ctx *ctx, grn_obj *index_column,
   value->header.domain = GRN_DB_UINT32;
 }
 
+static grn_obj *
+grn_obj_get_value_column_vector(grn_ctx *ctx, grn_obj *obj,
+                                grn_id id, grn_obj *value)
+{
+  grn_obj *lexicon;
+
+  lexicon = grn_ctx_at(ctx, DB_OBJ(obj)->range);
+  if (lexicon && !GRN_OBJ_TABLEP(lexicon) &&
+      (lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE)) {
+    grn_obj v_;
+    grn_obj_ensure_vector(ctx, value);
+    GRN_TEXT_INIT(&v_, 0);
+    grn_ja_get_value(ctx, (grn_ja *)obj, id, &v_);
+    grn_vector_decode(ctx, value, GRN_TEXT_VALUE(&v_), GRN_TEXT_LEN(&v_));
+    GRN_OBJ_FIN(ctx, &v_);
+  } else {
+    grn_obj_ensure_bulk(ctx, value);
+    grn_ja_get_value(ctx, (grn_ja *)obj, id, value);
+    value->header.type = GRN_UVECTOR;
+    if (obj->header.flags & GRN_OBJ_WITH_WEIGHT) {
+      value->header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
+    } else {
+      value->header.impl_flags &= ~GRN_OBJ_WITH_WEIGHT;
+    }
+  }
+
+  return value;
+}
+
 grn_obj *
 grn_obj_get_value(grn_ctx *ctx, grn_obj *obj, grn_id id, grn_obj *value)
 {
@@ -5886,22 +6082,7 @@ grn_obj_get_value(grn_ctx *ctx, grn_obj *obj, grn_id id, grn_obj *value)
   case GRN_COLUMN_VAR_SIZE :
     switch (obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) {
     case GRN_OBJ_COLUMN_VECTOR :
-      {
-        grn_obj *lexicon = grn_ctx_at(ctx, DB_OBJ(obj)->range);
-        if (lexicon && !GRN_OBJ_TABLEP(lexicon) &&
-            (lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE)) {
-          grn_obj v_;
-          grn_obj_ensure_vector(ctx, value);
-          GRN_TEXT_INIT(&v_, 0);
-          grn_ja_get_value(ctx, (grn_ja *)obj, id, &v_);
-          grn_vector_decode(ctx, value, GRN_TEXT_VALUE(&v_), GRN_TEXT_LEN(&v_));
-          GRN_OBJ_FIN(ctx, &v_);
-        } else {
-          grn_obj_ensure_bulk(ctx, value);
-          grn_ja_get_value(ctx, (grn_ja *)obj, id, value);
-          value->header.type = GRN_UVECTOR;
-        }
-      }
+      grn_obj_get_value_column_vector(ctx, obj, id, value);
       break;
     case GRN_OBJ_COLUMN_SCALAR :
       grn_obj_ensure_bulk(ctx, value);
@@ -7918,6 +8099,7 @@ grn_obj_ensure_vector(grn_ctx *ctx, grn_obj *obj)
 {
   if (obj->header.type != GRN_VECTOR) { grn_bulk_fin(ctx, obj); }
   obj->header.type = GRN_VECTOR;
+  obj->header.impl_flags &= ~GRN_OBJ_WITH_WEIGHT;
 }
 
 static void
@@ -7925,6 +8107,7 @@ grn_obj_ensure_bulk(grn_ctx *ctx, grn_obj *obj)
 {
   if (obj->header.type == GRN_VECTOR) { VECTOR_CLEAR(ctx, obj); }
   obj->header.type = GRN_BULK;
+  obj->header.impl_flags &= ~GRN_OBJ_WITH_WEIGHT;
 }
 
 grn_rc

  Modified: lib/ii.c (+36 -8)
===================================================================
--- lib/ii.c    2014-06-16 17:20:41 +0900 (9a94f17)
+++ lib/ii.c    2014-06-20 15:50:40 +0900 (775af9c)
@@ -4903,13 +4903,17 @@ static grn_rc
 grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
                      grn_obj *in, grn_obj *out)
 {
-  int j;
+  int i, n;
   grn_ii_updspec **u;
   grn_hash *h = (grn_hash *)out;
-  const grn_id *rp = (const grn_id *)GRN_BULK_HEAD(in);
-  const grn_id *re = (const grn_id *)GRN_BULK_CURR(in);
-  for (j = 0; rp < re; j++, rp++) {
-    if (!grn_hash_add(ctx, h, rp, sizeof(grn_id), (void **) &u, NULL)) {
+
+  n = grn_vector_size(ctx, in);
+  for (i = 0; i < n; i++) {
+    grn_id id;
+    unsigned int weight;
+
+    id = grn_uvector_get_element(ctx, in, i, &weight);
+    if (!grn_hash_add(ctx, h, &id, sizeof(grn_id), (void **)&u, NULL)) {
       break;
     }
     if (!*u) {
@@ -4918,7 +4922,7 @@ grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         return GRN_NO_MEMORY_AVAILABLE;
       }
     }
-    if (grn_ii_updspec_add(ctx, *u, j, 0)) {
+    if (grn_ii_updspec_add(ctx, *u, i, weight)) {
       GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!");
       return GRN_NO_MEMORY_AVAILABLE;
     }
@@ -4985,7 +4989,19 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !");
         rc = GRN_NO_MEMORY_AVAILABLE;
       } else {
-        rc = grn_uvector2updspecs(ctx, ii, rid, section, new_, new);
+        if (new_->header.type == GRN_UVECTOR) {
+          rc = grn_uvector2updspecs(ctx, ii, rid, section, new_, new);
+        } else {
+          grn_obj uvector;
+          unsigned int weight = 0;
+          GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR, new_->header.domain);
+          if (new_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
+            uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
+          }
+          grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(new_), weight);
+          rc = grn_uvector2updspecs(ctx, ii, rid, section, &uvector, new);
+          GRN_OBJ_FIN(ctx, &uvector);
+        }
       }
       if (new_ != newvalue) { grn_obj_close(ctx, new_); }
       if (rc) { goto exit; }
@@ -5067,7 +5083,19 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!");
         rc = GRN_NO_MEMORY_AVAILABLE;
       } else {
-        rc = grn_uvector2updspecs(ctx, ii, rid, section, old_, old);
+        if (old_->header.type == GRN_UVECTOR) {
+          rc = grn_uvector2updspecs(ctx, ii, rid, section, old_, old);
+        } else {
+          grn_obj uvector;
+          unsigned int weight = 0;
+          GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR, old_->header.domain);
+          if (old_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
+            uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
+          }
+          grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(old_), weight);
+          rc = grn_uvector2updspecs(ctx, ii, rid, section, &uvector, old);
+          GRN_OBJ_FIN(ctx, &uvector);
+        }
       }
       if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
       if (rc) { goto exit; }
-------------- next part --------------
HTML����������������������������...
다운로드 



More information about the Groonga-commit mailing list
Back to archive index