Kouhei Sutou
null+****@clear*****
Thu Nov 22 15:12:12 JST 2012
Kouhei Sutou 2012-11-22 15:12:12 +0900 (Thu, 22 Nov 2012) New Revision: afe65fb5245e1de6b8a4e848d569f865cad02300 https://github.com/groonga/groonga/commit/afe65fb5245e1de6b8a4e848d569f865cad02300 Log: doc: add about snippet_html function Added files: doc/source/example/reference/functions/snippet_html/usage.log doc/source/example/reference/functions/snippet_html/usage_basic.log doc/source/example/reference/functions/snippet_html/usage_setup.log doc/source/example/reference/functions/snippet_html/usage_string_literal.log doc/source/reference/functions/snippet_html.txt Modified files: doc/files.am Modified: doc/files.am (+12 -0) =================================================================== --- doc/files.am 2012-11-22 14:41:00 +0900 (5657ed4) +++ doc/files.am 2012-11-22 15:12:12 +0900 (cf08257) @@ -73,6 +73,10 @@ absolute_source_files = \ $(top_srcdir)/doc/source/example/reference/functions/geo_distance_location_sphere.log \ $(top_srcdir)/doc/source/example/reference/functions/geo_distance_setup_distance.log \ $(top_srcdir)/doc/source/example/reference/functions/geo_distance_setup_location.log \ + $(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage.log \ + $(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_basic.log \ + $(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_setup.log \ + $(top_srcdir)/doc/source/example/reference/functions/snippet_html/usage_string_literal.log \ $(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/setup.log \ $(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/simple_equal.log \ $(top_srcdir)/doc/source/example/reference/grn_expr/query_syntax/simple_full_text_search.log \ @@ -341,6 +345,7 @@ absolute_source_files = \ $(top_srcdir)/doc/source/reference/functions/geo_in_rectangle.txt \ $(top_srcdir)/doc/source/reference/functions/now.txt \ $(top_srcdir)/doc/source/reference/functions/rand.txt \ + $(top_srcdir)/doc/source/reference/functions/snippet_html.txt \ $(top_srcdir)/doc/source/reference/grn_expr.txt \ $(top_srcdir)/doc/source/reference/grn_expr/query_syntax.txt \ $(top_srcdir)/doc/source/reference/grn_expr/script_syntax.txt \ @@ -471,6 +476,10 @@ source_files_relative_from_doc_dir = \ source/example/reference/functions/geo_distance_location_sphere.log \ source/example/reference/functions/geo_distance_setup_distance.log \ source/example/reference/functions/geo_distance_setup_location.log \ + source/example/reference/functions/snippet_html/usage.log \ + source/example/reference/functions/snippet_html/usage_basic.log \ + source/example/reference/functions/snippet_html/usage_setup.log \ + source/example/reference/functions/snippet_html/usage_string_literal.log \ source/example/reference/grn_expr/query_syntax/setup.log \ source/example/reference/grn_expr/query_syntax/simple_equal.log \ source/example/reference/grn_expr/query_syntax/simple_full_text_search.log \ @@ -739,6 +748,7 @@ source_files_relative_from_doc_dir = \ source/reference/functions/geo_in_rectangle.txt \ source/reference/functions/now.txt \ source/reference/functions/rand.txt \ + source/reference/functions/snippet_html.txt \ source/reference/grn_expr.txt \ source/reference/grn_expr/query_syntax.txt \ source/reference/grn_expr/script_syntax.txt \ @@ -955,6 +965,7 @@ html_files_relative_from_locale_dir = \ html/_sources/reference/functions/geo_in_rectangle.txt \ html/_sources/reference/functions/now.txt \ html/_sources/reference/functions/rand.txt \ + html/_sources/reference/functions/snippet_html.txt \ html/_sources/reference/grn_expr.txt \ html/_sources/reference/grn_expr/query_syntax.txt \ html/_sources/reference/grn_expr/script_syntax.txt \ @@ -1104,6 +1115,7 @@ html_files_relative_from_locale_dir = \ html/reference/functions/geo_in_rectangle.html \ html/reference/functions/now.html \ html/reference/functions/rand.html \ + html/reference/functions/snippet_html.html \ html/reference/grn_expr.html \ html/reference/grn_expr/query_syntax.html \ html/reference/grn_expr/script_syntax.html \ Added: doc/source/example/reference/functions/snippet_html/usage.log (+28 -0) 100644 =================================================================== --- /dev/null +++ doc/source/example/reference/functions/snippet_html/usage.log 2012-11-22 15:12:12 +0900 (62b7501) @@ -0,0 +1,28 @@ +Execution example:: + + select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance" + # [ + # [ + # 0, + # 1337566253.89858, + # 0.000355720520019531 + # ], + # [ + # [ + # [ + # 1 + # ], + # [ + # [ + # "snippet_html", + # "null" + # ] + # ], + # [ + # [ + # "Groonga is very <span class=\"keyword\">fast</span> fulltext search engine." + # ] + # ] + # ] + # ] + # ] Added: doc/source/example/reference/functions/snippet_html/usage_basic.log (+29 -0) 100644 =================================================================== --- /dev/null +++ doc/source/example/reference/functions/snippet_html/usage_basic.log 2012-11-22 15:12:12 +0900 (423d3fe) @@ -0,0 +1,29 @@ +Execution example:: + + select Documents --output_columns "snippet_html(content)" --command_version 2 --match_columns content --query "fast performance" + # [ + # [ + # 0, + # 1337566253.89858, + # 0.000355720520019531 + # ], + # [ + # [ + # [ + # 1 + # ], + # [ + # [ + # "snippet_html", + # "null" + # ] + # ], + # [ + # [ + # "Groonga is a <span class=\"keyword\">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, gro", + # "onga allows updates without read locks. These characteristics result in superior <span class=\"keyword\">performance</span> on real-time applications." + # ] + # ] + # ] + # ] + # ] Added: doc/source/example/reference/functions/snippet_html/usage_setup.log (+17 -0) 100644 =================================================================== --- /dev/null +++ doc/source/example/reference/functions/snippet_html/usage_setup.log 2012-11-22 15:12:12 +0900 (d696b2b) @@ -0,0 +1,17 @@ +Execution example:: + + table_create Documents TABLE_NO_KEY + # [[0, 1337566253.89858, 0.000355720520019531], true] + column_create Documents content COLUMN_SCALAR Text + # [[0, 1337566253.89858, 0.000355720520019531], true] + table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram + # [[0, 1337566253.89858, 0.000355720520019531], true] + column_create Terms documents_content_index COLUMN_INDEX|WITH_POSITION Documents content + # [[0, 1337566253.89858, 0.000355720520019531], true] + load --table Documents + [ + ["content"], + ["Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."], + ["Groonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, groonga can cover weakness of row-oriented systems."] + ] + # [[0, 1337566253.89858, 0.000355720520019531], 2] Added: doc/source/example/reference/functions/snippet_html/usage_string_literal.log (+28 -0) 100644 =================================================================== --- /dev/null +++ doc/source/example/reference/functions/snippet_html/usage_string_literal.log 2012-11-22 15:12:12 +0900 (62b7501) @@ -0,0 +1,28 @@ +Execution example:: + + select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance" + # [ + # [ + # 0, + # 1337566253.89858, + # 0.000355720520019531 + # ], + # [ + # [ + # [ + # 1 + # ], + # [ + # [ + # "snippet_html", + # "null" + # ] + # ], + # [ + # [ + # "Groonga is very <span class=\"keyword\">fast</span> fulltext search engine." + # ] + # ] + # ] + # ] + # ] Added: doc/source/reference/functions/snippet_html.txt (+120 -0) 100644 =================================================================== --- /dev/null +++ doc/source/reference/functions/snippet_html.txt 2012-11-22 15:12:12 +0900 (859fccd) @@ -0,0 +1,120 @@ +.. -*- rst -*- + +.. highlightlang:: none + +.. groonga-command +.. database: functions_snippet_html + +snippet_html +============ + +.. caution:: + + This feature is experimental. API will be changed. + +Summary +------- + +``snippet_html`` generates snippets (``KWIC``. ``KeyWord In Context``) +of search keywords. The snippets are prepared for embedding +HTML. Special characters such as ``<`` and ``>`` are escapsed as +``<`` and ``>``. Keyword is surrounded with ``<span +class="keyword">`` and ``</span>``. For example, a snippet of ``I am a +groonga user. <3`` for keyword ``groonga`` is ``I am a <span +class="keyword">groonga</span> user. <3``. + +Syntax +------ + +``snippet_html`` has only one parameter:: + + snippet_html(column) + +``snippet_html`` has many parameters internally but they can't be +specified for now. You will be able to custom those parameters soon. + +Usage +----- + +Here are a schema definition and sample data to show usage. + +.. groonga-command +.. include:: ../../example/reference/functions/snippet_html/usage_setup.log +.. table_create Documents TABLE_NO_KEY +.. column_create Documents content COLUMN_SCALAR Text +.. table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram +.. column_create Terms documents_content_index COLUMN_INDEX|WITH_POSITION Documents content +.. load --table Documents +.. [ +.. ["content"], +.. ["Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of groonga is that a newly registered document instantly appears in search results. Also, groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."], +.. ["Groonga is also a column-oriented database management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, groonga can cover weakness of row-oriented systems."] +.. ] + +``snippet_html`` can be used in only ``--output_columns`` in +:doc:`/reference/commands/select`. + +You need to specify ``--command_version 2`` argument explicitly +because function call in ``--output_columns`` is experimental feature +in groonga 2.0.9. It will be enabled by default soon. + +You als oneed to specify ``--query`` and/or ``--filter``. Keywords are +extracted from ``--query`` and ``--filter`` arguments. + +The following example uses ``--query "fast performance"``. In this +case, ``fast`` and ``performance`` are used as keywords. + +.. groonga-command +.. include:: ../../example/reference/functions/snippet_html/usage_basic.log +.. select Documents --output_columns "snippet_html(content)" --command_version 2 --match_columns content --query "fast performance" + +``--query "fast performance"`` matches to only the first record's +content. ``snippet_html(content)`` extracts two text parts that +include the keywords ``fast`` or ``performance`` and surronds the +keywords with ``<span class="keyword">`` and ``</span>``. + +The max number of text parts is 3. If there are 4 or more text parts +that include the keywords, only the leading 3 parts are only used. + +The max size of a text part is 200 bytes. The unit is bytes not +chracters. The size doesn't include inserted ``<span keyword="span">`` +and ``</span>``. + +Both the max number of text parts and the max size of a text part +aren't customizable. + +You can specify string literal instead of column. + +.. groonga-command +.. include:: ../../example/reference/functions/snippet_html/usage_string_literal.log +.. select Documents --output_columns 'snippet_html("Groonga is very fast fulltext search engine.")' --command_version 2 --match_columns content --query "fast performance" + +Return value +------------ + +``snippet_html`` returns an array of string. An element of array is a +snippet:: + + [SNIPPET1, SNIPPET2, SNIPPET3] + +A snippet includes one or more keywords. The max byte size of a +snippet except ``<span keyword="span">`` and ``</span>`` is 200 +byte. The unit isn't the number of chracters. + +The array size is larger than or equal to 1 and less than or equal +to 3. The max size 3 will be customizable soon. + +TODO +---- + +* Make the max number of text parts customizable. +* Make the max size of a text part customizable. +* Make keywords customizable. +* Make tag that surrounds a keyword customizable. +* Make normalization customizable. +* Support options by object literal. + +See also +-------- + +* :doc:`/reference/commands/select` -------------- next part -------------- HTML����������������������������... 다운로드