Hiroyuki Ikezoe
ikezo****@users*****
Tue Dec 5 14:09:35 JST 2006
Index: kazehakase/module/search/kz-search-hyper-estraier.c diff -u kazehakase/module/search/kz-search-hyper-estraier.c:1.8 kazehakase/module/search/kz-search-hyper-estraier.c:1.9 --- kazehakase/module/search/kz-search-hyper-estraier.c:1.8 Mon Dec 4 11:50:53 2006 +++ kazehakase/module/search/kz-search-hyper-estraier.c Tue Dec 5 14:09:35 2006 @@ -21,6 +21,8 @@ #include <ctype.h> #include <glib/gi18n.h> #include <estraier.h> +#include <glib.h> +#include <glib/gstdio.h> #include "kazehakase.h" #include "utils/utils.h" @@ -33,7 +35,7 @@ #define HYPER_ESTRAIERRAIER_URI "http://hyperestraier.sourceforge.net/" #define DTD "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" #define HEAD "<head>\n" \ - " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" \ + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>" \ " <title>Full-text search in history</title>\n" \ " <link rel=\"stylesheet\" type=\"text/css\" href=\"history-search:?css=search-result.css\">\n" \ "</head>\n" @@ -219,7 +221,7 @@ int ecode, ret; ret = est_db_close(priv->db, &ecode); if (!ret) - g_warning("error: %s", est_err_msg(ecode)); + g_warning("db close error: %s", est_err_msg(ecode)); } if (priv->dbname) { @@ -299,7 +301,7 @@ g_string_append_printf(html, "<h1>Search results for %s</h1>", text); - dirname = g_strconcat(g_get_home_dir(), HISTORY_DIR, NULL); + dirname = g_build_filename(g_get_home_dir(), HISTORY_DIR, NULL); len = strlen(dirname); g_free(dirname); @@ -349,9 +351,7 @@ ret = est_db_close(db, &ecode); if (!ret) - { - g_warning("error: %s", est_err_msg(ecode)); - } + g_warning("db close error: %s", est_err_msg(ecode)); g_string_append_printf(html, FOOTER, HYPER_ESTRAIERRAIER_URI, _EST_VERSION); g_string_append(html, "</body></html>"); @@ -368,14 +368,54 @@ return create_search_result_html(search, text); } +static gchar * +get_document_encoding (const gchar *contents) +{ + gchar *encoding = NULL; + gchar *p; + + if (!contents) return NULL; + + p = (gchar*)contents; + while ((p = strstr(p, "<meta "))) + { + gchar *end; + p = strstr(p, "http-equiv=\""); + if (!p) break; + + p+=12; + end = strchr(p, '"'); + if (!end) break; + + if (g_ascii_strncasecmp(p, "content-type", end - p)) + continue; + + p = end; + /* negligent */ + p = strstr(p, "charset="); + if (!p) break; + p+=8; + end = strchr(p, ';'); + if (!end) + end = strchr(p, '"'); + if (!end) break; + + encoding = g_strndup(p, end - p); + g_warning("%s", encoding); + break; + } + + return encoding; +} + gboolean register_document (KzSearch *search, const gchar *uri, const gchar *encoding, const gchar *title, const gchar *contents, GTime mtime) { ESTDB *db; ESTDOC *doc; int ret, ecode, success; - gchar *time_str; - gchar *filename; + gchar *time_str, *filename, *text = NULL; + gchar *utf8 = NULL; KzSearchHyperEstraierPrivate *priv = KZ_SEARCH_HYPER_ESTRAIER_GET_PRIVATE(search); db = est_db_open(priv->dbname, ESTDBWRITER | ESTDBCREAT, &ecode); @@ -391,34 +431,46 @@ est_doc_add_attr(doc, ESTDATTRURI, filename); g_free(filename); - est_doc_add_attr(doc, ESTDATTRTITLE, title); + if (title) + est_doc_add_attr(doc, ESTDATTRTITLE, title); time_str = cbdatestrwww(mtime, 0); est_doc_add_attr(doc, ESTDATTRMDATE, time_str); g_free(time_str); - if (contents) + if (!encoding) + encoding = get_document_encoding(contents); + + if (encoding && strcmp(encoding, "UTF-8")) + { + utf8 = g_convert(contents, -1, + "UTF-8", encoding, + NULL, NULL, + NULL); + } + if (utf8) + { + text = html_to_text(utf8); + g_free(utf8); + } + else + text = html_to_text(contents); + + if (text) { -#warning FIXME! remove javascript code. - gchar *raw_text; - raw_text = remove_tag(contents, strlen(contents)); - est_doc_add_text(doc, raw_text); - g_free(raw_text); + est_doc_add_text(doc, text); + g_free(text); } success = est_db_put_doc(db, doc, ESTPDCLEAN); if (!success) - { g_warning("register error: %s", est_err_msg(ecode)); - } est_doc_delete(doc); ret = est_db_close(db, &ecode); if (!ret) - { g_warning("db close error: %s", est_err_msg(ecode)); - } return (success != 0) ? TRUE : FALSE; } @@ -444,9 +496,7 @@ ret = est_db_close(db, &ecode); if (!ret) - { g_warning("error: %s", est_err_msg(ecode)); - } return (success != 0) ? TRUE : FALSE; } @@ -483,9 +533,7 @@ ret = est_db_close(db, &ecode); if (!ret) - { g_warning("error: %s", est_err_msg(ecode)); - } #warning FIXME! return GSource or something. return 0; } @@ -498,39 +546,52 @@ } static void +register_documents_in_path (KzSearch *search, const gchar *path) +{ + GDir *gd; + const gchar *file; + + gd = g_dir_open(path, 0, NULL); + if (!gd) + return; + + while ((file = g_dir_read_name (gd))) + { + gchar *new_path = g_build_filename (path, file, NULL); + if (g_file_test(new_path, G_FILE_TEST_IS_DIR)) + { + register_documents_in_path(search, new_path); + } + else + { + gchar *uri, *contents; + GTime mtime; + struct stat st; + + g_stat(new_path, &st); + mtime = st.st_mtime; + uri = g_strdup_printf("file://%s", new_path); + g_file_get_contents(new_path, &contents, NULL, NULL); + + kz_search_register_document(search, uri, NULL, NULL, contents, mtime); + + g_free(uri); + g_free(contents); + } + g_free(new_path); + } + g_dir_close (gd); +} + +static void make_index (KzSearch *search) { - const gchar *estgather = "estcmd gather -sd "; - gchar *command; - gint argc; - gchar **argv = NULL; - GSpawnFlags flags; - GPid pid; - - command = g_strconcat(estgather, - g_get_home_dir(), - HISTORY_INDEX" ", - g_get_home_dir(), - HISTORY_DIR, - NULL); - - g_shell_parse_argv(command, - &argc, - &argv, - NULL); - flags = G_SPAWN_SEARCH_PATH | - G_SPAWN_STDOUT_TO_DEV_NULL; - - g_spawn_async(NULL, - argv, - NULL, - flags, - NULL, - NULL, - &pid, - NULL); - g_strfreev(argv); - g_free(command); + gchar *path; + + path = g_build_filename(g_get_home_dir(), HISTORY_DIR, NULL); + register_documents_in_path(search, path); + + g_free(path); } static gboolean