Seiji Zenitani
zenit****@users*****
2005年 11月 30日 (水) 00:23:51 JST
Index: CarbonEmacsPackage/GPL/htmlize.el diff -u CarbonEmacsPackage/GPL/htmlize.el:1.5 CarbonEmacsPackage/GPL/htmlize.el:1.6 --- CarbonEmacsPackage/GPL/htmlize.el:1.5 Mon Oct 31 23:13:51 2005 +++ CarbonEmacsPackage/GPL/htmlize.el Wed Nov 30 00:23:51 2005 @@ -1,6 +1,6 @@ ;; htmlize.el -- Convert buffer text and decorations to HTML. -;; Copyright (C) 1997,1998,1999,2000,2001,2002,2003 Hrvoje Niksic +;; Copyright (C) 1997,1998,1999,2000,2001,2002,2003,2005 Hrvoje Niksic ;; Author: Hrvoje Niksic <hniks****@xemac*****> ;; Keywords: hypermedia, extensions @@ -70,8 +70,8 @@ ;; Thanks go to the multitudes of people who have sent reports and ;; contributed comments, suggestions, and fixes. They include Ron -;; Gut, Bob Weiner, Toni Drabik, Peter Breton, Thomas Vogels and many -;; others. +;; Gut, Bob Weiner, Toni Drabik, Peter Breton, Thomas Vogels, Juri +;; Linkov, and many others. ;; User quotes: "You sir, are a sick, sick, _sick_ person. :)" ;; -- Bill Perry, author of Emacs/W3 @@ -93,7 +93,7 @@ ;; `cl' is loaded. (load "cl-extra"))) -(defconst htmlize-version "1.16") +(defconst htmlize-version "1.29") ;; Incantations to make custom stuff work without customize, e.g. on ;; XEmacs 19.14 or GNU Emacs 19.34. @@ -144,7 +144,7 @@ :type 'boolean :group 'htmlize) -(defcustom htmlize-hyperlink-style "\ +(defcustom htmlize-hyperlink-style " a { color: inherit; background-color: inherit; @@ -159,6 +159,14 @@ :type 'string :group 'htmlize) +(defcustom htmlize-replace-form-feeds t + "*Non-nil means replace form feed characters in source code with <hr />. +If this is a string, it additionally specifies the replacement to use. +If you need more elaborate processing, set this to nil and use +htmlize-after-hook." + :type 'boolean + :group 'htmlize) + (defcustom htmlize-html-charset nil "*The charset declared by the resulting HTML documents. When non-nil, causes htmlize to insert the following in the HEAD section @@ -401,7 +409,7 @@ (defun htmlize-protect-string (string) "HTML-protect string, escaping HTML metacharacters and I18N chars." ;; Only protecting strings that actually contain unsafe or non-ASCII - ;; chars removes a lot of unnecessary consing. + ;; chars removes a lot of unnecessary funcalls and consing. (if (not (string-match "[^\r\n\t -%'-;=?-~]" string)) string (mapconcat (lambda (char) @@ -423,8 +431,8 @@ (setf (gethash char htmlize-extended-character-cache) (format "&#%d;" char))) ((and (fboundp 'encode-char) - ;; Have to check: encode-char fails for Arabic - ;; and possibly other chars. + ;; Must check if encode-char works for CHAR; + ;; it fails for Arabic and possibly elsewhere. (encode-char char 'ucs)) (setf (gethash char htmlize-extended-character-cache) (format "&#%d;" (encode-char char 'ucs)))) @@ -435,20 +443,56 @@ (char-to-string char))))) string ""))) +(defconst htmlize-ellipsis "...") +(put-text-property 0 (length htmlize-ellipsis) 'htmlize-ellipsis t htmlize-ellipsis) + (defun htmlize-buffer-substring-no-invisible (beg end) ;; Like buffer-substring-no-properties, but don't copy invisible - ;; parts of the region. + ;; parts of the region. Where buffer-substring-no-properties + ;; mandates an ellipsis to be shown, htmlize-ellipsis is inserted. (let ((pos beg) - visible-list invisible next-change) + visible-list invisible show next-change) ;; Iterate over the changes in the `invisible' property and filter ;; out the portions where it's non-nil, i.e. where the text is ;; invisible. (while (< pos end) (setq invisible (get-char-property pos 'invisible) next-change (htmlize-next-change pos 'invisible end)) - (unless invisible - (push (buffer-substring-no-properties pos next-change) - visible-list)) + (if (not (listp buffer-invisibility-spec)) + ;; If buffer-invisibility-spec is not a list, then all + ;; characters with non-nil `invisible' property are visible. + (setq show (not invisible)) + ;; Otherwise, the value of a non-nil `invisible' property can be: + ;; 1. a symbol -- make the text invisible if it matches + ;; buffer-invisibility-spec. + ;; 2. a list of symbols -- make the text invisible if + ;; any symbol in the list matches + ;; buffer-invisibility-spec. + ;; If the match of buffer-invisibility-spec has a non-nil + ;; CDR, replace the invisible text with an ellipsis. + (let (match) + (if (symbolp invisible) + (setq match (member* invisible buffer-invisibility-spec + :key (lambda (i) + (if (symbolp i) i (car i))))) + (setq match (block nil + (dolist (elem invisible) + (let ((m (member* + elem buffer-invisibility-spec + :key (lambda (i) + (if (symbolp i) i (car i)))))) + (when m (return m)))) + nil))) + (setq show (cond ((null match) t) + ((and (cdr-safe (car match)) + ;; Conflate successive ellipses. + (not (eq show htmlize-ellipsis))) + htmlize-ellipsis) + (t nil))))) + (cond ((eq show t) + (push (buffer-substring-no-properties pos next-change) visible-list)) + ((stringp show) + (push show visible-list))) (setq pos next-change)) (if (= (length visible-list) 1) ;; If VISIBLE-LIST consists of only one element, return it @@ -457,6 +501,15 @@ (car visible-list) (apply #'concat (nreverse visible-list))))) +(defun htmlize-trim-ellipsis (text) + ;; Remove htmlize-ellipses ("...") from the beginning of TEXT if it + ;; starts with it. It checks for the special property of the + ;; ellipsis so it doesn't work on ordinary text that begins with + ;; "...". + (if (get-text-property 0 'htmlize-ellipsis text) + (substring text (length htmlize-ellipsis)) + text)) + (defconst htmlize-tab-spaces ;; A table of strings with spaces. (aref htmlize-tab-spaces 5) is ;; like (make-string 5 ?\ ), except it doesn't cons. @@ -542,6 +595,16 @@ ;; <http://www.mail-archive.com/bbdb-****@xemac*****/> ;; <hniks****@xemac*****> ;; <xalan-dev-sc.10148567319.hacuhiucknfgmpfnjcpg-john=doe.c****@xml*****> + +(defun htmlize-defang-local-variables () + ;; Juri Linkov reports that an HTML-ized "Local variables" can lead + ;; visiting the HTML to fail with "Local variables list is not + ;; properly terminated". He suggested changing the phrase to + ;; syntactically equivalent HTML that Emacs doesn't recognize. + (goto-char (point-min)) + (while (search-forward "Local Variables:" nil t) + (replace-match "Local Variables:" nil t))) + ;;; Color handling. @@ -633,11 +696,25 @@ ;; return "unspecified-fg" or "unspecified-bg". If the face is ;; `default' and the color is unspecified, look up the color in ;; frame parameters. - (let ((color (if fg (face-foreground face) (face-background face)))) + (let* ((function (if fg #'face-foreground #'face-background)) + color) + (if (>= emacs-major-version 22) + ;; For GNU Emacs 22+ set INHERIT to get the inherited values. + (setq color (funcall function face nil t)) + (setq color (funcall function face)) + ;; For GNU Emacs 21 (which has `face-attribute'): if the color + ;; is nil, recursively check for the face's parent. + (when (and (null color) + (fboundp 'face-attribute) + (face-attribute face :inherit) + (not (eq (face-attribute face :inherit) 'unspecified))) + (setq color (htmlize-face-color-internal + (face-attribute face :inherit) fg)))) (when (and (eq face 'default) (null color)) (setq color (cdr (assq (if fg 'foreground-color 'background-color) (frame-parameters))))) - (when (or (equal color "unspecified-fg") + (when (or (eq color 'unspecified) + (equal color "unspecified-fg") (equal color "unspecified-bg")) (setq color nil)) (when (and (eq face 'default) @@ -680,7 +757,7 @@ ;; returns nil. ) ((string-match "\\`#" color) - ;; The color is alredy in #rrggbb format. + ;; The color is already in #rrggbb format. (setq rgb-string color)) ((and htmlize-use-rgb-txt htmlize-color-rgb-hash) @@ -724,7 +801,7 @@ italicp ; whether face is italic underlinep ; whether face is underlined overlinep ; whether face is overlined - strikep ; whether face is striked through + strikep ; whether face is struck through css-name ; CSS name of face ) @@ -776,9 +853,12 @@ (setf (htmlize-fstruct-underlinep fstruct) (face-underline-p face)))) ((fboundp 'face-attribute) - ;; GNU Emacs 21. + ;; GNU Emacs 21 and further. (dolist (attr '(:weight :slant :underline :overline :strike-through)) - (let ((value (face-attribute face attr))) + (let ((value (if (>= emacs-major-version 22) + ;; Use the INHERIT arg in GNU Emacs 22. + (face-attribute face attr nil t) + (face-attribute face attr)))) (when (and value (not (eq value 'unspecified))) (htmlize-face-emacs21-attr fstruct attr value))))) (t @@ -849,8 +929,7 @@ (defun htmlize-face-list-p (face-prop) "Return non-nil if FACE-PROP is a list of faces, nil otherwise." ;; If not for attrlists, this would return (listp face-prop). This - ;; way we have to be more careful because some an attrlist is also a - ;; list! + ;; way we have to be more careful because attrlist is also a list! (cond ((eq face-prop nil) ;; FACE-PROP being nil means empty list (no face), so return t. @@ -900,6 +979,12 @@ (push new-name css-names))))) face-map)) +(defun htmlize-unstringify-face (face) + "If FACE is a string, return it interned, otherwise return it unchanged." + (if (stringp face) + (intern face) + face)) + (defun htmlize-faces-in-buffer () "Return a list of faces used in the current buffer. Under XEmacs, this returns the set of faces specified by the extents @@ -931,16 +1016,20 @@ next (or (next-single-property-change pos 'face) (point-max))) ;; FACE-PROP can be a face/attrlist or a list thereof. (setq faces (if (htmlize-face-list-p face-prop) - (union face-prop faces :test 'equal) - (adjoin face-prop faces :test 'equal))) + (nunion (mapcar #'htmlize-unstringify-face face-prop) + faces :test 'equal) + (adjoin (htmlize-unstringify-face face-prop) + faces :test 'equal))) (setq pos next))) ;; Faces used by overlays. (dolist (overlay (overlays-in (point-min) (point-max))) (let ((face-prop (overlay-get overlay 'face))) ;; FACE-PROP can be a face/attrlist or a list thereof. (setq faces (if (htmlize-face-list-p face-prop) - (union face-prop faces :test 'equal) - (adjoin face-prop faces :test 'equal)))))) + (nunion (mapcar #'htmlize-unstringify-face face-prop) + faces :test 'equal) + (adjoin (htmlize-unstringify-face face-prop) + faces :test 'equal)))))) faces)) ;; htmlize-faces-at-point returns the faces in use at point. The @@ -953,23 +1042,31 @@ (cond (htmlize-running-xemacs (defun htmlize-faces-at-point () - (let (extent list face-prop) + (let (extent extent-list face-list face-prop) (while (setq extent (extent-at (point) nil 'face extent)) + (push extent extent-list)) + ;; extent-list is in reverse display order, meaning that + ;; smallest ones come last. That is the order we want, + ;; except it can be overridden by the `priority' property. + (setq extent-list (stable-sort extent-list #'< + :key #'extent-priority)) + (dolist (extent extent-list) (setq face-prop (extent-face extent)) - (setq list (if (listp face-prop) - (nconc (reverse face-prop) list) - (cons face-prop list)))) - ;; No need to reverse the list: PUSH has already - ;; constructed it in the reverse display order. - list))) + ;; extent's face-list is in reverse order from what we + ;; want, but the `nreverse' below will take care of it. + (setq face-list (if (listp face-prop) + (append face-prop face-list) + (cons face-prop face-list)))) + (nreverse face-list)))) (t (defun htmlize-faces-at-point () (let (all-faces) ;; Faces from text properties. (let ((face-prop (get-text-property (point) 'face))) (setq all-faces (if (htmlize-face-list-p face-prop) - (reverse face-prop) - (list face-prop)))) + (nreverse (mapcar #'htmlize-unstringify-face + face-prop)) + (list (htmlize-unstringify-face face-prop))))) ;; Faces from overlays. (let ((overlays ;; Collect overlays at point that specify `face'. @@ -988,17 +1085,34 @@ :key (lambda (o) (- (overlay-end o) (overlay-start o))))) + ;; Overlay priorities, if present, override the above + ;; established order. Larger overlay priority takes + ;; precedence and therefore comes later in the list. + (setq overlays (stable-sort + overlays + ;; Reorder (stably) by acending... + #'< + ;; ...overlay priority. + :key (lambda (o) + (or (overlay-get o 'priority) 0)))) (dolist (overlay overlays) (setq face-prop (overlay-get overlay 'face)) (setq list (if (htmlize-face-list-p face-prop) - (nconc (reverse face-prop) list) - (cons face-prop list)))) - (setq all-faces (nconc all-faces list))))))) + (nconc (nreverse (mapcar + #'htmlize-unstringify-face + face-prop)) + list) + (cons (htmlize-unstringify-face face-prop) list)))) + ;; Under "Merging Faces" the manual explicitly states + ;; that faces specified by overlays take precedence over + ;; faces specified by text properties. + (setq all-faces (nconc all-faces list))) + all-faces)))) ;; htmlize supports generating HTML in two several fundamentally ;; different ways, one with the use of CSS and nested <span> tags, and ;; the other with the use of the old <font> tags. Rather than adding -;; a bunch of if's to many places, we take a semi-OO approach. +;; a bunch of ifs to many places, we take a semi-OO approach. ;; `htmlize-buffer-1' calls a number of "methods", which indirect to ;; the functions that depend on `htmlize-output-type'. The currently ;; used methods are `doctype', `insert-head', `body-tag', and @@ -1108,19 +1222,19 @@ ;; To make generated HTML legal, htmlize.el used to specify the SGML ;; declaration of "HTML Pro" DTD here. HTML Pro aka Silmaril DTD - ;; was a project whose goal was to produce a DTD that would + ;; was a project whose goal was to produce a GPL'ed DTD that would ;; encompass all the incompatible HTML extensions procured by ;; Netscape, MSIE, and other players in the field. Apparently the ;; project got abandoned, the last available version being "Draft 0 ;; Revision 11" from January 1997, as documented at - ;; <http://validator.w3.org/sgml-lib/pro/html/dtds/htmlpro.html>. + ;; <http://imbolc.ucc.ie/~pflynn/articles/htmlpro.html>. - ;; Since by now (2001) HTML Pro is remembered by none but the most + ;; Since by now (2005) HTML Pro is remembered by none but the most ;; die-hard early-web-days nostalgics and used by not even them, ;; there is no use in specifying it. So we return the standard HTML ;; 4.0 declaration, which makes generated HTML technically illegal. ;; If you have a problem with that, use the `css' generation engine - ;; which I believe creates fully conformant HTML. + ;; which I believe creates fully conforming HTML. "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">" @@ -1222,7 +1336,7 @@ ;; Declare variables used in loop body outside the loop ;; because it's faster to establish `let' bindings only ;; once. - next-change text face-list fstruct-list) + next-change text face-list fstruct-list trailing-ellipsis) ;; This loop traverses and reads the source buffer, appending ;; the resulting HTML to HTMLBUF with `princ'. This method is ;; fast because: 1) it doesn't require examining the text @@ -1242,6 +1356,13 @@ ;; untabify it and escape the HTML metacharacters. (setq text (htmlize-buffer-substring-no-invisible (point) next-change)) + (when trailing-ellipsis + (setq text (htmlize-trim-ellipsis text))) + ;; If TEXT ends up empty, don't change trailing-ellipsis. + (when (> (length text) 0) + (setq trailing-ellipsis + (get-text-property (1- (length text)) + 'htmlize-ellipsis text))) (setq text (htmlize-untabify text (current-column))) (setq text (htmlize-protect-string text)) ;; Don't bother writing anything if there's no text (this @@ -1252,11 +1373,24 @@ (funcall insert-text-method text fstruct-list htmlbuf)) (goto-char next-change))) - ;; Insert the epilog. + ;; Insert the epilog and post-process the buffer. (with-current-buffer htmlbuf (insert "</pre>\n </body>\n</html>\n") (when htmlize-generate-hyperlinks (htmlize-make-hyperlinks)) + (htmlize-defang-local-variables) + (when htmlize-replace-form-feeds + ;; Change each "^L\n" to "\n<hr/>". + (goto-char (point-min)) + (let ((source + ;; ^L has already been escaped, so search for that. + (htmlize-protect-string "\^L\n")) + (replacement + (concat "\n" (if (stringp htmlize-replace-form-feeds) + htmlize-replace-form-feeds + "<hr />")))) + (while (search-forward source nil t) + (replace-match replacement t t)))) (goto-char (point-min)) (when htmlize-html-major-mode ;; What sucks about this is that the minor modes, most notably @@ -1268,6 +1402,19 @@ ;; Utility functions. +(defmacro htmlize-with-fontify-message (&rest body) + ;; When forcing fontification of large buffers in + ;; htmlize-ensure-fontified, inform the user that he is waiting for + ;; font-lock, not for htmlize to finish. + `(progn + (if (> (buffer-size) 65536) + (message "Forcing fontification of %s..." + (buffer-name (current-buffer)))) + , @ body + (if (> (buffer-size) 65536) + (message "Forcing fontification of %s...done" + (buffer-name (current-buffer)))))) + (defun htmlize-ensure-fontified () ;; If font-lock is being used, ensure that the "support" modes ;; actually fontify the buffer. If font-lock is not in use, we @@ -1279,15 +1426,18 @@ (cond ((and (boundp 'jit-lock-mode) (symbol-value 'jit-lock-mode)) - (jit-lock-fontify-now (point-min) (point-max))) + (htmlize-with-fontify-message + (jit-lock-fontify-now (point-min) (point-max)))) ((and (boundp 'lazy-lock-mode) (symbol-value 'lazy-lock-mode)) - (lazy-lock-fontify-region (point-min) (point-max))) + (htmlize-with-fontify-message + (lazy-lock-fontify-region (point-min) (point-max)))) ((and (boundp 'lazy-shot-mode) (symbol-value 'lazy-shot-mode)) - ;; lazy-shot is amazing in that it must *refontify* the region, - ;; even if the whole buffer has already been fontified. <sigh> - (lazy-shot-fontify-region (point-min) (point-max))) + (htmlize-with-fontify-message + ;; lazy-shot is amazing in that it must *refontify* the region, + ;; even if the whole buffer has already been fontified. <sigh> + (lazy-shot-fontify-region (point-min) (point-max)))) ;; There's also fast-lock, but we don't need to handle specially, ;; I think. fast-lock doesn't really defer fontification, it ;; just saves it to an external cache so it's not done twice.