Revision | 6197bd74b8437987a5c656a7c733e70ad95bf9a5 (tree) |
---|---|
Time | 2011-01-25 00:21:28 |
Author | henoheno <henoheno> |
Commiter | henoheno |
$Id: spam.php,v 1.221 2011/01/24 14:51:50 henoheno Exp $
$Id: spam_pickup.php,v 1.71 2009/01/04 08:56:07 henoheno Exp $
$Id: spam_util.php,v 1.4 2011/01/24 14:51:50 henoheno Exp $
$Id: spam.ini.php,v 1.202 2010/12/15 16:07:38 henoheno Exp $
@@ -1,6 +1,6 @@ | ||
1 | 1 | <?php |
2 | 2 | // PukiWiki - Yet another WikiWikiWeb clone. |
3 | -// $Id: pukiwiki.php,v 1.21 2007/08/26 15:17:28 henoheno Exp $ | |
3 | +// $Id: pukiwiki.php,v 1.22 2011/01/24 15:21:28 henoheno Exp $ | |
4 | 4 | // |
5 | 5 | // PukiWiki 1.4.* |
6 | 6 | // Copyright (C) 2002-2007 by PukiWiki Developers Team |
@@ -103,7 +103,6 @@ if ($spam && $method != 'GET') { | ||
103 | 103 | |
104 | 104 | if ($_spam) { |
105 | 105 | require(LIB_DIR . 'spam.php'); |
106 | - require(LIB_DIR . 'spam_pickup.php'); | |
107 | 106 | |
108 | 107 | if (isset($spam['method'][$_plugin])) { |
109 | 108 | $_method = & $spam['method'][$_plugin]; |
@@ -1,313 +1,22 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.php,v 1.33 2008/12/28 08:33:05 henoheno Exp $ | |
3 | -// Copyright (C) 2006-2007 PukiWiki Developers Team | |
2 | +// $Id: spam.php,v 1.34 2011/01/24 15:19:36 henoheno Exp $ | |
3 | +// Copyright (C) 2006-2009, 2011 PukiWiki Developers Team | |
4 | 4 | // License: GPL v2 or (at your option) any later version |
5 | 5 | // |
6 | 6 | // Functions for Concept-work of spam-uri metrics |
7 | 7 | // |
8 | 8 | // (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature |
9 | 9 | |
10 | -if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php'); | |
11 | -if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php'); | |
12 | - | |
13 | -// --------------------- | |
14 | -// Compat etc | |
15 | - | |
16 | -// (PHP 4 >= 4.2.0): var_export(): mail-reporting and dump related | |
17 | -if (! function_exists('var_export')) { | |
18 | - function var_export() { | |
19 | - return 'var_export() is not found on this server' . "\n"; | |
20 | - } | |
21 | -} | |
22 | - | |
23 | -// (PHP 4 >= 4.2.0): preg_grep() enables invert option | |
24 | -function preg_grep_invert($pattern = '//', $input = array()) | |
25 | -{ | |
26 | - static $invert; | |
27 | - if (! isset($invert)) $invert = defined('PREG_GREP_INVERT'); | |
28 | - | |
29 | - if ($invert) { | |
30 | - return preg_grep($pattern, $input, PREG_GREP_INVERT); | |
31 | - } else { | |
32 | - $result = preg_grep($pattern, $input); | |
33 | - if ($result) { | |
34 | - return array_diff($input, preg_grep($pattern, $input)); | |
35 | - } else { | |
36 | - return $input; | |
37 | - } | |
38 | - } | |
39 | -} | |
40 | - | |
41 | - | |
42 | -// --------------------- | |
43 | -// Utilities | |
44 | - | |
45 | -// Very roughly, shrink the lines of var_export() | |
46 | -// NOTE: If the same data exists, it must be corrupted. | |
47 | -function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = FALSE) | |
48 | -{ | |
49 | - $result = var_export($expression, TRUE); | |
50 | - | |
51 | - $result = preg_replace( | |
52 | - // Remove a newline and spaces | |
53 | - '# => \n *array \(#', ' => array (', | |
54 | - $result | |
55 | - ); | |
56 | - | |
57 | - if ($ignore_numeric_keys) { | |
58 | - $result =preg_replace( | |
59 | - // Remove numeric keys | |
60 | - '#^( *)[0-9]+ => #m', '$1', | |
61 | - $result | |
62 | - ); | |
63 | - } | |
64 | - | |
65 | - if ($return) { | |
66 | - return $result; | |
67 | - } else { | |
68 | - echo $result; | |
69 | - return NULL; | |
70 | - } | |
71 | -} | |
72 | - | |
73 | -// Data structure: Create an array they _refer_only_one_ value | |
74 | -function one_value_array($num = 0, $value = NULL) | |
75 | -{ | |
76 | - $num = max(0, intval($num)); | |
77 | - $array = array(); | |
78 | - | |
79 | - for ($i = 0; $i < $num; $i++) { | |
80 | - $array[] = & $value; | |
81 | - } | |
82 | - | |
83 | - return $array; | |
84 | -} | |
85 | - | |
86 | -// Reverse $string with specified delimiter | |
87 | -function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL) | |
88 | -{ | |
89 | - $to_null = ($to_delim === NULL); | |
90 | - | |
91 | - if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) { | |
92 | - return FALSE; | |
93 | - } | |
94 | - if (is_array($string)) { | |
95 | - // Map, Recurse | |
96 | - $count = count($string); | |
97 | - $from = one_value_array($count, $from_delim); | |
98 | - if ($to_null) { | |
99 | - // Note: array_map() vanishes all keys | |
100 | - return array_map('delimiter_reverse', $string, $from); | |
101 | - } else { | |
102 | - $to = one_value_array($count, $to_delim); | |
103 | - // Note: array_map() vanishes all keys | |
104 | - return array_map('delimiter_reverse', $string, $from, $to); | |
105 | - } | |
106 | - } | |
107 | - if (! is_string($string)) { | |
108 | - return FALSE; | |
109 | - } | |
110 | - | |
111 | - // Returns com.example.bar.foo | |
112 | - if ($to_null) $to_delim = & $from_delim; | |
113 | - return implode($to_delim, array_reverse(explode($from_delim, $string))); | |
114 | -} | |
115 | - | |
116 | -// ksort() by domain | |
117 | -function ksort_by_domain(& $array) | |
118 | -{ | |
119 | - $sort = array(); | |
120 | - foreach(array_keys($array) as $key) { | |
121 | - $reversed = delimiter_reverse($key); | |
122 | - if ($reversed !== FALSE) { | |
123 | - $sort[$reversed] = $key; | |
124 | - } | |
125 | - } | |
126 | - ksort($sort, SORT_STRING); | |
127 | - | |
128 | - $result = array(); | |
129 | - foreach($sort as $key) { | |
130 | - $result[$key] = & $array[$key]; | |
131 | - } | |
132 | - | |
133 | - $array = $result; | |
134 | -} | |
135 | - | |
136 | -// Roughly strings(1) using PCRE | |
137 | -// This function is useful to: | |
138 | -// * Reduce the size of data, from removing unprintable binary data | |
139 | -// * Detect _bare_strings_ from binary data | |
140 | -// References: | |
141 | -// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings) | |
142 | -// http://www.pcre.org/pcre.txt | |
143 | -// Note: mb_ereg_replace() is one of mbstring extension's functions | |
144 | -// and need to init its encoding. | |
145 | -function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = FALSE) | |
146 | -{ | |
147 | - // String only | |
148 | - $binary = (is_array($binary) || $binary === TRUE) ? '' : strval($binary); | |
149 | - | |
150 | - $regex = $ignore_space ? | |
151 | - '[^[:graph:] \t\n]+' : // Remove "\0" etc, and readable spaces | |
152 | - '[^[:graph:][:space:]]+'; // Preserve readable spaces if possible | |
153 | - | |
154 | - $binary = $multibyte ? | |
155 | - mb_ereg_replace($regex, "\n", $binary) : | |
156 | - preg_replace('/' . $regex . '/s', "\n", $binary); | |
157 | - | |
158 | - if ($ignore_space) { | |
159 | - $binary = preg_replace( | |
160 | - array( | |
161 | - '/[ \t]{2,}/', | |
162 | - '/^[ \t]/m', | |
163 | - '/[ \t]$/m', | |
164 | - ), | |
165 | - array( | |
166 | - ' ', | |
167 | - '', | |
168 | - '' | |
169 | - ), | |
170 | - $binary); | |
171 | - } | |
172 | - | |
173 | - if ($min_len > 1) { | |
174 | - // The last character seems "\n" or not | |
175 | - $br = (! empty($binary) && $binary[strlen($binary) - 1] == "\n") ? "\n" : ''; | |
176 | - | |
177 | - $min_len = min(1024, intval($min_len)); | |
178 | - $regex = '/^.{' . $min_len . ',}/S'; | |
179 | - $binary = implode("\n", preg_grep($regex, explode("\n", $binary))) . $br; | |
180 | - } | |
181 | - | |
182 | - return $binary; | |
183 | -} | |
184 | - | |
185 | - | |
186 | -// --------------------- | |
187 | -// Utilities: Arrays | |
188 | - | |
189 | -// Count leaves (A leaf = value that is not an array, or an empty array) | |
190 | -function array_count_leaves($array = array(), $count_empty = FALSE) | |
191 | -{ | |
192 | - if (! is_array($array) || (empty($array) && $count_empty)) return 1; | |
193 | - | |
194 | - // Recurse | |
195 | - $count = 0; | |
196 | - foreach ($array as $part) { | |
197 | - $count += array_count_leaves($part, $count_empty); | |
198 | - } | |
199 | - return $count; | |
200 | -} | |
201 | - | |
202 | -// Merge two leaves | |
203 | -// Similar to PHP array_merge_leaves(), except strictly preserving keys as string | |
204 | -function array_merge_leaves($array1, $array2, $sort_keys = TRUE) | |
205 | -{ | |
206 | - // Array(s) only | |
207 | - $is_array1 = is_array($array1); | |
208 | - $is_array2 = is_array($array2); | |
209 | - if ($is_array1) { | |
210 | - if ($is_array2) { | |
211 | - ; // Pass | |
212 | - } else { | |
213 | - return $array1; | |
214 | - } | |
215 | - } else if ($is_array2) { | |
216 | - return $array2; | |
217 | - } else { | |
218 | - return $array2; // Not array ($array1 is overwritten) | |
219 | - } | |
220 | - | |
221 | - $keys_all = array_merge(array_keys($array1), array_keys($array2)); | |
222 | - if ($sort_keys) sort($keys_all, SORT_STRING); | |
223 | - | |
224 | - $result = array(); | |
225 | - foreach($keys_all as $key) { | |
226 | - $isset1 = isset($array1[$key]); | |
227 | - $isset2 = isset($array2[$key]); | |
228 | - if ($isset1 && $isset2) { | |
229 | - // Recurse | |
230 | - $result[$key] = array_merge_leaves($array1[$key], $array2[$key], $sort_keys); | |
231 | - } else if ($isset1) { | |
232 | - $result[$key] = & $array1[$key]; | |
233 | - } else { | |
234 | - $result[$key] = & $array2[$key]; | |
235 | - } | |
236 | - } | |
237 | - return $result; | |
238 | -} | |
239 | - | |
240 | -// An array-leaves to a flat array | |
241 | -function array_flat_leaves($array, $unique = TRUE) | |
242 | -{ | |
243 | - if (! is_array($array)) return $array; | |
244 | - | |
245 | - $tmp = array(); | |
246 | - foreach(array_keys($array) as $key) { | |
247 | - if (is_array($array[$key])) { | |
248 | - // Recurse | |
249 | - foreach(array_flat_leaves($array[$key]) as $_value) { | |
250 | - $tmp[] = $_value; | |
251 | - } | |
252 | - } else { | |
253 | - $tmp[] = & $array[$key]; | |
254 | - } | |
255 | - } | |
256 | - | |
257 | - return $unique ? array_values(array_unique($tmp)) : $tmp; | |
258 | -} | |
259 | - | |
260 | -// $array['something'] => $array['wanted'] | |
261 | -function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FALSE, $default = '') | |
262 | -{ | |
263 | - if (! is_array($array) || ! is_array($keys)) return FALSE; | |
264 | 10 | |
265 | - // Nondestructive test | |
266 | - if (! $force) { | |
267 | - foreach(array_keys($keys) as $from) { | |
268 | - if (! isset($array[$from])) { | |
269 | - return FALSE; | |
270 | - } | |
271 | - } | |
272 | - } | |
273 | - | |
274 | - foreach($keys as $from => $to) { | |
275 | - if ($from === $to) continue; | |
276 | - if (! $force || isset($array[$from])) { | |
277 | - $array[$to] = & $array[$from]; | |
278 | - unset($array[$from]); | |
279 | - } else { | |
280 | - $array[$to] = $default; | |
281 | - } | |
282 | - } | |
283 | - | |
284 | - return TRUE; | |
285 | -} | |
286 | - | |
287 | -// Remove redundant values from array() | |
288 | -function array_unique_recursive($array = array()) | |
289 | -{ | |
290 | - if (! is_array($array)) return $array; | |
11 | +if (! defined('LIB_DIR')) define('LIB_DIR', './'); | |
12 | +require(LIB_DIR . 'spam_pickup.php'); | |
13 | +require(LIB_DIR . 'spam_util.php'); | |
291 | 14 | |
292 | - $tmp = array(); | |
293 | - foreach($array as $key => $value){ | |
294 | - if (is_array($value)) { | |
295 | - $array[$key] = array_unique_recursive($value); | |
296 | - } else { | |
297 | - if (isset($tmp[$value])) { | |
298 | - unset($array[$key]); | |
299 | - } else { | |
300 | - $tmp[$value] = TRUE; | |
301 | - } | |
302 | - } | |
303 | - } | |
304 | - | |
305 | - return $array; | |
306 | -} | |
15 | +if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php'); | |
307 | 16 | |
308 | 17 | |
309 | 18 | // --------------------- |
310 | -// Part One : Checker | |
19 | +// Regex | |
311 | 20 | |
312 | 21 | // Rough implementation of globbing |
313 | 22 | // |
@@ -355,51 +64,32 @@ function generate_host_regex($string = '', $divider = '/') | ||
355 | 64 | { |
356 | 65 | if (! is_string($string)) return ''; |
357 | 66 | |
358 | - if (mb_strpos($string, '.') === FALSE) { | |
359 | - // localhost | |
67 | + if (mb_strpos($string, '.') === FALSE || is_ip($string)) { | |
68 | + // "localhost", IPv4, etc | |
360 | 69 | return generate_glob_regex($string, $divider); |
361 | 70 | } |
362 | 71 | |
363 | - if (is_ip($string)) { | |
364 | - // IPv4 | |
365 | - return generate_glob_regex($string, $divider); | |
72 | + // FQDN or something | |
73 | + $part = explode('.', $string, 2); | |
74 | + if ($part[0] == '') { | |
75 | + // ".example.org" | |
76 | + $part[0] = '(?:.*\.)?'; | |
77 | + } else if ($part[0] == '*') { | |
78 | + // "*.example.org" | |
79 | + $part[0] = '.*\.'; | |
366 | 80 | } else { |
367 | - // FQDN or something | |
368 | - $part = explode('.', $string, 2); | |
369 | - if ($part[0] == '') { | |
370 | - // .example.org | |
371 | - $part[0] = '(?:.*\.)?'; | |
372 | - } else if ($part[0] == '*') { | |
373 | - // *.example.org | |
374 | - $part[0] = '.*\.'; | |
375 | - } else { | |
376 | - // example.org, etc | |
377 | - return generate_glob_regex($string, $divider); | |
378 | - } | |
379 | - $part[1] = generate_glob_regex($part[1], $divider); | |
380 | - return implode('', $part); | |
81 | + // example.org, etc | |
82 | + return generate_glob_regex($string, $divider); | |
381 | 83 | } |
382 | -} | |
383 | 84 | |
384 | -// Rough hostname checker | |
385 | -// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G' | |
386 | -function is_ip($string = '') | |
387 | -{ | |
388 | - if (! is_string($string)) return FALSE; | |
85 | + $part[1] = generate_glob_regex($part[1], $divider); | |
389 | 86 | |
390 | - if (strpos($string, ':') !== FALSE) { | |
391 | - return 6; // Seems IPv6 | |
392 | - } | |
87 | + return implode('', $part); | |
88 | +} | |
393 | 89 | |
394 | - if (preg_match('/^' . | |
395 | - '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . | |
396 | - '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
397 | - $string)) { | |
398 | - return 4; // Seems IPv4(dot-decimal) | |
399 | - } | |
400 | 90 | |
401 | - return FALSE; // Seems not IP | |
402 | -} | |
91 | +// --------------------- | |
92 | +// Load | |
403 | 93 | |
404 | 94 | // Load SPAM_INI_FILE and return parsed one |
405 | 95 | function get_blocklist($list = '') |
@@ -470,7 +160,10 @@ function get_blocklist_add(& $array, $key = 0, $value = '*.example.org/path/to/f | ||
470 | 160 | if (is_string($key)) { |
471 | 161 | $array[$key] = & $value; // Treat $value as a regex for FQDN(host)s |
472 | 162 | } else { |
473 | - $array[$value] = '#^' . generate_host_regex($value, '#') . '$#i'; | |
163 | + $regex = generate_host_regex($value, '#'); | |
164 | + if (! empty($regex)) { | |
165 | + $array[$value] = '#^' . $regex . '$#i'; | |
166 | + } | |
474 | 167 | } |
475 | 168 | } |
476 | 169 |
@@ -651,30 +344,37 @@ function check_uri_spam($target = '', $method = array()) | ||
651 | 344 | // ---------------------------------------- |
652 | 345 | // Area measure |
653 | 346 | |
654 | - // Area: There's HTML anchor tag | |
655 | - if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) { | |
656 | - $key = 'area_anchor'; | |
657 | - $_asap = isset($method['asap']) ? array('asap' => TRUE) : array(); | |
658 | - $result = area_pickup($target, array($key => TRUE) + $_asap); | |
659 | - if ($result) { | |
660 | - $sum[$key] = $result[$key]; | |
661 | - if (isset($method[$key]) && $sum[$key] > $method[$key]) { | |
662 | - $is_spam[$key] = TRUE; | |
663 | - } | |
347 | + if (! $asap || ! $is_spam) { | |
348 | + | |
349 | + // Method pickup | |
350 | + $_method = array(); | |
351 | + foreach(array( | |
352 | + 'area_anchor', // There's HTML anchor tag | |
353 | + 'area_bbcode', // There's 'BBCode' linking tag | |
354 | + ) as $key) { | |
355 | + if (isset($method[$key])) $_method[$key] = TRUE; | |
664 | 356 | } |
665 | - } | |
666 | 357 | |
667 | - // Area: There's 'BBCode' linking tag | |
668 | - if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) { | |
669 | - $key = 'area_bbcode'; | |
670 | - $_asap = isset($method['asap']) ? array('asap' => TRUE) : array(); | |
671 | - $result = area_pickup($target, array($key => TRUE) + $_asap); | |
672 | - if ($result) { | |
673 | - $sum[$key] = $result[$key]; | |
674 | - if (isset($method[$key]) && $sum[$key] > $method[$key]) { | |
675 | - $is_spam[$key] = TRUE; | |
358 | + if ($_method) { | |
359 | + $_asap = isset($method['asap']) ? array('asap' => TRUE) : array(); | |
360 | + $_result = area_pickup($target, $_method + $_asap); | |
361 | + $_asap = NULL; | |
362 | + } else { | |
363 | + $_result = FALSE; | |
364 | + } | |
365 | + | |
366 | + if ($_result) { | |
367 | + foreach(array_keys($_method) as $key) { | |
368 | + if (isset($_result[$key])) { | |
369 | + $sum[$key] = $_result[$key]; | |
370 | + if (isset($method[$key]) && $sum[$key] > $method[$key]) { | |
371 | + $is_spam[$key] = TRUE; | |
372 | + } | |
373 | + } | |
676 | 374 | } |
677 | 375 | } |
376 | + | |
377 | + unset($_asap, $_method, $_result); | |
678 | 378 | } |
679 | 379 | |
680 | 380 | // Return if ... |
@@ -683,22 +383,30 @@ function check_uri_spam($target = '', $method = array()) | ||
683 | 383 | // ---------------------------------------- |
684 | 384 | // URI: Pickup |
685 | 385 | |
686 | - $pickups = uri_pickup_normalize(spam_uri_pickup($target, $method)); | |
386 | + $pickups = spam_uri_pickup($target, $method); | |
387 | + | |
388 | + | |
389 | + // Return if ... | |
390 | + if (empty($pickups)) return $progress; | |
391 | + | |
392 | + // Normalize all | |
393 | + $pickups = uri_pickup_normalize($pickups); | |
394 | + | |
395 | + // ---------------------------------------- | |
396 | + // Pickup some part of URI | |
397 | + | |
687 | 398 | $hosts = array(); |
688 | 399 | foreach ($pickups as $key => $pickup) { |
689 | 400 | $hosts[$key] = & $pickup['host']; |
690 | 401 | } |
691 | 402 | |
692 | - // Return if ... | |
693 | - if (empty($pickups)) return $progress; | |
694 | - | |
695 | 403 | // ---------------------------------------- |
696 | 404 | // URI: Bad host <pre-filter> (Separate good/bad hosts from $hosts) |
697 | 405 | |
698 | 406 | if ((! $asap || ! $is_spam) && isset($method['badhost'])) { |
699 | 407 | $list = get_blocklist('pre'); |
700 | 408 | $blocked = blocklist_distiller($hosts, array_keys($list), $asap); |
701 | - foreach($list as $key=>$type){ | |
409 | + foreach($list as $key => $type){ | |
702 | 410 | if (! $type) unset($blocked[$key]); // Ignore goodhost etc |
703 | 411 | } |
704 | 412 | unset($list); |
@@ -918,9 +626,9 @@ function summarize_detail_newtral($progress = array()) | ||
918 | 626 | $subs = array(); |
919 | 627 | foreach(array_keys($trie[$key]) as $sub) { |
920 | 628 | if ($sub == '') { |
921 | - $subs[] = $key; | |
629 | + $subs[] = $key; // 'example.com' | |
922 | 630 | } else { |
923 | - $subs[] = $sub . '.' . $key; | |
631 | + $subs[] = $sub . '. '; // 'A.foo.bar. ' | |
924 | 632 | } |
925 | 633 | } |
926 | 634 | $result[] = ' \'' . $key . '\' => \'' . implode(', ', $subs) . '\','; |
@@ -934,59 +642,6 @@ function summarize_detail_newtral($progress = array()) | ||
934 | 642 | } |
935 | 643 | |
936 | 644 | |
937 | -// Check responsibility-root of the FQDN | |
938 | -// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it) | |
939 | -// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it) | |
940 | -// 'foo.bar.example.edu.au' => 'example.edu.au' (.edu.au has the last whois for it) | |
941 | -// 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it) | |
942 | -function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE) | |
943 | -{ | |
944 | - static $domain; | |
945 | - | |
946 | - if ($fqdn === NULL) { | |
947 | - $domain = NULL; // Unset | |
948 | - return ''; | |
949 | - } | |
950 | - if (! is_string($fqdn)) return ''; | |
951 | - | |
952 | - if (is_ip($fqdn)) return $fqdn; | |
953 | - | |
954 | - if (! isset($domain)) { | |
955 | - $domain = array(); | |
956 | - if (file_exists(DOMAIN_INI_FILE)) { | |
957 | - include(DOMAIN_INI_FILE); // Set | |
958 | - } | |
959 | - } | |
960 | - | |
961 | - $result = array(); | |
962 | - $dcursor = & $domain; | |
963 | - $array = array_reverse(explode('.', $fqdn)); | |
964 | - $i = 0; | |
965 | - while(TRUE) { | |
966 | - if (! isset($array[$i])) break; | |
967 | - $acursor = $array[$i]; | |
968 | - if (is_array($dcursor) && isset($dcursor[$acursor])) { | |
969 | - $result[] = & $array[$i]; | |
970 | - $dcursor = & $dcursor[$acursor]; | |
971 | - } else { | |
972 | - if (! $parent && isset($acursor)) { | |
973 | - $result[] = & $array[$i]; // Whois servers must know this subdomain | |
974 | - } | |
975 | - break; | |
976 | - } | |
977 | - ++$i; | |
978 | - } | |
979 | - | |
980 | - // Implicit responsibility: Top-Level-Domains must not be yours | |
981 | - // 'bar.foo.something' => 'foo.something' | |
982 | - if ($implicit && count($result) == 1 && count($array) > 1) { | |
983 | - $result[] = & $array[1]; | |
984 | - } | |
985 | - | |
986 | - return $result ? implode('.', array_reverse($result)) : ''; | |
987 | -} | |
988 | - | |
989 | - | |
990 | 645 | // --------------------- |
991 | 646 | // Exit |
992 | 647 |
@@ -1009,7 +664,7 @@ function spam_exit($mode = '', $data = array()) | ||
1009 | 664 | break; |
1010 | 665 | case 'dump': |
1011 | 666 | echo('<pre>' . "\n"); |
1012 | - echo htmlspecialchars(var_export($data, TRUE)); | |
667 | + echo htmlsc(var_export($data, TRUE)); | |
1013 | 668 | echo('</pre>' . "\n"); |
1014 | 669 | break; |
1015 | 670 | }; |
@@ -1,10 +1,14 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam_pickup.php,v 1.5 2007/10/20 04:44:08 henoheno Exp $ | |
3 | -// Copyright (C) 2006-2007 PukiWiki Developers Team | |
2 | +// $Id: spam_pickup.php,v 1.6 2011/01/24 15:19:36 henoheno Exp $ | |
3 | +// Copyright (C) 2006-2009 PukiWiki Developers Team | |
4 | 4 | // License: GPL v2 or (at your option) any later version |
5 | 5 | // |
6 | 6 | // Functions for Concept-work of spam-uri metrics |
7 | 7 | // |
8 | +// (PHP 4 >= 4.3.0): preg_match_all(PREG_OFFSET_CAPTURE): $method['uri_XXX'] related feature | |
9 | +// | |
10 | + | |
11 | +if (! defined('DOMAIN_INI_FILE')) define('DOMAIN_INI_FILE', 'domain.ini.php'); | |
8 | 12 | |
9 | 13 | // --------------------- |
10 | 14 | // URI pickup |
@@ -14,18 +18,18 @@ | ||
14 | 18 | // [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar |
15 | 19 | // [OK] ftp://nasty.example.org:80/dfsdfs |
16 | 20 | // [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986) |
21 | +// Not available for: IDN(ignored) | |
17 | 22 | function uri_pickup($string = '') |
18 | 23 | { |
19 | 24 | if (! is_string($string)) return array(); |
20 | 25 | |
21 | - // Not available for: IDN(ignored) | |
22 | 26 | $array = array(); |
23 | 27 | preg_match_all( |
24 | 28 | // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment |
25 | 29 | // Refer RFC3986 (Regex below is not strict) |
26 | 30 | '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme |
27 | 31 | '(?:' . |
28 | - '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username) | |
32 | + '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username and/or password) | |
29 | 33 | '@)?' . |
30 | 34 | '(' . |
31 | 35 | // 3: Host |
@@ -34,7 +38,7 @@ function uri_pickup($string = '') | ||
34 | 38 | '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org |
35 | 39 | ')' . |
36 | 40 | '(?::([0-9]*))?' . // 4: Port |
37 | - '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info | |
41 | + '((?:/+[^\s<>"\'\[\]/\#?]+)*/+)?' . // 5: Directory path | |
38 | 42 | '([^\s<>"\'\[\]\#?]+)?' . // 6: File? |
39 | 43 | '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string |
40 | 44 | '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment |
@@ -42,18 +46,18 @@ function uri_pickup($string = '') | ||
42 | 46 | $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE |
43 | 47 | ); |
44 | 48 | |
45 | - // Format the $array | |
49 | + // Reformat the $array | |
46 | 50 | static $parts = array( |
47 | 51 | 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port', |
48 | 52 | 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment' |
49 | 53 | ); |
50 | - $default = array(''); | |
54 | + $default = array(0 => '', 1 => -1); | |
51 | 55 | foreach(array_keys($array) as $uri) { |
52 | 56 | $_uri = & $array[$uri]; |
53 | 57 | array_rename_keys($_uri, $parts, TRUE, $default); |
54 | 58 | $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset |
55 | 59 | foreach(array_keys($_uri) as $part) { |
56 | - $_uri[$part] = & $_uri[$part][0]; // Remove offsets | |
60 | + $_uri[$part] = $_uri[$part][0]; // Remove offsets | |
57 | 61 | } |
58 | 62 | } |
59 | 63 |
@@ -86,27 +90,43 @@ function uri_pickup_implode($uri = array()) | ||
86 | 90 | $tmp[] = & $uri['scheme']; |
87 | 91 | $tmp[] = '://'; |
88 | 92 | } |
93 | + | |
89 | 94 | if (isset($uri['userinfo']) && $uri['userinfo'] !== '') { |
90 | 95 | $tmp[] = & $uri['userinfo']; |
91 | 96 | $tmp[] = '@'; |
97 | + } else if (isset($uri['user']) || isset($uri['pass'])) { | |
98 | + if (isset($uri['user']) && $uri['user'] !== '') { | |
99 | + $tmp[] = & $uri['user']; | |
100 | + } | |
101 | + $tmp[] = ':'; | |
102 | + if (isset($uri['pass']) && $uri['pass'] !== '') { | |
103 | + $tmp[] = & $uri['pass']; | |
104 | + } | |
105 | + $tmp[] = '@'; | |
92 | 106 | } |
107 | + | |
93 | 108 | if (isset($uri['host']) && $uri['host'] !== '') { |
94 | 109 | $tmp[] = & $uri['host']; |
95 | 110 | } |
111 | + | |
96 | 112 | if (isset($uri['port']) && $uri['port'] !== '') { |
97 | 113 | $tmp[] = ':'; |
98 | 114 | $tmp[] = & $uri['port']; |
99 | 115 | } |
116 | + | |
100 | 117 | if (isset($uri['path']) && $uri['path'] !== '') { |
101 | 118 | $tmp[] = & $uri['path']; |
102 | 119 | } |
120 | + | |
103 | 121 | if (isset($uri['file']) && $uri['file'] !== '') { |
104 | 122 | $tmp[] = & $uri['file']; |
105 | 123 | } |
124 | + | |
106 | 125 | if (isset($uri['query']) && $uri['query'] !== '') { |
107 | 126 | $tmp[] = '?'; |
108 | 127 | $tmp[] = & $uri['query']; |
109 | 128 | } |
129 | + | |
110 | 130 | if (isset($uri['fragment']) && $uri['fragment'] !== '') { |
111 | 131 | $tmp[] = '#'; |
112 | 132 | $tmp[] = & $uri['fragment']; |
@@ -115,12 +135,13 @@ function uri_pickup_implode($uri = array()) | ||
115 | 135 | return implode('', $tmp); |
116 | 136 | } |
117 | 137 | |
138 | + | |
118 | 139 | // --------------------- |
119 | 140 | // URI normalization |
120 | 141 | |
121 | 142 | // Normalize an array of URI arrays |
122 | 143 | // NOTE: Give me the uri_pickup() results |
123 | -function uri_pickup_normalize(& $pickups, $destructive = TRUE) | |
144 | +function uri_pickup_normalize(& $pickups, $destructive = TRUE, $pathfile = FALSE) | |
124 | 145 | { |
125 | 146 | if (! is_array($pickups)) return $pickups; |
126 | 147 |
@@ -145,6 +166,28 @@ function uri_pickup_normalize(& $pickups, $destructive = TRUE) | ||
145 | 166 | } |
146 | 167 | } |
147 | 168 | |
169 | + if ($pathfile) { | |
170 | + return uri_pickup_normalize_pathfile($pickups); | |
171 | + } else { | |
172 | + return $pickups; | |
173 | + } | |
174 | +} | |
175 | + | |
176 | +// Normalize: 'path' + 'file' = 'path' (Similar structure using PHP's "parse_url()" function) | |
177 | +// NOTE: In some case, 'file' DOES NOT mean _filename_. | |
178 | +// [EXAMPLE] http://example.com/path/to/directory-accidentally-not-ended-with-slash | |
179 | +function uri_pickup_normalize_pathfile(& $pickups) | |
180 | +{ | |
181 | + if (! is_array($pickups)) return $pickups; | |
182 | + | |
183 | + foreach (array_keys($pickups) as $key) { | |
184 | + $_key = & $pickups[$key]; | |
185 | + if (isset($_key['path'], $_key['file'])) { | |
186 | + $_key['path'] = $_key['path'] . $_key['file']; | |
187 | + unset($_key['file']); | |
188 | + } | |
189 | + } | |
190 | + | |
148 | 191 | return $pickups; |
149 | 192 | } |
150 | 193 |
@@ -188,13 +231,14 @@ function scheme_normalize($scheme = '', $abbrevs_harmfull = TRUE) | ||
188 | 231 | // www.foo.bar => foo.bar |
189 | 232 | // www.10.20 => www.10.20 (Invalid hostname) |
190 | 233 | // NOTE: |
191 | -// 'www' is mostly used as traditional hostname of WWW server. | |
192 | -// 'www.foo.bar' may be identical with 'foo.bar'. | |
234 | +// 'www' is basically traditional hostname for WWW server. | |
235 | +// In these case, 'www.foo.bar' MAY be identical with 'foo.bar'. | |
193 | 236 | function host_normalize($host = '') |
194 | 237 | { |
195 | 238 | if (! is_string($host)) return ''; |
196 | 239 | |
197 | 240 | $host = strtolower($host); |
241 | + | |
198 | 242 | $matches = array(); |
199 | 243 | if (preg_match('/^www\.(.+\.[a-z]+)$/', $host, $matches)) { |
200 | 244 | return $matches[1]; |
@@ -791,10 +835,83 @@ function spam_uri_pickup($string = '', $method = array()) | ||
791 | 835 | } |
792 | 836 | |
793 | 837 | // Remove 'offset's for area_measure() |
794 | - foreach(array_keys($array) as $key) | |
838 | + foreach(array_keys($array) as $key) { | |
795 | 839 | unset($array[$key]['area']['offset']); |
840 | + } | |
796 | 841 | |
797 | 842 | return $array; |
798 | 843 | } |
799 | 844 | |
845 | +// Rough hostname checker | |
846 | +// TODO: Strict digit, 0x, CIDR, '999.999.999.999', ':', '::G' | |
847 | +function is_ip($string = '') | |
848 | +{ | |
849 | + if (! is_string($string)) return FALSE; | |
850 | + | |
851 | + if (strpos($string, ':') !== FALSE) { | |
852 | + return 6; // Seems IPv6 | |
853 | + } | |
854 | + | |
855 | + if (preg_match('/^' . | |
856 | + '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . | |
857 | + '(?:[0-9]{1,3}\.){1,3}' . '$/', | |
858 | + $string)) { | |
859 | + return 4; // Seems IPv4(dot-decimal) | |
860 | + } | |
861 | + | |
862 | + return FALSE; // Seems not IP | |
863 | +} | |
864 | + | |
865 | +// Check responsibility-root of the FQDN | |
866 | +// 'foo.bar.example.com' => 'example.com' (.com has the last whois for it) | |
867 | +// 'foo.bar.example.au' => 'example.au' (.au has the last whois for it) | |
868 | +// 'foo.bar.example.edu.au' => 'example.edu.au' (.edu.au has the last whois for it) | |
869 | +// 'foo.bar.example.act.edu.au' => 'example.act.edu.au' (.act.edu.au has the last whois for it) | |
870 | +function whois_responsibility($fqdn = 'foo.bar.example.com', $parent = FALSE, $implicit = TRUE) | |
871 | +{ | |
872 | + static $domain; | |
873 | + | |
874 | + if ($fqdn === NULL) { | |
875 | + $domain = NULL; // Unset | |
876 | + return ''; | |
877 | + } | |
878 | + if (! is_string($fqdn)) return ''; | |
879 | + | |
880 | + if (is_ip($fqdn)) return $fqdn; | |
881 | + | |
882 | + if (! isset($domain)) { | |
883 | + $domain = array(); | |
884 | + if (file_exists(DOMAIN_INI_FILE)) { | |
885 | + include(DOMAIN_INI_FILE); // Set | |
886 | + } | |
887 | + } | |
888 | + | |
889 | + $result = array(); | |
890 | + $dcursor = & $domain; | |
891 | + $array = array_reverse(explode('.', $fqdn)); | |
892 | + $i = 0; | |
893 | + while(TRUE) { | |
894 | + if (! isset($array[$i])) break; | |
895 | + $acursor = $array[$i]; | |
896 | + if (is_array($dcursor) && isset($dcursor[$acursor])) { | |
897 | + $result[] = & $array[$i]; | |
898 | + $dcursor = & $dcursor[$acursor]; | |
899 | + } else { | |
900 | + if (! $parent && isset($acursor)) { | |
901 | + $result[] = & $array[$i]; // Whois servers must know this subdomain | |
902 | + } | |
903 | + break; | |
904 | + } | |
905 | + ++$i; | |
906 | + } | |
907 | + | |
908 | + // Implicit responsibility: Top-Level-Domains must not be yours | |
909 | + // 'bar.foo.something' => 'foo.something' | |
910 | + if ($implicit && count($result) == 1 && count($array) > 1) { | |
911 | + $result[] = & $array[1]; | |
912 | + } | |
913 | + | |
914 | + return $result ? implode('.', array_reverse($result)) : ''; | |
915 | +} | |
916 | + | |
800 | 917 | ?> |
@@ -0,0 +1,316 @@ | ||
1 | +<?php | |
2 | +// $Id: spam_util.php,v 1.1 2011/01/24 15:19:36 henoheno Exp $ | |
3 | +// Copyright (C) 2006-2009, 2011 PukiWiki Developers Team | |
4 | +// License: GPL v2 or (at your option) any later version | |
5 | +// | |
6 | +// Functions for Concept-work of spam-uri metrics | |
7 | + | |
8 | + | |
9 | +// --------------------- | |
10 | +// Compat etc | |
11 | + | |
12 | +// (PHP 4 >= 4.2.0): var_export(): mail-reporting and dump related | |
13 | +if (! function_exists('var_export')) { | |
14 | + function var_export() { | |
15 | + return 'var_export() is not found on this server' . "\n"; | |
16 | + } | |
17 | +} | |
18 | + | |
19 | +// (PHP 4 >= 4.2.0): preg_grep() enables invert option | |
20 | +function preg_grep_invert($pattern = '//', $input = array()) | |
21 | +{ | |
22 | + static $invert; | |
23 | + if (! isset($invert)) $invert = defined('PREG_GREP_INVERT'); | |
24 | + | |
25 | + if ($invert) { | |
26 | + return preg_grep($pattern, $input, PREG_GREP_INVERT); | |
27 | + } else { | |
28 | + $result = preg_grep($pattern, $input); | |
29 | + if ($result) { | |
30 | + return array_diff($input, preg_grep($pattern, $input)); | |
31 | + } else { | |
32 | + return $input; | |
33 | + } | |
34 | + } | |
35 | +} | |
36 | + | |
37 | + | |
38 | +// --------------------- | |
39 | +// Utilities | |
40 | + | |
41 | + | |
42 | +if (! function_exists('htmlsc')) { | |
43 | + // Interface with PukiWiki | |
44 | + if (! defined('CONTENT_CHARSET')) define('CONTENT_CHARSET', 'ISO-8859-1'); | |
45 | + | |
46 | + // Sugar with default settings | |
47 | + function htmlsc($string = '', $flags = ENT_QUOTES, $charset = CONTENT_CHARSET) | |
48 | + { | |
49 | + return htmlspecialchars($string, $flags, $charset); // htmlsc() | |
50 | + } | |
51 | +} | |
52 | + | |
53 | +// Very roughly, shrink the lines of var_export() | |
54 | +// NOTE: If the same data exists, it must be corrupted. | |
55 | +function var_export_shrink($expression, $return = FALSE, $ignore_numeric_keys = FALSE) | |
56 | +{ | |
57 | + $result = var_export($expression, TRUE); | |
58 | + | |
59 | + $result = preg_replace( | |
60 | + // Remove a newline and spaces | |
61 | + '# => \n *array \(#', ' => array (', | |
62 | + $result | |
63 | + ); | |
64 | + | |
65 | + if ($ignore_numeric_keys) { | |
66 | + $result =preg_replace( | |
67 | + // Remove numeric keys | |
68 | + '#^( *)[0-9]+ => #m', '$1', | |
69 | + $result | |
70 | + ); | |
71 | + } | |
72 | + | |
73 | + if ($return) { | |
74 | + return $result; | |
75 | + } else { | |
76 | + echo $result; | |
77 | + return NULL; | |
78 | + } | |
79 | +} | |
80 | + | |
81 | +// Data structure: Create an array they _refer_only_one_ value | |
82 | +function one_value_array($num = 0, $value = NULL) | |
83 | +{ | |
84 | + $num = max(0, intval($num)); | |
85 | + $array = array(); | |
86 | + | |
87 | + for ($i = 0; $i < $num; $i++) { | |
88 | + $array[] = & $value; | |
89 | + } | |
90 | + | |
91 | + return $array; | |
92 | +} | |
93 | + | |
94 | +// Reverse $string with specified delimiter | |
95 | +function delimiter_reverse($string = 'foo.bar.example.com', $from_delim = '.', $to_delim = NULL) | |
96 | +{ | |
97 | + $to_null = ($to_delim === NULL); | |
98 | + | |
99 | + if (! is_string($from_delim) || (! $to_null && ! is_string($to_delim))) { | |
100 | + return FALSE; | |
101 | + } | |
102 | + if (is_array($string)) { | |
103 | + // Map, Recurse | |
104 | + $count = count($string); | |
105 | + $from = one_value_array($count, $from_delim); | |
106 | + if ($to_null) { | |
107 | + // Note: array_map() vanishes all keys | |
108 | + return array_map('delimiter_reverse', $string, $from); | |
109 | + } else { | |
110 | + $to = one_value_array($count, $to_delim); | |
111 | + // Note: array_map() vanishes all keys | |
112 | + return array_map('delimiter_reverse', $string, $from, $to); | |
113 | + } | |
114 | + } | |
115 | + if (! is_string($string)) { | |
116 | + return FALSE; | |
117 | + } | |
118 | + | |
119 | + // Returns com.example.bar.foo | |
120 | + if ($to_null) $to_delim = & $from_delim; | |
121 | + return implode($to_delim, array_reverse(explode($from_delim, $string))); | |
122 | +} | |
123 | + | |
124 | +// ksort() by domain | |
125 | +function ksort_by_domain(& $array) | |
126 | +{ | |
127 | + $sort = array(); | |
128 | + foreach(array_keys($array) as $key) { | |
129 | + $reversed = delimiter_reverse($key); | |
130 | + if ($reversed !== FALSE) { | |
131 | + $sort[$reversed] = $key; | |
132 | + } | |
133 | + } | |
134 | + ksort($sort, SORT_STRING); | |
135 | + | |
136 | + $result = array(); | |
137 | + foreach($sort as $key) { | |
138 | + $result[$key] = & $array[$key]; | |
139 | + } | |
140 | + | |
141 | + $array = $result; | |
142 | +} | |
143 | + | |
144 | +// Roughly strings(1) using PCRE | |
145 | +// This function is useful to: | |
146 | +// * Reduce the size of data, from removing unprintable binary data | |
147 | +// * Detect _bare_strings_ from binary data | |
148 | +// References: | |
149 | +// http://www.freebsd.org/cgi/man.cgi?query=strings (Man-page of GNU strings) | |
150 | +// http://www.pcre.org/pcre.txt | |
151 | +// Note: mb_ereg_replace() is one of mbstring extension's functions | |
152 | +// and need to init its encoding. | |
153 | +function strings($binary = '', $min_len = 4, $ignore_space = FALSE, $multibyte = FALSE) | |
154 | +{ | |
155 | + // String only | |
156 | + $binary = (is_array($binary) || $binary === TRUE) ? '' : strval($binary); | |
157 | + | |
158 | + $regex = $ignore_space ? | |
159 | + '[^[:graph:] \t\n]+' : // Remove "\0" etc, and readable spaces | |
160 | + '[^[:graph:][:space:]]+'; // Preserve readable spaces if possible | |
161 | + | |
162 | + $binary = $multibyte ? | |
163 | + mb_ereg_replace($regex, "\n", $binary) : | |
164 | + preg_replace('/' . $regex . '/s', "\n", $binary); | |
165 | + | |
166 | + if ($ignore_space) { | |
167 | + $binary = preg_replace( | |
168 | + array( | |
169 | + '/[ \t]{2,}/', | |
170 | + '/^[ \t]/m', | |
171 | + '/[ \t]$/m', | |
172 | + ), | |
173 | + array( | |
174 | + ' ', | |
175 | + '', | |
176 | + '' | |
177 | + ), | |
178 | + $binary); | |
179 | + } | |
180 | + | |
181 | + if ($min_len > 1) { | |
182 | + // The last character seems "\n" or not | |
183 | + $br = (! empty($binary) && $binary[strlen($binary) - 1] == "\n") ? "\n" : ''; | |
184 | + | |
185 | + $min_len = min(1024, intval($min_len)); | |
186 | + $regex = '/^.{' . $min_len . ',}/S'; | |
187 | + $binary = implode("\n", preg_grep($regex, explode("\n", $binary))) . $br; | |
188 | + } | |
189 | + | |
190 | + return $binary; | |
191 | +} | |
192 | + | |
193 | + | |
194 | +// --------------------- | |
195 | +// Utilities: Arrays | |
196 | + | |
197 | +// Count leaves (A leaf = value that is not an array, or an empty array) | |
198 | +function array_count_leaves($array = array(), $count_empty = FALSE) | |
199 | +{ | |
200 | + if (! is_array($array) || (empty($array) && $count_empty)) return 1; | |
201 | + | |
202 | + // Recurse | |
203 | + $count = 0; | |
204 | + foreach ($array as $part) { | |
205 | + $count += array_count_leaves($part, $count_empty); | |
206 | + } | |
207 | + return $count; | |
208 | +} | |
209 | + | |
210 | +// Merge two leaves | |
211 | +// Similar to PHP array_merge_leaves(), except strictly preserving keys as string | |
212 | +function array_merge_leaves($array1, $array2, $sort_keys = TRUE) | |
213 | +{ | |
214 | + // Array(s) only | |
215 | + $is_array1 = is_array($array1); | |
216 | + $is_array2 = is_array($array2); | |
217 | + if ($is_array1) { | |
218 | + if ($is_array2) { | |
219 | + ; // Pass | |
220 | + } else { | |
221 | + return $array1; | |
222 | + } | |
223 | + } else if ($is_array2) { | |
224 | + return $array2; | |
225 | + } else { | |
226 | + return $array2; // Not array ($array1 is overwritten) | |
227 | + } | |
228 | + | |
229 | + $keys_all = array_merge(array_keys($array1), array_keys($array2)); | |
230 | + if ($sort_keys) sort($keys_all, SORT_STRING); | |
231 | + | |
232 | + $result = array(); | |
233 | + foreach($keys_all as $key) { | |
234 | + $isset1 = isset($array1[$key]); | |
235 | + $isset2 = isset($array2[$key]); | |
236 | + if ($isset1 && $isset2) { | |
237 | + // Recurse | |
238 | + $result[$key] = array_merge_leaves($array1[$key], $array2[$key], $sort_keys); | |
239 | + } else if ($isset1) { | |
240 | + $result[$key] = & $array1[$key]; | |
241 | + } else { | |
242 | + $result[$key] = & $array2[$key]; | |
243 | + } | |
244 | + } | |
245 | + return $result; | |
246 | +} | |
247 | + | |
248 | +// An array-leaves to a flat array | |
249 | +function array_flat_leaves($array, $unique = TRUE) | |
250 | +{ | |
251 | + if (! is_array($array)) return $array; | |
252 | + | |
253 | + $tmp = array(); | |
254 | + foreach(array_keys($array) as $key) { | |
255 | + if (is_array($array[$key])) { | |
256 | + // Recurse | |
257 | + foreach(array_flat_leaves($array[$key]) as $_value) { | |
258 | + $tmp[] = $_value; | |
259 | + } | |
260 | + } else { | |
261 | + $tmp[] = & $array[$key]; | |
262 | + } | |
263 | + } | |
264 | + | |
265 | + return $unique ? array_values(array_unique($tmp)) : $tmp; | |
266 | +} | |
267 | + | |
268 | +// $array['something'] => $array['wanted'] | |
269 | +function array_rename_keys(& $array, $keys = array('from' => 'to'), $force = FALSE, $default = '') | |
270 | +{ | |
271 | + if (! is_array($array) || ! is_array($keys)) return FALSE; | |
272 | + | |
273 | + // Nondestructive test | |
274 | + if (! $force) { | |
275 | + foreach(array_keys($keys) as $from) { | |
276 | + if (! isset($array[$from])) { | |
277 | + return FALSE; | |
278 | + } | |
279 | + } | |
280 | + } | |
281 | + | |
282 | + foreach($keys as $from => $to) { | |
283 | + if ($from === $to) continue; | |
284 | + if (! $force || isset($array[$from])) { | |
285 | + $array[$to] = & $array[$from]; | |
286 | + unset($array[$from]); | |
287 | + } else { | |
288 | + $array[$to] = $default; | |
289 | + } | |
290 | + } | |
291 | + | |
292 | + return TRUE; | |
293 | +} | |
294 | + | |
295 | +// Remove redundant values from array() | |
296 | +function array_unique_recursive($array = array()) | |
297 | +{ | |
298 | + if (! is_array($array)) return $array; | |
299 | + | |
300 | + $tmp = array(); | |
301 | + foreach($array as $key => $value){ | |
302 | + if (is_array($value)) { | |
303 | + $array[$key] = array_unique_recursive($value); | |
304 | + } else { | |
305 | + if (isset($tmp[$value])) { | |
306 | + unset($array[$key]); | |
307 | + } else { | |
308 | + $tmp[$value] = TRUE; | |
309 | + } | |
310 | + } | |
311 | + } | |
312 | + | |
313 | + return $array; | |
314 | +} | |
315 | + | |
316 | +?> |
@@ -1,5 +1,5 @@ | ||
1 | 1 | <?php |
2 | -// $Id: spam.ini.php,v 1.93 2010/09/04 13:36:25 henoheno Exp $ | |
2 | +// $Id: spam.ini.php,v 1.94 2011/01/24 15:19:36 henoheno Exp $ | |
3 | 3 | // Spam-related setting |
4 | 4 | |
5 | 5 | // NOTE FOR ADMINISTRATORS: |
@@ -83,7 +83,7 @@ $blocklist['official/dev'] = array( | ||
83 | 83 | |
84 | 84 | $blocklist['A-1'] = array( |
85 | 85 | |
86 | - // A-1: General redirection services -- by HTML meta, HTML frame, JavaScript, | |
86 | + // A-1: General redirection or masking services -- by HTML meta, HTML frame, JavaScript, | |
87 | 87 | // web-based proxy, DNS subdomains, etc |
88 | 88 | // http://en.wikipedia.org/wiki/URL_redirection |
89 | 89 | // |
@@ -468,6 +468,9 @@ $blocklist['A-1'] = array( | ||
468 | 468 | 'gzurl.com', |
469 | 469 | 'url.grillsportverein.de', |
470 | 470 | 'Harudake.net' => array('*.hyu.jp'), |
471 | + 'hatena.ne.jp related' => array( | |
472 | + 'htn.to', // 2010-09 59.106.108.106 (hatena.ne.jp is 59.106.108.106) | |
473 | + ), | |
471 | 474 | 'Hattinger Linux User Group' => array('short.hatlug.de'), |
472 | 475 | 'Hexten.net' => array('lyxus.net'), |
473 | 476 | 'here.is', |
@@ -1111,7 +1114,7 @@ $blocklist['A-1'] = array( | ||
1111 | 1114 | 'trimurl.com', |
1112 | 1115 | //'ttu.cc', // Seems closed |
1113 | 1116 | 'turl.jp', |
1114 | - 'Twitter' => array( | |
1117 | + 'Twitter.com' => array( | |
1115 | 1118 | 't.co', // by (cofounders at cointernet.co) |
1116 | 1119 | 'twt.tl', |
1117 | 1120 | ), |
@@ -1238,6 +1241,7 @@ $blocklist['A-1'] = array( | ||
1238 | 1241 | 'useurl.us', // by Edward Beauchamp (mail at ebvk.com) |
1239 | 1242 | 'utun.jp', |
1240 | 1243 | 'uxxy.com', |
1244 | + 'uzo.in', // 2010-09 redirects, and subdomain | |
1241 | 1245 | '*.v27.net', |
1242 | 1246 | 'V3.com by FortuneCity.com' => array( // http://www.v3.com/sub-domain-list.shtml |
1243 | 1247 | '*.all.at', |
@@ -7089,76 +7093,103 @@ $blocklist['C'] = array( | ||
7089 | 7093 | ), |
7090 | 7094 | '.onlinecasinoinformation.com', // 2010/08 66.96.147.105 |
7091 | 7095 | '.onlinecasinoresources.com', // 2010/08 74.220.215.62 |
7096 | + | |
7097 | + 'moshenhm at gmail.com' => array( // by Nahum, Moshe (moshenhm at gmail.com) | |
7098 | + | |
7099 | + // 2010/08 65.254.248.143 | |
7100 | + '.odinhosting.com', // seems no link today | |
7101 | + '.nycdivers.com', // link to mainalpha.com | |
7102 | + '.robertlhines.com', // link to mainalpha.com, etc | |
7103 | + '.shadowsonmyshift.com', // link to mainalpha.com, etc | |
7104 | + | |
7105 | + // 2010/08 69.89.31.187 | |
7106 | + '.bcsliding.com', // | |
7107 | + '.msthirteen.com', // link to mainalpha.com | |
7108 | + // 2010/08 74.53.239.27 | |
7109 | + '.thetravelerscafe.com', // cheap tickets | |
7110 | + | |
7111 | + // 2010/08 74.81.92.55 | |
7112 | + '.sonicparthenon.com', // link to mainalpha.com | |
7113 | + '.staroftheevening.com', // link to mainalpha.com | |
7114 | + '.sanjosecosmeticdental.com', | |
7115 | + | |
7116 | + // 2010/08 173.45.103.74 | |
7117 | + '.digitalexperts.com', | |
7118 | + | |
7119 | + // 2010/08 173.236.48.82 | |
7120 | + '.sunshinetesting.com', // link to mainalpha.com | |
7121 | + '.sports-and-concert-tickets.com', // seems no link today | |
7122 | + | |
7123 | + // 2010/08 174.120.82.124 | |
7124 | + '.blueysretreat.com', // seems no link today | |
7125 | + '.lamborghinidenveronline.com', // seems no link today | |
7126 | + '.buckandbb.net', // link to kqzyfj.com | |
7127 | + | |
7128 | + // 2010/08 174.132.149.98 | |
7129 | + '.2008-national-n-scale-convention.com', // link to mainalpha.com | |
7130 | + '.creativejuicecompetition.com', // seems no link today | |
7131 | + | |
7132 | + // 2010/08 216.119.132.2 | |
7133 | + '.kennybrown.net', | |
7134 | + | |
7135 | + // 2010/08 no address today | |
7136 | + '.bestblackdatingonline.com', | |
7137 | + ), | |
7138 | + | |
7139 | + 'info at dvishnu.com' => array( // by Vishnu Prasath (info at dvishnu.com) | |
7140 | + | |
7141 | + // 2010/08 69.89.31.187 | |
7142 | + '.dinuzzollc.com', // link to mainalpha.com etc | |
7143 | + '.laruesbackdoor.com', // link to mainalpha.com etc | |
7144 | + '.okrenters.com', // link to mainalpha.co | |
7145 | + '.pandaitaid.com', // link to mainalpha.com, etc | |
7146 | + '.vicariouscollection.com', // by Vishnu Prasath (info at dvishnu.com) | |
7147 | + '.middlefingerproductions.net', // link to mainalpha.com | |
7148 | + | |
7149 | + // 2010/08 74.81.92.55 | |
7150 | + '.ecoxfinancial.com', // link to mainalpha.com etc | |
7151 | + '.fightingspirit-comics.com', // link to mainalpha.com etc | |
7152 | + '.learntoplaythedobro.com', // link to mainalpha.com etc | |
7153 | + '.montcalm4hfair.com', // link to mainalpha.com etc | |
7154 | + '.oaads.com', // link to mainalpha.com etc | |
7155 | + '.pabloblum.com', // link to mainalpha.com | |
7156 | + '.renaissancequartet.com', // link to mainalpha.com | |
7157 | + '.sbi-limited.com', // link to mainalpha.com | |
7158 | + '.showeroffire.com', // link to mainalpha.com | |
7159 | + '.soccerfestcolumbus.com', // link to mainalpha.com | |
7160 | + | |
7161 | + // 2010/08 173.236.48.82 | |
7162 | + '.anniedguesthouse.com', // link to mainalpha.com | |
7163 | + '.finnfest2009.com', // link to mainalpha.com | |
7164 | + '.hietalasoldworldmeats.com', // link to mainalpha.com etc | |
7165 | + '.splendoreimport.com', // link to mainalpha.com | |
7166 | + | |
7167 | + // 2010/08 174.120.82.124 | |
7168 | + '.jenurbanandthebox.com', // link to mainalpha.com | |
7169 | + | |
7170 | + // 2010/08 174.132.149.98 | |
7171 | + '.segwaybykar.com', // link to mainalpha.com | |
7172 | + ), | |
7173 | + | |
7092 | 7174 | 'mainalpha.com related' => array( |
7093 | 7175 | |
7094 | 7176 | // 2010/08 65.254.248.143 |
7095 | 7177 | '.archivecdbooksus.com', // by (offpista at gmail.com), "sports betting" |
7096 | 7178 | '.highrollersonlinecasinos.com', // casios |
7097 | 7179 | '.onlinecasinocenter.com', // casios |
7098 | - '.odinhosting.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today | |
7099 | - '.nycdivers.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com | |
7100 | - '.robertlhines.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com, etc | |
7101 | - '.shadowsonmyshift.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com, etc | |
7102 | 7180 | |
7103 | - // 2010/08 69.89.31.187 | |
7104 | - '.bcsliding.com', // by Nahum, Moshe (moshenhm at gmail.com) | |
7105 | - '.dinuzzollc.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7106 | - '.laruesbackdoor.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7107 | - '.msthirteen.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com | |
7108 | - '.okrenters.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.co | |
7109 | - '.pandaitaid.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com, etc | |
7110 | - '.vicariouscollection.com', // by Vishnu Prasath (info at dvishnu.com) | |
7111 | - '.middlefingerproductions.net', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7112 | - | |
7113 | - // 2010/08 74.53.239.27 | |
7114 | - '.thetravelerscafe.com', // by Nahum, Moshe (moshenhm at gmail.com), cheap tickets | |
7115 | 7181 | |
7116 | 7182 | // 2010/08 74.81.92.55 |
7117 | 7183 | '.2ndrose.com', // by Victor Zrovanov (victor3239 at gmail.com), link to mainalpha.com |
7118 | - '.ecoxfinancial.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7119 | 7184 | '.edgewatertowers.com', // by Victor Frankl (victor3239 at gmail.com), link to mainalpha.com |
7120 | - '.fightingspirit-comics.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7121 | - '.learntoplaythedobro.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7122 | - '.montcalm4hfair.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7123 | - '.oaads.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7124 | - '.pabloblum.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7125 | - '.renaissancequartet.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7126 | - '.sbi-limited.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7127 | - '.showeroffire.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7128 | - '.soccerfestcolumbus.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7129 | - '.sonicparthenon.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com | |
7130 | - '.staroftheevening.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com | |
7131 | 7185 | |
7132 | - // 2010/08 173.45.103.74 | |
7133 | - '.digitalexperts.com', // by Nahum, Moshe (moshenhm at gmail.com) | |
7134 | - | |
7135 | 7186 | // 2010/08 173.236.48.82 |
7136 | - '.anniedguesthouse.com', // by Vishnu Prasath (info dvishnu.com), link to mainalpha.com | |
7137 | 7187 | '.ciprogram.com', // link to mainalpha.com |
7138 | - '.finnfest2009.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7139 | - '.hietalasoldworldmeats.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com etc | |
7140 | - '.splendoreimport.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7141 | - '.sunshinetesting.com', // by Nahum, Moshe (moshenhm at gmail.com), link to mainalpha.com | |
7142 | - '.sports-and-concert-tickets.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today | |
7143 | - | |
7144 | - // 2010/08 174.120.82.124 | |
7145 | - '.blueysretreat.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today | |
7146 | - '.jenurbanandthebox.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7147 | - '.lamborghinidenveronline.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today | |
7148 | - '.buckandbb.net', // by Nahum, Moshe (moshenhm at gmail.com), link to kqzyfj.com | |
7149 | 7188 | |
7150 | 7189 | // 2010/08 174.132.149.98 |
7151 | - '.2008-national-n-scale-convention.com', // by Moshe Nahum (moshenhm at gmail.com), link to mainalpha.com | |
7152 | 7190 | '.msgulfcoastbnbs.com', // by Michael John (cpajourney at yahoo.com), link to mainalpha.com etc |
7153 | - '.segwaybykar.com', // by Vishnu Prasath (info at dvishnu.com), link to mainalpha.com | |
7154 | - '.creativejuicecompetition.com', // by Nahum, Moshe (moshenhm at gmail.com), seems no link today | |
7155 | - | |
7156 | - // 2010/08 216.119.132.2 | |
7157 | - '.kennybrown.net', // by Moshe Nahum (moshenhm at gmail.com), | |
7158 | - | |
7159 | - // 2010/08 no address today | |
7160 | - '.bestblackdatingonline.com', // by Nahum, Moshe (moshenhm at gmail.com) | |
7161 | 7191 | ), |
7192 | + | |
7162 | 7193 | 'kouvald at gmail.com' => array( // by Vlad Kouvaldin (kouvald at gmail.com) |
7163 | 7194 | |
7164 | 7195 | // 2010/08 91.205.156.73 |
@@ -7222,7 +7253,7 @@ $blocklist['C'] = array( | ||
7222 | 7253 | '.analsextube247.com', // 2010/08 99.192.176.58 |
7223 | 7254 | '.eroticandy.com', // 2010/08 99.192.176.54 |
7224 | 7255 | ), |
7225 | - 'nikiforov501@mail.ru' => array( | |
7256 | + 'nikiforov501 at mail.ru' => array( | |
7226 | 7257 | // 2010/09 91.205.156.74 |
7227 | 7258 | '.211park.com', |
7228 | 7259 | '.atlantajuniorthrashers.com', |
@@ -7261,12 +7292,15 @@ $blocklist['D'] = array( | ||
7261 | 7292 | |
7262 | 7293 | $blocklist['E'] = array( |
7263 | 7294 | // E: Sample setting of |
7264 | - // Promoters | |
7265 | - // (Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers, | |
7295 | + // Promotions | |
7296 | + // (Phenomenons with Affiliates, Hypes, Catalog retailers, Multi-level marketings, Resellers, | |
7266 | 7297 | // Ads, Business promotions, SEO, etc) |
7267 | 7298 | // |
7268 | - // They often promotes near you using blog article, mail-magazines, tools(search engines, blogs, etc), etc. | |
7299 | + // Promotions near you using blog article, mail-magazines, tools(search engines, blogs, etc), etc. | |
7269 | 7300 | // Sometimes they may promote each other |
7301 | + // | |
7302 | + // Please notify us about this list with reason: | |
7303 | + // http://pukiwiki.sourceforge.jp/dev/?BugTrack2/342 | |
7270 | 7304 | |
7271 | 7305 | '15-Mail.com related' => array( |
7272 | 7306 | '.15-mail.com', // 202.218.109.45(*.netassist.jp) by yukiyo yamamoto (sunkusu5268 at m4.ktplan.ne.jp) |