• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

news4 - RSS aggrigation system


Commit MetaInfo

Revision108d48fa43fc49064b94b1d5ed7236fa89bd9781 (tree)
Time2012-10-03 05:24:10
Authorhylom <hylom@hylo...>
Commiterhylom

Log Message

implement some filters

Change Summary

Incremental Difference

--- a/fetcher.py
+++ b/fetcher.py
@@ -38,19 +38,30 @@ class FeedFetcher(object):
3838 return None
3939 return entry
4040
41+ def _apply_filters(self, filters, entries):
42+ for f in filters:
43+ entry_filter = self._get_filter(f)
44+ entries = [entry_filter(x) for x in entries]
45+ # remove entry which is None
46+ entries = [x for x in entries if x]
47+ return entries
48+
49+ def _apply_pre_filters(self, entries):
50+ return self._apply_filters(config['pre_filters'], entries)
51+
52+ def _apply_post_filters(self, entries):
53+ return self._apply_filters(config['post_filters'], entries)
54+
4155 def get_entries(self):
4256 'get entries'
4357 entries = self._fetch()
44- entries = [self._embeded_filter(x) for x in entries]
45- entries = [x for x in entries if x]
58+ entries = self._apply_pre_filters(entries)
4659
4760 if 'filter' in self._feed:
4861 filters = self._feed.get('filter', None)
49- for filter in filters:
50- entry_filter = self._get_filter(filter)
51- entries = [entry_filter(x) for x in entries]
52- # remove entry which is None
53- entries = [x for x in entries if x]
62+ entries = self._apply_filters(filters, entries)
63+
64+ entries = self._apply_post_filters(entries)
5465 return entries
5566
5667 def _get_filter(self, filter_name):
@@ -65,8 +76,16 @@ class FeedFetcher(object):
6576 globals(),
6677 locals(),
6778 [filter_name,])
68- mod = mods.__getattribute__(filter_name)
79+ try:
80+ mod = mods.__getattribute__(filter_name)
81+ except AttributeError:
82+ raise FilterError(filter_name)
6983
7084 # return module's entry_filter function
7185 return mod.entry_filter
7286
87+class FilterError(Exception):
88+ def __init__(self, value):
89+ self.value = value
90+ def __str__(self):
91+ return 'filter "' + self.value + '" is not found.'
--- /dev/null
+++ b/filters/cleanup.py
@@ -0,0 +1,27 @@
1+# filter for Image extraction
2+# -*- coding: utf-8 -*-
3+
4+import re
5+
6+re_blank = re.compile(r'<\s*(\w+)[^>]*>\s*</\s*\1\s*>')
7+re_br = re.compile(r'<\s*br\s*/?>')
8+
9+def _replace_all(rex, text):
10+ m = rex.search(text)
11+ while(m):
12+ text = rex.sub('', text)
13+ m = rex.search(text)
14+ return text
15+
16+def entry_filter(entry):
17+ body = entry['body']
18+
19+ # 空のタグを削除
20+ body = _replace_all(re_blank, body)
21+
22+ # brタグを削除
23+ body = _replace_all(re_br, body)
24+
25+ entry['body'] = body
26+ return entry
27+
--- /dev/null
+++ b/filters/gizmodo.py
@@ -0,0 +1,30 @@
1+# filter for slashdot.jp
2+# -*- coding: utf-8 -*-
3+
4+import re
5+
6+re_read_all = re.compile(ur'''<p>\s*<a href=['"][^'"]+['"]>\s*すべて読む\s*</a>.*?</p>''')
7+re_related = re.compile(ur'''<p>\s*関連ストーリー:.*?</p>''')
8+re_topics = re.compile(ur'''<a href="http://slashdot.jp/stories/\w+">(.*?)</a>''')
9+
10+def entry_filter(entry):
11+ # すべて読む、関連ストーリーを削除
12+ body = entry['body']
13+ topics = []
14+ m = re_read_all.search(body)
15+ if m:
16+ s = m.group(0)
17+ itr = re_topics.findall(s)
18+ for items in itr:
19+ topics.append(items)
20+
21+ body = re_read_all.sub('', body)
22+ body = re_related.sub('', body)
23+ if 'tags' in entry:
24+ entry['tags'].extend(topics)
25+ else:
26+ entry['tags'] = topics
27+ entry['body'] = body
28+
29+ return entry
30+
--- /dev/null
+++ b/filters/pr_block.py
@@ -0,0 +1,12 @@
1+# PR filter
2+# -*- coding: utf-8 -*-
3+
4+import re
5+
6+def entry_filter(entry):
7+ if re.search(u'^(PR|AD)(:|:)', entry['title']):
8+ print 'delete PR entry - %s' % entry['title']
9+ return None
10+ return entry
11+
12+
--- a/templates/index.tmpl.html
+++ b/templates/index.tmpl.html
@@ -64,13 +64,13 @@
6464 <span>タグ:</span>
6565 % for tag in entry.tags:
6666 <span>${tag} </span>
67+ % endfor
6768 % if 'images' in entry:
6869 <span>画像:</span>
6970 % for imgurl in entry.images:
7071 <span><a href="${imgurl}">*</a></span>
7172 % endfor
7273 % endif
73- % endfor
7474 </div>
7575 </div>
7676 </div>