Make the aggregator write it's log to the database instead of just
authorMagnus Hagander <magnus@hagander.net>
Sat, 31 Jan 2009 13:34:42 +0000 (13:34 +0000)
committerMagnus Hagander <magnus@hagander.net>
Sat, 31 Jan 2009 13:34:42 +0000 (13:34 +0000)
a flatfile.
Add ability to view this data on a per-blog basis in /register/.
Set up a cronjob to mail summary logs to planet@postgresql.org.

aggregator.py
logmailer.py [new file with mode: 0755]
planet.ini.sample
planetadmin/register/models.py
planetadmin/register/templates/aggregatorlog.html [new file with mode: 0644]
planetadmin/register/templates/index.html
planetadmin/register/urls.py
planetadmin/register/views.py
schema.sql

index de01befeb278ac099018624e2dd9f7bed102e65c..83f90026a0bda48834a1725319fc5c90863748f4 100755 (executable)
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
+# vim: ai ts=4 sts=4 sw=4
 """PostgreSQL Planet Aggregator
 
 This file contains the functions to suck down RSS/Atom feeds 
 (using feedparser) and store the results in a PostgreSQL database.
 
-Copyright (C) 2008 PostgreSQL Global Development Group
+Copyright (C) 2008-2009 PostgreSQL Global Development Group
 """
 
 import psycopg2
@@ -25,24 +26,33 @@ class Aggregator:
                feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM planet.feeds')
                for feed in feeds.fetchall():
                        try:
-                               self.ParseFeed(feed)
+                               n = self.ParseFeed(feed)
+                               if n > 0:
+                                       c = self.db.cursor()
+                                       c.execute("INSERT INTO planet.aggregatorlog (feed, success, info) VALUES (%(feed)s, 't', %(info)s)", {
+                                               'feed': feed[0],
+                                               'info': 'Fetched %s posts.' % n,
+                                       })
                        except Exception, e:
                                print "Exception when parsing feed '%s': %s" % (feed[1], e)
                                self.db.rollback()
+                               c = self.db.cursor()
+                               c.execute("INSERT INTO planet.aggregatorlog (feed, success, info) VALUES (%(feed)s, 'f', %(info)s)", {
+                                       'feed': feed[0],
+                                       'info': 'Error: "%s"' % e,
+                               })
                        self.db.commit()
 
        def ParseFeed(self, feedinfo):
-               #print "Loading feed %s" % (feedinfo[1])
+               numadded = 0
                parsestart = datetime.datetime.now()
                feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
 
                if feed.status == 304:
                        # not changed
-                       return
+                       return 0
                if feed.status != 200:
-                       # not ok!
-                       print "Feed %s status %s" % (feedinfo[1], feed.status)
-                       return
+                       raise Exception('Feed returned status %s' % feed.status)
 
                self.authorfilter = feedinfo[4]
 
@@ -61,6 +71,7 @@ class Aggregator:
                        if txt == '' and entry.has_key('summary'):
                                txt = entry.summary
                        if txt == '':
+                               # Not a critical error, we just ignore empty posts
                                print "Failed to get text for entry at %s" % entry.link
                                continue
 
@@ -68,9 +79,11 @@ class Aggregator:
                                guidisperma = entry.guidislink
                        else:
                                guidisperma = True
-                       self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt)
-               self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
-               #self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
+                       if self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt) > 0:
+                               numadded += 1
+               if numadded > 0:
+                       self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
+               return numadded
 
        def matches_filter(self, entry):
                # For now, we only match against self.authorfilter. In the future,
@@ -90,7 +103,7 @@ class Aggregator:
                c = self.db.cursor()
                c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
                if c.rowcount > 0:
-                       return
+                       return 0
                print "Store entry %s from feed %s" % (guid, feedid)
                c.execute("INSERT INTO planet.posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
                        {'feed': feedid,
@@ -101,6 +114,7 @@ class Aggregator:
                         'title': title,
                         'txt': txt})
                self.stored += 1
+               return 1
 
 if __name__=="__main__":
        c = ConfigParser.ConfigParser()
diff --git a/logmailer.py b/logmailer.py
new file mode 100755 (executable)
index 0000000..7a74b36
--- /dev/null
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# vim: ai ts=4 sts=4 sw=4
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to email a report of failed fetches
+by reading the aggregator log table in the database.
+
+Current version just sends a single summary report. A future enhancement
+could be to send reports directly to individual blog owners.
+
+Copyright (C) 2009 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import smtplib
+import email.Message
+import ConfigParser
+
+class LogChecker(object):
+       def __init__(self, cfg, db):
+               self.cfg = cfg
+               self.db = db
+               
+       def Check(self):
+               c = self.db.cursor()
+               c.execute("""SELECT ts,name,info FROM planet.aggregatorlog
+       INNER JOIN planet.feeds ON feed=feeds.id 
+       WHERE success='f' AND ts > CURRENT_TIMESTAMP-'24 hours'::interval
+       ORDER BY name,ts""")
+               if c.rowcount > 0:
+                       s = """
+One or more of the blogs fetched in the past 24 hours caused an error
+as listed below.
+
+"""
+                       last = ""
+                       for r in c:
+                               if not last == r[1]:
+                                       last = r[1]
+                                       s += "\n"
+                               s += "%s  %-20s  %s\n" % (r[0].strftime("%Y%m%d %H:%M:%S"), r[1][:20], r[2])
+                       
+                       s += "\n\n"
+
+                       toAddr = self.cfg.get('notify','mailto')
+                       fromAddr = self.cfg.get('notify','mailfrom')
+                       
+                       msg = email.Message.Message()
+                       msg['To'] = toAddr
+                       msg['From'] = fromAddr
+                       msg['Subject'] = 'Planet PostgreSQL error summary'
+                       msg.set_payload(s)
+                       
+                       
+                       smtp = smtplib.SMTP('127.0.0.1')
+                       smtp.sendmail(fromAddr, toAddr, msg.as_string())
+                       smtp.quit()
+               
+                       
+if __name__=="__main__":
+       c = ConfigParser.ConfigParser()
+       c.read('planet.ini')
+       LogChecker(c, psycopg2.connect(c.get('planet','db'))).Check()
+
index 60cabc4faaf1360e7a843031215ec7bdd023abaa..f63dc677319c6aa9943b34a8769a0926c2fbb003 100644 (file)
@@ -8,3 +8,6 @@ server=localhost
 listname=planet-subscribers
 password=yeahthatssecret
 
+[notify]
+mailfrom=webmaster@postgresql.org
+mailto=planet@postgresql.org
index 85e0be4903ca8e0289efcdc81afe0fcfb8e7e87a..50858d562259a4d4e368427f34f231342a32b6a3 100644 (file)
@@ -63,3 +63,14 @@ class AuditEntry(models.Model):
        class Meta:
                db_table = 'planetadmin\".\"auditlog'
                ordering = ['logtime']
+               
+class AggregatorLog(models.Model):
+       ts = models.DateTimeField()
+       feed = models.ForeignKey(Blog, db_column='feed')
+       success = models.BooleanField()
+       info = models.TextField()
+       
+       class Meta:
+               db_table = 'planet\".\"aggregatorlog'
+               ordering = ['-ts']
+
diff --git a/planetadmin/register/templates/aggregatorlog.html b/planetadmin/register/templates/aggregatorlog.html
new file mode 100644 (file)
index 0000000..b3892b7
--- /dev/null
@@ -0,0 +1,26 @@
+{% extends "regbase.html" %}
+{%block regcontent %}
+<p>
+This is a log of the most recent activity on your feed. Note that only
+operations that had some effect are logged. The normal fetches that are
+made every 15 minutes don't log anything unless they found new posts or
+some error occurred.
+</p>
+<p>
+Return to <a href="../..">blog list</a>.
+</p>
+<table border="1" cellspacing="0" cellpadding="1">
+<tr>
+ <th>Time</th>
+ <th>Status</th>
+ <th>Info</th>
+</tr>
+{% for entry in entries %}
+<tr valign="top">
+ <td>{{entry.ts}}</td>
+ <td>{{entry.success|yesno:"Success,Failure"}}</td>
+ <td>{{entry.info}}</td>
+</tr>
+{%endfor%}
+</table>
+{% endblock %}
index 37cce3c95bc510ae9760445fda33be7c10cc802b..3f290a2c7d71c2d0f5ceac15af6f376c6029ed11 100644 (file)
@@ -46,7 +46,8 @@ OPTIONAL Author Filter:
 {%endif%}
  </td>
 
- <td>{% if blog.approved  or user.is_superuser%}
+ <td><a href="log/{{blog.id}}/">View log</a><br/>
+{% if blog.approved  or user.is_superuser%}
   <a href="blogposts/{{blog.id}}/">Posts</a><br/>
 {%else%}
 Not approved yet.<br/>
index 6e5f05a7480caa1d9d4f88f9c3586f5802ebe270..1f48f6ee3449e11c83ce7f6e7a562f64769f2f79 100644 (file)
@@ -17,6 +17,7 @@ urlpatterns = patterns('',
     (r'^modify/(\d+)/$', 'planetadmin.register.views.modify'),
     (r'^modifyauthorfilter/(\d+)/$', 'planetadmin.register.views.modifyauthorfilter'),
 
+    (r'^log/(\d+)/$','planetadmin.register.views.logview'),
     (r'^blogposts/(\d+)/$', 'planetadmin.register.views.blogposts'),
     (r'^blogposts/(\d+)/hide/(\d+)/$', 'planetadmin.register.views.blogpost_hide'),
     (r'^blogposts/(\d+)/unhide/(\d+)/$', 'planetadmin.register.views.blogpost_unhide'),
index f8bc95fb550d1a3fb8da6025fab87e7e36947a44..1ec8537b0e66f8a08976062629e3b46c673cd900 100644 (file)
@@ -211,6 +211,18 @@ def detach(request, id):
        AuditEntry(request.user.username, 'Detached blog %s from %s' % (blog.feedurl, olduid)).save()
        return HttpResponseRedirect('../..')
 
+@login_required
+def logview(request, id):
+       blog = get_object_or_404(Blog, id=id)
+       if not blog.userid == request.user.username and not request.user.is_superuser:
+               return HttpResponse("You can't view the log for somebody elses blog!")
+               
+       logentries = AggregatorLog.objects.filter(feed=blog)[:50]
+       
+       return render_to_response('aggregatorlog.html', {
+               'entries': logentries,
+       }, context_instance=RequestContext(request))
+
 @login_required
 @transaction.commit_on_success
 def blogposts(request, id):
index 9443afa51649de4c0631cfaf1cbbb9b576eb45d3..2ffcf36a3e11c8820932775a6a1ab09b93974ee5 100644 (file)
@@ -21,6 +21,19 @@ SET default_tablespace = '';
 
 SET default_with_oids = false;
 
+--
+-- Name: aggregatorlog; Type: TABLE; Schema: planet; Owner: -; Tablespace: 
+--
+
+CREATE TABLE aggregatorlog (
+    id integer NOT NULL,
+    ts timestamp with time zone DEFAULT now() NOT NULL,
+    feed integer NOT NULL,
+    success boolean NOT NULL,
+    info text NOT NULL
+);
+
+
 --
 -- Name: feeds; Type: TABLE; Schema: planet; Owner: -; Tablespace: 
 --
@@ -54,6 +67,24 @@ CREATE TABLE posts (
 );
 
 
+--
+-- Name: aggregatorlog_id_seq; Type: SEQUENCE; Schema: planet; Owner: -
+--
+
+CREATE SEQUENCE aggregatorlog_id_seq
+    INCREMENT BY 1
+    NO MAXVALUE
+    NO MINVALUE
+    CACHE 1;
+
+
+--
+-- Name: aggregatorlog_id_seq; Type: SEQUENCE OWNED BY; Schema: planet; Owner: -
+--
+
+ALTER SEQUENCE aggregatorlog_id_seq OWNED BY aggregatorlog.id;
+
+
 --
 -- Name: feeds_id_seq; Type: SEQUENCE; Schema: planet; Owner: -
 --
@@ -90,6 +121,13 @@ CREATE SEQUENCE posts_id_seq
 ALTER SEQUENCE posts_id_seq OWNED BY posts.id;
 
 
+--
+-- Name: id; Type: DEFAULT; Schema: planet; Owner: -
+--
+
+ALTER TABLE aggregatorlog ALTER COLUMN id SET DEFAULT nextval('aggregatorlog_id_seq'::regclass);
+
+
 --
 -- Name: id; Type: DEFAULT; Schema: planet; Owner: -
 --
@@ -104,6 +142,14 @@ ALTER TABLE feeds ALTER COLUMN id SET DEFAULT nextval('feeds_id_seq'::regclass);
 ALTER TABLE posts ALTER COLUMN id SET DEFAULT nextval('posts_id_seq'::regclass);
 
 
+--
+-- Name: aggregatorlog_pkey; Type: CONSTRAINT; Schema: planet; Owner: -; Tablespace: 
+--
+
+ALTER TABLE ONLY aggregatorlog
+    ADD CONSTRAINT aggregatorlog_pkey PRIMARY KEY (id);
+
+
 --
 -- Name: feeds_pkey; Type: CONSTRAINT; Schema: planet; Owner: -; Tablespace: 
 --
@@ -120,6 +166,20 @@ ALTER TABLE ONLY posts
     ADD CONSTRAINT posts_pkey PRIMARY KEY (id);
 
 
+--
+-- Name: aggregatorlog_feed_idx; Type: INDEX; Schema: planet; Owner: -; Tablespace: 
+--
+
+CREATE INDEX aggregatorlog_feed_idx ON aggregatorlog USING btree (feed);
+
+
+--
+-- Name: aggregatorlog_feed_ts_idx; Type: INDEX; Schema: planet; Owner: -; Tablespace: 
+--
+
+CREATE INDEX aggregatorlog_feed_ts_idx ON aggregatorlog USING btree (feed, ts);
+
+
 --
 -- Name: feeds_feddurl; Type: INDEX; Schema: planet; Owner: -; Tablespace: 
 --
@@ -134,6 +194,14 @@ CREATE INDEX feeds_feddurl ON feeds USING btree (feedurl);
 CREATE INDEX feeds_name ON feeds USING btree (name);
 
 
+--
+-- Name: aggregatorlog_feed_fkey; Type: FK CONSTRAINT; Schema: planet; Owner: -
+--
+
+ALTER TABLE ONLY aggregatorlog
+    ADD CONSTRAINT aggregatorlog_feed_fkey FOREIGN KEY (feed) REFERENCES feeds(id);
+
+
 --
 -- Name: posts_feed_fkey; Type: FK CONSTRAINT; Schema: planet; Owner: -
 --