From 532adfa201b9b6315bd36970428c3088252fef7d Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Sun, 30 Aug 2009 14:08:19 +0200 Subject: [PATCH] Drive twitter posts from the database instead of the RSS feed, since we have better information there. Store short URLs as generated by tr.im, in case we want them for something later. Generate short URLs using authenticated access to tr.im (if available), so it's possible to get statistics etc. --- hamnadmin/register/models.py | 2 + planet.ini.sample | 9 ++ planet_run.sh | 2 +- posttotwitter.py | 184 +++++++++++++++++++++-------------- schema.sql | 4 +- 5 files changed, 124 insertions(+), 77 deletions(-) diff --git a/hamnadmin/register/models.py b/hamnadmin/register/models.py index 0af0c22..6f43e8a 100644 --- a/hamnadmin/register/models.py +++ b/hamnadmin/register/models.py @@ -49,6 +49,8 @@ class Post(models.Model): title = models.CharField(max_length=255) guidisperma = models.BooleanField() hidden = models.BooleanField() + twittered = models.BooleanField() + shortlink = models.TextField() def __unicode__(self): return self.title diff --git a/planet.ini.sample b/planet.ini.sample index 42dff94..17b1779 100644 --- a/planet.ini.sample +++ b/planet.ini.sample @@ -12,3 +12,12 @@ password=yeahthatssecret mailfrom=webmaster@postgresql.org mailto=planet@postgresql.org minerrors=3 + +[twitter] +account=planetpostgres +password=topsecret + +[tr.im] +account=planetpostgres +password=topsecret + diff --git a/planet_run.sh b/planet_run.sh index 7296aa5..6d9f3e2 100755 --- a/planet_run.sh +++ b/planet_run.sh @@ -6,5 +6,5 @@ cd /home/planetpg/planet date >> planet.log python aggregator.py >> planet.log 2>&1 python generator.py >>planet.log 2>&1 -python posttotwitter.py tweets.dat >>planet.log 2>&1 +python posttotwitter.py >>planet.log 2>&1 echo Done `date` >> planet.log diff --git a/posttotwitter.py b/posttotwitter.py index 172f2c7..70b258f 100755 --- a/posttotwitter.py +++ b/posttotwitter.py @@ -1,83 +1,117 @@ #!/usr/bin/env python -# python rss reader -> twitter post -import feedparser, pickle, os, sys, twitter, urllib, simplejson as json - -class RSS2Twitter: - def __init__(self, filename, url, username, passwd): - self.filename=filename - self.url=url - self.username=username - self.passwd=passwd - self.twApi=twitter.Api(username=self.username, password=self.passwd) - - if os.path.exists(self.filename): - self.itemsDB = pickle.load(file(filename, 'r+b')) - else: - self.itemsDB = {} - - def getLatestFeedItems(self, items = 10): - feed=feedparser.parse(self.url); - it=feed["items"] - it_ret=it[0:items] - return it_ret - - def twitIt(self, items): - oldItems=pItems=0 - items.sort(reverse=True) - for it in items: - if self.itemPublished(it) == None: - trim = json.loads(self.trim(it["link"])) - txt=it["title"] +" "+trim["url"] - # print txt - try: - status = self.twApi.PostUpdate(txt) - except IOError, e: - raise e - pItems=pItems+1 - # print "Total items: ", len(items) - # print "published: ",pItems - # print "old stuff: ",len(items) - pItems - - def itemPublished (self, item): - if self.itemsDB.has_key(item["link"]) == True: - return True - else: - self.itemsDB[item["link"]]=item["title"] - pickle.dump(self.itemsDB, file(self.filename, 'w+b')) - return None - - def trim(self, url): +# Post links to articles on twitter + +import psycopg2 +import twitter +import urllib +import simplejson as json +import ConfigParser + + +class PostToTwitter: + def __init__(self, cfg): + self.username=cfg.get('twitter','account') + self.passwd=cfg.get('twitter','password') + + if cfg.has_option('tr.im','account'): + self.trimuser = cfg.get('tr.im','account') + self.trimpassword = cfg.get('tr.im','password') + + self.db = psycopg2.connect(c.get('planet','db')) + + # Only set up the connection to twitter when we know we're going to + # post something. + self._twitter = None + + @property + def twitter(self): + if not self._twitter: + self._twitter=twitter.Api(username=self.username, password=self.passwd) + return self._twitter + + + def Run(self): + c = self.db.cursor() + c.execute("""SELECT posts.id, posts.title, posts.link, posts.shortlink, feeds.name + FROM planet.posts INNER JOIN planet.feeds ON planet.posts.feed=planet.feeds.id + WHERE approved AND NOT (twittered OR hidden) ORDER BY dat""") + for post in c.fetchall(): + if post[3] and len(post[3])>1: + short = post[3] + else: + # No short-link exists, so create one. We need the short-link + # to twitter, and we store it separately in the database + # in case it's needed. + try: + short = self.shortlink(post[2]) + except Exception, e: + print "Failed to shorten URL %s: %s" % (post[2], e) + continue + + c.execute("UPDATE planet.posts SET shortlink=%(short)s WHERE id=%(id)s", { + 'short': short, + 'id': post[0], + }) + self.db.commit() + + # Set up the string to twitter + msg = "%s: %s %s" % ( + post[4], + self.trimpost(post[1],len(post[4])+len(short)+3), + short, + ) + + # Now post it to twitter + try: + status = self.twitter.PostUpdate(msg) + except Exception, e: + print "Error posting to twitter: %s" % e + # We'll just try again with the next one + continue + + # Flag this item as posted + c.execute("UPDATE planet.posts SET twittered='t' WHERE id=%(id)s", { 'id': post[0] }) + self.db.commit() + + print "Twittered: %s" % msg + + + # Trim a post to the length required by twitter, so we don't fail to post + # if a title is really long. Assume other parts of the string to be + # posted are characters. + def trimpost(self, txt, otherlen): + if len(txt) + otherlen < 140: + return txt + return "%s..." % (txt[:(140-otherlen-3)]) + + + # Trim an URL using http://tr.im + def shortlink(self, url): try: - data = urllib.urlencode(dict(url=url, source="RSS2Twit")) - encodedurl="http://tr.im/api/trim_url.json?"+data + if self.trimuser: + data = urllib.urlencode(dict(url=url, username=self.trimuser, password=self.trimpassword)) + else: + data = urllib.urlencode(dict(url=url, )) + encodedurl="http://api.tr.im/v1/trim_url.json?"+data instream=urllib.urlopen(encodedurl) ret=instream.read() instream.close() - if len(ret)==0: - return url - return ret - except IOError, e: - raise "urllib error." + except Exception, e: + raise "Failed in call to tr.im API: %s" % e + + if len(ret)==0: + raise "tr.im returned blank!" - def tiny(self, url): try: - data = urllib.urlencode(dict(url=url, source="RSS2Twit")) - encodedurl="http://www.tinyurl.com/api-create.php?"+data - instream=urllib.urlopen(encodedurl) - ret=instream.read() - instream.close() - if len(ret)==0: - return url - return ret - except IOError, e: - raise "urllib error." - -if __name__ == "__main__": - # run it like python rss2twitter.py oi.dat (oi.dat is the posted item db) - # update username and passwd with your twitter account data, surrounding them with quotes. - url="http://planet.postgresql.org/rss20_short.xml" - ## Third and fourth args are username and password for twitter - r2t=RSS2Twitter(sys.argv[1], url, '', '') - its=r2t.getLatestFeedItems() - r2t.twitIt(its) + trim = json.loads(ret) + return trim['url'] + except Exception, e: + raise "Failed to JSON parse tr.im response: %s" % e + + +if __name__=="__main__": + c = ConfigParser.ConfigParser() + c.read('planet.ini') + PostToTwitter(c).Run() + diff --git a/schema.sql b/schema.sql index 2ffcf36..beedb82 100644 --- a/schema.sql +++ b/schema.sql @@ -63,7 +63,9 @@ CREATE TABLE posts ( dat timestamp with time zone NOT NULL, title text NOT NULL, guidisperma boolean NOT NULL, - hidden boolean DEFAULT false NOT NULL + hidden boolean DEFAULT false NOT NULL, + twittered boolean DEFAULT false NOT NULL, + shortlink text ); -- 2.39.5