import pyblog # (http://code.google.com/p/python-blogger/) import re,pickle,traceback from pyblog import * wp = MetaWeblog('http://blog.jonudell.net/xmlrpc.php', '{USER}','{PASS}', appkey='{KEY}') posts = wp.get_recent_posts(530) # a number larger than the total f = open('metabydate.pickle','rb') # created in a previous step, # from old archive. e.g.: # { '2006/11/01' : # {'title' : '...'}, # {'etc' : '...'}, # } metabydate = pickle.load(f) def make_fname(title,date): fname = title.replace('\n','') fname = re.sub('[&\/:]+','-',fname) fname = re.sub('[^\w\s\-]+','',fname) fname = re.sub('\s+','-',fname) fname = re.sub('\-+','-',fname) fname = fname.lower() + '.html' fname = date.replace('/','-') + '-' + fname return fname def make_new_url(fname): return 'http://jonudell.net/udell/%s' % fname def subrepl(matchobj): oldurl = matchobj.group(1) date = re.findall('(\d+/\d+/\d+)',oldurl)[0] title = metabydate[date]['title'] fname = make_fname(title,date) newurl = make_new_url(fname) print "from %s\n to %s\n" % (oldurl,newurl) return newurl targets = [] for post in posts: # first collect edit targets desc = post['description'] if ( len(re.findall('weblog.infoworld.com/udell', desc)) > 0 ): targets.append(post) for post in targets[2:]: # skipping the most recent 2, which really # do refer to the old namespace desc = post['description'] id = post['postid'] print id, post['dateCreated'],post['title'] try: desc = re.sub('(http://weblog\.infoworld\.com/udell/\d+/\d+/\d+\.html#a\d+)', subrepl, desc) # some urls end like 2006/01/04.html#a527 desc = re.sub('(http://weblog\.infoworld\.com/udell/\d+/\d+/\d+\.html)', subrepl, desc) # others just like 2006/01/05.html post['description'] = desc wp.edit_post(id,post) except: traceback.print_exc() # sample output # 50 20070206T15:06:17 Search strategies, part 2 # from http://weblog.infoworld.com/udell/2006/12/04.html#a1571 # to http://jonudell.net/udell/2006-12-04-hunting-the-elusive-search-strategy.html