#!/usr/local/bin/python
from formatter import NullFormatter
from htmllib import HTMLParser
import re, os, sys, urllib
class FrazzPageParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self, NullFormatter())
def run(self, url):
self.url = url
self.title = ""
self.inDateCell = False
self.inDateFont = False
self.feed(urllib.urlopen(url).read())
self.close()
def start_td(self, attrs):
if (("bgcolor", "#FFFFFF") in attrs and
("colspan", "3") in attrs and
("align", "RIGHT")) in attrs:
self.inDateCell = True
def end_td(self):
if self.inDateCell:
self.inDateCell = False
def start_font(self, attrs):
if self.inDateCell and ("class", "strong") in attrs:
self.inDateFont = True
self.save_bgn()
def end_font(self):
if self.inDateFont:
self.title = self.save_end()
self.inDateFont = False
def do_img(self, attrs):
if ("alt", "Today's Comic") in attrs:
print ''
print '%s' % self.title
print '' % self.url
print 'tag:comics.com,frazz:%s' % attrs[0][1]
print '<img src="http://www.comics.com%s">' % attrs[0][1]
print ''
class FrazzTocParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self, NullFormatter())
self.pageParser = FrazzPageParser()
print ''
print ''
print 'Frazz'
print ''
print 'Jeff Mallett'
def run(self):
self.inCalendarCell = False
self.feed(urllib.urlopen('http://www.comics.com/comics/frazz/').read())
self.close()
self.pageParser.run('http://www.comics.com/comics/frazz/')
print ''
def start_td(self, attrs):
if ("class", "calendar") in attrs:
self.inCalendarCell = True
def end_td(self):
if self.inCalendarCell:
self.inCalendarCell = False
def start_a(self, attrs):
if not self.inCalendarCell:
return
self.pageParser.run("http://www.comics.com" + attrs[0][1])
parser = FrazzTocParser()
parser.run()