gotr: fix line break handling (fixes #20)

This commit is contained in:
Kjell Braden
2012-10-15 13:06:28 +02:00
parent 54f9be6b79
commit 511b37dd91

View File

@@ -628,6 +628,11 @@ class HTMLStripper(HTMLParser):
def handle_data(self, data):
self.stripped_data += data
def handle_starttag(self, tag, attrs):
if tag == 'br':
self.stripped_data += '\n'
def handle_entityref(self, name):
c = unichr(name2codepoint[name])
self.stripped_data += c
@@ -637,9 +642,14 @@ class HTMLStripper(HTMLParser):
else:
c = unichr(int(name))
self.stripped_data += c
def unknown_decl(self, data):
if data.startswith('CDATA['):
self.data += data[6:]
self.stripped_data += data[6:]
def feed(self, data):
data = data.replace('\n', '')
HTMLParser.feed(self, data)
def escape(s):
'''Replace special characters "&", "<" and ">" to HTML-safe sequences.
@@ -648,6 +658,7 @@ def escape(s):
s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;")
s = s.replace("\n", "<br/>")
return s
## TODO: