Use a regex to extract the page title
This commit is contained in:
parent
925e95e847
commit
aeaa93d82e
30
plugin.py
30
plugin.py
@ -28,29 +28,10 @@
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from html.parser import HTMLParser
|
|
||||||
import supybot.callbacks as callbacks
|
import supybot.callbacks as callbacks
|
||||||
from supybot import ircmsgs
|
from supybot import ircmsgs
|
||||||
|
|
||||||
|
|
||||||
class TitleParser(HTMLParser):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.in_title = False
|
|
||||||
self.title = None
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
|
||||||
if tag.lower() == "title":
|
|
||||||
self.in_title = True
|
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
|
||||||
if tag.lower() == "title":
|
|
||||||
self.in_title = False
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
|
||||||
if self.in_title:
|
|
||||||
self.title = data.strip()
|
|
||||||
|
|
||||||
class wfg(callbacks.Plugin):
|
class wfg(callbacks.Plugin):
|
||||||
"""This plugin contains random, vaguely WFG-related commands of questionable utility."""
|
"""This plugin contains random, vaguely WFG-related commands of questionable utility."""
|
||||||
def __init__(self, irc):
|
def __init__(self, irc):
|
||||||
@ -58,6 +39,7 @@ def __init__(self, irc):
|
|||||||
self.__parent.__init__(irc)
|
self.__parent.__init__(irc)
|
||||||
|
|
||||||
self.ticket_pattern = re.compile(r"(?:^|\W)#(\d+)")
|
self.ticket_pattern = re.compile(r"(?:^|\W)#(\d+)")
|
||||||
|
self.title_pattern = re.compile(r"\s-\s0ad\s-\s*Wildfire Games\s*$")
|
||||||
|
|
||||||
def doPrivmsg(self, irc, msg):
|
def doPrivmsg(self, irc, msg):
|
||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
@ -80,14 +62,14 @@ def doPrivmsg(self, irc, msg):
|
|||||||
try:
|
try:
|
||||||
response = urllib.request.urlopen(url)
|
response = urllib.request.urlopen(url)
|
||||||
html = response.read().decode()
|
html = response.read().decode()
|
||||||
parser = TitleParser()
|
|
||||||
parser.feed(html)
|
|
||||||
title = parser.title if parser.title else "No title found"
|
|
||||||
# Clean up the title if necessary
|
|
||||||
title = re.sub(r'\s-\s0ad\s-\s*Wildfire Games\s*$', '', title)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
title_match = re.search(r"<title>(.*)</title>", html)
|
||||||
|
title = title_match[1].strip() if title_match else "No title found"
|
||||||
|
# Clean up the title if necessary
|
||||||
|
title = self.title_pattern.sub('', title)
|
||||||
|
|
||||||
response = f'{title} – {response.url}'
|
response = f'{title} – {response.url}'
|
||||||
irc.queueMsg(ircmsgs.privmsg(channel, response))
|
irc.queueMsg(ircmsgs.privmsg(channel, response))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user