Use a regex to extract the page title

2024-09-09 06:58:28 +02:00 · 2024-09-09 06:58:28 +02:00 · aeaa93d82e
commit aeaa93d82e
parent 925e95e847
1 changed files with 6 additions and 24 deletions
--- a/plugin.py
+++ b/plugin.py
@ -28,29 +28,10 @@
 import re
 import urllib.request
 from html.parser import HTMLParser
 import supybot.callbacks as callbacks
 from supybot import ircmsgs
 class TitleParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.in_title = False
        self.title = None
    def handle_starttag(self, tag, attrs):
        if tag.lower() == "title":
            self.in_title = True
    def handle_endtag(self, tag):
        if tag.lower() == "title":
            self.in_title = False
    def handle_data(self, data):
        if self.in_title:
            self.title = data.strip()
 class wfg(callbacks.Plugin):
    """This plugin contains random, vaguely WFG-related commands of questionable utility."""
    def __init__(self, irc):
@ -58,6 +39,7 @@ def __init__(self, irc):
        self.__parent.__init__(irc)
        self.ticket_pattern = re.compile(r"(?:^|\W)#(\d+)")
        self.title_pattern = re.compile(r"\s-\s0ad\s-\s*Wildfire Games\s*$")
    def doPrivmsg(self, irc, msg):
        channel = msg.args[0]
@ -80,14 +62,14 @@ def doPrivmsg(self, irc, msg):
        try:
            response = urllib.request.urlopen(url)
            html = response.read().decode()
            parser = TitleParser()
            parser.feed(html)
            title = parser.title if parser.title else "No title found"
            # Clean up the title if necessary
            title = re.sub(r'\s-\s0ad\s-\s*Wildfire Games\s*$', '', title)
        except Exception:
            return
        title_match = re.search(r"<title>(.*)</title>", html)
        title = title_match[1].strip() if title_match else "No title found"
        # Clean up the title if necessary
        title = self.title_pattern.sub('', title)
        response = f'{title} – {response.url}'
        irc.queueMsg(ircmsgs.privmsg(channel, response))