Use a regex to extract the page title
All checks were successful
Run Limnoria Plugin Tests / test (pull_request) Successful in 7s

This commit is contained in:
Dunedan 2024-09-09 06:58:28 +02:00
parent 4766cea580
commit e9ecfacb4f
Signed by untrusted user: Dunedan
GPG Key ID: 885B16854284E0B2

View File

@ -28,29 +28,10 @@
import re
import urllib.request
from html.parser import HTMLParser
import supybot.callbacks as callbacks
from supybot import ircmsgs
class TitleParser(HTMLParser):
def __init__(self):
super().__init__()
self.in_title = False
self.title = None
def handle_starttag(self, tag, attrs):
if tag.lower() == "title":
self.in_title = True
def handle_endtag(self, tag):
if tag.lower() == "title":
self.in_title = False
def handle_data(self, data):
if self.in_title:
self.title = data.strip()
class wfg(callbacks.Plugin):
"""This plugin contains random, vaguely WFG-related commands of questionable utility."""
def __init__(self, irc):
@ -78,14 +59,14 @@ def doPrivmsg(self, irc, msg):
try:
response = urllib.request.urlopen(url)
html = response.read().decode()
parser = TitleParser()
parser.feed(html)
title = parser.title if parser.title else "No title found"
# Clean up the title if necessary
title = re.sub(r'\s-\s0ad\s-\s*Wildfire Games\s*$', '', title)
except Exception:
return
title_match = re.search(r"<title>(.*)</title>", html)
title = title_match[1].strip() if title_match else "No title found"
# Clean up the title if necessary
title = re.sub(r'\s-\s0ad\s-\s*Wildfire Games\s*$', '', title)
response = f'{title}{response.url}'
irc.queueMsg(ircmsgs.privmsg(channel, response))