From fc701ff151b8826aac23aab77493718d1836c492 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Tue, 18 Apr 2017 20:59:50 +1200 Subject: Butcher URL titles in from original idalius bot --- idalius.pl | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'idalius.pl') diff --git a/idalius.pl b/idalius.pl index 7b5395b..6bd9bfd 100755 --- a/idalius.pl +++ b/idalius.pl @@ -8,6 +8,8 @@ use POE::Kernel; use POE::Component::IRC; use POE::Component::IRC::Plugin::NickServID; use config_file; +use HTTP::Tiny; +use HTML::HeadParser; my $config_file = "bot.conf"; my %config = config_file::parse_config($config_file); @@ -16,6 +18,8 @@ $| = 1; my $current_nick = $config{nick}; ++$config{url_len}; + # New PoCo-IRC object my $irc = POE::Component::IRC->spawn( UseSSL => $config{usessl}, @@ -110,6 +114,44 @@ sub irc_public { my $me = $irc->nick_name; + # Parse urls in all other regular messages + if ($what =~ /(https?:\/\/[^ ]+)/i) + { + my $url = $1; + my $response = HTTP::Tiny->new((timeout => 5))->head($url); + if (!$response->{success}) { + print "Something broke: $response->{content}\n"; + return; + } + + if (!$response->{headers}->{"content-type"} =~ m,text/html ?,) { + print("Not html, giving up now"); + return; + } + + $response = HTTP::Tiny->new((timeout => 5))->get($url); + if (!$response->{success}) { + print "Something broke: $response->{content}\n"; + return; + } + + my $html = $response->{content}; + + my $parser = HTML::HeadParser->new; + $parser->parse($html); + + # get title and unpack from utf8 (assumption) + my $title = $parser->header("title"); + return unless $title; + + my $shorturl = $url; + $shorturl = (substr $url, 0, $config{url_len}) . "…" if length ($url) > $config{url_len}; + + print "Title: $title ($url)\n"; + $irc->yield(privmsg => $channel => "$title ($shorturl)"); + } + + my $gathered = ""; my @expressions = (keys %{$config{triggers}}); my $expression = join '|', @expressions; -- cgit v1.1