diff options
author | David Phillips <david@sighup.nz> | 2017-04-18 20:59:50 +1200 |
---|---|---|
committer | David Phillips <david@sighup.nz> | 2017-04-18 20:59:50 +1200 |
commit | fc701ff151b8826aac23aab77493718d1836c492 (patch) | |
tree | 7b2c095a0ccc81adbbc4fa186915166301fce448 | |
parent | e94d7c1272607205f379a6252c93011b7527a3ab (diff) | |
download | idalius-fc701ff151b8826aac23aab77493718d1836c492.tar.xz |
Butcher URL titles in from original idalius bot
-rwxr-xr-x | idalius.pl | 42 |
1 files changed, 42 insertions, 0 deletions
@@ -8,6 +8,8 @@ use POE::Kernel; use POE::Component::IRC; use POE::Component::IRC::Plugin::NickServID; use config_file; +use HTTP::Tiny; +use HTML::HeadParser; my $config_file = "bot.conf"; my %config = config_file::parse_config($config_file); @@ -16,6 +18,8 @@ $| = 1; my $current_nick = $config{nick}; ++$config{url_len}; + # New PoCo-IRC object my $irc = POE::Component::IRC->spawn( UseSSL => $config{usessl}, @@ -110,6 +114,44 @@ sub irc_public { my $me = $irc->nick_name; + # Parse urls in all other regular messages + if ($what =~ /(https?:\/\/[^ ]+)/i) + { + my $url = $1; + my $response = HTTP::Tiny->new((timeout => 5))->head($url); + if (!$response->{success}) { + print "Something broke: $response->{content}\n"; + return; + } + + if (!$response->{headers}->{"content-type"} =~ m,text/html ?,) { + print("Not html, giving up now"); + return; + } + + $response = HTTP::Tiny->new((timeout => 5))->get($url); + if (!$response->{success}) { + print "Something broke: $response->{content}\n"; + return; + } + + my $html = $response->{content}; + + my $parser = HTML::HeadParser->new; + $parser->parse($html); + + # get title and unpack from utf8 (assumption) + my $title = $parser->header("title"); + return unless $title; + + my $shorturl = $url; + $shorturl = (substr $url, 0, $config{url_len}) . "β¦" if length ($url) > $config{url_len}; + + print "Title: $title ($url)\n"; + $irc->yield(privmsg => $channel => "$title ($shorturl)"); + } + + my $gathered = ""; my @expressions = (keys %{$config{triggers}}); my $expression = join '|', @expressions; |