From 5390f36c84b0876b9c94dc49fa8e043a5d0d51a4 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Tue, 9 May 2017 10:14:09 +1200 Subject: URL Titles: Don't use a HEAD request first This upset some websites like reddit which are prissy about >1 request every two seconds. --- idalius.pl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'idalius.pl') diff --git a/idalius.pl b/idalius.pl index c0782ff..a885ca5 100755 --- a/idalius.pl +++ b/idalius.pl @@ -67,20 +67,17 @@ sub drop_priv { sub url_get_title { my $url = $_[0]; - my $response = HTTP::Tiny->new((timeout => 5))->head($url); + my $http = HTTP::Tiny->new((default_headers => {accept => 'text/html'}, timeout => 5, agent => "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0")); + + my $response = $http->get($url); + if (!$response->{success}) { print "Something broke: $response->{content}\n"; return; } - if (!$response->{headers}->{"content-type"} =~ m,text/html ?,) { - print("Not html, giving up now"); - return; - } - - $response = HTTP::Tiny->new((timeout => 5))->get($url); - if (!$response->{success}) { - print "Something broke: $response->{content}\n"; + if (!($response->{headers}->{"content-type"} =~ m,text/html ?,)) { + print("Not html, giving up now\n"); return; } @@ -158,8 +155,10 @@ sub irc_public { if ($config{url_on} and $what =~ /(https?:\/\/[^ ]+)/i) { my $title = url_get_title($1); - print "Title: $title\n"; - $irc->yield(privmsg => $channel => $title); + if ($title) { + print "Title: $title\n"; + $irc->yield(privmsg => $channel => $title); + } } my $gathered = ""; -- cgit v1.1