diff options
Diffstat (limited to 'Plugin')
-rw-r--r-- | Plugin/Timezone.pm | 5 | ||||
-rw-r--r-- | Plugin/Titillate.pm (renamed from Plugin/Tittilate.pm) | 2 | ||||
-rw-r--r-- | Plugin/URL_Title.pm | 41 |
3 files changed, 34 insertions, 14 deletions
diff --git a/Plugin/Timezone.pm b/Plugin/Timezone.pm index eaea907..60a0f34 100644 --- a/Plugin/Timezone.pm +++ b/Plugin/Timezone.pm @@ -30,9 +30,10 @@ sub time { my $nick = $arguments[0]; if (grep {$_ eq $nick} @known_zones) { - my $d = DateTime->now(); + my $d = DateTime->now(); $d->set_time_zone($config{timezone}->{$nick}); - return "$requester: $nick\'s clock reads $d"; + my $timestr = $d->strftime("%Y-%m-%d %H:%M %Z"); + return "$requester: $nick\'s clock reads $timestr"; } else { return "$requester: I don't know what timezone $nick is in"; } diff --git a/Plugin/Tittilate.pm b/Plugin/Titillate.pm index b2c2286..f969df7 100644 --- a/Plugin/Tittilate.pm +++ b/Plugin/Titillate.pm @@ -1,6 +1,6 @@ #!/usr/bin/env perl -package Plugin::Tittilate; +package Plugin::Titillate; use strict; use warnings; diff --git a/Plugin/URL_Title.pm b/Plugin/URL_Title.pm index 8248560..73fa498 100644 --- a/Plugin/URL_Title.pm +++ b/Plugin/URL_Title.pm @@ -5,7 +5,7 @@ package Plugin::URL_Title; use strict; use warnings; use HTTP::Tiny; -use HTML::HeadParser; +use HTML::Parser; use utf8; my %config; @@ -18,6 +18,17 @@ sub configure { return $self; } +my $title; + +sub start_handler +{ + return if shift ne "title"; + my $self = shift; + $self->handler(text => sub { $title = shift; }, "dtext"); + $self->handler(end => sub { shift->eof if shift eq "title"; }, + "tagname,self"); +} + sub message { my ($self, $logger, $me, $who, $where, $raw_what, $what, $irc) = @_; @@ -25,12 +36,15 @@ sub message return if ($config{url_on} == 0); - if ($what =~ /(https?:\/\/[^ ]+)/i) { + # Drawn from RFC 3986Β§2 + if ($what =~ /(https?:\/\/[a-z0-9\-\._~:\/\?#\[\]@\!\$&'()\*\+,;=]+)/i) { $url = $1; } return unless $url; - my $http = HTTP::Tiny->new((default_headers => {'Range' => "bytes=0-65536", 'Accept' => 'text/html'}, timeout => 3)); + # FIXME add more XML-based formats that we can theoretically extract titles from + # FIXME factor out accepted formats and response match into accepted formats array + my $http = HTTP::Tiny->new((default_headers => {'Range' => "bytes=0-65536", 'Accept' => 'text/html, image/svg+xml'}, timeout => 3)); my $response = $http->get($url); @@ -39,29 +53,34 @@ sub message return; } - if (!($response->{headers}->{"content-type"} =~ m,text/html ?,)) { - $logger->("Not html, giving up now"); + if (!($response->{headers}->{"content-type"} =~ m,(text/html|image/svg\+xml),)) { + $logger->("I don't think I can parse titles from $response->{headers}->{'content-type'} - stopping here"); return; } my $html = $response->{content}; utf8::decode($html); - my $parser = HTML::HeadParser->new; - $parser->parse($html); + $title = ""; + my $p = HTML::Parser->new(api_version => 3); + $p->handler( start => \&start_handler, "tagname,self"); + $p->parse($html); + die "Error: $!\n" if $!; + + $title =~ s/\s+/ /g; + $title =~ s/(^\s+|\s+$)//g; - # get title and unpack from utf8 (assumption) - my $title = $parser->header("title"); utf8::upgrade($title); return unless $title; my $shorturl = $url; - $shorturl = (substr $url, 0, $config{url_len}) . "β¦" if length ($url) > $config{url_len}; - # remove http(s):// to avoid triggering other poorly configured bots $shorturl =~ s,^https?://,,g; $shorturl =~ s,/$,,g; + # truncate URL without http(s):// to configured length if needed + $shorturl = (substr $shorturl, 0, $config{url_len}) . "β¦" if length ($shorturl) > $config{url_len}; + my $composed_title = "$title ($shorturl)"; return $composed_title; } |