aboutsummaryrefslogtreecommitdiff
path: root/Plugin
diff options
context:
space:
mode:
Diffstat (limited to 'Plugin')
-rw-r--r--Plugin/Timezone.pm5
-rw-r--r--Plugin/Titillate.pm (renamed from Plugin/Tittilate.pm)2
-rw-r--r--Plugin/URL_Title.pm41
3 files changed, 34 insertions, 14 deletions
diff --git a/Plugin/Timezone.pm b/Plugin/Timezone.pm
index eaea907..60a0f34 100644
--- a/Plugin/Timezone.pm
+++ b/Plugin/Timezone.pm
@@ -30,9 +30,10 @@ sub time {
my $nick = $arguments[0];
if (grep {$_ eq $nick} @known_zones) {
- my $d = DateTime->now();
+ my $d = DateTime->now();
$d->set_time_zone($config{timezone}->{$nick});
- return "$requester: $nick\'s clock reads $d";
+ my $timestr = $d->strftime("%Y-%m-%d %H:%M %Z");
+ return "$requester: $nick\'s clock reads $timestr";
} else {
return "$requester: I don't know what timezone $nick is in";
}
diff --git a/Plugin/Tittilate.pm b/Plugin/Titillate.pm
index b2c2286..f969df7 100644
--- a/Plugin/Tittilate.pm
+++ b/Plugin/Titillate.pm
@@ -1,6 +1,6 @@
#!/usr/bin/env perl
-package Plugin::Tittilate;
+package Plugin::Titillate;
use strict;
use warnings;
diff --git a/Plugin/URL_Title.pm b/Plugin/URL_Title.pm
index 8248560..73fa498 100644
--- a/Plugin/URL_Title.pm
+++ b/Plugin/URL_Title.pm
@@ -5,7 +5,7 @@ package Plugin::URL_Title;
use strict;
use warnings;
use HTTP::Tiny;
-use HTML::HeadParser;
+use HTML::Parser;
use utf8;
my %config;
@@ -18,6 +18,17 @@ sub configure {
return $self;
}
+my $title;
+
+sub start_handler
+{
+ return if shift ne "title";
+ my $self = shift;
+ $self->handler(text => sub { $title = shift; }, "dtext");
+ $self->handler(end => sub { shift->eof if shift eq "title"; },
+ "tagname,self");
+}
+
sub message
{
my ($self, $logger, $me, $who, $where, $raw_what, $what, $irc) = @_;
@@ -25,12 +36,15 @@ sub message
return if ($config{url_on} == 0);
- if ($what =~ /(https?:\/\/[^ ]+)/i) {
+ # Drawn from RFC 3986Β§2
+ if ($what =~ /(https?:\/\/[a-z0-9\-\._~:\/\?#\[\]@\!\$&'()\*\+,;=]+)/i) {
$url = $1;
}
return unless $url;
- my $http = HTTP::Tiny->new((default_headers => {'Range' => "bytes=0-65536", 'Accept' => 'text/html'}, timeout => 3));
+ # FIXME add more XML-based formats that we can theoretically extract titles from
+ # FIXME factor out accepted formats and response match into accepted formats array
+ my $http = HTTP::Tiny->new((default_headers => {'Range' => "bytes=0-65536", 'Accept' => 'text/html, image/svg+xml'}, timeout => 3));
my $response = $http->get($url);
@@ -39,29 +53,34 @@ sub message
return;
}
- if (!($response->{headers}->{"content-type"} =~ m,text/html ?,)) {
- $logger->("Not html, giving up now");
+ if (!($response->{headers}->{"content-type"} =~ m,(text/html|image/svg\+xml),)) {
+ $logger->("I don't think I can parse titles from $response->{headers}->{'content-type'} - stopping here");
return;
}
my $html = $response->{content};
utf8::decode($html);
- my $parser = HTML::HeadParser->new;
- $parser->parse($html);
+ $title = "";
+ my $p = HTML::Parser->new(api_version => 3);
+ $p->handler( start => \&start_handler, "tagname,self");
+ $p->parse($html);
+ die "Error: $!\n" if $!;
+
+ $title =~ s/\s+/ /g;
+ $title =~ s/(^\s+|\s+$)//g;
- # get title and unpack from utf8 (assumption)
- my $title = $parser->header("title");
utf8::upgrade($title);
return unless $title;
my $shorturl = $url;
- $shorturl = (substr $url, 0, $config{url_len}) . "…" if length ($url) > $config{url_len};
-
# remove http(s):// to avoid triggering other poorly configured bots
$shorturl =~ s,^https?://,,g;
$shorturl =~ s,/$,,g;
+ # truncate URL without http(s):// to configured length if needed
+ $shorturl = (substr $shorturl, 0, $config{url_len}) . "…" if length ($shorturl) > $config{url_len};
+
my $composed_title = "$title ($shorturl)";
return $composed_title;
}