From f53e5933315a316bb94d8d27cebbb95554aca6ba Mon Sep 17 00:00:00 2001 From: David Phillips Date: Tue, 10 Apr 2018 23:53:18 +1200 Subject: Decode HTML body before passing to to head parser From the HTML::HeadParser docs: > Note that the HTML::HeadParser might get confused if raw undecoded UTF-8 is > passed to the parse() method. Make sure the strings are properly decoded > before passing them on. This explains some hard-to-trace bugs with character mangling --- Plugin/URL_Title.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/Plugin/URL_Title.pm b/Plugin/URL_Title.pm index 495df8e..8248560 100644 --- a/Plugin/URL_Title.pm +++ b/Plugin/URL_Title.pm @@ -45,6 +45,7 @@ sub message } my $html = $response->{content}; + utf8::decode($html); my $parser = HTML::HeadParser->new; $parser->parse($html); -- cgit v1.1