#!/usr/bin/perl

# Release history
# 
# 2009-07-07 - Beta 0.3.1.4
#    IGNOREUSER config option to ignore everything from a user - links and commands, both
#
# 2009-07-07 - Beta 0.3.1.3
#    LWP::UserAgent object set to a MAXGETSIZE which limits the most it can fetch per HTTP GET
#
# 2009-06-23 - Beta 0.3.1.2
#    !google improved to show link and description in a cleaner fashion
#    Can now take a config file as the first argument
# 
# 2009-06-22 - Beta 0.3.1.1
#    !google will now search google
#    Case insensitive triggers
#    !version
#    !about / !trigger take arguements to get specific value / description
#    Custom triggers can be added via the config file - can be modified and reloaded on the fly
#
# 2009-06-22 - Beta 0.3.0
#    Added deNaughty to remove bad words -> add a per-channel deNaughty switch?
#    Added alt_nicks in case one fails
#    Added the URL host name to the printed response
#    !about
#    !triggers
#    !quit -> Owner only
#    !reload -> doesn't affect the stuff that happens on startup like nick, channels and nickserv
#    Reads a hash from a config file and evals to get the settings
#    Sends off a /msg nickserv identify NSPASSWD
#    TIMETOLIVE can be used to set how long before the same URL can be repeated
#
# 2009-06-21 - Beta 0.2.0
#    Added a URL_LIMIT to prevent flooding - max URLs titled per line
#
#  2009-06-21 - Beta 0.1
#    Connects to multiple channels
#    Recognizes http(?:s)?:// and www. urls
#    Reports HTTP status on non-200
#    Reports content type for non-"^text/html.*"
#    Reports title or lack of
#    No config file support yet
#    No extra triggers

# Feature list - working on it a little bit at a time
#   Reads a config file in hash-style
#   - list of channels to listen for
#   - per-channel choice to print titles to channel or PM to owner
#   - specify one owner
#   - specify a list of people that can operate (ie send commands)
#   - specify a URL-regex (to overwrite a default)
#   A trigger-hash with regex's that are triggers and associated function calls
#   !reload conf file trigger
#   !shutup/noshutup triggers - per channel
#   !about trigger
#   bad-word filtering to prevent kicks
#   URL-limit per line to prevent spam floods
#   Limit to the same URL no more than once every $REMEMBER seconds
#   identify to nickserv

package Titler;

use Bot::BasicBot;
@ISA = 'Bot::BasicBot';

use LWP::UserAgent;
use HTML::TokeParser;
use Data::Dumper;

use warnings;
use strict;

my %about = (
	Author     => 'Isaac Good',
	License    => 'MIT/X11',
	Version    => 'Titler - 2009-06-23 - Beta 0.3.1.4',
);

my %config;
my $CONFIG_FILE = $ARGV[0] || 'config.pl';

my %triggers = (
	about    => [ \&cmdAbout, 'List info about the bot. Say !about <tag> for just one entry.' ],
	quit     => [ \&cmdQuit, 'For the owner only.' ],
	triggers => [ \&cmdShowTriggers, 'Lists triggers. Do !trigger <trigger> for details.' ],
	reload   => [ \&loadConfig, 'Reloads the config file. Owner only.' ],
	google   => [ \&googleSearch, 'Search Google!' ],
	version  => [ sub { shift(@_)->say( { channel => shift(@_)->{'channel'}, body => " Version: $about{'Version'}" } ) }, 'Prints the version.' ],
);

my $SELF;

# --------- Bot::BasicBot callback methods we overwrite

# The main function - where we parse incoming messages
sub said 
{
	my ($self, $args) = @_;
	return unless( $args->{'body'} );

	printf "%-10s | %10s | %s\n", $args->{'channel'}, $args->{'who'}, $args->{'body'} if ( $config{'PRINTCHAT'} );

	# Stop here on IGNOREUSER
	return if ( $config{'IGNOREUSER'} and grep { lc($_) eq lc($args->{'who'}) } @{ $config{'IGNOREUSER'} } );

	my $count = 0;
	my $regex = $config{'REGEX'};
	for my $url ( $args->{'body'} =~ /$regex/g ) 
	{
		next if ( $self->isRecentUrl( $url ) );
		$count++;
		my $response = getUrl($url);
		my $message;
		if ( $response->code() != 200 ) 
		{
			$message = "Failed to fetch document: " . $response->status_line();
		}
		elsif ( $response->header('content-type') !~ qr{^text/html} )
		{
			$message = "Not an HTML document. Content Type: " . $response->header('content-type');
		}	
		elsif ( not defined $response->title() ) {
			$message = "No title found.";
		}
		else
		{
			$message = "Title: " . $response->title();
		}

		$message = "$message (at " . $response->request->uri()->host() . ")";
		$message = deNaughty($message);
		$self->say( { channel => $args->{'channel'}, body => $message } );

		last if ( $count >= $config{'URL_LIMIT'} );
	}

	my $trigger;
	# Built in triggers
	for $trigger ( keys %triggers )
	{
		&{ $triggers{$trigger}->[0] }($self, $args) if ( $args->{'body'} =~ /^!$trigger/i );
	}

	# Triggers from the config file
	for $trigger ( keys %{ $config{'TRIGGERS'} } )
	{
		&{ $config{'TRIGGERS'}->{$trigger}->[0] }($self, $args) if ( $args->{'body'} =~ /^!$trigger/i );
	}


	return 0;
}

sub connected
{
	$SELF->say( { channel => 'nickserv', body => 'identify ' . $config{'NSPASSWD'} } ) if ( $config{'NSPASSWD'} );
}

# --------- Helper functions

sub isRecentUrl
{
	my ($self, $url) = @_;

	return unless( $config{'TIMETOLIVE'} );

	$self->{'seen'} = [ grep { $_->{'when'} + $config{'TIMETOLIVE'} > time } @{ $self->{'seen'} } ];
	return 1 if ( grep { $_->{'url'} eq $url } @{ $self->{'seen'} } );
	push ( @{ $self->{'seen'} }, { when => time, url => $url } );
	return;
}

sub getUrl
{
	my ($url) = @_;
	# Prepend http:// if needed
	$url = "http://$url" unless ( $url =~ qr(^http://) );
	my $response = $SELF->{'ua'}->get($url);
	return $response;
}

sub cmsg
{
	my ( $self, $nick, $str ) = @_;
	$self->say( { channel => 'msg', who => $nick, body => $str } );
}

sub loadConfig 
{
	my ($self, $args) = @_;
	return if ( defined $args and exists $args->{'who'} and $args->{'who'} ne $config{'OWNER'} );
	open my $FH, "<", $CONFIG_FILE;
	my $file = join(" ", <$FH>); 
	%config = eval ( $file );
}

sub deNaughty
{
	my ($msg) = @_;
	my $replace = $config{'CENSOR'};
	open my $FH, "<", $config{'BADWORDS'} || return $msg;
	while (my $line = <$FH>)
	{
		chomp $line;
		# Expand the mIRC '*' to the perl '.*'
		$line =~ s/[*]/.*/g;
		$msg =~ s/$line/$replace/ig;
	}
	close $FH;
	return $msg;
}


# --------- cmds that get !triggered
sub cmdAbout
{
	my ($self, $args) = @_;
	my ($trig, $spec) = split(/\s/, $args->{'body'});
	$spec = lc($spec);
	$spec =~ s/^(.)/uc($1)/e;
	if ($spec and exists $about{$spec})
	{
		$self->say( { channel => $args->{'channel'}, body => " $spec: $about{$spec}" } );
	}
	else
	{
		$self->say( { channel => $args->{'channel'}, body => " $_: $about{$_}" } ) for ( keys %about );
	}
}

sub cmdQuit
{
	my ($self, $args) = @_;
	if ( $args->{'who'} eq $config{'OWNER'} ) 
	{
		exit;
	} else {
		$self->say( { channel => $args->{'channel'}, body => $args->{'who'} . ": Shoo!" } );
	}
}

sub cmdShowTriggers
{
	my ($self, $args) = @_;
	my ($trig, $spec) = split(/\s/, $args->{'body'});
	$spec = lc($spec);
	if ($spec and exists $triggers{$spec})
	{
		$self->say( { channel => $args->{'channel'}, body => " $spec: $triggers{$spec}->[1]" } );
	}
	elsif ($spec and exists $config{'TRIGGERS'}->{$spec})
	{
		$self->say( { channel => $args->{'channel'}, body => " $spec: $config{'TRIGGERS'}->{$spec}->[1]" } );
	}
	else
	{
		$self->say( { channel => $args->{'channel'}, body => "Default triggers are: " . join(", ", sort keys %triggers ) } );
		$self->say( { channel => $args->{'channel'}, body => "Extra triggers are: " . join(", ", sort keys %{ $config{'TRIGGERS'} } ) } );
	}
}

sub googleSearch
{
	my ($self, $args) = @_;
	my $term = $args->{'body'};

	# Remove the first word
	my @arr = split(/\s/, $term);
	$term = join('+', @arr[1..$#arr]) || return;

	# URL encode
	$term =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;

	# Search and parse
	my $response = $self->{'ua'}->get("http://www.google.com/search?q=$term&hl=en&num=1");
	$response = $response->decoded_content();

	my $p = HTML::TokeParser->new( \$response );
	my $t;

	while( $t = $p->get_tag('h2') )
	{
		last if ( $p->get_trimmed_text() eq 'Search Results' );
	}

	$t = $p->get_tag('a');
	my $url =  $t->[1]->{'href'};
	my $title = $p->get_phrase();
	$p->get_tag('div');
	my $description = $p->get_trimmed_text('b');
	$self->say( { channel => $args->{'channel'}, body => "[GOOG] $title ( $url )" } );
	$self->say( { channel => $args->{'channel'}, body => "Desc: $description" } );
}

# --------- Create and start the Bot
loadConfig();
$about{'Owner'} = $config{'OWNER'};

$SELF = Titler->new(
	server    => $config{'SERVER'}  || die,
	channels  => $config{'CHAN'}    || die,
	nick      => $config{'NICK'}    || die,
	alt_nicks => $config{'ANICK'}   || die,
	username  => $config{'NAME'}    || die,
	port      => $config{'PORT'}    || die,
);

$SELF->{'ua'} = LWP::UserAgent->new(
	agent    => $config{'USERAGENT' } || die,
	timeout  => $config{'UATIMEOUT' } || die,
	max_size => $config{'MAXGETSIZE'} || die,
);

$SELF->{'seen'} = [];

$SELF->run();

