#!/usr/bin/perl -w
# dre,v 1.29 2007/03/23
# vi:ts=4:sw=4:ai:cindent
# Dave Holland <dave@biff.org.uk>
# Released under the GPL.

use strict;
use XML::RSS;
use HTML::Entities qw(:DEFAULT encode_entities_numeric);
use HTML::FormatText::WithLinks;
use LWP::UserAgent;
use LWP::Simple;
use DB_File;
use Data::Dumper qw(Dumper);
use Getopt::Std;

my $debug=0;
my $quiet=0;
my $dosend=1;
my $dofetch=0;
our($opt_d, $opt_q, $opt_n, $opt_a, $opt_h, $opt_f);
getopts('dqnahf');
if($opt_d) { $debug++; }
if($opt_q) { $quiet++; }
if($opt_n) { $dosend=0; }
if($opt_a) { $dosend=2; }
if($opt_f) { $dofetch=1; }
if($opt_h) {
	print STDERR <<EOT;
usage: dre [-h][-d][-q][-n][-a][-f]
  -h  this help text
  -d  print debugging information
  -q  run quietly (e.g. for use with cron)
  -n  don't send any email (for populating the seen file)
  -a  send mail for all entries, even those previously seen
  -f  fetch referenced mp3 file
EOT
	exit(0);
}

binmode STDOUT, ":utf8";
$|=1;

my $dir=$ENV{"HOME"}."/.dre/";

my $feedlist=$dir."feeds";
my $seenfile=$dir."seen";
my $sendmail="/usr/sbin/sendmail -t";
my $from='nobody@localhost'; # foo@dom.ain part only

tie my %seen, 'DB_File', $seenfile;

my @feeds=();
open(F,$feedlist) or die "$0: couldn't open feed list: $!\n";
while(<F>) {
	chomp;
	next if /^#/;
	next if /^\s*$/;
	push @feeds, $_;
}
close(F);

my $to="";
open(T,$dir."recipient") or die "$0: couldn't open recipient file: $!\n";
chomp($to=<T>);
close(T);
if(! $to =~ m/\S+@\S+\.\S+/) {
	die "$0: recipient '$to' doesn't look like an email address\n";
}

my $ua = LWP::UserAgent->new(timeout => 30, env_proxy => 1);

foreach my $feed (@feeds) {
	$debug && print ">>> fetching ", $feed, "\n";
	my $response = $ua->get($feed);
	if(!$response->is_success) {
		print STDERR "problem fetching $feed: ".$response->status_line."\n";
		next;
	}
	my $rss = XML::RSS->new;
	#$debug && print $response->content."\n";

	# eww. high-bit literal characters aren't valid XML
	my $content=$response->content;
	$content=~s/^﻿//; # icky hack for Nildram status feed
	eval { $rss->parse(encode_entities_numeric($content, "\200-\377")); };
	if($@) {
		print STDERR "problem parsing $feed: $@\n" unless $quiet;
		next;
	}
	my $title = ${$rss->channel}{'title'};
	$title=$feed unless $title;
	$title =~ s/^\s+//;
	#$debug && print Dumper($rss)."\n";

	my @items = @{$rss->{'items'}};
	$debug && print ">>> feed is \"$title\" containing ", $#items+1, " items\n";

	foreach my $item (@items) {
		my $ititle = decode_entities($item->{'title'} || "(untitled)");
		$ititle =~ s/^\s+//;

		$debug && print ">>> * title: ", $ititle, "\n";
		#$debug && print Dumper($item)."\n";
		my $guid="";
		my $link="";
		if(defined($item->{'link'})) {
			$link = $item->{'link'};
		}
		# we prefer the guid, but the link should be acceptable
		if(!defined($item->{'guid'})) {
			if(!defined($item->{'link'})) {
				if($item->{'enclosure'}->{'url'}) {
					# ugh, a last resort
					$guid=$item->{'enclosure'}->{'url'};
				} else {
					$debug && print ">>> * no link, guid or enclosure url\n";
				}
			} else {
				$guid=$item->{'link'};
			}
		} else {
			$guid=$item->{'guid'};
		}
		# Now cope with non-permalink guids.
		# This is thoroughly unpleasant. I hate RSS.
		if(($content =~ m/guid\s+isPermaLink="false"/i) &&
				($link ne "")) { $guid=$link; }

		if($guid ne "") {
			$debug && print ">>> * guid: $guid";
			$debug && print ", link: $link";
			$debug && print "\n";
			my $enclosure = "";
			my $fetched = 0;
			if(&seen($guid)) {
				$debug && print ">>> ...already seen\n";
			}
			if((!&seen($guid) || ($dosend==2)) && ($dofetch==1)) {
				if(($item->{'enclosure'}->{'url'}) &&
						($item->{'enclosure'}->{'url'} =~ m/mp3$/i)) {
					$enclosure = $item->{'enclosure'}->{'url'};
					$debug && print ">>> * want to fetch: ", $enclosure, "\n";
					if(! -d $dir."downloads") { mkdir($dir."downloads"); }
					my $filename = $item->{'enclosure'}->{'url'};
					$filename =~ s#^.*/##;
					if(! -d $dir."downloads") {
						mkdir($dir."downloads");
					}
					my $response = getstore($enclosure,
							$dir."downloads/".$filename);

					$fetched = 0;
					$debug && print $response, "\n";
					if($response != RC_OK) {
						print STDERR "problem fetching enclosure $enclosure: ".$response."\n";
					} else {
						$fetched = 1;
						$enclosure = $filename;
					}
				}
			}
			if(!&seen($guid) || ($dosend==2)) {
				&send_mail($item, $title, $link,
						$feed,$enclosure,$fetched);
			}
			$seen{$guid}++;
		}
	}
}

untie %seen;
exit(0);

#######################################

sub seen {
	my $url = shift;
	if(exists($seen{$url})) {
		return(1);
	} else {
		return(0);
	}
}

sub send_mail {
	my $item = shift;
	my $title = shift;
	my $ititle;
	my $link = shift;
	my $feedurl = shift;
	my $enclosure = shift;
	my $fetched = shift;
	if($dosend==0) {
		$debug && print ">>> would send mail, but -n given\n";
		return;
	}
	$debug && print ">>> sending mail\n";
	open(MAIL, "| $sendmail") or die "error sending mail: $!\n";
	binmode MAIL, ":utf8";
#open(MAIL, ">/dev/tty") or die "open: $!\n";
	print MAIL "From: $from ($title)\nTo: $to\n";
	$ititle = decode_entities($item->{'title'} || "(untitled)");
	$ititle =~ s/^\s+//;
	chomp $ititle;
	print MAIL "Subject: ", $ititle, "\n";
	print MAIL 'X-sent-by: dre,v 1.29 2007/03/23'."\n";
	print MAIL 'X-feed-URL: ', $feedurl, "\n";
	print MAIL "Content-Type: text/plain; charset=utf-8\n";
	print MAIL "\n";
	my $ft = HTML::FormatText::WithLinks->new(before_link => '', after_link => '[%n]', leftmargin => 1);
	if(defined($item->{'description'})) {
		print MAIL decode_entities($ft->parse($item->{'description'}));
	}
	print MAIL "link: $link\n";
	if($enclosure ne "") {
		print MAIL "enclosure: $enclosure (", ($fetched ? "" : "not "),
			  "fetched)\n";
	}
	close(MAIL) or die "error closing sendmail: $!\n";

}
