#!/usr/bin/perl -w # dre,v 1.29 2007/03/23 # vi:ts=4:sw=4:ai:cindent # Dave Holland # Released under the GPL. use strict; use XML::RSS; use HTML::Entities qw(:DEFAULT encode_entities_numeric); use HTML::FormatText::WithLinks; use LWP::UserAgent; use LWP::Simple; use DB_File; use Data::Dumper qw(Dumper); use Getopt::Std; my $debug=0; my $quiet=0; my $dosend=1; my $dofetch=0; our($opt_d, $opt_q, $opt_n, $opt_a, $opt_h, $opt_f); getopts('dqnahf'); if($opt_d) { $debug++; } if($opt_q) { $quiet++; } if($opt_n) { $dosend=0; } if($opt_a) { $dosend=2; } if($opt_f) { $dofetch=1; } if($opt_h) { print STDERR <) { chomp; next if /^#/; next if /^\s*$/; push @feeds, $_; } close(F); my $to=""; open(T,$dir."recipient") or die "$0: couldn't open recipient file: $!\n"; chomp($to=); close(T); if(! $to =~ m/\S+@\S+\.\S+/) { die "$0: recipient '$to' doesn't look like an email address\n"; } my $ua = LWP::UserAgent->new(timeout => 30, env_proxy => 1); foreach my $feed (@feeds) { $debug && print ">>> fetching ", $feed, "\n"; my $response = $ua->get($feed); if(!$response->is_success) { print STDERR "problem fetching $feed: ".$response->status_line."\n"; next; } my $rss = XML::RSS->new; #$debug && print $response->content."\n"; # eww. high-bit literal characters aren't valid XML my $content=$response->content; $content=~s/^//; # icky hack for Nildram status feed eval { $rss->parse(encode_entities_numeric($content, "\200-\377")); }; if($@) { print STDERR "problem parsing $feed: $@\n" unless $quiet; next; } my $title = ${$rss->channel}{'title'}; $title=$feed unless $title; $title =~ s/^\s+//; #$debug && print Dumper($rss)."\n"; my @items = @{$rss->{'items'}}; $debug && print ">>> feed is \"$title\" containing ", $#items+1, " items\n"; foreach my $item (@items) { my $ititle = decode_entities($item->{'title'} || "(untitled)"); $ititle =~ s/^\s+//; $debug && print ">>> * title: ", $ititle, "\n"; #$debug && print Dumper($item)."\n"; my $guid=""; my $link=""; if(defined($item->{'link'})) { $link = $item->{'link'}; } # we prefer the guid, but the link should be acceptable if(!defined($item->{'guid'})) { if(!defined($item->{'link'})) { if($item->{'enclosure'}->{'url'}) { # ugh, a last resort $guid=$item->{'enclosure'}->{'url'}; } else { $debug && print ">>> * no link, guid or enclosure url\n"; } } else { $guid=$item->{'link'}; } } else { $guid=$item->{'guid'}; } # Now cope with non-permalink guids. # This is thoroughly unpleasant. I hate RSS. if(($content =~ m/guid\s+isPermaLink="false"/i) && ($link ne "")) { $guid=$link; } if($guid ne "") { $debug && print ">>> * guid: $guid"; $debug && print ", link: $link"; $debug && print "\n"; my $enclosure = ""; my $fetched = 0; if(&seen($guid)) { $debug && print ">>> ...already seen\n"; } if((!&seen($guid) || ($dosend==2)) && ($dofetch==1)) { if(($item->{'enclosure'}->{'url'}) && ($item->{'enclosure'}->{'url'} =~ m/mp3$/i)) { $enclosure = $item->{'enclosure'}->{'url'}; $debug && print ">>> * want to fetch: ", $enclosure, "\n"; if(! -d $dir."downloads") { mkdir($dir."downloads"); } my $filename = $item->{'enclosure'}->{'url'}; $filename =~ s#^.*/##; if(! -d $dir."downloads") { mkdir($dir."downloads"); } my $response = getstore($enclosure, $dir."downloads/".$filename); $fetched = 0; $debug && print $response, "\n"; if($response != RC_OK) { print STDERR "problem fetching enclosure $enclosure: ".$response."\n"; } else { $fetched = 1; $enclosure = $filename; } } } if(!&seen($guid) || ($dosend==2)) { &send_mail($item, $title, $link, $feed,$enclosure,$fetched); } $seen{$guid}++; } } } untie %seen; exit(0); ####################################### sub seen { my $url = shift; if(exists($seen{$url})) { return(1); } else { return(0); } } sub send_mail { my $item = shift; my $title = shift; my $ititle; my $link = shift; my $feedurl = shift; my $enclosure = shift; my $fetched = shift; if($dosend==0) { $debug && print ">>> would send mail, but -n given\n"; return; } $debug && print ">>> sending mail\n"; open(MAIL, "| $sendmail") or die "error sending mail: $!\n"; binmode MAIL, ":utf8"; #open(MAIL, ">/dev/tty") or die "open: $!\n"; print MAIL "From: $from ($title)\nTo: $to\n"; $ititle = decode_entities($item->{'title'} || "(untitled)"); $ititle =~ s/^\s+//; chomp $ititle; print MAIL "Subject: ", $ititle, "\n"; print MAIL 'X-sent-by: dre,v 1.29 2007/03/23'."\n"; print MAIL 'X-feed-URL: ', $feedurl, "\n"; print MAIL "Content-Type: text/plain; charset=utf-8\n"; print MAIL "\n"; my $ft = HTML::FormatText::WithLinks->new(before_link => '', after_link => '[%n]', leftmargin => 1); if(defined($item->{'description'})) { print MAIL decode_entities($ft->parse($item->{'description'})); } print MAIL "link: $link\n"; if($enclosure ne "") { print MAIL "enclosure: $enclosure (", ($fetched ? "" : "not "), "fetched)\n"; } close(MAIL) or die "error closing sendmail: $!\n"; }