#! /usr/bin/perl BEGIN{$^W=1} use strict; # Instapundit's RSS stories # 1) are truncated # 2) have all HTML markup removed # # This just restores the full description to the .rss feed, and should leave the rest of it alone. # # The story is only read the first time it shows up on the official .rss, to save bandwidth # (actually, doing it otherwise would use a huge amount of bandwidth, but would catch any story updates) use lib '/home/interiot/src/pl/modules/'; use lib '/home/interiot/perllib/lib/perl5/site_perl/5.6.1/i686-linux/'; use lib '/home/jerry/perllib/lib/site_perl/5.005/i386-linux/'; use LWP::Simple; use Storable; use XML::RSS; ######################################### # Load the persistent data ######################################### my $persistent; my $persistent_filename = "/home/interiot/www/rss/feeds/instapundit.db"; if (-e $persistent_filename) { $persistent = Storable::retrieve($persistent_filename); } else { $persistent = {}; } ######################################### # Load the official RSS feed ######################################### my $page = LWP::Simple::get("http://www.instapundit.com/index.xml") or die "Unable to fetch instapundit's index.xml: $!\n\t"; print "Fetched ", length($page), " bytes from Instapundit\n"; my $rss_obj = XML::RSS->new(version => '1.0'); eval{ local $^W=0; no strict; $rss_obj->parse($page); }; ######################################### # Use a previously-fetched description # where possible, or a newly-fetched # description where necessary. ######################################### my %links_seen; foreach my $item (@{ $rss_obj->{items} }) { my $link = $item->{link}; $links_seen{$link}++; if (! exists $persistent->{$link}) { my $link_page = LWP::Simple::get($link) or die "Unable to fetch $link: $!\n\t"; print "Fetched ", length($link_page), " bytes from $link\n"; if ($link_page =~ m#

\s*(.*?)\s+