#! /usr/bin/env perl BEGIN{$^W=1} use strict; # Each story is an tag for that day's pennyarcade comic. If you click either # on the title or the image, it will take you to the news article for that comic. # # The official pennyarcade .rss doesn't include the comic at all. And other # third-party pennyarcade.rss's don't include a link to the news article that I can see. use lib '/home/interiot/src/pl/modules/'; use Date::Parse; use Data::Dumper; use HTML::Entities; use LWP::Simple 'get'; use POSIX; use Storable; use Time::Local; use Time::Zone; # Load the persistent data my $persistent; my $persistent_filename = "/home/interiot/www/rss/feeds/pennyarcade.db"; if (-e $persistent_filename) { $persistent = Storable::retrieve($persistent_filename); } else { $persistent = {}; } ################################## # Grab a new comic, if one exists ################################## my $latest_page = LWP::Simple::get('http://www.penny-arcade.com/view.php3') or die "Unable to retrieve http://www.penny-arcade.com/view.php3 webpage: $!\n"; my $latest_news_url; if ($latest_page =~ m#href="(news.php3\?date=\d\d\d\d-\d\d-\d\d)"#i) { $latest_news_url = "http://www.penny-arcade.com/$1"; } else { #die "PennyArcade parser failed to find necessary info. Page format has changed."; # Actually, apparently the text can be posted long after the comic. So... I guess... deal with it. $latest_news_url = undef; } if ($latest_page !~ m#]*))*\s*>\s*(?: )*\s*)*'; if ($latest_page !~ m#]*background="bluegrey/images/strip/striptop.gif"[^>]*>$arbitrary_tags([^<]+)#si) { die "PennyArcade parser failed to find necessary info. Page format has changed."; } my $headline = $1; ## Add a new entry if necessary, but generally update the persistent data with the (possibly) new information if (! exists $persistent->{$perl_date}) { $persistent->{$perl_date} = {}; } $persistent->{$perl_date}{COMIC_IMG} = $latest_img_url; $persistent->{$perl_date}{NEWS_URL} = $latest_news_url if ($latest_news_url); $persistent->{$perl_date}{FIRST_SEEN} = time() unless ($persistent->{$perl_date}{FIRST_SEEN}); $persistent->{$perl_date}{HEADLINE} = $headline; #print Dumper($persistent); exit(1); ################################### # Retire old comics # only keep the latest 25 ################################## my @chrono_sorted = sort {$b <=> $a} keys(%$persistent); # the latest ones show up first # remove the first 25 from the list of ones to kill splice(@chrono_sorted, 0, 25, ); # kill the remaining ones foreach my $date (@chrono_sorted) { delete($persistent->{$date}); } # (the above code is untested as of yet) ################################### # Write out the .RSS file ################################### @chrono_sorted = sort {$b <=> $a} keys(%$persistent); # the latest ones show up first my $tz_offset = Time::Zone::tz_local_offset(); my $timezone = sprintf("%d:%02d", $tz_offset/3600, ($tz_offset/60)% 60); $timezone = "+$timezone" if ($tz_offset >= 0); open RSS, ">/home/interiot/www/rss/feeds/pennyarcade.rss" or die "Unable to write to pennyarcade.rss: $!"; print RSS <<"EOF"; Newcum's PennyArcade Feed http://www.penny-arcade.com/ David Newcum's RSS Feed of PennyArcade.com. Contact rss_feeds\@paperlined.org for change requests. en-us EOF foreach my $perl_date (@chrono_sorted) { my $data = $persistent->{$perl_date}; my $displayable_date = POSIX::strftime("%D", localtime($perl_date)); my $rss_date = POSIX::strftime("%Y-%m-%dT%H:%M", localtime($data->{FIRST_SEEN})) . $timezone; my $headline = encode_entities($data->{HEADLINE} || "Penny Arcade"); my $img_news_url = ''; if ($data->{NEWS_URL}) { $img_news_url = $data->{NEWS_URL}; } else { $img_news_url = "http://www.penny-arcade.com/"; } print RSS <<"EOF"; $headline ($displayable_date) $img_news_url <a href="$img_news_url"><img src="$data->{COMIC_IMG}" border="0" /></a> Gabe and Tycho $rss_date EOF } print RSS "\n"; # Write the updated persistent perl data out Storable::nstore($persistent, $persistent_filename) or die "Unable to write to $persistent_filename: $!";