#! /usr/bin/env perl
BEGIN{$^W=1} use strict;
# Each story is an tag for that day's pennyarcade comic. If you click either
# on the title or the image, it will take you to the news article for that comic.
#
# The official pennyarcade .rss doesn't include the comic at all. And other
# third-party pennyarcade.rss's don't include a link to the news article that I can see.
use lib '/home/interiot/src/pl/modules/';
use Date::Parse;
use Data::Dumper;
use HTML::Entities;
use LWP::Simple 'get';
use POSIX;
use Storable;
use Time::Local;
use Time::Zone;
# Load the persistent data
my $persistent;
my $persistent_filename = "/home/interiot/www/rss/feeds/pennyarcade.db";
if (-e $persistent_filename) {
$persistent = Storable::retrieve($persistent_filename);
} else {
$persistent = {};
}
##################################
# Grab a new comic, if one exists
##################################
my $latest_page = LWP::Simple::get('http://www.penny-arcade.com/view.php3')
or die "Unable to retrieve http://www.penny-arcade.com/view.php3 webpage: $!\n";
my $latest_news_url;
if ($latest_page =~ m#href="(news.php3\?date=\d\d\d\d-\d\d-\d\d)"#i) {
$latest_news_url = "http://www.penny-arcade.com/$1";
} else {
#die "PennyArcade parser failed to find necessary info. Page format has changed.";
# Actually, apparently the text can be posted long after the comic. So... I guess... deal with it.
$latest_news_url = undef;
}
if ($latest_page !~ m#]*))*\s*>\s*(?: )*\s*)*';
if ($latest_page !~ m#
]*background="bluegrey/images/strip/striptop.gif"[^>]*>$arbitrary_tags([^<]+)#si) {
die "PennyArcade parser failed to find necessary info. Page format has changed.";
}
my $headline = $1;
## Add a new entry if necessary, but generally update the persistent data with the (possibly) new information
if (! exists $persistent->{$perl_date}) {
$persistent->{$perl_date} = {};
}
$persistent->{$perl_date}{COMIC_IMG} = $latest_img_url;
$persistent->{$perl_date}{NEWS_URL} = $latest_news_url if ($latest_news_url);
$persistent->{$perl_date}{FIRST_SEEN} = time()
unless ($persistent->{$perl_date}{FIRST_SEEN});
$persistent->{$perl_date}{HEADLINE} = $headline;
#print Dumper($persistent); exit(1);
###################################
# Retire old comics
# only keep the latest 25
##################################
my @chrono_sorted = sort {$b <=> $a} keys(%$persistent); # the latest ones show up first
# remove the first 25 from the list of ones to kill
splice(@chrono_sorted, 0, 25, );
# kill the remaining ones
foreach my $date (@chrono_sorted) {
delete($persistent->{$date});
}
# (the above code is untested as of yet)
###################################
# Write out the .RSS file
###################################
@chrono_sorted = sort {$b <=> $a} keys(%$persistent); # the latest ones show up first
my $tz_offset = Time::Zone::tz_local_offset();
my $timezone = sprintf("%d:%02d", $tz_offset/3600, ($tz_offset/60)% 60);
$timezone = "+$timezone" if ($tz_offset >= 0);
open RSS, ">/home/interiot/www/rss/feeds/pennyarcade.rss" or die "Unable to write to pennyarcade.rss: $!";
print RSS <<"EOF";
Newcum's PennyArcade Feed
http://www.penny-arcade.com/
David Newcum's RSS Feed of PennyArcade.com. Contact rss_feeds\@paperlined.org for change requests.en-us
EOF
foreach my $perl_date (@chrono_sorted) {
my $data = $persistent->{$perl_date};
my $displayable_date = POSIX::strftime("%D", localtime($perl_date));
my $rss_date = POSIX::strftime("%Y-%m-%dT%H:%M", localtime($data->{FIRST_SEEN})) . $timezone;
my $headline = encode_entities($data->{HEADLINE} || "Penny Arcade");
my $img_news_url = '';
if ($data->{NEWS_URL}) {
$img_news_url = $data->{NEWS_URL};
} else {
$img_news_url = "http://www.penny-arcade.com/";
}
print RSS <<"EOF";
$headline ($displayable_date)
$img_news_url
<a href="$img_news_url"><img src="$data->{COMIC_IMG}" border="0" /></a>Gabe and Tycho$rss_date
EOF
}
print RSS "\n";
# Write the updated persistent perl data out
Storable::nstore($persistent, $persistent_filename) or die "Unable to write to $persistent_filename: $!";