http://paperlined.org/rss/feeds/dilbert.gen.pl

#! /usr/bin/env perl
BEGIN{$^W=1}  use strict;

use LWP::Simple 'get';
use POSIX;
use Storable;

use lib '/home/interiot/src/pl/modules/';
use Time::Zone;



# Load the persistent data
my $persistent;
my $persistent_filename = "/home/interiot/www/rss/feeds/dilbert.db";
if (-e $persistent_filename) {
	$persistent = Storable::retrieve($persistent_filename);
} else {
	$persistent = {};
}


##################################
# Grab a new comic, if one exists
##################################
my $latest_page = LWP::Simple::get('http://dilbert.com/comics/dilbert/archive/');
die "Unable to retrieve dilbert webpage" unless $latest_page;

if ($latest_page !~ m#<a HREF="/comics/dilbert/archive/dilbert-(20\d+).html"><img SRC="/comics/dilbert/images/arrow_right.gif"#i) {
	die "Dilbert parser failed to find necessary info.  Page format has changed.";
}

my $cur_date = $1;

if (! exists $persistent->{$cur_date}) {
	my %cur;
	$cur{URL} = "http://dilbert.com/comics/dilbert/archive/dilbert-$cur_date.html";
	$cur{DATETIME} = time();

	if ($latest_page !~ m#<img SRC="/comics/dilbert/archive/images/(dilbert\d+.(?:gif|jpg))"#i) {
		die "Dilbert parser failed to find necessary info.  Page format has changed.";
	}

	$cur{IMG} = "http://dilbert.com/comics/dilbert/archive/images/$1";

	$persistent->{$cur_date} = \%cur;
}


###################################
# Retire old comics
#    once the img is no longer stored on the server
##################################
if ($latest_page !~ m#<option VALUE="/comics/dilbert/archive/dilbert-\d+.html">#i) {
	die "Dilbert parser failed to find necessary info.  Page format has changed.";
}
foreach my $date (keys %$persistent) {
	if ($latest_page !~ m#<option VALUE="/comics/dilbert/archive/dilbert-$date.html">#i) {
		delete $persistent->{$date};
	}
}
	# (the above code is untested as of yet)

###################################
# Write out the .RSS file
###################################
my $tz_offset = Time::Zone::tz_local_offset();
my $timezone = sprintf("%d:%02d", $tz_offset/3600, ($tz_offset/60)% 60);
$timezone = "+$timezone" if ($tz_offset >= 0);

open RSS, ">/home/interiot/www/rss/feeds/dilbert.rss"	or die "Unable to write to dilbert.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
>
	<channel rdf:about="http://paperlined.org/rss/feeds/dilbert.rss">
		<title>Newcum's Dilbert Feed</title>
		<link>http://dilbert.com/comics/dilbert/archive/</link>
		<description>David Newcum's RSS Feed of Dilbert.com.  Contact rss_feeds\@paperlined.org for change requests.</description>
		<language>en-us</language>
	</channel>
EOF

foreach my $date (reverse sort keys %$persistent) {
	my $data = $persistent->{$date};
	my $displayable_date = POSIX::strftime("%D", localtime($data->{DATETIME}));
	my $rss_date = POSIX::strftime("%Y-%m-%dT%H:%M", localtime($data->{DATETIME})) . $timezone;

	print RSS <<"EOF";
	<item rdf:about="$data->{URL}">
		<title>Dilbert for $displayable_date</title>
		<link>$data->{URL}</link>
		<description>&lt;img src="$data->{IMG}" /&gt;</description>
		<dc:creator>Scott Adams</dc:creator>
		<dc:date>$rss_date</dc:date>
	</item>
EOF
}
print RSS "</rdf:RDF>\n";


# Write the updated persistent perl data out
Storable::nstore($persistent, $persistent_filename)	or die "Unable to write to $persistent_filename: $!";

Generated by GNU enscript 1.6.4.