http://paperlined.org/rss/feeds/kottke.gen.pl
#! /usr/bin/env perl
BEGIN{$^W=1} use strict;
use Data::Dumper;
use File::Basename;
use LWP::Simple;
use HTML::Entities;
#############################################
# Load the main page, parse posts
#############################################
# Idiots don't implement etag or last-modified, so we have to eat up 70k every time
my $main_page = get('http://www.kottke.org/index.html')
or die "Unable to fetch kottke main page.\n\t";
print "Fetched bytes ", length($main_page), " from kottke\n";
my @stories;
while ($main_page =~ m#<div class="post">\s*<h2><a href="(.*?)">(.*?)</a></h2>\s*(.*?)<div class="meta">#gis) {
my $story = {};
my ($link, $title, $body) = ($1, $2, $3);
$body =~ s#src="/#src="http://www.kottke.org/#gi;
$story->{link} = $link;
$story->{title} = HTML::Entities::encode_entities($title);
$story->{description} = HTML::Entities::encode_entities($body);
push(@stories, $story);
}
#############################################
# Output RSS file
#############################################
my $zero_dir = dirname $0;
open RSS, ">$zero_dir/kottke.rss" or die "Unable to write to kottke.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
>
<channel rdf:about="http://paperlined.org/rss/feeds/kottke.rss">
<title>Newcum's Kottke Feed</title>
<link>http://www.kottke.org/index.html</link>
<description>David Newcum's RSS Feed of kottke.org. Contact rss_feeds\@paperlined.org for change requests.</description>
<language>en-us</language>
</channel>
EOF
foreach my $story (@stories)
{
print RSS <<"EOF";
<item rdf:about="$story->{link}">
<title>$story->{title}</title>
<link>$story->{link}</link>
<description>$story->{description}</description>
</item>
EOF
}
print RSS "</rdf:RDF>\n";
Generated by GNU enscript 1.6.4.