http://paperlined.org/rss/feeds/fark_photoshop.gen.pl
#! /usr/bin/env perl
BEGIN{$^W=1} use strict;
use Data::Dumper;
use File::Basename;
use HTML::Entities;
use LWP::Simple;
#use Time::ParseDate;
use lib dirname($0); # look in the same directory that this script resides in
use FeedHelpers; # Common code, available here: http://paperlined.org/rss/feeds/FeedHelpers.pm
(my $feed = $0) =~ s#(^.*/|\.gen\.pl$)##g;
# Load the persistent data
my $persistent = PersistentHash::tie_storable("$feed.db");
#############################################
# Load the main page, parse posts
#############################################
my $main_page = conditional_http_simple_get('http://www.fark.com/index.html', $persistent)
or die "Unable to fetch Fark main page.\n\t";
my @stories;
while ($main_page =~ m#<img src="http://img.fark.com/images/2001/topics/photoshop.gif".*?align=left>(.*?)</td>.*?<a href="(.*?)"#gis) {
my $story = {};
my ($title, $link) = ($1, $2);
my $img;
if (! exists $persistent->{STORY_IMG}{$link}) {
print "Fetching $link...\n";
my $story = get($link);
#my $firstprost;
#($firstprost) = ($story =~ m#<table class="ctable">(.*?<div class="ctext">.*?</div>)#si);
#if ($firstprost =~ m#<div class="ctext">.*<img src="(.*?)"#si) {
#$persistent->{STORY_IMG}{$link}{IMG} = $1;
#} else {
#$persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png";
#}
#if ($firstprost !~ m#<td class="cdate">(.*?)</td>#) {
#die "HTML format likely changed";
#}
#$persistent->{STORY_IMG}{$link}{DATE} = parsedate($1);
#if (! $persistent->{STORY_IMG}{$link}{DATE}) {
#die "Unable to parse date " . $persistent->{STORY_IMG}{$link}{DATE} . "\n\t";
#}
#
while ($story =~ m#<div class="ctext">(.*?)</div>#gsi) {
my $post_text = $1;
if ($post_text =~ m#<img[^>]*src=["']?(\S+?)["']?(\s|/\s*>|>)#si) {
$persistent->{STORY_IMG}{$link}{IMG} = $1;
last;
}
}
$persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png"
unless $persistent->{STORY_IMG}{$link}{IMG};
}
$img = $persistent->{STORY_IMG}{$link}{IMG};
$story->{link} = $link;
$story->{date} = $persistent->{STORY_IMG}{$link}{DATE};
$story->{title} = HTML::Entities::encode_entities($title);
$story->{description} = HTML::Entities::encode_entities("<a href='$link\&mode=voteresults'><img src='$img' style='border:none' /></a>");
push(@stories, $story);
}
#############################################
# Output RSS file
#############################################
my $zero_dir = dirname $0;
open RSS, ">$zero_dir/$feed.rss" or die "Unable to write to $feed.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
>
<channel rdf:about="http://paperlined.org/rss/feeds/$feed.rss">
<title>Newcum's Fark-Photoshop Feed</title>
<link>http://www.fark.com/</link>
<description>David Newcum's RSS Feed of Fark photoshop. Contact rss_feeds\@paperlined.org for change requests.</description>
<language>en-us</language>
</channel>
EOF
foreach my $story (@stories)
{
#my $date = rss_localtime($story->{date});
print RSS <<"EOF";
<item rdf:about="$story->{link}">
<title>$story->{title}</title>
<link>$story->{link}</link>
<description>$story->{description}</description>
</item>
EOF
#<dc:date>$date</dc:date>
}
print RSS "</rdf:RDF>\n";
Generated by GNU enscript 1.6.4.