http://paperlined.org/rss/feeds/fark_photoshop.gen.pl

#! /usr/bin/env perl
BEGIN{$^W=1}  use strict;

use Data::Dumper;
use File::Basename;
use HTML::Entities;
use LWP::Simple;
#use Time::ParseDate;

use lib dirname($0);            # look in the same directory that this script resides in
use FeedHelpers;                # Common code, available here:  http://paperlined.org/rss/feeds/FeedHelpers.pm

(my $feed = $0) =~ s#(^.*/|\.gen\.pl$)##g;

# Load the persistent data
my $persistent = PersistentHash::tie_storable("$feed.db");


#############################################
# Load the main page, parse posts
#############################################
my $main_page = conditional_http_simple_get('http://www.fark.com/index.html', $persistent)
	or die "Unable to fetch Fark main page.\n\t";

my @stories;
while ($main_page =~ m#<img src="http://img.fark.com/images/2001/topics/photoshop.gif".*?align=left>(.*?)</td>.*?<a href="(.*?)"#gis) {
	my $story = {};

	my ($title, $link) = ($1, $2);

	my $img;
	if (! exists $persistent->{STORY_IMG}{$link}) {
		print "Fetching $link...\n";
		my $story = get($link);
		#my $firstprost;
		#($firstprost) = ($story =~ m#<table class="ctable">(.*?<div class="ctext">.*?</div>)#si);
		#if ($firstprost =~ m#<div class="ctext">.*<img src="(.*?)"#si) {
			#$persistent->{STORY_IMG}{$link}{IMG} = $1;
		#} else {
			#$persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png";
		#}
		#if ($firstprost !~ m#<td class="cdate">(.*?)</td>#) {
			#die "HTML format likely changed";
		#}
		#$persistent->{STORY_IMG}{$link}{DATE} = parsedate($1);
		#if (! $persistent->{STORY_IMG}{$link}{DATE}) {
			#die "Unable to parse date " . $persistent->{STORY_IMG}{$link}{DATE} . "\n\t";
		#}
		#
		while ($story =~ m#<div class="ctext">(.*?)</div>#gsi) {
			my $post_text = $1;
			if ($post_text =~ m#<img[^>]*src=["']?(\S+?)["']?(\s|/\s*>|>)#si) {
				$persistent->{STORY_IMG}{$link}{IMG} = $1;
				last;
			}
		}

		$persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png"
			unless $persistent->{STORY_IMG}{$link}{IMG};
	}
	$img = $persistent->{STORY_IMG}{$link}{IMG};

	$story->{link} = $link;
	$story->{date} = $persistent->{STORY_IMG}{$link}{DATE};
	$story->{title} = HTML::Entities::encode_entities($title);
	$story->{description} = HTML::Entities::encode_entities("<a href='$link\&mode=voteresults'><img src='$img' style='border:none' /></a>");

	push(@stories, $story);
}


#############################################
# Output RSS file
#############################################
my $zero_dir = dirname $0;
open RSS, ">$zero_dir/$feed.rss"	or die "Unable to write to $feed.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
>
        <channel rdf:about="http://paperlined.org/rss/feeds/$feed.rss">
                <title>Newcum's Fark-Photoshop Feed</title>
                <link>http://www.fark.com/</link>
                <description>David Newcum's RSS Feed of Fark photoshop.  Contact rss_feeds\@paperlined.org for change requests.</description>
                <language>en-us</language>
        </channel>
EOF

foreach my $story (@stories)
{
	#my $date = rss_localtime($story->{date});
	print RSS <<"EOF";
        <item rdf:about="$story->{link}">
                <title>$story->{title}</title>
                <link>$story->{link}</link>
                <description>$story->{description}</description>
        </item>
EOF
		#<dc:date>$date</dc:date>
}
print RSS "</rdf:RDF>\n";

Generated by GNU enscript 1.6.4.