#! /usr/bin/env perl BEGIN{$^W=1} use strict; use Data::Dumper; use File::Basename; use HTML::Entities; use LWP::Simple; #use Time::ParseDate; use lib dirname($0); # look in the same directory that this script resides in use FeedHelpers; # Common code, available here: http://paperlined.org/rss/feeds/FeedHelpers.pm (my $feed = $0) =~ s#(^.*/|\.gen\.pl$)##g; # Load the persistent data my $persistent = PersistentHash::tie_storable("$feed.db"); ############################################# # Load the main page, parse posts ############################################# my $main_page = conditional_http_simple_get('http://www.fark.com/index.html', $persistent) or die "Unable to fetch Fark main page.\n\t"; my @stories; while ($main_page =~ m#(.*?).*?{STORY_IMG}{$link}) { print "Fetching $link...\n"; my $story = get($link); #my $firstprost; #($firstprost) = ($story =~ m#(.*?
.*?
)#si); #if ($firstprost =~ m#
.*{STORY_IMG}{$link}{IMG} = $1; #} else { #$persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png"; #} #if ($firstprost !~ m#
#) { #die "HTML format likely changed"; #} #$persistent->{STORY_IMG}{$link}{DATE} = parsedate($1); #if (! $persistent->{STORY_IMG}{$link}{DATE}) { #die "Unable to parse date " . $persistent->{STORY_IMG}{$link}{DATE} . "\n\t"; #} # while ($story =~ m#
(.*?)
#gsi) { my $post_text = $1; if ($post_text =~ m#]*src=["']?(\S+?)["']?(\s|/\s*>|>)#si) { $persistent->{STORY_IMG}{$link}{IMG} = $1; last; } } $persistent->{STORY_IMG}{$link}{IMG} = "http://paperlined.org/rss/feeds/img/question-mark.png" unless $persistent->{STORY_IMG}{$link}{IMG}; } $img = $persistent->{STORY_IMG}{$link}{IMG}; $story->{link} = $link; $story->{date} = $persistent->{STORY_IMG}{$link}{DATE}; $story->{title} = HTML::Entities::encode_entities($title); $story->{description} = HTML::Entities::encode_entities(""); push(@stories, $story); } ############################################# # Output RSS file ############################################# my $zero_dir = dirname $0; open RSS, ">$zero_dir/$feed.rss" or die "Unable to write to $feed.rss: $!"; print RSS <<"EOF"; Newcum's Fark-Photoshop Feed http://www.fark.com/ David Newcum's RSS Feed of Fark photoshop. Contact rss_feeds\@paperlined.org for change requests. en-us EOF foreach my $story (@stories) { #my $date = rss_localtime($story->{date}); print RSS <<"EOF"; $story->{title} $story->{link} $story->{description} EOF #$date } print RSS "\n";
(.*?)