#!/usr/bin/perl # an HTML screen-scraper for http://www.propublica.org/site/author/julia_angwin/ # that pulls the list of her latest articles off # for help in constructing http://paperlined.org/external/Dragnet_Nation__Julia_Angwin.html use strict; use warnings; use LWP::Simple; use Data::Dumper; #use Devel::Comments; # uncomment this during development to enable the ### debugging statements my $html = get "http://www.propublica.org/site/author/julia_angwin"; $html =~ s/^.*?(
)/$1/si; $html =~ s/.*//si; my @articles = split /class="article-excerpt"/, $html; #print join("\n"x12, @articles), "\n"; exit; foreach my $article (@articles) { my ($link) = ($article =~ /()/si); my ($date) = ($article =~ /
  • $date $link\n"; }