#!/usr/bin/perl

use lib '/home/interiot/cpan/lib/';
use Data::Dumper;
#use SOAP::Lite +trace; BEGIN { open(STDERR, '>>soap.trace'); }
#use SOAP::Lite +trace => [qw(transport)];
use SOAP::Lite;
#use SOAP::Data;

my $google_key = do{local*FIN; open FIN, "<$ENV{HOME}/.google.key" or die $!;<FIN>};
chomp $google_key;


my $service = SOAP::Lite->service('http://api.google.com/GoogleSearch.wsdl');


my @results = lots_of_results(
                $google_key,
                "site:southbendtribune.com roseland snyder OR council OR dorothy",        # query
                0,                                  # first
                10,                                 # count
                0,                                  # filter (hide similar results)
                "",                                 # restrict to a specific country or the like http://code.google.com/apis/soapsearch/reference.html#2_4
                0,                                  # enable SafeSearch filtering
                "",                                 # language restrict
                "",                                 # input encoding
                "",                                 # output encoding
                  1000);                                # max # results

my @urls = map {$_->{URL}} @results;

@urls = map {s#/www\.southbendtribune#/southbendtribune#si; $_} @urls;
@urls = grep m#/pbcs\.dll/article#i, @urls;
@urls = map {s#(AID=/[^/]*/[^/]*/[^/]*).*#$1#si; $_} @urls;

open FOUT, ">list.txt" or die;
print FOUT join("\n", sort @urls), "\n";
close FOUT;

sub lots_of_results {
    my $max_count = pop(@_);
    my @args = @_;

    my $max_count_per_query = 10;

    my @all_results;

    for (my $start=0; $start<$max_count; $start+=$max_count_per_query) {
        $args[2] = $start;
        my $results = $service->doGoogleSearch(@args);

        push(@all_results, @{$results->{resultElements}});

        # stop if we don't receive the max results (probably indicating we've hit the end)
        last if (scalar(@{$results->{resultElements}}) < $max_count_per_query);
    }

    return @all_results;
}

