http://paperlined.org/dev/src/pl/digg/identify_group_diggers.pl

#!/usr/bin/perl

# Given the URLs of several Digg stories, this will try to identify commanlities among users who
# initially dugg it up.
#
# To use, just pipe the URLs on STDIN...  (eg. you can record them to a file first, if you want...)

# TODO:
#   - this probably won't work once a story gets many diggers...  I need to make it always go back
#     to the first page or two, and record only those usernames.

    use strict;
    use warnings;

    use LWP::Simple;
    use List::Util qw[sum];
    use Data::Dumper;


my @urls = split ' ', do {local $/=undef; <>};
#print Dumper \@urls;
my %url_key;

my %users;
foreach my $url (@urls) {
    $url .= "/who" unless ($url =~ /\/who$/i);

    my $url_key = chr(ord('A') + scalar(keys(%url_key)));
    $url_key{$url} = $url_key;

    print "Fetching $url\n";
    #my $html = LWP::Simple::get($url);
    my $html = `w3m -dump_source '$url' | gzip -dcf -`;
    #my $html = "";
    if (!$html) {
        print "Unable to fetch $url\n";
        next;
    }

    # trim off everything but the list we're interested in
    $html =~ s#^.*?<ul class=["']?user-list["']?>##si;
    $html =~ s#</ul>.*##si;
    #print $html; exit;

    my @users = ($html =~ m#>([^<]+)</a>#g);
    @users = reverse @users;

    for (my $ctr=0; $ctr<@users; $ctr++) {
        my $user = $users[$ctr];
        $users{$user} ||= [];

        push(@{$users{$user}}, [$url, $ctr+1]);
    }
}

# figure out the sort order
my %user_score;
while (my ($user, $data) = each %users) {
    my $cnt = scalar(@$data);
    my $avg_place = sum(map {$_->[1]} @$data) / $cnt;

    # make it so that users with a higher count are sorted first, but when two users have the same
    # number of diggs, that the one that dugg earlier gets sorted first
    $user_score{$user} = $cnt + 1 / ($avg_place+1);
}
my @user_order = sort {$user_score{$b} <=> $user_score{$a}} keys %users;


##### Output the report #####
print "Stories apparently involved in this spam group-digging:\n";
foreach my $url (sort {$url_key{$a} cmp $url_key{$b}} keys %url_key) {
    print "$url_key{$url}) $url\n";
}

print "\nUsers apparently involved in doing the initial group-digging of these stories:\n";
foreach my $user (@user_order) {
    print "---- user $user ----\n";
    foreach my $listref (sort {$url_key{$a->[0]} cmp $url_key{$b->[0]}} @{$users{$user}}) {
        my $position = suffix_number($listref->[1]) . " digger";
        $position = "submitter" if ($listref->[1] == 1);
        print "Story " . $url_key{$listref->[0]} . ": $position.\n";
    }
    print "\n";
}


our @letter_suffixes;
BEGIN {
@letter_suffixes = qw[
    th
    st
    nd
    rd
    th
    th
    th
    th
    th
    th
];
}

# given a number, like '22', add the suffix to turn it into '22nd'
sub suffix_number {
    my $num = scalar(shift);

    my $rightnum = substr($num, -1);

    if ($num >10 && $num<20) {      # "teens" always end in "th"  (it's "2nd", "12th", "22nd", "32nd", ....  teens are the odd-man-out
        return $num . "th";
    } else {
        return $num . $letter_suffixes[$rightnum];
    }
}

Generated by GNU enscript 1.6.4.