http://paperlined.org/dev/src/pl/wowtcg/fetch_regionals_statistics.pl
#!/usr/bin/perl
# wow.tcgplayer.com keeps track of many regionals decks. This script fetches all those decks, and
# calculates how frequently each specific card gets used in those regionals.
# Additionally, cards are weighted, because some cards can only be used by one
# hero, others can be used by ~50% of the heros (eg. allies), and others can be
# used by everyone (eg. quests), so even if a card is ALWAYS used when
# possible, it can still get a much lower rating than a lower-quality card that
# has the possibility of being used by a larger number of heros.
# The weighting simply assumes that every hero can use 4 cards of every card that's legal for its deck.
# This is an oversimplification (since decks limit themselves to ~60 cards),
# but it's sufficient.
# Some odd output of this (>100%) is a result of:
# - http://wow.tcgplayer.com/db/deck.asp?ID=1579 is marked as using "Dismantle", eg. a Rogue card in a Warlock deck
# - the same deck is marked as using "Coup de Grace", another Rogue card in a Warlock deck
use strict;
use warnings;
use lib '/home/interiot/cpan/lib/';
use LWP::Simple;
#use Storable;
use Tie::Storable;
use List::Util qw[min max];
use Data::Dumper;
my $debug = 1;
#my $tcgcardid_map_filename = ".tcgplayer_map";
my @deck_ids;
### Load the file-based data
open FIN, "<hero_validity.txt" or die;
my %hero_validity;
my $cur_hero_listref;
while (<FIN>) {
s/[\n\r]*$//; # fancy chomp
if (s/^----HERO---- //) { # we came across a new section... start accumulating under a new hero
$cur_hero_listref = $hero_validity{$_} = [];
} else {
push(@$cur_hero_listref, $_);
}
}
#die Dumper \%hero_validity;
close FIN;
open FIN, "<card_names.txt" or die;
my %card_names = map {s/[\n\r]*$//; split /\t/, $_, 2} <FIN>;
close FIN;
#die Dumper \%card_names;
### Fetch all the regional deck IDs
foreach my $class (qw[ Warrior Druid Hunter Mage Paladin Priest Rogue Shaman Warlock ]) {
#foreach my $class (qw[ Hunter ]) {
print "Fetching deck_ids for class=$class\n" if $debug;
my $html = get_wowtcgplayercom("http://wow.tcgplayer.com/db/Decks.asp?Location=Regionals&HeroClass=$class") or die;
$html =~ s/privacy\.asp.*//si; # remove the bottom part of the page, in particular the "Latest Decks" box, which gives us false positives
push(@deck_ids, ($html =~ m#/db/deck\.asp\?id=(\d+)#gi));
}
#print Dumper(\@deck_ids); exit;
#@deck_ids = qw[1318 1128 1099]; # DEBUG... Graccus decks
### Fetch all the cards in each deck
my %cards_used;
my %max_cards_possible; # For each given card, if the card was legal for 5 decks (eg. of all the decks we scanned, 5 of the heros could legally use this card), then
# this is 5 * 4 = 20 total possible for that card.
tie my %tcgplayer_cardid_map => 'Tie::Storable', '.tcgplayer_map';
tie my %regional_decks => 'Tie::Storable', '.regional_decks';
foreach my $deckid (@deck_ids) {
#foreach my $deckid (@deck_ids[0..1]) {
print "Fetching deck_id=$deckid\n" if $debug;
my $html = ($regional_decks{$deckid} ||= get_wowtcgplayercom("http://wow.tcgplayer.com/db/deck.asp?ID=$deckid"));
$html =~ s#alt="Return to Search Page".*##si; # remove bottom half of page, just focus on the part with cards
if ($html =~ m#<b>Hero:</b>\s*<a\s+href="card\.asp\?ID=(\d+)#i) {
my $hero_id = fetch_tcg_cardid_cached($1);
next if (!$hero_id || !$card_names{$hero_id});
print " hero = ", $card_names{$hero_id}, "\n";
#next unless ($hero_id eq 'AZEROTH 4/361'); # DEBUG: Add temporarily, only to check certain heroes individually
while ($html =~ m#(\d)\s*<A\s*HREF="card\.asp\?id=(\d+)">#gi) {
my $qty = $1;
my $wow_cardid = fetch_tcg_cardid_cached($2);
next if ($deckid == 1579 && ($2 == 223 || $2 == 229)); # ignore rogue cards in a warlock deck (Dismantle and Coup de Grace in http://wow.tcgplayer.com/db/deck.asp?ID=1579)
$cards_used{$wow_cardid} += $qty;
}
# increment %max_cards_possible for all of this hero's legal cards
foreach my $card (@{$hero_validity{$hero_id}}) {
$max_cards_possible{$card} += 4;
}
}
}
#print Dumper \%cards_used;
## Output the final stats
my %tcgplayer_cardid_reverse_map = map {$tcgplayer_cardid_map{$_} => $_} keys %tcgplayer_cardid_map;
sub sortfilter {$_ = shift; my ($outof)=s#(/\d+)$##; s/(\d+)$//; $_ .= sprintf('%04d%s', $1, $outof); $_}
#my @real_cardids = sort {sortfilter($a) cmp sortfilter($b)} map {$tcgplayer_cardid_map{$_}} keys %cards_used;
my @real_cardids = keys %card_names;
open FOUT, ">regionals_statistics.txt" or die;
my $maxmax = max(values %max_cards_possible);
foreach my $real_cardid (@real_cardids) {
my $num = $cards_used{$real_cardid} || 0;
my $max_num = $max_cards_possible{$real_cardid} || 0;
#my $factor = $max_num ? ($max_num / $maxmax) : 1;
#$num /= $factor;
#my $line = sprintf "%4d of %4d %-40s %s\n", $num, $max_num, $real_cardid, $card_names{$real_cardid};
my $line = sprintf "%7.2f%% %4d of %4d %-40s %s\n", 100 * $num / ($max_num || 1), $num, $max_num, $real_cardid, $card_names{$real_cardid};
print FOUT $line;
print $line;
}
sub fetch_tcg_cardid_cached {
my $tcgplayer_cardid = shift;
$tcgplayer_cardid_map{$tcgplayer_cardid} && return $tcgplayer_cardid_map{$tcgplayer_cardid};
if ($tcgplayer_cardid_map{$tcgplayer_cardid} = fetch_tcg_cardid($tcgplayer_cardid)) {
print " Fetched tcgplayer_cardid=$tcgplayer_cardid, ", $card_names{$tcgplayer_cardid_map{$tcgplayer_cardid}}, "\n";
} else {
print " Fetched tcgplayer_cardid=$tcgplayer_cardid, resulted in NULL\n";
}
return $tcgplayer_cardid_map{$tcgplayer_cardid};
}
# given a TCG cardid, returns an official card ID (eg. "AZEROTH 32")
sub fetch_tcg_cardid {
my $tcgplayer_cardid = shift;
my $html = get_wowtcgplayercom("http://wow.tcgplayer.com/db/card.asp?id=$tcgplayer_cardid");
if (!$html) {
print STDERR "Unable to fetch TCGPlayer cardID $tcgplayer_cardid\n"; return;
}
if ($html =~ /Card #(?:<[^>]*>|\s*)*(\d+)/) {
my $cardnum = $1;
if ($html =~ /<b>Set(?:<[^>]*>|\s*)*([^<>]+)/) {
my $set = $1;
#print "TCG-ID $tcgplayer_cardid = $set $cardnum\n";
if ($set =~ /Heroes of Azeroth/i) {
return "AZEROTH $cardnum/361";
}
if ($set =~ /Onyxia's Lair Treasure/i) {
return "ONYXIA $cardnum/33";
}
}
}
}
# for some reason, wow.tcgplayer.com stopped resolving during development... hack around that
# (though this hack should be removed as soon as its DNS starts working again)
sub get_wowtcgplayercom {
my $url = shift; # present the same interface as LWP::Simple::get()
my $hack = 0;
$hack++ if ($url =~ s/wow\.tcgplayer\.com/208.109.49.16/);
### based on LWP::Simple::get_old
LWP::Simple::_init_ua() unless $LWP::Simple::ua;
my $request = HTTP::Request->new(GET => $url);
$request->header(host => "wow.tcgplayer.com") if $hack; # the server won't work unless it has its Host: value set... something that would normally be a bit hard to do without a functioning DNS
my $response = $LWP::Simple::ua->request($request);
return $response->content if $response->is_success;
return undef;
}
Generated by GNU enscript 1.6.4.