http://paperlined.org/dev/src/pl/porn/cgi/tgp_condom/bak/URLMetadata.pm
package URLMetadata;
use CGI::Util;
use URI;
use URI::QueryParam;
use Data::Dumper;
use strict;
use warnings;
# in some cases, doing site-specific redirector-stripping isn't enough, because there are actually
# two or more redirectors chained together!! Strip these off.
#
# TODO: move this to a more common module for making URLs canonical?
sub strip_extra_redirectors {
my $url = shift;
if ($url =~ m#refer\.ccbill\.com/cgi-bin/clicks\.cgi# && $url =~ s#^.*?HTML=##) {
$url =~ s#=.*##;
return strip_extra_redirectors(CGI::Util::unescape($url));
} elsif ($url =~ m#bertonicash\.com/tgp\.php#) {
$url =~ s#^.*?HTML=##;
$url =~ s#=.*##;
return strip_extra_redirectors(CGI::Util::unescape($url));
}
return $url;
}
BEGIN {
my %strip_all_queries = map {$_,1} qw[
g.wasteland.com
galleries.extremebound.com
];
# bdsmtraffic.com
# fhg.masteracash.com
# galleries.extremebound.com
# promo.limitedaudience.com
# bondageforte.org
# bondagenation.com
# ];
my %strip_one_param = (
"promo.gordcash.com" => "a",
"fhg.masteracash.com" => "id",
"kinky-katinka.com" => "id",
"promo.limitedaudience.com" => "a",
"bondageforte.org" => "aid",
"bondagenation.com" => "PA",
"bongum-video.com" => "ID",
"fetishshots.com" => "id",
"ratherextreme.com" => "id",
"roperookie.com" => "nats",
"trashygallery.com" => "nats",
"platinumfetish.com" => "ccbill",
"pics.maniacpass.com" => "id",
"bdsmtraffic.com" => "webmaster",
"pornaccess.com" => "adv_id",
"freemeatwhores.com" => "aff",
"maxcash.com" => "webm_id",
"assspankingvideos.com" => "id",
"redassteens.com" => "r",
"fistinglessons.net" => "r",
"fistinglessons.com" => "r",
"misspain.com" => "r",
"miss-krista.com" => "id",
"femdomchronicles.com" => "id",
"bbraxaporn.com" => "r",
"sadoslaves.com" => "r",
"maniacpass.com" => "pt",
"rubberdollies.com" => "id",
"shinyangels.com" => "id",
"slutgalz.com" => "nats",
"spookycash.com" => "id",
"straponangels.com" => "id",
);
my @always_strip = qw[
nats
];
# TODO: move this to a more common module for making URLs canonical?
sub strip_affiliate_id {
my $url = shift;
my $uri = URI->new($url);
my $host = $uri->host;
foreach my $always (@always_strip) {
$uri->query_param_delete( $always );
}
my $h = $host;
while ($h =~ m#\.#s) {
if ($strip_all_queries{$h}) {
$url =~ s#\?.*##s;
return $url;
}
if ($strip_one_param{$h}) {
# delete this one parameter
$uri->query_param_delete( $strip_one_param{$h} );
return $uri->as_string;
}
$h =~ s#^[^\.]*\.##s;
}
return $url;
}
}
BEGIN {
my %strip_final_file = (
'bdsmtraffic.com' => '^/galleries/[^/]*/index\.php$',
'g.wasteland.com' => '^/[^/]*/index\.html$',
'galleries.extremebound.com' => '^/[^/]*/index\.html$',
'galleries.knotnice.com' => '^(?:/[^/]*){3}/index\.html$',
'kinky-katinka.com' => '^(?:/[^/]*){2}/index\.php$',
'bongum-video.com' => '^(?:/[^/]*){2}/index\.php$',
'christinacaptured.com' => '^(?:/[^/]*){3}/index\.html$',
'fetishshots.com' => '^(?:/[^/]*){2}/index\.php$',
'ratherextreme.com' => '^(?:/[^/]*){2}/index\.php$',
'platinumfetish.com' => '^(?:/[^/]*){4}/index\.php$',
'freemeatwhores.com' => '^(?:/[^/]*){3}/index\.php$',
'bdsmtraffic.com' => '^(?:/[^/]*){2,3}/index\.php$',
'galleries.bucks2go.com' => '^(?:/[^/]*){3}/index\.html$',
'assspankingvideos.com' => '^(?:/[^/]*){2}/index\.php$',
'latexangelic.com' => '^(?:/[^/]*){2}/fcindex\.html$',
'orientalrope.com' => '/index\d+\.html$',
);
# Don't put sites here if they always force-redirect back to the longer version of the name.
my %strip_domain_prefix = map {$_,1} qw[
www.bondageforte.org
www.bondagenation.com
www.bondageorgies.com
www.bongum-video.com
www.damseldiary.com
www.fetishshots.com
www.fragilehostage.com
www.natalisbondagesluts.com
www.platinumfetish.com
www.rarebdsm.com
www.ratherextreme.com
www.roperookie.com
www.wildpornreviews.com
www.freemeatwhores.com
www.latexbyanna.com
www.shinyangels.com
www.straponangels.com
www.xxx69.net
];
# the site force-redirects to the longer one anyway, might as well go with that...
my %add_domain_prefix = (
'christinacaptured.net' => 'www.christinacaptured.net',
);
my %other_modification = (
# not sure what the extra commas are for, but they don't seem to be required...
'bondagebank.com' => sub {s#(gallhit\.php)\?\d+,(\d+),\d+,\d+,\d+$#$1?1,$2#si},
);
# TODO: move this to a more common module for making URLs canonical?
sub make_canonical {
my $url = shift;
my $uri = URI->new($url)->canonical;
my $h = $uri->host;
my $p = $uri->path;
#print "path: $p\n"; exit;
while ($h =~ m#\.#s) {
if ($strip_final_file{$h} && $p =~ m#$strip_final_file{$h}#si) {
$p =~ s#/[^/]+$#/#si;
$uri->path($p);
}
if ($strip_domain_prefix{$h}) {
$h =~ s#^[^\.]*\.##si;
$uri->host($h);
next;
}
if ($add_domain_prefix{$h}) {
$uri->host($add_domain_prefix{$h});
}
if ($other_modification{$h}) {
local $_ = $uri->as_string;
$other_modification{$h}->();
$uri = URI->new($_);
}
$h =~ s#^[^\.]*\.##s;
}
return $uri->as_string;
}
}
BEGIN {
my %domain_hosted_for_another = (
# NOTE: these are LIST-refs, not HASH-refs, though they end up being functionaly
# equivalent to hashrefs
'fhg.masteracash.com' => [
qr#^/slavesinlove/#i => 'slavesinlove.com',
],
'bdsmi.com' => [
qr#^/hosted/painfiles/#i => 'thepainfiles.com',
],
'fragilehostage.com' => [
qr#galleries/ssm/#i => 'societysm.com',
qr#/galleries/ps/#i => 'perfectslave.com',
],
'bondagebank.com' => [
qr#\?100080,#i => 'chantasbitches.com',
],
'bdsmtraffic.com' => [ # actually, this is an obfuscated redirector
qr#^/kk/#i => 'kinky-katinka.com',
qr#/as\d#i => 'amateurslave.com',
qr#/ss\d#i => 'slavesluts.com',
],
'platinumfetish.com' => [
qr#^/amateurbondagevideos/#i => 'amateurbondagevideos.com',
qr#/purespanking/#i => 'purespanking.com',
qr#/painfreaks/#i => 'painfreaks.com',
qr#/amateursmothering/#i => 'amateursmothering.com',
],
'ddfcash.com' => [
qr#^(?:/PROMO/|/affiliate/freepages/)hot/#i => 'houseoftaboo.com',
],
'galleryhost.com' => [
qr#/her1stanal/#i => 'her1stanal.com',
],
'natalisbondagesluts.com' => [
qr#^/galleries/ss/#i => 'smotheredslave.com',
qr#^/galleries/bo/#i => 'bondageorgasms.com',
],
'freemeatwhores.com' => [
qr#^/g/Gagging-Whores\b#i => 'gaggingwhores.com',
],
'damseldiary.com' => [
qr#^/free/ssm/#i => 'societysm.com',
qr#^/free/bo/#i => 'bondageorgasms.com',
qr#^/free/ps/#i => 'perfectslave.com',
],
'spankbuxx.com' => [
qr#^/hosted/SSG/#i => 'spankedschoolgirl.com',
qr#^/hosted/spvids/#i => 'spankingvids.com',
qr#^/hosted/sol/#i => 'spankingonline.com',
],
'fhg.fuckingfree.net' => [
qr#^/fhg/violentanime/#i => 'violentanime.com',
qr#^/fhg/violentcomix/#i => 'violentcomix.com',
],
'massivegalleries.com' => [
qr#^/vids/rr/#i => 'rarebondage.com',
],
'bondageorgies.com' => [
qr#^/pjwarchive/#i => 'pjwarchive.com',
],
'galleries.do-bill.com' => [
qr#^/bdsmartwork/#i => 'bdsmartwork.com',
],
'fetishshots.com' => [
qr#^/fdl/#i => 'femdomloft.com',
qr#^/tt/#i => 'tightlytied.com',
qr#^/tw/#i => 'tramplingworld.com',
qr#^/nd/#i => 'nylondivas.com',
qr#^/ld/#i => 'lesbiandommes.com',
qr#^/hhd/#i => 'highheeldivas.com',
],
'rarebdsm.com' => [
qr#^/galleries/ball_gagged/#i => 'ball-gagged.com',
qr#^/galleries/shackled/#i => 'shackledmaidens.com',
],
'galleries.fetishsphere.com' => [
qr#^/hustler-taboo/#i => 'hustlerstaboo.com',
qr#^/infernal-restraints/#i => 'infernalrestraints.com',
qr#^/hot/#i => 'houseoftaboo.com',
qr#^/hard-tied/#i => 'hardtied.com',
qr#^/rope-affairs/#i => 'ropeaffairs.com',
qr#^/charlotte/#i => 'charlottefetish.com',
qr#^/sweetties/#i => 'sweetties.com',
qr#^/satin-steel/#i => 'satinsteel.com',
qr#^/bloodangels/#i => 'bloodangels.com',
qr#^/bondage-maidens/#i => 'bondagemaidens.com',
qr#^/pupett/#i => 'pupett.com',
qr#^/maniac-pass/#i => 'maniacpass.com',
qr#^/alterpic/#i => 'alterpic.com',
],
'spookycash.com' => [
qr#^/hgma/#i => 'michelle-aston.com',
qr#^/hgsc13/#i => 'scar13.com',
qr#^/hgeb/#i => 'eroticbpm.net',
],
'fetishgera.com' => [
qr#^/mega\d+#i => 'megapenetrations.com',
qr#^/gap\d+#i => 'clubgape.deluxepass.com',
],
'bdsmbook.com' => [
qr#/gsg/#i => 'girlspanksgirl.com',
qr#/ffg/#i => 'femdomfetishgirls.com',
],
'eropain.com' => [
qr#^/\w/\d+shp-#i => 'shockingpain.com',
qr#^/\w/\d+ep-#i => 'extrapain.com',
qr#^/\w/\d+-#i => 'painriser.com',
],
'bondagepoint.com' => [
qr#^/(?:maledom|bdsm)/#i => 'societysm.com',
qr#^/(?:domination)/#i => 'uckingdungeon.com',
],
# just markers for the fact that the thumbnails from these places need to be sorted out
'bucks2go.com' => [ ],
'adult-empire.com' => [ ],
'pornaccess.com' => [ ],
'risegalleries.com' => [ ],
);
# same as above... ostensibly an array, but acts like a hash
my @universal_pattern = (
qr#ricksavage#i => 'ricksavage.com',
qr#Hell-Fire-Sex#i => 'hellfiresex.com',
qr#specialexercises#i => 'specialexercises.com',
qr#\bmeatholes\b#i => 'meatholes.com',
qr#amateurbondagevideos#i => 'amateurbondagevideos.com',
qr#cumbots#i => 'cumbots.com',
);
my %strip_domain_prefix = map {$_,1} qw[
sadoslaves.com
gagfuck.com
frenchfisting.com
hogtied.com
maxxandmore.com
meninpain.com
devicebondage.com
miss-krista.com
sexandsubmission.com
bondagehere.com
fuckingmachines.com
knotnice.com
limitedaudience.com
thetrainingofo.com
wasteland.com
waterbondage.com
whippedass.com
wiredpussy.com
extremebound.com
sadisticbondage.com
naturebondage.com
hellfiresex.com
femdomchronicles.com
brutaldildos.com
maniacpass.com
bossbitches2.com
];
my %entire_domain_is_for_another = (
'fistinglessons.com' => 'fistinglessons.net',
'maxhardcoreporn.com' => 'maxhardcoreporn.net',
'bondagenation.com' => 'fetishnation.com',
'fhgp.maxcash.com' => 'fetishhotel.com',
'bound-and-gagged.org' => 'extremebound.com',
'medical-fetish-videos.com' => 'doctortushy.com',
'promo.gordcash.com' => 'nakedgord.com',
'lady-sonia-galleries.com' => 'lady-sonia.com',
'orientalrope.com' => 'thepainfiles.com',
'aerobdsm.com' => 'societysm.com',
'chantacash.com' => 'chantasbitches.com',
);
my %per_page_key = (
'promo.bucks2go.com' => {
getkey => sub {(split m#/#, $_)[1]}, # the first in the sequence of nubmers
domains => {
'slavecontest.com' => {map{$_,1}qw[ 113 247 255 ]},
'shockers.com' => {map{$_,1}qw[ 267 268 ]},
},
},
'bondagebank.com' => {
getkey => sub {(split m#[,]#)[1]},
domains => {
'chantasbitches.com' => {map{$_,1}qw[ 71 88 560 566 568 574 580 ]},
'fuckedandbound.com' => {map{$_,1}qw[ 119 133 286 542 554 558 562 564 572 576 578 582 ]},
},
},
);
sub domain_hosted_for_another {
my $url = shift;
#print Dumper \%per_page_key; exit;
my $uri = URI->new($url);
my $h = $uri->host;
my $p = $uri->path_query;
my $matched_hosting_house = 0;
#print "path: $p\n"; exit;
while ($h =~ m#\.#s) {
if ($domain_hosted_for_another{$h}) {
for (my $ctr=0; $ctr<@{$domain_hosted_for_another{$h}}; $ctr+=2) {
my $pattern = $domain_hosted_for_another{$h}[$ctr];
my $hosted_for = $domain_hosted_for_another{$h}[$ctr+1];
if (ref($pattern)) {
return $hosted_for if ($p =~ $pattern);
}
}
$matched_hosting_house++;
}
if ($strip_domain_prefix{$h}) {
return $h;
}
if ($entire_domain_is_for_another{$h}) {
return $entire_domain_is_for_another{$h};
}
if ($per_page_key{$h}) {
local $_ = $p;
my $key = $per_page_key{$h}{getkey}->($p);
#print "Got key: $key from $url\n";
foreach my $checking_domain (keys %{$per_page_key{$h}{domains}}) {
my $matching_keys = $per_page_key{$h}{domains}{$checking_domain};
return $checking_domain if ($matching_keys->{$key});
}
#while (my ($var, $val) = each %{$per_page_key{$h}{domains}}) {
# return $var if ($val->{$key});
# print "key($key) didn't match against: ", Dumper $val;
#}
print "key($key) didn't match against anything in '$h'.<br>\n";
}
if ($h eq 'tgp.pornaccess.com' && $p =~ m#^/galleries/([^/]*)/#i) {
return "$1.pornaccess.com"; # hard coded, just because this doesn't seem to be an infrequently used site
}
$h =~ s#^[^\.]*\.##s;
}
for (my $ctr=0; $ctr<@universal_pattern; $ctr+=2) {
my $pattern = $universal_pattern[$ctr];
my $hosted_for = $universal_pattern[$ctr+1];
#print "Matching '$p' against '$pattern'\n";
if (ref($pattern)) {
return $hosted_for if ($p =~ $pattern);
}
}
return ($uri->host . "(hosting house, unknown) ") if ($matched_hosting_house);
return $uri->host;
}
}
BEGIN {
my %most_domains_URLs_bad = map{$_,1} (
"rathbondage.com", # original owner seems to have sold it to someone else, who just uses it for SEO junk now
"pimpcash.com", # most links I've seen have been dead...
);
sub is_mostly_bad_domain {
my $h = URI->new(shift)->host;
while ($h =~ m#\.#s) {
return 1 if $most_domains_URLs_bad{$h};
$h =~ s#^[^\.]*\.##s;
}
}
}
BEGIN {
my $kink_com = qr#^/g/\d+/\d+/v/#i;
my %domain_movie_urls = (
'vipspanking.com' => [
qr#^/wm/mg/#i,
],
'promo.waterbondage.com' => [$kink_com],
'promo.sexandsubmission.com' => [$kink_com],
'promo.waterbondage.com' => [$kink_com],
'promo.whippedass.com' => [$kink_com],
'promo.wiredpussy.com' => [$kink_com],
'promo.ultimatesurrender.com' => [$kink_com],
'promo.hogtied.com' => [$kink_com],
'promo.fuckingmachines.com' => [$kink_com],
'bdsmtraffic.com' => [
qr#^/galleries/movies/#i,
],
'electricityplay.com' => [
qr#^/gallery/[^/]*/mgp#i,
],
'mgp.bossbitches2.com' => [ qr#.# ],
);
sub is_TGP_movie_page {
my $uri = URI->new(shift);
my $h = $uri->host;
my $p = $uri->path_query;
while ($h =~ m#\.#s) {
if ($domain_movie_urls{$h}) {
foreach my $regexp (@{$domain_movie_urls{$h}}) {
return 1 if ($p =~ $regexp);
}
}
$h =~ s#^[^\.]*\.##s;
}
}
}
1;
Generated by GNU enscript 1.6.4.