package URLMetadata;
# Functions that operate using site-specific information.
use Carp;
use CGI::Util;
use URI;
use URI::QueryParam;
use Data::Dumper;
use strict;
use warnings;
# in some cases, doing site-specific redirector-stripping isn't enough, because there are actually
# two or more redirectors chained together!! Strip these off.
#
# TODO: move this to a more common module for making URLs canonical?
sub strip_extra_redirectors {
my $url = shift;
if ($url =~ m#refer\.ccbill\.com/cgi-bin/clicks\.cgi# && $url =~ s#^.*?HTML=##) {
$url =~ s#=.*##;
return strip_extra_redirectors(CGI::Util::unescape($url));
} elsif ($url =~ m#bertonicash\.com/tgp\.php#) {
$url =~ s#^.*?HTML=##;
$url =~ s#=.*##;
return strip_extra_redirectors(CGI::Util::unescape($url));
}
return $url;
}
BEGIN {
my %strip_all_queries = map {$_,1} qw[
g.wasteland.com
galleries.extremebound.com
];
# bdsmtraffic.com
# fhg.masteracash.com
# galleries.extremebound.com
# promo.limitedaudience.com
# bondageforte.org
# bondagenation.com
# ];
my %strip_one_param = (
"promo.gordcash.com" => "a",
"fhg.masteracash.com" => "id",
"kinky-katinka.com" => "id",
"promo.limitedaudience.com" => "a",
"bondageforte.org" => "aid",
"bondagenation.com" => "PA",
"bongum-video.com" => "ID",
"fetishshots.com" => "id",
"ratherextreme.com" => "id",
"roperookie.com" => "nats",
"trashygallery.com" => "nats",
"platinumfetish.com" => "ccbill",
"pics.maniacpass.com" => "id",
"bdsmtraffic.com" => "webmaster",
"pornaccess.com" => "adv_id",
"freemeatwhores.com" => "aff",
"maxcash.com" => "webm_id",
"assspankingvideos.com" => "id",
"redassteens.com" => "r",
"fistinglessons.net" => "r",
"fistinglessons.com" => "r",
"misspain.com" => "r",
"miss-krista.com" => "id",
"femdomchronicles.com" => "id",
"bbraxaporn.com" => "r",
"sadoslaves.com" => "r",
"maniacpass.com" => "pt",
"rubberdollies.com" => "id",
"shinyangels.com" => "id",
"slutgalz.com" => "nats",
"spookycash.com" => "id",
"straponangels.com" => "id",
"shockingbdsm.com" => "id",
"ebony-bondage.org" => "aff",
"gothicsluts.com" => "id",
"ddfcash.com" => "nats",
"susanwayland.com" => "PA",
"silversdollars.com" => "affcode",
"cherrysweetpics.com" => "ID",
"exclusivebdsm.com" => "id",
"freewhorelinks.com" => "aff",
"meatgalleries.com" => "aff",
"freewhoregalleries.com" => "aff",
);
my @always_strip = qw[
nats
];
# TODO: move this to a more common module for making URLs canonical?
sub strip_affiliate_id {
my $url = shift;
my $uri = URI->new($url);
ref($uri) eq "URI::http" or confess "Not http: $url\n\t";
my $host = $uri->host;
foreach my $always (@always_strip) {
$uri->query_param_delete( $always );
}
my $h = $host;
while ($h =~ m#\.#s) {
if ($strip_all_queries{$h}) {
$url =~ s#\?.*##s;
return $url;
}
if ($strip_one_param{$h}) {
# delete this one parameter
$uri->query_param_delete( $strip_one_param{$h} );
return $uri->as_string;
}
$h =~ s#^[^\.]*\.##s;
}
return $url;
}
}
BEGIN {
my %strip_final_file = (
'bdsmtraffic.com' => '^/galleries/[^/]*/index\.php$',
'g.wasteland.com' => '^/[^/]*/index\.html$',
'galleries.extremebound.com' => '^/[^/]*/index\.html$',
'galleries.knotnice.com' => '^(?:/[^/]*){3}/index\.html$',
'kinky-katinka.com' => '^(?:/[^/]*){2}/index\.php$',
'bongum-video.com' => '^(?:/[^/]*){2}/index\.php$',
'christinacaptured.com' => '^(?:/[^/]*){3}/index\.html$',
'fetishshots.com' => '^(?:/[^/]*){2}/index\.php$',
'ratherextreme.com' => '^(?:/[^/]*){2}/index\.php$',
'platinumfetish.com' => '^(?:/[^/]*){4}/index\.php$',
'freemeatwhores.com' => '^(?:/[^/]*){3}/index\.php$',
'bdsmtraffic.com' => '^(?:/[^/]*){2,3}/index\.php$',
'galleries.bucks2go.com' => '^(?:/[^/]*){3}/index\.html$',
'assspankingvideos.com' => '^(?:/[^/]*){2}/index\.php$',
'latexangelic.com' => '^(?:/[^/]*){2}/fcindex\.html$',
'orientalrope.com' => '/index\d+\.html$',
'shockingbdsm.com' => '^(?:/[^/]*){2}/index\.php$',
'susanwayland.com' => '^(?:/[^/]*){2}/index\.php$',
'bdsmdirect.com' => '^/[^/]*/index\d+\.html$',
'galleries.xrblogger.com' => '.',
'checksexmovie.com' => '.',
'bondagehotporn.com' => '/index.html$',
'tusya.com' => '/index.html$',
);
# Don't put sites here if they always force-redirect back to the longer version of the name.
my %strip_domain_prefix = map {$_,1} qw[
www.bondageforte.org
www.bondagenation.com
www.bondageorgies.com
www.bongum-video.com
www.damseldiary.com
www.fetishshots.com
www.fragilehostage.com
www.natalisbondagesluts.com
www.platinumfetish.com
www.rarebdsm.com
www.ratherextreme.com
www.roperookie.com
www.wildpornreviews.com
www.freemeatwhores.com
www.latexbyanna.com
www.shinyangels.com
www.straponangels.com
www.xxx69.net
www.shockingbdsm.com
www.evildesire.com
www.susanwayland.com
www.bondagehotporn.com
www.kinky-katinka.com
];
# the site force-redirects to the longer one anyway, might as well go with that...
my %add_domain_prefix = (
'christinacaptured.net' => 'www.christinacaptured.net',
);
my $kink_com = sub {s#http://([^/]*)/([gm])/[^/]+/(\d+)/\w/\d+/h.*#http://$1/$2/1/$3/m/1/h#};
my %other_modification = (
# not sure what the extra commas are for, but they don't seem to be required...
'bondagebank.com' => sub {s#(gallhit\.php)\?\d+,(\d+),(\d+,)*\d+$#$1?1,$2#si},
'evildesire.com' => sub {s#^(http://evildesire\.com/hosted/\d+/).*#$1#},
'promo.devicebondage.com' => $kink_com,
'promo.fuckingmachines.com' => $kink_com,
'promo.hogtied.com' => $kink_com,
'promo.meninpain.com' => $kink_com,
'promo.sexandsubmission.com' => $kink_com,
'promo.thetrainingofo.com' => $kink_com,
'promo.ultimatesurrender.com' => $kink_com,
'promo.waterbondage.com' => $kink_com,
'promo.whippedass.com' => $kink_com,
'promo.wiredpussy.com' => $kink_com,
'promo.tsseduction.com' => $kink_com,
'bondageforte.org' => sub {s#/bondageforte\.org/#/bondageforte.com/#s;},
'galleries.adult-empire.com' => sub {s#(adult-empire.com/\d+/\d+)/\d+/#$1/1/#},
# XXXCumCash sites
'natalisbondagesluts.com' => sub {s#(natalisbondagesluts\.com/galleries/[^/]*/[^/]*/[^/]*)/\d+$#$1/1#},
'damseldiary.com' => sub {s#(damseldiary\.com/free/[^/]*/[^/]*/[^/]*)/\d+$#$1/1#},
'promo.bucks2go.com' => sub {s#(promo\.bucks2go\.com/[^/]*)/\d+/\d+#$1/1/1#},
);
# TODO: move this to a more common module for making URLs canonical?
sub make_canonical {
my $url = shift;
my $uri = URI->new($url)->canonical;
my $h = $uri->host;
my $p = $uri->path;
#print "path: $p\n"; exit;
while ($h =~ m#\.#s) {
if ($strip_final_file{$h} && $p =~ m#$strip_final_file{$h}#si) {
$p =~ s#/[^/]+$#/#si;
$uri->path($p);
}
if ($strip_domain_prefix{$h}) {
$h =~ s#^[^\.]*\.##si;
$uri->host($h);
next;
}
if ($add_domain_prefix{$h}) {
$uri->host($add_domain_prefix{$h});
}
if ($other_modification{$h}) {
local $_ = $uri->as_string;
$other_modification{$h}->();
$uri = URI->new($_);
}
$h =~ s#^[^\.]*\.##s;
}
return $uri->as_string;
}
}
BEGIN {
my %domain_hosted_for_another = (
# NOTE: these are LIST-refs, not HASH-refs, though they end up being functionaly
# equivalent to hashrefs
'fhg.masteracash.com' => [
qr#^/slavesinlove/#i => 'slavesinlove.com',
],
'bdsmi.com' => [
qr#^/hosted/painfiles/#i => 'thepainfiles.com',
],
'fragilehostage.com' => [
qr#galleries/ssm/#i => 'societysm.com',
qr#/galleries/ps/#i => 'perfectslave.com',
],
'bdsmtraffic.com' => [ # actually, this is an obfuscated redirector
qr#^/kk/#i => 'kinky-katinka.com',
qr#/as\d#i => 'amateurslave.com',
qr#/ss\d#i => 'slavesluts.com',
],
'ddfcash.com' => [
qr#^(?:/PROMO/|/affiliate/freepages/)hot/#i => 'houseoftaboo.com',
],
'galleryhost.com' => [
qr#/her1stanal/#i => 'her1stanal.com',
],
'natalisbondagesluts.com' => [
qr#^/galleries/ss/#i => 'smotheredslave.com',
qr#^/galleries/bo/#i => 'bondageorgasms.com',
],
'freemeatwhores.com' => [
qr#^//?(?:g|galleries)/Gagging-Whores\b#i => 'gaggingwhores.com',
],
'damseldiary.com' => [
qr#^/free/ssm/#i => 'societysm.com',
qr#^/free/bo/#i => 'bondageorgasms.com',
qr#^/free/ps/#i => 'perfectslave.com',
],
'spankbuxx.com' => [
qr#^/hosted/SSG/#i => 'spankedschoolgirl.com',
qr#^/hosted/spvids/#i => 'spankingvids.com',
qr#^/hosted/sol/#i => 'spankingonline.com',
],
'fhg.fuckingfree.net' => [
qr#^/fhg/violentanime/#i => 'violentanime.com',
qr#^/fhg/violentcomix/#i => 'violentcomix.com',
],
'massivegalleries.com' => [
qr#^/vids/rr/#i => 'rarebondage.com',
],
'bondageorgies.com' => [
qr#^/pjwarchive/#i => 'pjwarchive.com',
],
'galleries.do-bill.com' => [
qr#^/bdsmartwork/#i => 'bdsmartwork.com',
],
'fetishshots.com' => [
qr#^/fdl/#i => 'femdomloft.com',
qr#^/tt/#i => 'tightlytied.com',
qr#^/tw/#i => 'tramplingworld.com',
qr#^/nd/#i => 'nylondivas.com',
qr#^/ld/#i => 'lesbiandommes.com',
qr#^/hhd/#i => 'highheeldivas.com',
],
'spookycash.com' => [
qr#^/hgma/#i => 'michelle-aston.com',
qr#^/hgsc13/#i => 'scar13.com',
qr#^/hgeb/#i => 'eroticbpm.net',
],
'bdsmbook.com' => [
qr#/gsg/#i => 'girlspanksgirl.com',
qr#/ffg/#i => 'femdomfetishgirls.com',
qr#/sas/#i => 'sexandsubmission.com',
qr#/ir/#i => 'infernalrestraints.com',
qr#/pg/#i => 'paingate.com',
],
'eropain.com' => [
qr#^/\w/(?:\w-)?\d+shp-#i => 'shockingpain.com',
qr#^/\w/(?:\w-)?\d+ep-#i => 'extrapain.com',
qr#^/\w/(?:\w-)?\d+-#i => 'painriser.com',
],
'bondagepoint.com' => [
qr#^/(?:maledom|bdsm)/#i => 'societysm.com',
qr#^/(?:domination)/#i => 'fuckingdungeon.com',
],
'femdomfreeimages.com' => [
qr#^/aetgp/SpSvr\d+#i => 'spankingserver.com',
],
'carnaldungeon.com' => [
qr#^/galleries/fd/#i => 'fuckingdungeon.com',
qr#^/galleries/str/#i => 'strictrestraint.com',
],
'cuckoldrix.com' => [
qr#^/host50/#i => 'lady-sonia.com',
],
'gothdream.net' => [
qr#^/fh[gs]/f\d+#i => 'guysgetfucked.com',
qr#^/fh[gs]/h\d+#i => 'girlfriendhandjobs.com',
qr#^/fh[gs]/z\d+#i => 'zoliboy.com',
],
'spankingmoney.com' => [
qr#/rs/#i => 'realspankings.com',
],
'bondagehotporn.com' => [
qr#^/bdsm_sex[/_]#i => 'sexandsubmission.com',
qr#^/sex_and_submission/#i => 'sexandsubmission.com',
qr#^/fucked_bound/#i => 'fuckedandbound.com',
qr#^/machines?_#i => 'fuckingmachines.com',
qr#^/fuck_machine_#i => 'fuckingmachines.com',
qr#^/chantas_bitches/#i => 'chantasbitches.com',
],
# just markers for the fact that the thumbnails from these places need to be sorted out
'bucks2go.com' => [ ],
'adult-empire.com' => [ ],
'pornaccess.com' => [ ],
'risegalleries.com' => [ ],
'sexcataclysm.com' => [ ],
'majesticfuck.com' => [ ],
'facesittingheaven.com' => [ ],
'spankingdollars.com' => [ ], # dedicated to "Girls Boarding School" or not?
'silversdollars.com' => [ ],
'porncabinet.com' => [ ],
'femdomfetishsites.com' => [ ],
'painwoman.com' => [ ], # paingate.com + whippedwomen.com?
);
# same as above... ostensibly an array, but acts like a hash
my @universal_pattern = (
qr#ricksavage#i => 'ricksavage.com',
qr#Hell-Fire-Sex#i => 'hellfiresex.com',
qr#specialexercises#i => 'specialexercises.com',
qr#\bmeatholes\b#i => 'meatholes.com',
qr#amateurbondagevideos#i => 'amateurbondagevideos.com',
qr#cumbots#i => 'cumbots.com',
);
my %strip_domain_prefix = map {$_,1} qw[
sadoslaves.com
gagfuck.com
frenchfisting.com
hogtied.com
maxxandmore.com
meninpain.com
devicebondage.com
miss-krista.com
sexandsubmission.com
bondagehere.com
fuckingmachines.com
knotnice.com
limitedaudience.com
thetrainingofo.com
wasteland.com
waterbondage.com
whippedass.com
wiredpussy.com
tsseduction.com
extremebound.com
sadisticbondage.com
naturebondage.com
hellfiresex.com
femdomchronicles.com
brutaldildos.com
bossbitches2.com
ebony-bondage.org
ultimatesurrender.com
bizarreadultclub.com
exclusivebdsm.com
kshara.com
meatinsertions.com
];
my %entire_domain_is_for_another = (
'fistinglessons.com' => 'fistinglessons.net',
'maxhardcoreporn.com' => 'maxhardcoreporn.net',
'bondagenation.com' => 'fetishnation.com',
'fhgp.maxcash.com' => 'fetishhotel.com',
'bound-and-gagged.org' => 'extremebound.com',
'medical-fetish-videos.com' => 'doctortushy.com',
'promo.gordcash.com' => 'nakedgord.com',
'lady-sonia-galleries.com' => 'lady-sonia.com',
'orientalrope.com' => 'thepainfiles.com',
'aerobdsm.com' => 'societysm.com',
'chantacash.com' => 'chantasbitches.com',
'bdsmresorts.com' => 'perfectslave.com',
'eroticshadows.com' => 'thepainfiles.com',
'smfiles.com' => 'thepainfiles.com',
'bdsmdirect.com' => 'thepainfiles.com',
'devianterotica.com' => 'xxxsuicide.com',
'abbysrealm.com' => 'abbys-realm.com',
);
my %per_domain_key = (
'pussypizza.com' => {
getkey => sub {if (m#^/([a-z]+)\d+#i) {return $1}},
domains => {
dd => 'deviantdavid.com',
mb => 'meanbitches.com',
md => 'meandungeon.com',
},
},
'femdomworld.com' => {
getkey => sub {if ($_[1] =~ /^([^.]+)\.femdomworld\.com$/i) {return $1} },
domains => {
russianmistress => 'russian-mistress.com',
underfeet => 'under-feet.com',
},
},
'richporn.com' => {
getkey => sub {(split m#/#)[1]},
domains => {
'backdoorlesbians' => 'backdoorlesbians.com',
},
},
'bdsmengine.com' => {
getkey => sub {(split m#/#)[1]},
domains => {
'male_domination' => 'societysm.com',
},
},
'wanklinks.com' => {
getkey => sub {if (m#^/([a-z]+)[0-9_]#i) {return $1}},
domains => {
'tyrannized' => 'tyrannized.com',
},
},
'galleries.xrblogger.com' => {
getkey => sub {if (m#^/([a-z]+)[0-9]#i) {return $1}},
domains => {
tgpc => 'spankingfear.com',
tgp => 'spankingfear.com',
},
},
'tampapartygirl.com' => {
getkey => sub {if (m#^/galleries/([^/]+)/#i) {return $1}},
domains => {
'electricityplay' => 'electricityplay.com',
},
},
'deviantteens.com' => {
getkey => sub {if (m#^/galleries/(?:overbucks|xxxcumcash)/([^/]+)/#i) {return $1}},
domains => {
'goldenrain' => 'thegoldenrain.com',
'societysm' => 'societysm.com',
},
},
'eroticaxxx.com' => {
getkey => sub {if (m#^/gal/([a-z]+)[0-9]#i) {return $1}},
domains => {
nwl => 'wasteland.com',
},
},
'agebucks.com' => {
getkey => sub {if (m#^/content/fhg/([^/]+)/#i) {return $1}},
domains => {
'painart.com' => 'painart.com',
},
},
'agebucks.com' => {
getkey => sub {if (m#^/content/fhg/([^/]+)/#i) {return $1}},
domains => {
'painart.com' => 'painart.com',
},
},
'tusya.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
mature => 'myfriendshotmom.com',
bondage => 'thetrainingofo.com',
},
},
'erotiscore.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
kumimonster => 'kumimonster.com',
},
},
'galleries.fetishsphere.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'alterpic' => 'alterpic.com',
'bloodangels' => 'bloodangels.com',
'bondage-maidens' => 'bondagemaidens.com',
'bound-by-bhowani' => 'boundbybhowani.com',
'charlotte' => 'charlottefetish.com',
'chilly-girls' => 'chillygirls.com',
'gwen-media' => 'gwenmedia.com',
'hard-tied' => 'hardtied.com',
'hell-fire' => 'hellfiresex.com',
'hot' => 'houseoftaboo.com',
'hustler-taboo' => 'hustlerstaboo.com',
'infernal-restraints' => 'infernalrestraints.com',
'kinky-katinka' => 'kinky-katinka.com',
'maniac-pass' => 'maniacpass.com',
'pupett' => 'pupett.com',
'rope-affairs' => 'ropeaffairs.com',
'rope-lover' => 'ropelover.com',
'saffys-secrets' => 'saffyssecrets.com',
'satin-steel' => 'satinsteel.com',
'sweetties' => 'sweetties.com',
'shadow-slaves' => 'shadowslaves.com',
},
},
'rarebdsm.com' => {
getkey => sub {(split m#/#, $_)[2]},
domains => {
'ashley_renee' => 'ashleyrenee.com',
'ball_gagged' => 'ball-gagged.com',
'shackled' => 'shackledmaidens.com',
'painfiles' => 'thepainfiles.com',
},
},
'fetishgera.com' => {
getkey => sub {if (m#^/([a-z]+)#i) {return $1}},
domains => {
'mega' => 'megapenetrations.com',
'gubba' => 'skinvideo.com',
'tlove' => 'teens-love-oldmen.com',
'gap' => 'clubgape.deluxepass.com',
'vag' => 'vaginalsucker.com',
'pissact' => 'pissinginaction.net',
},
},
'spankingarts.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'manp' => 'meninpain.com',
'whip' => 'whippedass.com',
'shame' => 'spankingshame.com',
'redasst' => 'redassteens.net',
'redassm' => 'redassmodels.com',
'cutie' => 'spanked-cutie.com',
'smb' => 'spankmybottom.com',
'gb' => 'girls-boarding-school.com',
}
},
'kinky-photos.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'straponsissies' => 'straponsissies.com',
'ladiesfuckgents' => 'ladiesfuckgents.com',
'tyrannized' => 'tyrannized.com',
'lady-sonia' => 'lady-sonia.com',
}
},
'mainbdsm.com' => {
getkey => sub {if (m#^/\w/(?:\w-)?\d+([a-z]+)#i) {return $1}},
domains => {
'pmaid' => 'painmaiden.net',
'maid' => 'painmaiden.net',
'pain' => 'thepainfiles.com',
'painf' => 'thepainfiles.com',
'sad' => 'sadoslaves.com',
'rotvid' => 'nichepass.com',
'elec' => 'nichepass.com',
'rick' => 'ricksavage.com',
}
},
'1stbbw.com' => {
getkey => sub {if (m#^/[^/]*/(?:\w-)?\d+([a-z]+)#i) {return $1}},
domains => {
'strap' => 'straponfuckers.com',
},
},
'bdsmpain.info' => {
getkey => sub {if (m#^/[^/]*/(?:\w-)?\d+([a-z]+)#i) {return $1}},
domains => {
'ep' => 'extrapain.com',
},
},
'whipsandwelts.com' => {
getkey => sub {if (m#^/([a-z]+)#i) {return $1}},
domains => {
su => 'slutspanking.com',
},
},
'spankpain.com' => {
getkey => sub {(split m#/#, $_)[3]},
domains => {
'spse' => 'spankingserver.com',
}
},
'femdom-universe.com' => {
#getkey => sub {m#^/galleries/# and return (split m#/#, $_)[2]},
getkey => sub {if (m#^/galleries/([^/]*/[^/]*)/#i) {return $1}},
domains => {
'lezdom/lesbiandommes' => 'lesbiandommes.com',
'lezdom/bloodangels' => 'bloodangels.com',
},
},
'facialrama.com' => {
getkey => sub {m#^/amp/# and return (split m#/#, $_)[2]},
domains => {
'AsianRopes' => 'asianropes.com',
},
},
'cream-me.com' => {
getkey => sub {if (m#^/rageca/([^_/]+)_#i) {return $1}},
domains => {
'spanker' => 'spankedandabused.com',
},
},
'spankingbookmarks.com' => {
getkey => sub {m#^/gals/# and return (split m#/#, $_)[2]},
domains => {
'hard' => 'hardcaning.com',
},
},
'hollywoodsweeties.com' => {
getkey => sub {if (m#^/Gal/CSub/([^/0-9]+?)Gal#i) {return $1}},
domains => {
'BBROS' => 'bangbros1.com',
'BKSM' => 'babesfight.com',
},
},
'painforall.com' => {
getkey => sub {m#^/gals/as/# and return (split m#/#, $_)[3]},
domains => {
'mixt' => 'missxtreme.com',
},
},
'freewhoregalleries.com' => {
#getkey => sub {m#^//?(?:g|galleries)/([^/]*)/# and return $1},
getkey => sub {m#^/g/# and return (split m#/#, $_)[2]},
domains => {
'Gagging-Whores' => 'gaggingwhores.com',
},
},
'msrcash.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'barukakane' => 'barukakane.com',
},
},
'marialovesporn.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'squirtbukkake' => 'squirtbukkake.com',
'gggsexbox' => 'gggsexbox.com',
},
},
# google site: doesn't work here, unfortunately, so some of this might be a blind guess
# (eg. what's the first parameter?)
# http://www.bastinda.com/bondage/fuckedandbound291107/bondage_3493812518.html
'bastinda.com' => {
getkey => sub {m#^/[^/]*/([a-z]+)#s && return $1},
domains => {
'violentcomix' => 'violentcomix.com',
'fuckedandbound' => 'fuckedandbound.com',
},
},
'bdsmvault.com' => {
getkey => sub {m#^/images/([a-z])\d+([a-z])\d# && return "$1$2"},
domains => {
bo => 'bondageorgasms.com',
sm => 'societysm.com',
fd => 'fuckingdungeon.com',
ps => 'perfectslave.com',
},
},
'gigolomovies.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'young_dirty_sluts' => 'youngdirtysluts.com',
'dirty_anal_girls' => 'dirtyanalgirls.com',
},
},
'platinumfetish.com' => {
getkey => sub {m#^/hosted_galleries\d*/([^/]+)/# && return "$1"},
domains => {
'amateurbondagevideos' => 'amateurbondagevideos.com',
'amateursmothering' => 'amateursmothering.com',
'femdominas' => 'femdominas.com',
'painfreaks' => 'painfreaks.com',
'painvixens' => 'painvixens.com',
'purespanking' => 'purespanking.com',
'ricksavage' => 'ricksavage.com',
'tokyoslaves' => 'tokyoslaves.com',
},
},
'painwoman.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'paingate' => 'paingate.com',
},
},
);
my %per_page_key = (
'promo.bucks2go.com' => {
getkey => sub {(split m#/#, $_)[1]}, # the first in the sequence of nubmers
domains => {
'slavecontest.com' => {map{$_,1}qw[ 74 113 246 247 255 ]},
'shockers.com' => {map{$_,1}qw[ 151 240 267 268 ]},
},
},
'bondagebank.com' => {
getkey => sub {(split m#[,]#)[1]},
domains => {
'chantasbitches.com' => {map{$_,1}qw[ 25 39 41 60 71 77 80 82 87 88 219 268 273 461 485 489 552 556 560 566 568 574 580 ]},
'fuckedandbound.com' => {map{$_,1}qw[ 119 129 133 134 258 282 286 298 328 521 542 554 558 562 564 572 573 576 578 582 586 ]},
'captivemale.com' => {map{$_,1}qw[ 602 610 ]},
},
},
'darksidebondage.com' => {
getkey => sub {(split m#/#, $_)[2]},
domains => {
'wasteland.com' => {map{$_,1}qw[ painslut ]},
},
},
'fetishhotel.ws' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'sadisticbondage.com' => {map{$_,1}qw[ fetish0400 ]},
'sadisticsluts.com' => {map{$_,1}qw[ fetish0682 fetish0688 ]},
},
},
'checksexmovie.com' => {
getkey => sub {if (m#^/galls/([^/]+)/#i) {return $1}},
domains => {
'bangingmachines.com' => {map{$_,1}qw[ d05nov17 ]},
},
},
'fetishmpg.com' => {
getkey => sub {(split m#/#, $_)[1]},
domains => {
'latexxxgirls.com' => {map{$_,1}qw[
06fetish12ow
08fetish56ye
08fetish57rd
08fetish58xq
08fetish60ko
08fetish62lg
08fetish63rq
]},
},
},
);
sub domain_hosted_for_another {
my $url = shift;
#print Dumper \%per_page_key; exit;
my $uri = URI->new($url);
my $h = $uri->host;
my $p = $uri->path_query;
my $matched_hosting_house = 0;
#print "path: $p\n"; exit;
while ($h =~ m#\.#s) {
if ($domain_hosted_for_another{$h}) {
for (my $ctr=0; $ctr<@{$domain_hosted_for_another{$h}}; $ctr+=2) {
my $pattern = $domain_hosted_for_another{$h}[$ctr];
my $hosted_for = $domain_hosted_for_another{$h}[$ctr+1];
if (ref($pattern)) {
return $hosted_for if ($p =~ $pattern);
}
}
$matched_hosting_house++;
}
if ($strip_domain_prefix{$h}) {
return $h;
}
if ($entire_domain_is_for_another{$h}) {
return $entire_domain_is_for_another{$h};
}
if ($per_domain_key{$h}) {
local $_ = $p;
my $key = $per_domain_key{$h}{getkey}->($p, $uri->host);
if (!defined($key) || length($key)==0) {
print "Unable to parse key from '", $uri->as_string, "'.
\n";
} elsif (exists $per_domain_key{$h}{domains}{$key}) {
return $per_domain_key{$h}{domains}{$key};
} else {
print "key($key) didn't match against anything in '$h'.
\n";
}
$matched_hosting_house++;
}
if ($per_page_key{$h}) {
local $_ = $p;
my $key = $per_page_key{$h}{getkey}->($p);
#print "Got key: $key from $url\n";
foreach my $checking_domain (keys %{$per_page_key{$h}{domains}}) {
my $matching_keys = $per_page_key{$h}{domains}{$checking_domain};
return $checking_domain if ($matching_keys->{$key});
}
#while (my ($var, $val) = each %{$per_page_key{$h}{domains}}) {
# return $var if ($val->{$key});
# print "key($key) didn't match against: ", Dumper $val;
#}
print "key($key) didn't match against anything in '$h'.
\n";
$matched_hosting_house++;
}
# hard-coded patterns (special cases that are more complex)
if ($h eq 'tgp.pornaccess.com' && $p =~ m#^/galleries/([^/]*)/#i) {
return "$1.pornaccess.com"; # hard coded, just because this doesn't seem to be an infrequently used site
}
if ($h =~ m#^(pics|free|galls)\.maniacpass\.com$#i && $p =~ m#^/([^/]*)/#i) { # similar to the above
return "$1.maniacpass.com";
}
if ($h eq 'galleries.adult-empire.com' && $p =~ m#^/(\d+)/#i) {
return "sites.adult-empire.com/$1/";
}
$h =~ s#^[^\.]*\.##s;
}
for (my $ctr=0; $ctr<@universal_pattern; $ctr+=2) {
my $pattern = $universal_pattern[$ctr];
my $hosted_for = $universal_pattern[$ctr+1];
#print "Matching '$p' against '$pattern'\n";
if (ref($pattern)) {
return $hosted_for if ($p =~ $pattern);
}
}
return ($uri->host . "(hosting house, unknown) ") if ($matched_hosting_house);
return $uri->host;
}
}
BEGIN {
my %most_domains_URLs_bad = map{$_,1} (
"rathbondage.com", # original owner seems to have sold it to someone else, who just uses it for SEO junk now
"pimpcash.com", # most links I've seen have been dead...
);
sub is_mostly_bad_domain {
my $h = URI->new(shift)->host;
while ($h =~ m#\.#s) {
return 1 if $most_domains_URLs_bad{$h};
$h =~ s#^[^\.]*\.##s;
}
}
}
BEGIN {
my $kink_com = qr#^/[gm]/\d+/\d+/[vm]/#i;
my %domain_movie_urls = (
'vipspanking.com' => [
qr#^/wm/mg/#i,
],
'promo.devicebondage.com' => [$kink_com],
'promo.fuckingmachines.com' => [$kink_com],
'promo.hogtied.com' => [$kink_com],
'promo.meninpain.com' => [$kink_com],
'promo.sexandsubmission.com' => [$kink_com],
'promo.thetrainingofo.com' => [$kink_com],
'promo.ultimatesurrender.com' => [$kink_com],
'promo.waterbondage.com' => [$kink_com],
'promo.whippedass.com' => [$kink_com],
'promo.wiredpussy.com' => [$kink_com],
'promo.tsseduction.com' => [$kink_com],
# XXXCumCash sites: 1=image, 2=hybrid, 3=movie (though hybrid has 3 fewer movies, so I don't know that we want to always force it to that)
'natalisbondagesluts.com' => [ qr#^/galleries/[^/]*/[^/]*/[23]/# ],
'damseldiary.com' => [ qr#^/free/[^/]*]/[^/]*/[23]/# ], # this doesn't seem to be working...?
'bdsmtraffic.com' => [
qr#^/galleries/movies/#i,
],
'electricityplay.com' => [
qr#^/gallery/[^/]*/mgp#i,
],
'mgp.bossbitches2.com' => [ qr#.# ],
'movies.meatinsertions.com' => [ qr#.# ],
'spankingdollars.com' => [ qr#^/mg/# ],
'vipspanking.com' => [ qr#^/wm/mg/# ],
'badfemales.com' => [ qr#^/wm/mg/# ],
'herfirstpunishment.com' => [ qr#^/wm/mg/# ],
'fhg.masteracash.com' => [ qr#^/[^/]*/video/# ],
'platinumfetish.com' => [ qr#^/hosted[^/]*/[^/]*/video/# ],
'galleries.knotnice.com' => [ qr#^/h/\d+/mgp# ],
'bdsmtraffic.com' => [ qr#^/[^/]*/movies/# ],
'latexangel.com' => [ qr#^/galleries/free/[^/]*/fhm/#si ],
);
sub is_TGP_movie_page {
my $uri = URI->new(shift);
my $h = $uri->host;
my $p = $uri->path_query;
while ($h =~ m#\.#s) {
if ($domain_movie_urls{$h}) {
foreach my $regexp (@{$domain_movie_urls{$h}}) {
return 1 if ($p =~ $regexp);
}
}
$h =~ s#^[^\.]*\.##s;
}
}
}
1;