http://paperlined.org/dev/src/pl/porn/cgi/tgp_condom/URLMetadata.pm

package URLMetadata;

# Functions that operate using site-specific information.

    use Carp;
    use CGI::Util;
    use URI;
    use URI::QueryParam;
    use Data::Dumper;


    use strict;
    use warnings;

# in some cases, doing site-specific redirector-stripping isn't enough, because there are actually
# two or more redirectors chained together!!  Strip these off.
#
# TODO: move this to a more common module for making URLs canonical?
sub strip_extra_redirectors {
    my $url = shift;

    if ($url =~ m#refer\.ccbill\.com/cgi-bin/clicks\.cgi# && $url =~ s#^.*?HTML=##) {
        $url =~ s#=.*##;
        return strip_extra_redirectors(CGI::Util::unescape($url));
    } elsif ($url =~ m#bertonicash\.com/tgp\.php#) {
        $url =~ s#^.*?HTML=##;
        $url =~ s#=.*##;
        return strip_extra_redirectors(CGI::Util::unescape($url));
    }

    return $url;
}

BEGIN {
    my %strip_all_queries = map {$_,1} qw[
        g.wasteland.com
        galleries.extremebound.com
    ];
#        bdsmtraffic.com
#        fhg.masteracash.com
#        galleries.extremebound.com
#        promo.limitedaudience.com
#        bondageforte.org
#        bondagenation.com
#    ];
    my %strip_one_param = (
        "promo.gordcash.com" => "a",
        "fhg.masteracash.com" => "id",
        "kinky-katinka.com" => "id",
        "promo.limitedaudience.com" => "a",
        "bondageforte.org" => "aid",
        "bondagenation.com" => "PA",
        "bongum-video.com" => "ID",
        "fetishshots.com" => "id",
        "ratherextreme.com" => "id",
        "roperookie.com" => "nats",
        "trashygallery.com" => "nats",
        "platinumfetish.com" => "ccbill",
        "pics.maniacpass.com" => "id",
        "bdsmtraffic.com" => "webmaster",
        "pornaccess.com" => "adv_id",
        "freemeatwhores.com" => "aff",
        "maxcash.com" => "webm_id",
        "assspankingvideos.com" => "id",
        "redassteens.com" => "r",
        "fistinglessons.net" => "r",
        "fistinglessons.com" => "r",
        "misspain.com" => "r",
        "miss-krista.com" => "id",
        "femdomchronicles.com" => "id",
        "bbraxaporn.com" => "r",
        "sadoslaves.com" => "r",
        "maniacpass.com" => "pt",
        "rubberdollies.com" => "id",
        "shinyangels.com" => "id",
        "slutgalz.com" => "nats",
        "spookycash.com" => "id",
        "straponangels.com" => "id",
        "shockingbdsm.com" => "id",
        "ebony-bondage.org" => "aff",
        "gothicsluts.com" => "id",
        "ddfcash.com" => "nats",
        "susanwayland.com" => "PA",
        "silversdollars.com" => "affcode",
        "cherrysweetpics.com" => "ID",
        "exclusivebdsm.com" => "id",
        "freewhorelinks.com" => "aff",
        "meatgalleries.com" => "aff",
        "freewhoregalleries.com" => "aff",
    );
    my @always_strip = qw[
        nats
    ];
# TODO: move this to a more common module for making URLs canonical?
sub strip_affiliate_id {
    my $url = shift;

    my $uri = URI->new($url);
    ref($uri) eq "URI::http" or confess "Not http: $url\n\t";
    my $host = $uri->host;

    foreach my $always (@always_strip) {
        $uri->query_param_delete( $always );
    }

    my $h = $host;
    while ($h =~ m#\.#s) {
        if ($strip_all_queries{$h}) {
            $url =~ s#\?.*##s;
            return $url;
        }
        if ($strip_one_param{$h}) {
            # delete this one parameter
            $uri->query_param_delete( $strip_one_param{$h} );
            return $uri->as_string;
        }
        $h =~ s#^[^\.]*\.##s;
    }

    return $url;
}
}

BEGIN {
    my %strip_final_file = (
        'bdsmtraffic.com' => '^/galleries/[^/]*/index\.php$',
        'g.wasteland.com' => '^/[^/]*/index\.html$',
        'galleries.extremebound.com' => '^/[^/]*/index\.html$',
        'galleries.knotnice.com' => '^(?:/[^/]*){3}/index\.html$',
        'kinky-katinka.com' => '^(?:/[^/]*){2}/index\.php$',
        'bongum-video.com' => '^(?:/[^/]*){2}/index\.php$',
        'christinacaptured.com' => '^(?:/[^/]*){3}/index\.html$',
        'fetishshots.com' => '^(?:/[^/]*){2}/index\.php$',
        'ratherextreme.com' => '^(?:/[^/]*){2}/index\.php$',
        'platinumfetish.com' => '^(?:/[^/]*){4}/index\.php$',
        'freemeatwhores.com' => '^(?:/[^/]*){3}/index\.php$',
        'bdsmtraffic.com' => '^(?:/[^/]*){2,3}/index\.php$',
        'galleries.bucks2go.com' => '^(?:/[^/]*){3}/index\.html$',
        'assspankingvideos.com' => '^(?:/[^/]*){2}/index\.php$',
        'latexangelic.com' => '^(?:/[^/]*){2}/fcindex\.html$',
        'orientalrope.com' => '/index\d+\.html$',
        'shockingbdsm.com' => '^(?:/[^/]*){2}/index\.php$',
        'susanwayland.com' => '^(?:/[^/]*){2}/index\.php$',
        'bdsmdirect.com' => '^/[^/]*/index\d+\.html$',
        'galleries.xrblogger.com' => '.',
        'checksexmovie.com' => '.',
        'bondagehotporn.com' => '/index.html$',
        'tusya.com' => '/index.html$',
    );

    # Don't put sites here if they always force-redirect back to the longer version of the name.
    my %strip_domain_prefix = map {$_,1} qw[
        www.bondageforte.org
        www.bondagenation.com
        www.bondageorgies.com
        www.bongum-video.com
        www.damseldiary.com
        www.fetishshots.com
        www.fragilehostage.com
        www.natalisbondagesluts.com
        www.platinumfetish.com
        www.rarebdsm.com
        www.ratherextreme.com
        www.roperookie.com
        www.wildpornreviews.com
        www.freemeatwhores.com
        www.latexbyanna.com
        www.shinyangels.com
        www.straponangels.com
        www.xxx69.net
        www.shockingbdsm.com
        www.evildesire.com
        www.susanwayland.com
        www.bondagehotporn.com
        www.kinky-katinka.com
    ];

    # the site force-redirects to the longer one anyway, might as well go with that...
    my %add_domain_prefix = (
        'christinacaptured.net' => 'www.christinacaptured.net',
    );

    my $kink_com = sub {s#http://([^/]*)/([gm])/[^/]+/(\d+)/\w/\d+/h.*#http://$1/$2/1/$3/m/1/h#};
    my %other_modification = (
                    # not sure what the extra commas are for, but they don't seem to be required...
        'bondagebank.com' => sub {s#(gallhit\.php)\?\d+,(\d+),(\d+,)*\d+$#$1?1,$2#si},
        'evildesire.com' => sub {s#^(http://evildesire\.com/hosted/\d+/).*#$1#},

        'promo.devicebondage.com' => $kink_com,
        'promo.fuckingmachines.com' => $kink_com,
        'promo.hogtied.com' => $kink_com,
        'promo.meninpain.com' => $kink_com,
        'promo.sexandsubmission.com' => $kink_com,
        'promo.thetrainingofo.com' => $kink_com,
        'promo.ultimatesurrender.com' => $kink_com,
        'promo.waterbondage.com' => $kink_com,
        'promo.whippedass.com' => $kink_com,
        'promo.wiredpussy.com' => $kink_com,
        'promo.tsseduction.com' => $kink_com,

        'bondageforte.org' => sub {s#/bondageforte\.org/#/bondageforte.com/#s;},

        'galleries.adult-empire.com' => sub {s#(adult-empire.com/\d+/\d+)/\d+/#$1/1/#},

        # XXXCumCash sites
        'natalisbondagesluts.com' => sub {s#(natalisbondagesluts\.com/galleries/[^/]*/[^/]*/[^/]*)/\d+$#$1/1#},
        'damseldiary.com' => sub {s#(damseldiary\.com/free/[^/]*/[^/]*/[^/]*)/\d+$#$1/1#},

        'promo.bucks2go.com' => sub {s#(promo\.bucks2go\.com/[^/]*)/\d+/\d+#$1/1/1#},

    );
# TODO: move this to a more common module for making URLs canonical?
sub make_canonical {
    my $url = shift;

    my $uri = URI->new($url)->canonical;
    my $h = $uri->host;
    my $p = $uri->path;
    #print "path: $p\n"; exit;
    while ($h =~ m#\.#s) {
        if ($strip_final_file{$h} && $p =~ m#$strip_final_file{$h}#si) {
            $p =~ s#/[^/]+$#/#si;
            $uri->path($p);
        }
        if ($strip_domain_prefix{$h}) {
            $h =~ s#^[^\.]*\.##si;
            $uri->host($h);
            next;
        }
        if ($add_domain_prefix{$h}) {
            $uri->host($add_domain_prefix{$h});
        }
        if ($other_modification{$h}) {
            local $_ = $uri->as_string;
            $other_modification{$h}->();
            $uri = URI->new($_);
        }
        $h =~ s#^[^\.]*\.##s;
    }

    return $uri->as_string;
}
}


BEGIN {
    my %domain_hosted_for_another = (
                # NOTE: these are LIST-refs, not HASH-refs, though they end up being functionaly
                # equivalent to hashrefs
        'fhg.masteracash.com' => [
            qr#^/slavesinlove/#i => 'slavesinlove.com',
        ],

        'bdsmi.com' => [
            qr#^/hosted/painfiles/#i => 'thepainfiles.com',
        ],

        'fragilehostage.com' => [
            qr#galleries/ssm/#i => 'societysm.com',
            qr#/galleries/ps/#i => 'perfectslave.com',
        ],

        'bdsmtraffic.com' => [          # actually, this is an obfuscated redirector
            qr#^/kk/#i => 'kinky-katinka.com',
            qr#/as\d#i => 'amateurslave.com',
            qr#/ss\d#i => 'slavesluts.com',
        ],

        'ddfcash.com' => [
            qr#^(?:/PROMO/|/affiliate/freepages/)hot/#i => 'houseoftaboo.com',
        ],

        'galleryhost.com' => [
            qr#/her1stanal/#i => 'her1stanal.com',
        ],

        'natalisbondagesluts.com' => [
            qr#^/galleries/ss/#i => 'smotheredslave.com',
            qr#^/galleries/bo/#i => 'bondageorgasms.com',
        ],

        'freemeatwhores.com' => [
            qr#^//?(?:g|galleries)/Gagging-Whores\b#i => 'gaggingwhores.com',
        ],

        'damseldiary.com' => [
            qr#^/free/ssm/#i => 'societysm.com',
            qr#^/free/bo/#i => 'bondageorgasms.com',
            qr#^/free/ps/#i => 'perfectslave.com',
        ],

        'spankbuxx.com' => [
            qr#^/hosted/SSG/#i => 'spankedschoolgirl.com',
            qr#^/hosted/spvids/#i => 'spankingvids.com',
            qr#^/hosted/sol/#i => 'spankingonline.com',
        ],

        'fhg.fuckingfree.net' => [
            qr#^/fhg/violentanime/#i => 'violentanime.com',
            qr#^/fhg/violentcomix/#i => 'violentcomix.com',
        ],

        'massivegalleries.com' => [
            qr#^/vids/rr/#i => 'rarebondage.com',
        ],

        'bondageorgies.com' => [
            qr#^/pjwarchive/#i => 'pjwarchive.com',
        ],

        'galleries.do-bill.com' => [
            qr#^/bdsmartwork/#i => 'bdsmartwork.com',
        ],

        'fetishshots.com' => [
            qr#^/fdl/#i => 'femdomloft.com',
            qr#^/tt/#i => 'tightlytied.com',
            qr#^/tw/#i => 'tramplingworld.com',
            qr#^/nd/#i => 'nylondivas.com',
            qr#^/ld/#i => 'lesbiandommes.com',
            qr#^/hhd/#i => 'highheeldivas.com',
        ],

        'spookycash.com' => [
            qr#^/hgma/#i => 'michelle-aston.com',
            qr#^/hgsc13/#i => 'scar13.com',
            qr#^/hgeb/#i => 'eroticbpm.net',
        ],


        'bdsmbook.com' => [
            qr#/gsg/#i => 'girlspanksgirl.com',
            qr#/ffg/#i => 'femdomfetishgirls.com',
            qr#/sas/#i => 'sexandsubmission.com',
            qr#/ir/#i  => 'infernalrestraints.com',
            qr#/pg/#i  => 'paingate.com',
        ],

        'eropain.com' => [
            qr#^/\w/(?:\w-)?\d+shp-#i => 'shockingpain.com',
            qr#^/\w/(?:\w-)?\d+ep-#i => 'extrapain.com',
            qr#^/\w/(?:\w-)?\d+-#i => 'painriser.com',
        ],

        'bondagepoint.com' => [
            qr#^/(?:maledom|bdsm)/#i => 'societysm.com',
            qr#^/(?:domination)/#i => 'fuckingdungeon.com',
        ],

        'femdomfreeimages.com' => [
            qr#^/aetgp/SpSvr\d+#i => 'spankingserver.com',
        ],

        'carnaldungeon.com' => [
            qr#^/galleries/fd/#i => 'fuckingdungeon.com',
            qr#^/galleries/str/#i => 'strictrestraint.com',
        ],

        'cuckoldrix.com' => [
            qr#^/host50/#i => 'lady-sonia.com',
        ],

        'gothdream.net' => [
            qr#^/fh[gs]/f\d+#i => 'guysgetfucked.com',
            qr#^/fh[gs]/h\d+#i => 'girlfriendhandjobs.com',
            qr#^/fh[gs]/z\d+#i => 'zoliboy.com',
        ],


        'spankingmoney.com' => [
            qr#/rs/#i => 'realspankings.com',
        ],

        'bondagehotporn.com' => [
            qr#^/bdsm_sex[/_]#i => 'sexandsubmission.com',
            qr#^/sex_and_submission/#i => 'sexandsubmission.com',
            qr#^/fucked_bound/#i => 'fuckedandbound.com',
            qr#^/machines?_#i => 'fuckingmachines.com',
            qr#^/fuck_machine_#i => 'fuckingmachines.com',
            qr#^/chantas_bitches/#i => 'chantasbitches.com',
        ],

        # just markers for the fact that the thumbnails from these places need to be sorted out
        'bucks2go.com' => [ ],
        'adult-empire.com' => [ ],
        'pornaccess.com' => [ ],
        'risegalleries.com' => [ ],
        'sexcataclysm.com' => [ ],
        'majesticfuck.com' => [ ],
        'facesittingheaven.com' => [ ],
        'spankingdollars.com' => [ ],           # dedicated to "Girls Boarding School" or not?
        'silversdollars.com' => [ ],
        'porncabinet.com' => [ ],
        'femdomfetishsites.com' => [ ],
        'painwoman.com' => [ ],                 # paingate.com + whippedwomen.com?
    );

        # same as above...  ostensibly an array, but acts like a hash
    my @universal_pattern = (
        qr#ricksavage#i => 'ricksavage.com',
        qr#Hell-Fire-Sex#i => 'hellfiresex.com',
        qr#specialexercises#i => 'specialexercises.com',
        qr#\bmeatholes\b#i => 'meatholes.com',
        qr#amateurbondagevideos#i => 'amateurbondagevideos.com',
        qr#cumbots#i => 'cumbots.com',
    );
    
    my %strip_domain_prefix = map {$_,1} qw[
        sadoslaves.com
        gagfuck.com
        frenchfisting.com
        hogtied.com
        maxxandmore.com
        meninpain.com
        devicebondage.com
        miss-krista.com
        sexandsubmission.com
        bondagehere.com
        fuckingmachines.com
        knotnice.com
        limitedaudience.com
        thetrainingofo.com
        wasteland.com
        waterbondage.com
        whippedass.com
        wiredpussy.com
        tsseduction.com
        extremebound.com
        sadisticbondage.com
        naturebondage.com
        hellfiresex.com
        femdomchronicles.com
        brutaldildos.com
        bossbitches2.com
        ebony-bondage.org
        ultimatesurrender.com
        bizarreadultclub.com
        exclusivebdsm.com
        kshara.com
        meatinsertions.com
    ];

    my %entire_domain_is_for_another = (
        'fistinglessons.com' => 'fistinglessons.net',
        'maxhardcoreporn.com' => 'maxhardcoreporn.net',
        'bondagenation.com' => 'fetishnation.com',
        'fhgp.maxcash.com' => 'fetishhotel.com',
        'bound-and-gagged.org' => 'extremebound.com',
        'medical-fetish-videos.com' => 'doctortushy.com',
        'promo.gordcash.com' => 'nakedgord.com',
        'lady-sonia-galleries.com' => 'lady-sonia.com',
        'orientalrope.com' => 'thepainfiles.com',
        'aerobdsm.com' => 'societysm.com',
        'chantacash.com' => 'chantasbitches.com',
        'bdsmresorts.com' => 'perfectslave.com',
        'eroticshadows.com' => 'thepainfiles.com',
        'smfiles.com' => 'thepainfiles.com',
        'bdsmdirect.com' => 'thepainfiles.com',
        'devianterotica.com' => 'xxxsuicide.com',
        'abbysrealm.com' => 'abbys-realm.com',
    );

    my %per_domain_key = (
        'pussypizza.com' => {
            getkey => sub {if (m#^/([a-z]+)\d+#i) {return $1}},
            domains => {
                dd => 'deviantdavid.com',
                mb => 'meanbitches.com',
                md => 'meandungeon.com',
            },
        },

        'femdomworld.com' => {
            getkey => sub {if ($_[1] =~ /^([^.]+)\.femdomworld\.com$/i) {return $1} },
            domains => {
                russianmistress => 'russian-mistress.com',
                underfeet => 'under-feet.com',
            },
        },

        'richporn.com' => {
            getkey => sub {(split m#/#)[1]},
            domains => {
                'backdoorlesbians' => 'backdoorlesbians.com',
            },
        },

        'bdsmengine.com' => {
            getkey => sub {(split m#/#)[1]},
            domains => {
                'male_domination' => 'societysm.com',
            },
        },

        'wanklinks.com' => {
            getkey => sub {if (m#^/([a-z]+)[0-9_]#i) {return $1}},
            domains => {
                'tyrannized' => 'tyrannized.com',
            },
        },

        'galleries.xrblogger.com' => {
            getkey => sub {if (m#^/([a-z]+)[0-9]#i) {return $1}},
            domains => {
                tgpc => 'spankingfear.com',
                tgp  => 'spankingfear.com',
            },
        },

        'tampapartygirl.com' => {
            getkey => sub {if (m#^/galleries/([^/]+)/#i) {return $1}},
            domains => {
                'electricityplay' => 'electricityplay.com',
            },
        },

        'deviantteens.com' => {
            getkey => sub {if (m#^/galleries/(?:overbucks|xxxcumcash)/([^/]+)/#i) {return $1}},
            domains => {
                'goldenrain' => 'thegoldenrain.com',
                'societysm' => 'societysm.com',
            },
        },

        'eroticaxxx.com' => {
            getkey => sub {if (m#^/gal/([a-z]+)[0-9]#i) {return $1}},
            domains => {
                nwl => 'wasteland.com',
            },
        },

        'agebucks.com' => {
            getkey => sub {if (m#^/content/fhg/([^/]+)/#i) {return $1}},
            domains => {
                'painart.com' => 'painart.com',
            },
        },

        'agebucks.com' => {
            getkey => sub {if (m#^/content/fhg/([^/]+)/#i) {return $1}},
            domains => {
                'painart.com' => 'painart.com',
            },
        },

        'tusya.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                mature => 'myfriendshotmom.com',
                bondage => 'thetrainingofo.com',
            },
        },

        'erotiscore.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                kumimonster => 'kumimonster.com',
            },
        },

        'galleries.fetishsphere.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'alterpic' => 'alterpic.com',
                'bloodangels' => 'bloodangels.com',
                'bondage-maidens' => 'bondagemaidens.com',
                'bound-by-bhowani' => 'boundbybhowani.com',
                'charlotte' => 'charlottefetish.com',
                'chilly-girls' => 'chillygirls.com',
                'gwen-media' => 'gwenmedia.com',
                'hard-tied' => 'hardtied.com',
                'hell-fire' => 'hellfiresex.com',
                'hot' => 'houseoftaboo.com',
                'hustler-taboo' => 'hustlerstaboo.com',
                'infernal-restraints' => 'infernalrestraints.com',
                'kinky-katinka' => 'kinky-katinka.com',
                'maniac-pass' => 'maniacpass.com',
                'pupett' => 'pupett.com',
                'rope-affairs' => 'ropeaffairs.com',
                'rope-lover' => 'ropelover.com',
                'saffys-secrets' => 'saffyssecrets.com',
                'satin-steel' => 'satinsteel.com',
                'sweetties' => 'sweetties.com',
                'shadow-slaves' => 'shadowslaves.com',
            },
        },

        'rarebdsm.com' => {
            getkey => sub {(split m#/#, $_)[2]},
            domains => {
                'ashley_renee' => 'ashleyrenee.com',
                'ball_gagged' => 'ball-gagged.com',
                'shackled' => 'shackledmaidens.com',
                'painfiles' => 'thepainfiles.com',
            },
        },


        'fetishgera.com' => {
            getkey => sub {if (m#^/([a-z]+)#i) {return $1}},
            domains => {
                'mega' => 'megapenetrations.com',
                'gubba' => 'skinvideo.com',
                'tlove' => 'teens-love-oldmen.com',
                'gap' => 'clubgape.deluxepass.com',
                'vag' => 'vaginalsucker.com',
                'pissact' => 'pissinginaction.net',
            },
        },

        'spankingarts.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'manp' => 'meninpain.com',
                'whip' => 'whippedass.com',
                'shame' => 'spankingshame.com',
                'redasst' => 'redassteens.net',
                'redassm' => 'redassmodels.com',
                'cutie' => 'spanked-cutie.com',
                'smb' => 'spankmybottom.com',
                'gb' => 'girls-boarding-school.com',
            }
        },

        'kinky-photos.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'straponsissies'  => 'straponsissies.com',
                'ladiesfuckgents' => 'ladiesfuckgents.com',
                'tyrannized' => 'tyrannized.com',
                'lady-sonia' => 'lady-sonia.com',
            }
        },

        'mainbdsm.com' => {
            getkey => sub {if (m#^/\w/(?:\w-)?\d+([a-z]+)#i) {return $1}},
            domains => {
                'pmaid'  => 'painmaiden.net',
                'maid'   => 'painmaiden.net',
                'pain'   => 'thepainfiles.com',
                'painf'  => 'thepainfiles.com',
                'sad'    => 'sadoslaves.com',
                'rotvid' => 'nichepass.com',
                'elec'   => 'nichepass.com',
                'rick'   => 'ricksavage.com',
            }
        },

        '1stbbw.com' => {
            getkey => sub {if (m#^/[^/]*/(?:\w-)?\d+([a-z]+)#i) {return $1}},
            domains => {
                'strap' => 'straponfuckers.com',
            },
        },

        'bdsmpain.info' => {
            getkey => sub {if (m#^/[^/]*/(?:\w-)?\d+([a-z]+)#i) {return $1}},
            domains => {
                'ep' => 'extrapain.com',
            },
        },

        'whipsandwelts.com' => {
            getkey => sub {if (m#^/([a-z]+)#i) {return $1}},
            domains => {
                su => 'slutspanking.com',
            },
        },

        'spankpain.com' => {
            getkey => sub {(split m#/#, $_)[3]},
            domains => {
                'spse' => 'spankingserver.com',
            }
        },

        'femdom-universe.com' => {
            #getkey => sub {m#^/galleries/# and return (split m#/#, $_)[2]},
            getkey => sub {if (m#^/galleries/([^/]*/[^/]*)/#i) {return $1}},
            domains => {
                'lezdom/lesbiandommes' => 'lesbiandommes.com',
                'lezdom/bloodangels' => 'bloodangels.com',
            },
        },

        'facialrama.com' => {
            getkey => sub {m#^/amp/# and return (split m#/#, $_)[2]},
            domains => {
                'AsianRopes' => 'asianropes.com',
            },
        },

        'cream-me.com' => {
            getkey => sub {if (m#^/rageca/([^_/]+)_#i) {return $1}},
            domains => {
                'spanker' => 'spankedandabused.com',
            },
        },

        'spankingbookmarks.com' => {
            getkey => sub {m#^/gals/# and return (split m#/#, $_)[2]},
            domains => {
                'hard' => 'hardcaning.com',
            },
        },

        'hollywoodsweeties.com' => {
            getkey => sub {if (m#^/Gal/CSub/([^/0-9]+?)Gal#i) {return $1}},
            domains => {
                'BBROS' => 'bangbros1.com',
                'BKSM' => 'babesfight.com',
            },
        },

        'painforall.com' => {
            getkey => sub {m#^/gals/as/# and return (split m#/#, $_)[3]},
            domains => {
                'mixt' => 'missxtreme.com',
            },
        },

        'freewhoregalleries.com' => {
            #getkey => sub {m#^//?(?:g|galleries)/([^/]*)/# and return $1},
            getkey => sub {m#^/g/# and return (split m#/#, $_)[2]},
            domains => {
                'Gagging-Whores' => 'gaggingwhores.com',
            },
        },

        'msrcash.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'barukakane' => 'barukakane.com',
            },
        },

        'marialovesporn.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'squirtbukkake' => 'squirtbukkake.com',
                'gggsexbox' => 'gggsexbox.com',
            },
        },

        # google site: doesn't work here, unfortunately, so some of this might be a blind guess
        #       (eg. what's the first parameter?)
        #               http://www.bastinda.com/bondage/fuckedandbound291107/bondage_3493812518.html
        'bastinda.com' => {
            getkey => sub {m#^/[^/]*/([a-z]+)#s && return $1},
            domains => {
                'violentcomix' => 'violentcomix.com',
                'fuckedandbound' => 'fuckedandbound.com',
            },
        },

        'bdsmvault.com' => {
            getkey => sub {m#^/images/([a-z])\d+([a-z])\d# && return "$1$2"},
            domains => {
                bo => 'bondageorgasms.com',
                sm => 'societysm.com',
                fd => 'fuckingdungeon.com',
                ps => 'perfectslave.com',
            },
        },

        'gigolomovies.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'young_dirty_sluts' => 'youngdirtysluts.com',
                'dirty_anal_girls' => 'dirtyanalgirls.com',
            },
        },

        'platinumfetish.com' => {
            getkey => sub {m#^/hosted_galleries\d*/([^/]+)/# && return "$1"},
            domains => {
                'amateurbondagevideos' => 'amateurbondagevideos.com',
                'amateursmothering' => 'amateursmothering.com',
                'femdominas' => 'femdominas.com',
                'painfreaks' => 'painfreaks.com',
                'painvixens' => 'painvixens.com',
                'purespanking' => 'purespanking.com',
                'ricksavage' => 'ricksavage.com',
                'tokyoslaves' => 'tokyoslaves.com',
            },
        },

        'painwoman.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'paingate' => 'paingate.com',
            },
        },



    );


    my %per_page_key = (
        'promo.bucks2go.com' => {
            getkey => sub {(split m#/#, $_)[1]},        # the first in the sequence of nubmers
            domains => {
                'slavecontest.com' => {map{$_,1}qw[  74 113 246 247 255 ]},
                'shockers.com' => {map{$_,1}qw[  151 240 267 268  ]},
            },
        },

        'bondagebank.com' => {
            getkey => sub {(split m#[,]#)[1]},
            domains => {
                'chantasbitches.com' => {map{$_,1}qw[  25 39 41 60 71 77 80 82 87 88 219 268 273 461 485 489 552 556 560 566 568 574 580  ]},
                'fuckedandbound.com' => {map{$_,1}qw[  119 129 133 134 258 282 286 298 328 521 542 554 558 562 564 572 573 576 578 582 586  ]},
                'captivemale.com' => {map{$_,1}qw[ 602 610 ]},
            },
        },

        'darksidebondage.com' => {
            getkey => sub {(split m#/#, $_)[2]},
            domains => {
                'wasteland.com' => {map{$_,1}qw[ painslut ]},
            },
        },

        'fetishhotel.ws' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'sadisticbondage.com' => {map{$_,1}qw[ fetish0400 ]},
                'sadisticsluts.com' => {map{$_,1}qw[ fetish0682 fetish0688 ]},
            },
        },

        'checksexmovie.com' => {
            getkey => sub {if (m#^/galls/([^/]+)/#i) {return $1}},
            domains => {
                'bangingmachines.com' => {map{$_,1}qw[ d05nov17 ]},
            },
        },

        'fetishmpg.com' => {
            getkey => sub {(split m#/#, $_)[1]},
            domains => {
                'latexxxgirls.com' => {map{$_,1}qw[
                        06fetish12ow
                        08fetish56ye
                        08fetish57rd
                        08fetish58xq
                        08fetish60ko
                        08fetish62lg
                        08fetish63rq
                ]},
            },
        },

    );

sub domain_hosted_for_another {
    my $url = shift;

    #print Dumper \%per_page_key; exit;

    my $uri = URI->new($url);
    my $h = $uri->host;
    my $p = $uri->path_query;

    my $matched_hosting_house = 0;

    #print "path: $p\n"; exit;
    while ($h =~ m#\.#s) {
        if ($domain_hosted_for_another{$h}) {
            for (my $ctr=0; $ctr<@{$domain_hosted_for_another{$h}}; $ctr+=2) {
                my $pattern    = $domain_hosted_for_another{$h}[$ctr];
                my $hosted_for = $domain_hosted_for_another{$h}[$ctr+1];

                if (ref($pattern)) {
                    return $hosted_for if ($p =~ $pattern);
                }
            }

            $matched_hosting_house++;
        }
        if ($strip_domain_prefix{$h}) {
            return $h;
        }
        if ($entire_domain_is_for_another{$h}) {
            return $entire_domain_is_for_another{$h};
        }
        if ($per_domain_key{$h}) {
            local $_ = $p;
            my $key = $per_domain_key{$h}{getkey}->($p, $uri->host);
            if (!defined($key) || length($key)==0) {
                print "Unable to parse key from '", $uri->as_string, "'.<br>\n";
            } elsif (exists $per_domain_key{$h}{domains}{$key}) {
                return $per_domain_key{$h}{domains}{$key};
            } else {
                print "key($key) didn't match against anything in '$h'.<br>\n";
            }
            $matched_hosting_house++;
        }
        if ($per_page_key{$h}) {
            local $_ = $p;
            my $key = $per_page_key{$h}{getkey}->($p);
            #print "Got key: $key    from $url\n";
            foreach my $checking_domain (keys %{$per_page_key{$h}{domains}}) {
                my $matching_keys = $per_page_key{$h}{domains}{$checking_domain};
                return $checking_domain if ($matching_keys->{$key});
            }
            #while (my ($var, $val) = each %{$per_page_key{$h}{domains}}) {
            #    return $var if ($val->{$key});
            #    print "key($key) didn't match against: ", Dumper $val;
            #}
            print "key($key) didn't match against anything in '$h'.<br>\n";
            $matched_hosting_house++;
        }

        # hard-coded patterns (special cases that are more complex)
        if ($h eq 'tgp.pornaccess.com' && $p =~ m#^/galleries/([^/]*)/#i) {
            return "$1.pornaccess.com";     # hard coded, just because this doesn't seem to be an infrequently used site
        }
        if ($h =~ m#^(pics|free|galls)\.maniacpass\.com$#i && $p =~ m#^/([^/]*)/#i) {      # similar to the above
            return "$1.maniacpass.com";
        }
        if ($h eq 'galleries.adult-empire.com' && $p =~ m#^/(\d+)/#i) {
            return "sites.adult-empire.com/$1/";
        }
        $h =~ s#^[^\.]*\.##s;
    }

    for (my $ctr=0; $ctr<@universal_pattern; $ctr+=2) {
        my $pattern    = $universal_pattern[$ctr];
        my $hosted_for = $universal_pattern[$ctr+1];

        #print "Matching '$p' against '$pattern'\n";

        if (ref($pattern)) {
            return $hosted_for if ($p =~ $pattern);
        }
    }

    return ($uri->host . "(hosting house, unknown) ") if ($matched_hosting_house);

    return $uri->host;
}
}


BEGIN {
    my %most_domains_URLs_bad = map{$_,1} (
        "rathbondage.com",      # original owner seems to have sold it to someone else, who just uses it for SEO junk now
        "pimpcash.com",         # most links I've seen have been dead...
    );
    sub is_mostly_bad_domain {
        my $h = URI->new(shift)->host;
        while ($h =~ m#\.#s) {
            return 1 if $most_domains_URLs_bad{$h};
            $h =~ s#^[^\.]*\.##s;
        }
    }
}


BEGIN {
    my $kink_com = qr#^/[gm]/\d+/\d+/[vm]/#i;
    my %domain_movie_urls = (
        'vipspanking.com' => [
            qr#^/wm/mg/#i,
        ],

        'promo.devicebondage.com' => [$kink_com],
        'promo.fuckingmachines.com' => [$kink_com],
        'promo.hogtied.com' => [$kink_com],
        'promo.meninpain.com' => [$kink_com],
        'promo.sexandsubmission.com' => [$kink_com],
        'promo.thetrainingofo.com' => [$kink_com],
        'promo.ultimatesurrender.com' => [$kink_com],
        'promo.waterbondage.com' => [$kink_com],
        'promo.whippedass.com' => [$kink_com],
        'promo.wiredpussy.com' => [$kink_com],
        'promo.tsseduction.com' => [$kink_com],

        # XXXCumCash sites: 1=image, 2=hybrid, 3=movie  (though hybrid has 3 fewer movies, so I don't know that we want to always force it to that)
        'natalisbondagesluts.com' => [ qr#^/galleries/[^/]*/[^/]*/[23]/# ],
        'damseldiary.com' => [ qr#^/free/[^/]*]/[^/]*/[23]/# ],         # this doesn't seem to be working...?

        'bdsmtraffic.com' => [
            qr#^/galleries/movies/#i,
        ],
        'electricityplay.com' => [
            qr#^/gallery/[^/]*/mgp#i,
        ],

        'mgp.bossbitches2.com' => [ qr#.# ],
        'movies.meatinsertions.com' => [ qr#.# ],

        'spankingdollars.com' => [ qr#^/mg/# ],
        'vipspanking.com' => [ qr#^/wm/mg/# ],
        'badfemales.com'  => [ qr#^/wm/mg/# ],
        'herfirstpunishment.com'  => [ qr#^/wm/mg/# ],

        'fhg.masteracash.com' => [ qr#^/[^/]*/video/# ],
        'platinumfetish.com' => [ qr#^/hosted[^/]*/[^/]*/video/# ],
        'galleries.knotnice.com' => [ qr#^/h/\d+/mgp# ],
        'bdsmtraffic.com' => [ qr#^/[^/]*/movies/# ],

        'latexangel.com' => [ qr#^/galleries/free/[^/]*/fhm/#si ],

    );
sub is_TGP_movie_page {
    my $uri = URI->new(shift);
    my $h = $uri->host;
    my $p = $uri->path_query;

    while ($h =~ m#\.#s) {
        if ($domain_movie_urls{$h}) {
            foreach my $regexp (@{$domain_movie_urls{$h}}) {
                return 1 if ($p =~ $regexp);
            }
        }

        $h =~ s#^[^\.]*\.##s;
    }
}
}



1;

Generated by GNU enscript 1.6.4.