#!/usr/bin/perl # Just makes a list of all the filenames included under each .tar.gz file. # It's probably useful for grepping. # # Designed to work with CPAN::Mini. # # Beware -- the output is pretty large. If you run: # ./dump_filenames.pl | gzip > filename_dump.txt.gz # you will still get a 11mb file. use strict; use warnings; use Archive::Tar (); # Perl core use CGI::Tiny (); use File::Basename (); # Perl core use Pod::Parser (); # Perl core use Text::LineNumber (); use Data::Dumper; # Perl core # the location of your CPAN::Mini mirror my $mirror_location = '/home/newcum/minicpan/mirror/'; -d $mirror_location or die "\$mirror_location is set incorrectly ($mirror_location)\n"; $Archive::Tar::WARN = 0; # quiet warning messages my @archives; chdir($mirror_location); open PIN, "-|", "find", "authors/", "-type", "f", "-o", "-type", "l" or die $!; while () { chomp; push(@archives, $_) if (/\.tar\.gz$/i); } # sort by distribution name -- without this, the results end up being sorted by author name sub distribution_name { local $_ = shift; s#^.*/##; $_ } @archives = sort {lc(distribution_name($a)) cmp lc(distribution_name($b))} @archives; foreach my $archive (@archives) { process_archive($archive); } sub process_archive { my $tarball = shift; # tarballs that we get hung up on for one reason or another return if ($tarball =~ m#/Lingua-StanfordCoreNLP-#); my $tar = Archive::Tar->new($tarball); foreach my $filename ($tar->list_files()) { #next unless (filename_filter($filename)); (my $filename_sans_package = $filename) =~ s#^[^/]*/##s; print "$tarball/$filename_sans_package\n"; } }