http://paperlined.org/dev/src/pl/git_characterize_files/file_survey.pl
#!/usr/bin/perl
# use the program 'file' to try to classify files in ./target/
use strict;
use warnings;
use Data::Dumper;
#use Devel::Comments; # uncomment this during development to enable the ### debugging statements
chdir "target";
my %filetypes;
open my $pin, '-|', 'find', '.', '-type', 'f' or die $!;
while (<$pin>) {
chomp;
next if (grep {$_ eq '.git'} split '/');
my $filetype = readpipe_args('file', '-b', $_);
chomp $filetype;
push(@{$filetypes{$filetype}}, $_);
}
#print Dumper \%filetypes;
my %counts = map {$_ => scalar(@{$filetypes{$_}})} keys %filetypes;
#print Dumper \%counts;
foreach my $filetype (sort {$counts{$b} <=> $counts{$a}} keys %counts) {
print "================ $filetype ================\n";
print join("\n", sort @{$filetypes{$filetype}}), "\n\n";
}
print "\n";
print "================ file type count ================\n";
my @sort = sort {
$counts{$b} <=> $counts{$a}
|| $a cmp $b
} keys %counts;
my $total = 0;
foreach my $filetype (@sort) {
printf "%-5d %s\n", $counts{$filetype}, $filetype;
$total += $counts{$filetype};
}
printf "%-5d total\n", $total;
# like qx// or readpipe(), BUT it allows you to give explicitely delimited args, so you don't have to worry about escaping quotes
sub readpipe_args {my$p=open(my$f,'-|',@_)or die$!;my@o=<$f>;close$f;waitpid($p,0);wantarray?@o:join("",@o)}
Generated by GNU enscript 1.6.4.