http://paperlined.org/dev/src/pl/git_characterize_files/sizes.pl
#!/usr/bin/perl
# characterize the file sizes of files within ./target/
use strict;
use warnings;
use Data::Dumper;
#use Devel::Comments; # uncomment this during development to enable the ### debugging statements
chdir "target";
my %sizes;
open my $pin, '-|', 'find', '.', '-type', 'f' or die $!;
while (<$pin>) {
chomp;
next if (grep {$_ eq '.git'} split '/');
my $s = round_size(-s $_);
push(@{$sizes{$s}}, $_);
}
foreach my $size (sort {$a <=> $b} keys %sizes) {
printf "%12s %7d\n", commify($size), scalar(@{$sizes{$size}});
}
# round size, but using log to do it, so the grouping-size increases as the number itself increases
sub round_size {
my $size = shift;
$size = sprintf("%d", $size);
my $leftmost = substr($size, 0, 1);
$leftmost = ($leftmost < 5) ? 1 : 5;
return $leftmost . "0" x (length($size) - 1);
}
# add commas to a number
sub commify {(my$text=reverse$_[0])=~s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;scalar reverse$text}
Generated by GNU enscript 1.6.4.