http://paperlined.org/dev/src/pl/git_characterize_files/sizes.pl

#!/usr/bin/perl

# characterize the file sizes of files within ./target/

    use strict;
    use warnings;

    use Data::Dumper;
    #use Devel::Comments;           # uncomment this during development to enable the ### debugging statements


chdir "target";


my %sizes;

open my $pin, '-|', 'find', '.', '-type', 'f'          or die $!;
while (<$pin>) {
    chomp;
    next if (grep {$_ eq '.git'} split '/');
    my $s = round_size(-s $_);
    push(@{$sizes{$s}}, $_);
}

foreach my $size (sort {$a <=> $b} keys %sizes) {
    printf "%12s  %7d\n", commify($size), scalar(@{$sizes{$size}});
}


# round size, but using log to do it, so the grouping-size increases as the number itself increases
sub round_size {
    my $size = shift;
    $size = sprintf("%d", $size);
    my $leftmost = substr($size, 0, 1);
    $leftmost = ($leftmost < 5) ? 1 : 5;
    return $leftmost . "0" x (length($size) - 1);
}

# add commas to a number
sub commify {(my$text=reverse$_[0])=~s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;scalar reverse$text}

Generated by GNU enscript 1.6.4.