http://paperlined.org/dev/src/pl/bzip2/bzip2_magic.pl

#!/usr/bin/perl

    use strict;
    use warnings;

    use Data::Dumper;

my $filename = shift or die "Please specify a .bz2 file.\n";
open FIN, "<$filename" or die;

my $this_tell = 0;
my $next_tell = 0;
my $this_block = "";
my $whole_block = "";

my $start_magic = "\x31\x41\x59\x26\x53\x59";
my $end_magic =   "\x17\x72\x45\x38\x50\x90";

my @magic;

my $chunk_len = 100*1024;
for (;;) {
    # read 100K more, and tack it onto the last 100k, so we have a 200k chunk
    $this_tell = $next_tell;
    $next_tell = tell(FIN);
    #print "Reading at $next_tell\n";
    $whole_block = $this_block;
    my $bytes_read = read FIN, $this_block, $chunk_len;
    last if (!defined($bytes_read) || $bytes_read<=0);
    $whole_block .= $this_block;
    #print "This chunk is ", length($whole_block), "\n";

    
    # now find new instances of the start magic
    my $offset = $chunk_len - 8;
    $offset = -1 if ($next_tell==0);
    print "Searching from offset $offset\n";
    for (;;) {
        $offset = index($whole_block, $start_magic, $offset+1);
        last if ($offset < 0);
        print "Start found at offset $offset (tell currently $this_tell)\n";
        push(@magic, ["s", $this_tell+$offset]);
    }

    # and the end magic
    $offset = $chunk_len - 7;
    $offset = -1 if ($next_tell==0);
    for (;;) {
        $offset = index($whole_block, $end_magic, $offset+1);
        last if ($offset < 0);
        print "End found at offset $offset (tell currently $this_tell)\n";
        push(@magic, ["e", $this_tell+$offset]);
    }
}
print Dumper \@magic; exit;

print "Sorting\n";
@magic = sort {$a->[1] <=> $b->[1]} @magic;
foreach my $m (@magic) {
    printf "%s %s\n",
        $m->[0] eq "s" ? "start" : "end  ",
        $m->[1];
}

Generated by GNU enscript 1.6.4.