http://paperlined.org/dev/src/pl/bzip2/bzip2_magic.pl
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my $filename = shift or die "Please specify a .bz2 file.\n";
open FIN, "<$filename" or die;
my $this_tell = 0;
my $next_tell = 0;
my $this_block = "";
my $whole_block = "";
my $start_magic = "\x31\x41\x59\x26\x53\x59";
my $end_magic = "\x17\x72\x45\x38\x50\x90";
my @magic;
my $chunk_len = 100*1024;
for (;;) {
# read 100K more, and tack it onto the last 100k, so we have a 200k chunk
$this_tell = $next_tell;
$next_tell = tell(FIN);
#print "Reading at $next_tell\n";
$whole_block = $this_block;
my $bytes_read = read FIN, $this_block, $chunk_len;
last if (!defined($bytes_read) || $bytes_read<=0);
$whole_block .= $this_block;
#print "This chunk is ", length($whole_block), "\n";
# now find new instances of the start magic
my $offset = $chunk_len - 8;
$offset = -1 if ($next_tell==0);
print "Searching from offset $offset\n";
for (;;) {
$offset = index($whole_block, $start_magic, $offset+1);
last if ($offset < 0);
print "Start found at offset $offset (tell currently $this_tell)\n";
push(@magic, ["s", $this_tell+$offset]);
}
# and the end magic
$offset = $chunk_len - 7;
$offset = -1 if ($next_tell==0);
for (;;) {
$offset = index($whole_block, $end_magic, $offset+1);
last if ($offset < 0);
print "End found at offset $offset (tell currently $this_tell)\n";
push(@magic, ["e", $this_tell+$offset]);
}
}
print Dumper \@magic; exit;
print "Sorting\n";
@magic = sort {$a->[1] <=> $b->[1]} @magic;
foreach my $m (@magic) {
printf "%s %s\n",
$m->[0] eq "s" ? "start" : "end ",
$m->[1];
}
Generated by GNU enscript 1.6.4.