# Detect time-gaps in log entries. # # This returns a closure that does stream-processing. The closure will trigger end-chunk and begin-chunk events at each time-gap. # (see http://docstore.mik.ua/orelly/perl/advprog/ch04_04.htm#ch04-pgfId-945065 ) # # The event-stream should be ordered by time, with the earlier events processed first. # # gap_detector_factory() # arguments: # $gap_seconds What are the minimum seconds that need to pass in between events for us to consider it to be a "large enough" gap? # $cb_begin_chunk The callback that will be called when a chunk-begin is detected # $cb_end_chunk The callback that will be called when a chunk-end is detected # returns: # $gap_detector A closure. # # # $gap_detector->() The caller should call this for each event seen. # arguments: # $event_data Event data. This is an opaque datastructure... we don't care what's inside it, we just pass it on directly # $event_time The time this event occurred # $eof At the end of the stream, this MUST be called an extra time, with $eof set to true. # (when $eof is true, the $event_data and $event_time are ignored) # returns: # nothing # # $cb_begin_chunk->() A callback that gets called whenever the beginning of a chunk is detected. # arguments: # $event_data The $event_data that was seen at the beginning of this chunk. # returns: # return value ignored # # $cb_end_chunk->() A callback that gets called whenever the end of a chunk is detected. # arguments: # $event_data The $event_data that was seen at the end of this chunk. # returns: # return value ignored sub gap_detector_factory { my ($gap_seconds, $cb_begin_chunk, $cb_end_chunk) = @_; my $last_time_seen = undef; my $last_event_data = undef; my $after_eof = 0; my $closure = sub { my ($event_data, $event_time, $eof) = @_; if ($after_eof) { die "gap_detector_factory()'s stream-processor was called after EOF"; } elsif ($eof) { $cb_end_chunk->($last_event_data) if ($last_event_data); $after_eof = 1; } elsif (!defined($last_time_seen) || ($event_time - $last_time_seen >= $gap_seconds)) { $cb_end_chunk->($last_event_data) if ($last_event_data); $cb_begin_chunk->($event_data); } $last_time_seen = $event_time; $last_event_data = $event_data; return undef; }; return $closure; } sub test_gap_detector { my $gap_detector = gap_detector_factory(15, sub { print "Chunk-begin: ", Dumper $_[0] }, sub { print "Chunk-end: ", Dumper $_[0] }); foreach my $event_data ( {step => 1, time => 100}, {step => 2, time => 105}, {step => 3, time => 110}, {step => 4, time => 120}, {step => 5, time => 150}, {step => 6, time => 155}, {step => 7, time => 160}, {step => 8, time => 165}, {step => 9, time => 170}, {step => 10, time => 200}, ) { $gap_detector->($event_data, $event_data->{time}); } $gap_detector->(undef, undef, 1); }