#!/usr/bin/env perl

use strict;
use warnings;

no warnings 'uninitialized';

my %FIELDS_RE = (
    '%logfile'   => '(?<logfile>[^:]*?):',
    '%host'      => '(?<host>\S+?)',
    '%date'      => '\[(?<date>\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4})\]',
    '%request'   => '"(?<request>[^"]+)"',
    '%status'    => '"(?<status>\d+?)"',
    '%bytes'     => '(?<bytes>-|\d+)',
    '%referer'   => '"(?<referer>.+?)"',
    '%useragent' => '"(?<useragent>.*?)"',
    '%upstream_time' => '(?<upstream_time>[.\d]+?)',
    '%request_time'  => '(?<request_time>[.\d]+?)',
    '%size'      => '(?<size>\d+?)',
    '%null'      => '\S+?',
    '%cookies'   => '"(?<size>.*?)"',
    '%scheme'    => '"(?<scheme>.*?)"',
    '%server'    => '"(?<server>.*?)"',
);

my %FORMATS = (
    'format1' =>         '%host %null %null %date %upstream_time %request_time %size %request %status %bytes %referer %useragent %null %cookies %null %scheme %server',
    'format2' => '%logfile%host %null %null %date %upstream_time %request_time %size %request %status %bytes %referer %useragent %null %cookies %null %scheme %server',
    'format3' => '%logfile %host %null %null %date %upstream_time %request_time %size %request %status %bytes %referer %useragent %null %cookies %null %null %null %scheme %server',
);

unless (@ARGV) {
    print <<EOF;
usage:
$0 format1 referer cookies
$0 format2 upstream_time request_time size

formats:
EOF

    for my $key (sort keys %FORMATS) {
        printf "%-10s %s\n", $key, $FORMATS{$key};
    }

    exit;
}

my $format_name = shift @ARGV;
my @filters = @ARGV;
@ARGV = ();

my $format_string = $FORMATS{$format_name};

die "unknown format $format_name"
  unless $format_string;

while (my ($field_name, $field_re) = each %FIELDS_RE) {
    $format_string =~ s/$field_name\b/$field_re/g;
}

my $re = qr/$format_string/i;

while (<>) {
    chomp;
    next unless /$re/;
    my @filtered = map $+{$_}, @filters;
    print join("\t", @filtered), "\n";
}

