#!/usr/bin/perl

use strict;
use warnings;

use Carp qw/croak/;
use List::MoreUtils qw/firstidx firstval/;

use Yandex::YT;
use Yandex::ListUtils;
use Yandex::HashUtils;

sub yield($);
sub yields($;$$);
sub iter(&);

my @ARGS = map {/^(--[\w-]+)=(.*)/ ? ($1, $2) : $_} @ARGV;
my %args_idx = map {$ARGS[$_] => $_} 0..$#ARGS;

if (defined $args_idx{'--help'} || defined $args_idx{'-h'}
    || !@ARGS 
    || $ARGS[0] !~ /^(?:map|reduce|map-reduce)$/
) {
    usage();
}

my $FORMAT = 'json';
my $DEFAULT_FORMAT_FLAGS = '<encode_utf8=false>';

if (defined (my $format_idx = $args_idx{'--format'}) ) {
    my $format_with_flags = $ARGS[$format_idx+1];

    if ($format_with_flags =~ /^(\<[\;\[\]\w\=]+\>)?(dsv|json)$/) {
        $DEFAULT_FORMAT_FLAGS = $1 || '';
        $FORMAT               = $2;
    }
    else {
        croak "unsupported format: $FORMAT";
    }

} else {
    push @ARGS, '--format', $DEFAULT_FORMAT_FLAGS.$FORMAT;
}

my @CLEANUP_FILES;

if ($ARGS[0] =~ /^(map|reduce)$/) {
    my $cmd_idx = firstval {$ARGS[$_] !~ /^-/} @{range(1, $#ARGS+1, 2)};
    usage("Can't find perl code in arguments") if !defined $cmd_idx;
    _pack_prog_fix_args($ARGS[0], $cmd_idx);
} elsif ($ARGS[0] eq 'map-reduce') {
    usage("Can't find perl code for --reducer") if !$args_idx{'--reducer'};
    _pack_prog_fix_args('reduce', $args_idx{'--reducer'}+1);
    if ($args_idx{'--mapper'}) {
        _pack_prog_fix_args('map', $args_idx{'--mapper'}+1);
    }
    if ($args_idx{'--reduce-combiner'}) {
        _pack_prog_fix_args('combine', $args_idx{'--reduce-combiner'}+1);
    }
}

use Yandex::Shell;
print join " ", 'yt', map {yash_quote($_)} @ARGS;
print "\n";
system('yt', @ARGS) and croak "$!";


sub _pack_prog_fix_args {
    my ($cmd_type, $cmd_idx) = @_;
    my ($tar, $yt_cmd) = Yandex::YT::_yt_prog_tar(prog => _generate_program($cmd_type, $ARGS[$cmd_idx]));
    $ARGS[$cmd_idx] = $yt_cmd;
    push @ARGS, (
        $ARGS[0] =~ /^(map|reduce)$/ ? "--local-file" 
        : $cmd_type eq 'combine' ? "--reduce-combiner-local-file"
        : "--$cmd_type-local-file"
    ), $tar;
    push @CLEANUP_FILES, $tar;
}


sub _generate_program {
    my ($cmd_type, $cmd) = @_;

    my $streaming_params = "format => '$FORMAT'";
    my $main;
    if ($cmd_type eq 'map') {
        $main = "while(my \$r = \$s->get) {local \$_ = \$s->{_line}; $cmd}";
    } else {
        # reduce | combine
        my @reduceby = map {$ARGS[$_] eq '--reduce-by' ? $ARGS[$_+1] : ()} 0..$#ARGS-1;
        croak "Incorrect --reduce-by" if !@reduceby || grep {!/^\w+$/} @reduceby;
        $streaming_params .= ', reduceby => '.Data::Dumper->new([\@reduceby])->Terse(1)->Indent(0)->Dump();
        $main = "while(my \$g = \$s->get_group) {local \$_ = \$s->{_line}; $cmd}";
    }
    
    { no strict; eval "sub ___test_$cmd_type {my (\$s,\$r,\$g); $main; }"; }
    croak $@ if $@;
    
    return "#!/usr/bin/perl

use List::Util qw/sum/;

use FindBin qw/\$Bin/;
use lib \$Bin;

\$Yandex::YT::DEFAULT_FORMAT = '$FORMAT';
\$Yandex::YT::DEFAULT_FORMAT_FLAGS = {'$FORMAT' => '$DEFAULT_FORMAT_FLAGS'};

use Yandex::YT;
use Yandex::ListUtils;
use Yandex::HashUtils;

my \$s = Yandex::YT::Streaming->new($streaming_params);
my \$r;
my \$g;
sub yield (\$) {
    \$s->yield(\$_[0]);
}
sub yields (\$;\$\$) {
    my \$h = shift;
    my \$kn = \$_[0] || 'key';
    my \$vn = \$_[1] || 'value';
    while(my (\$k, \$v) = each \%\$h) {
        \$s->yield(+{\$kn => \$k, \$vn => \$v});
    }
}
sub iter (&) {
    while(\$r = \$s->get) {
        local \$_ = \$s->{_line};
        \$_[0]->();
    }
}

$main
";
}


sub usage {
    my ($msg) = @_;
    print "$msg\n\n" if defined $msg;
    print 
q#Write perl one-liners for YT! (like perl -nae '')

yt-perl map '$S{$r->{id}}++; }{ yields \%S' --src 'tmp/1{uid}' --dst tmp/2
yt-perl map-reduce --mapper '$r->{uid} *= 100; yield $r' --reduce-by x --reducer 'iter {$g->{cnt}++}; yield $g' --src 'tmp/1{uid}' --dst tmp/2

What you can use in onliner:
  $s - Yandex::YT::Streaming object (supports yield(), get(), get_all())
  yield($) ~ $s->yield
  yelds({a=>1, b=>2}) ~ yield {key => 'a', value => 1}; yield {key => 'b', value => 2};
  yelds({ip1 => 1, ip2=>2}, ip => 'cnt') ~ yield {ip => 'ip1', cnt => 1}; yield {ip => 'ip2', cnt => 2};
  iter {CODE}; - full analogue of "while(my $r = $s->get) {CODE;}"
  $r - current record (in map or iter only)
  $g - current reduce group (in reduce only)
  $_ - current line
#;
    exit defined $msg ? 1 : 0;
}


END {
    unlink @CLEANUP_FILES;
}
