package Utils::Funcs;
use strict;

use utf8;

# функции общего назначения: работа со строками, со структурами данных, алгоритмы, ...
# базовая библиотека, не использует Utils::Common

use List::Util qw(sum max);
use Storable qw(dclone);
use Encode qw();
use Digest::MD5 qw();
use JSON::XS qw();

use base qw(Exporter);
our @EXPORT_OK = (
    'hash2str',
    'str2hash',
    'expand_names',
    'convert_time_interval',
    'prm_list_filter',
    'correlation',  # коэффициент корреляции между двумя последовательностями
    'sort_keys_asc',    # Отсортировать ключи хэша по возрастанию значений
    'sort_keys_desc',   # Отсортировать ключи хэша по убыванию значений
    qw(encode_quotes decode_quotes),
    'is_rus',
    'is_translit',
    'ratio_str',
    'deep_diff',
    'patch_hash',
    'print_diff',  # разница двух массивов
    'encode_tsv', 'decode_tsv', 'parse_tskv_line',
    'get_data_md5',  # md5 для хэша или массива
    'content_type_is_html',
    'content_type_is_xhtml',
    'content_type_is_xml',
    'content_is_text',
    'get_urls_from_str',
    'get_python_options',  # запускает python скрипт ожидает json выхлоп в stdout, отдает хеш. Используем для получения опций из питона
    'is_offer_id_valid',
    'is_price_valid',
);


sub get_quantiles {
    my ($values, $rate) = @_;
    my $sorted_values = [ sort { $a <=> $b } @$values ];
    my $count = @$values;
    my $ind = ($count - 1) * $rate;
    my $result = $sorted_values->[$ind];
    return $result;
}

sub hash2str {
    my $hash = shift;
    return join(',', map { $_.':'.$hash->{$_} } keys %$hash);
}

sub str2hash {
    my $str = shift;
    my %h = map { split /:/, $_, 2 } split /,/, $str;
    return \%h;
}

# host0[12][fg] -> host01f, host02f, host01g, host02g
# host{08,09,10}[ih] -> host08i, host08h, host09i, host09h, host10i, host10h
# вложенные скобки не поддерживаются, корректность скобок не проверяется!
sub expand_names {
    my $item = shift;
    my @variants = ('');
    my @parts = ($item =~ m/([^\]\[\{\}]+|[\{\[][^\{\}\[\]]+[\]\}])/g);
    for my $part (@parts) {
        my @c;
        if ($part =~ /^\[/) {
            $part =~ s/(^\[|\]$)//g;
            @c = split //, $part;
        } elsif ($part =~ /^\{/) {
            $part =~ s/(^\{|\}$)//g;
            @c = split /,/, $part;
        } else {
            @c = ($part);
        }
        @variants = map { my $old = $_; map { $old . $_ } @c } @variants;
    }
    return @variants;
}

# конвертация времени в секундах в человекочитаемый формат
sub convert_time_interval {
    my $time = shift;

    my $days = int($time / 86400);
    $time -= ($days * 86400);
    my $hours = int($time / 3600);
    $time -= ($hours * 3600);
    my $minutes = int($time / 60);
    my $seconds = $time % 60;

    my @time_items;
    push @time_items, $days.'d' if $days;
    push @time_items, $hours.'h' if $hours;
    push @time_items, $minutes.'m' if $minutes;
    push @time_items, $seconds.'s';
    return join (' ', @time_items);
}

sub prm_list_filter {
    my ($list, $prm) = @_;

    return $list unless @$list;

    my %prm = %{ $prm || {} };

    #$proj->dd($list);
    if( $prm{'filter'} ){ #Накладываем фильтр
        my $flt = $prm{'filter'};

        sub prm_list_filter_hflt {
            my ($list, $flt, $ortype) = @_;

            ##Фильтры сравнения
            #my $spre = '[<>]|\sLIKE|\sREGEXP$';
            #my @fcmp = grep { /$spre/ } @fk;
            #@fk = grep { ! /$spre/ } @fk;

            #for my $l (@$list){
            #    if()
            #    push(@arr, $l) unless grep { $l->{$_} ne $flt->{$_} } @fk;
            #}

            my @fltrs = ();

            for my $fc ( keys %$flt ){
                if( ref($flt->{$fc}) eq 'HASH' ){
                    $list = prm_list_filter_hflt($list, $flt->{$fc}, 1);
                }elsif($fc =~ /^\s*(\S+)\s*((?:>|<)=?|LIKE|is NULL)/){
                    my ($fld, $op, $val) = ($1, $2, $flt->{$fc});
                    #$proj->dd([$fld, $op, $val]);
                    #Приводим фильтрацию SQL к перловым функциям
                    my $ffh = {
                        '>' => sub { return $_[0] gt $val },
                        '>=' => sub { return $_[0] ge $val },
                        '<' => sub { return $_[0] lt $val },
                        '<=' => sub { return $_[0] le $val },
                        'is NULL' => sub { return ! defined($_[0]) },
                        'LIKE' => sub { my $vv = $val; $vv =~ s/^\%|\%$//g; return $_[0] =~ /$vv/; },
                    };
                    push( @fltrs, [ $fld, ($ffh->{$op} || sub { return 1; }), ] );
                    #my $f = $ffh->{$op} || sub { return 1; }; #Выбираем функцию фильтрации
                    #@arr = grep { $f->($_->{$fld}) } @arr; #Непосредственно фильтрация
                }else{
                    push( @fltrs, [ 
                        $fc, 
                        sub { 
                            my @val = @_;
                            return ((grep { $val[0] eq $_ } @{$flt->{$fc}}) > 0)   if ref($flt->{$fc}) eq 'ARRAY';
                            return $val[0] eq $flt->{$fc};  # TODO fix for undef $val[0]:   return (defined $val[0]  ?  ($val[0] eq $flt->{$fc})  :  0)     или    $val[0] // ''  ?
                        } 
                    ] );
                }
            }

            sub prm_list_filter_hflt_fltr {
                my ( $el, $fltrs, $ortype ) = @_;

                if($ortype){
                    for my $f (@$fltrs){
                        if($f->[1]->( $el->{$f->[0]})){
                             return 1;
                        }
                    }
                    return 0;
                }else{
                    for my $f (@$fltrs){
                        unless($f->[1]->( $el->{$f->[0]})){
                             return 0;
                        }
                    }
                    return 1;
                }
            }

            $list = [ grep { prm_list_filter_hflt_fltr( $_, \@fltrs, $ortype ) } @$list ];
            return $list;
        }

        if((ref($flt) eq 'HASH')&&((keys %$flt) > 0)){
            $list = prm_list_filter_hflt($list, $flt);
        } elsif (ref($flt) eq 'ARRAY') {
            # TODO ?
        }
    }
    if( $prm{'group_by'} ){ #Группировка
        my @grflds = split(/\s*,\s*/, $prm{'group_by'});
        my $h = {};
        my $cntname = '';
        if($prm{'gfields'}){
            for my $f ( @{ $prm{'gfields'} } ){
                $cntname = $1 if $f =~ /^\s*count\(\s*\*\s*\)\s+(.+)/;     
            }
        }
        for my $el (@$list){
            my $k = join(' // ', map {$_ // ''}  @{$el}{@grflds});
            unless( $h->{$k} ){
                $h->{$k} = { %$el };
                $h->{$k}{$cntname} = 0;
            }
            $h->{$k}{$cntname}++ if $cntname;
        }
        $list = [ map {$h->{$_}} sort keys %$h ];
    }
    if( $prm{'order_by'} ){ #Сортировка
        my ($srtf, $desc) = ($prm{'order_by'} =~ /(\S+)\s?(desc)?/i);
        $desc = ($desc // 0) ? 1 : 0;
        my $numsort = (grep {($_->{$srtf} // '') !~ /^(\s*-?\d*[.,]?\d*\s*|\s*-*\s*|n\/a)$/} @$list) ? 0 : 1;  # Числа, пустые строки или '-'.
        my $is_percents = (grep {($_->{$srtf} // '') !~ /^(\s*-?\d*[,.]?\d*\s?%\s*|\s*-*\s*|\s*)$/} @$list) ? 0 : 1;   # Весь столбец - проценты. Допустимы пробелы, пустые строки и '-'.
        #$proj->dd([$srtf, $desc, $is_percents, $numsort, ]);
        if($numsort or $is_percents){
            $list = [ 
                (sort { $a->{$srtf} <=> $b->{$srtf} }  grep { ($_->{$srtf} // '') =~ /[0-9]/} @$list),  # Числа
                (grep { ($_->{$srtf} // '') =~ /^[^0-9]*$/} @$list),   # Пробелы и прочерки
            ];
            $list = [reverse @$list]  if $desc;
        } else {
            $list = [ sort { ($a->{$srtf} cmp $b->{$srtf}) * ($desc ? -1 : 1) } @$list ];
        }
    }
    if( $prm{'limit'} ){ #Отработка ограничения
        my $maxcc = @$list - 1;
        if( $prm{'limit'} =~ /\s*(\d+)\s*,\s*(\d+)/ ){
            my ($fr, $cnt) = ($1, $2);
            my $to = $fr+$cnt-1;
            $to = $maxcc if $to > $maxcc; 
            #$proj->dd([$fr, $to]);
            $list = [ @{$list}[$fr .. $to] ];
        }elsif( $prm{'limit'} =~ /\s*(\d+)/ ){
            my $cnt = $1;
            my $to = $cnt-1;
            $to = $maxcc if $to > $maxcc; 
            #$proj->dd([0, $to]);
            $list = [ @{$list}[0 .. $to] ];
        }
    }
    return $list;
}

sub correlation {
    my $x = shift;
    my $y = shift;

    return undef if scalar(@$x) != scalar(@$y);

    # считаем в один проход
    my $N = scalar(@$x);
    my $xy_sum = 0;
    my $x_sum = 0;
    my $y_sum = 0;
    my $x_sumsq = 0;
    my $y_sumsq = 0;
    for my $i (0..$N-1) {
        my $x_i = $x->[$i];
        my $y_i = $y->[$i];

        $xy_sum += $x_i * $y_i;
        $x_sum += $x_i;
        $x_sumsq += $x_i**2;
        $y_sum += $y_i;
        $y_sumsq += $y_i**2;
    }
    
    my $var_x = sqrt($N * $x_sumsq - $x_sum**2);
    my $var_y = sqrt($N * $y_sumsq - $y_sum**2);

    if (!$var_x && !$var_y) {
        # два константных ряда, считаем скоррелированными
        return 1;
    } elsif (!($var_x * $var_y)) {
        # константа и не-константа, считаем нескоррелированными
        return 0;
    } else {
        return ($N * $xy_sum - $x_sum * $y_sum) / $var_x / $var_y;
    }
}

# Отсортировать ключи хэша по возрастанию значений
# На входе - ссылка на хэш
sub sort_keys_asc {
    my $h = shift;
    return sort {
        $h->{$a} <=> $h->{$b}
        or  $a cmp $b
    } keys %$h;
}

# Отсортировать ключи хэша по убыванию значений
# На входе - ссылка на хэш
sub sort_keys_desc {
    my $h = shift;
    return sort {
        -($h->{$a} <=> $h->{$b})
        or  $a cmp $b
    } keys %$h;
}

# преобразовать кавычки в формат, понятный БК (~0)
sub encode_quotes {
    my $text = shift;
    return $text unless $text =~ /^\s*"(.*)"\s*$/;
    return $1.' ~0';
}
# преобразовать БК-шный формат в обычные кавычки
sub decode_quotes {
    my $text = shift;
    return $text unless $text =~ /^(.*)\s+~0\s*$/;
    return '"'.$1.'"';
}

sub is_rus {
    my $str = shift;
    return 1 if $str =~ m/[а-яё]/i;
    return 0;
}

# 2 of 12 (16.6%)
sub ratio_str {
    my $cnt = shift;
    my $tot = shift;
    return sprintf "%d of %d (%.1f %%)", $cnt, $tot, 100 * $cnt / ($tot || 1);
}

# Является ли пара слов (например, гипотезы контекстных синонимов) транслитом.
# На входе:
#   $phs - [$word1, $word2]     - ссылка на массив из двух слов
# Возвращает 1, если транслит; 0, если не транслит; undef, если недопустимые входные данные
sub is_translit {
    my ($phs) = @_;

    #return if grep { m/^[\s\-]*$/ } @$phs;
    #return if grep { m/ / } @$phs;

    my $ph_ru = (grep { m/[а-яё]/i } @$phs)[0];
    my $ph_en = (grep { not m/[а-яё]/i } @$phs)[0];
    return unless (defined $ph_ru  and  defined $ph_en);
    return unless (
            $ph_en =~ m/^[a-z\- 0-9]+$/i
        and $ph_ru =~ m/^[а-яё\- 0-9]+$/i
    );

    my @en2ru_letters = (
        [ a => 'аэеоея'], [ b => 'бв'], [ c => 'кцсч'], [ d => 'д'], [ e => 'аеэи'], [ f => 'ф'],
        [ g => 'гж'], [ h => 'хг'], [ i => 'ийеы'], [ j => 'джьй'], [ k => 'к'], [ l => 'л'],
        [ m => 'м'], [ n => 'н'], [ o => 'оау'], [ p => 'п'], [ q => 'к'], [ r => 'р'],
        [ s => 'сзш'], [ t => 'тц'], [ u => 'аую'], [ v => 'в'], [ w => 'ву'], [ x => 'к'],
        [ y => 'ийыу'], [ z => 'зцс'],
    );
    my %en2ru_letters = map { $_->[0] => { map { $_ => 1 } (split m//, $_->[1]) } }  @en2ru_letters;
    my @en2ru_letters_re = map { my @a=@{$_}; [map { my $ll=$_; qr[^([$ll])] } @a]}  @en2ru_letters;
    #my @en_ru_re = ( [qr/^sh/, qr/^ш/], [qr/^ch/, qr/^ш/], [qr/^ch/, qr/^ч/], [qr/^sch/, qr/^ш/], [qr/^x/, qr/^экс/], [qr/^i/, qr/^ай/] );
    my @en_ru_re = (
        [qr/^(sh)/, qr/^(ш|щ|с)/],  [qr/^(ch)/, qr/^(к|ш|ч)/],  [qr/^(tch|tsch)/, qr/^(ч)/],  [qr/^(sch)/, qr/^(ш|щ)/],  [qr/^(zh)/, qr/^(ж|чж)/],
        [qr/^(ch)/, qr/^(сш)/],  [qr/^(sc)/, qr/^(с)/],
        [qr/^(gh)/, qr/^(г)/],
        [qr/^(g?zh)/, qr/^(чж)/],
        [qr/^(kn|ng|gn)/, qr/^(н)/],

        [qr/^(j|g)/, qr/^(ж|дж|джи)/],  [qr/^(j)/, qr/^(джей)/],
        [qr/^(x)/, qr/^(экс|икс|кс|кз|х)/],
        [qr/^(j|g)/, qr/^(джи)/],  [qr/^(b)/, qr/^(би)/],  [qr/^(d)/, qr/^(ди)/],  [qr/^(q)/, qr/^(ку|кю|кью)/],    # TODO 
        # TODO
        #(map { my ($e,$r)=@{$_}; [qr/^($e)/, qr/^($rи)/] } ( ['b','б'], ['c','с'], ['d','д'], ['g','д?ж'], ['j','д?ж'], ['p','п'], ['t','т'], ['v','в'], )),
        #(map { my ($e,$r)=@{$_}; [qr/^($e)/, qr/^($rэ)/] } ( ['b','б'], ['c','ц'], ['d','д'], ['p','п'], )),
        #(map { my ($e,$r)=@{$_}; [qr/^($e)/, qr/^(э$r)/] } ( ['f','ф'], ['l','л'], ['m','м'], ['n','н'], ['r','р'], )),
        # g h j k q r w x y z

        [qr/^(ch)/, qr/^(к|х)/],  [qr/^(ck)/, qr/^(к)/],  [qr/^(tz|ts)/, qr/^(ц|тц)/],
        [qr/^(dh)/, qr/^(д)/],  [qr/^(th)/, qr/^(т|ф|с|з)/],  [qr/^(ph)/, qr/^(ф)/],
        [qr/^(kh)/, qr/^(х)/],  [qr/^(gh)/, qr/^(х)/],
        [qr/^(qu)/, qr/^(кв|к)/],
        [qr/^(cc)/, qr/^(ч)/],  [qr/^(ss)/, qr/^(ш)/],
        [qr/^(wh)/, qr/^(в|у)/],  [qr/^(woo)/, qr/^(у)/],
        [qr/^()/, qr/^(ль)/],
        [qr/^(ui)/, qr/^(и|ь?ю)/],  [qr/^([ui]e)/, qr/^(ь?е)/],
        [qr/^(a)/, qr/^(эй|ай|ей|ае)/],  [qr/^(eu)/, qr/^(о|е)/],  [qr/^(i|y)/, qr/^(ай|яй)/],  [qr/^(oo|ou|iu|ui)/, qr/^(у)/],  [qr/^(ae)/, qr/^(а|е|э)/],
        [qr/^(ou)/, qr/^(эй|ау)/],  [qr/^(ow)/, qr/^(ау)/],  [qr/^(au|ou|oa)/, qr/^(о|а)/],  [qr/^(oa)/, qr/^(оу)/],  [qr/^(oi)/, qr/^(уа)/],  [qr/^(o)/, qr/^(оу)/],
        [qr/^(i?ew)/, qr/^(ью)/],  [qr/^(ea)/, qr/^(а|и|е|э)/],  [qr/^(ea)/, qr/^(ей|эй|ея)/],  [qr/^(ay)/, qr/^(э)/],
        [qr/^(ee|ea|ie|io|eo|ey)/, qr/^(и)/],
        [qr/^(y)/, qr/^(ие)/],
        [qr/^(yu)/, qr/^(е)/],  # TODO - ?    hyundai хендай
        [qr/^(ia)/, qr/^(а|я|ья)/],
        [qr/^(ay)/, qr/^(ей|эй)/],
        [qr/^(ua)/, qr/^(а)/],
        [qr/^(oh)/, qr/^(о)/],
        [qr/^(au)/, qr/^(ав)/],  [qr/^(eu)/, qr/^(ев)/],
        [qr/^(i)/, qr/^([ае][ий]|ае|айе)/],
        [qr/^([yij]?u)/, qr/^(ь?ю)/],
        [qr/^(ue)/, qr/^(ю)/],
        [qr/^(you)/, qr/^(ю|я)/],
        [qr/^(yea)/, qr/^(е)/],
        [qr/^(eh|ah|ai)/, qr/^(е|э)/],
        [qr/^([yj])/, qr/^(ь)/],  [qr/^([yj]a)/, qr/^(а?я)/],  [qr/^([yj]o)/, qr/^(ё|е)/],  [qr/^([yj]e)/, qr/^(е)/],
        [qr/^(er)/, qr/^(э|е)/],    # TODO
        [qr/^(h)/, qr/^()/],

        [qr/^(igh)/, qr/^(а[ий])/],
        [qr/^(ght)/, qr/^(т)/],
        [qr/^(gn)/, qr/^(н)/],
        [qr/^(auto)/, qr/^(авто)/], [qr/^(euro)/, qr/^(евро)/],
        [qr/^(tion)/, qr/^(шн|шен|шион|цион)/],

        [qr/^()/, qr/^(ь|ъ)/],
        [qr/^(e)/, qr/^()/],    #TODO - ?
        [qr/^(-)/, qr/^()/],  [qr/^()/, qr/^(-)/],

        [qr/^(ional)$/, qr/^(иональный|ионал|нл)$/],
        [qr/^(ion)$/, qr/^(ия)$/],  [qr/^(tional)$/, qr/^(шнл)$/],
        [qr/^(burg)$/, qr/^(берг)$/],
        [qr/^(ault)$/, qr/^(о|а|)$/],
        [qr/^(mc)/, qr/^(мак)/],
        [qr/^(f+)$/, qr/^(в)$/],
        [qr/^(e|an)$/, qr/^(я)$/],
        [qr/^(k|g)$/, qr/^(к|г)$/],
        [qr/^(ie|ia|je|ja)$/, qr/^(и|я|ь)$/],
        [qr/^(es)$/, qr/^(ы)$/],
        [qr/^(i|y|e)$/, qr/^(ить|уть|еть|ать|ять)$/],
        [qr/^(ie|ye|aya|oe)$/, qr/^(ий|ый)$/],
        [qr/^(er|ed)$/, qr/^()$/],
        [qr/^(ok)$/, qr/^(ка)$/],
        [qr/^(ka)$/, qr/^(ок)$/],
        [qr/^(ah|om|as)$/, qr/^()$/],
        [qr/^(|e|a|o|u|i|y|t|s|r|ue)$/, qr/^(|е|ы|о|а|я|и|ый|ий|й|с|з|т|д)$/],
    );

    my @en_ru_re_all = (@en_ru_re, @en2ru_letters_re);

    my $phs_parts = [ map { lc } ($ph_en, $ph_ru) ];
    s/ё/е/gi   for @$phs_parts; # TODO

    return 1 if not grep { m/[^\s]/ } @$phs_parts;  # [ '', '' ]
    return 0 if grep { m/^\s*$/ } @$phs_parts;  # [ '', '...' ]

    my $is_translit = 0;
    my $phs_parts_all = [ [@$phs_parts] ];
    while (@$phs_parts_all) {
        #print "  ", join(", ", map { "(".$_->[0]." ".$_->[1].")" } @$phs_parts_all), "\n";
        my $phs_parts_all_new = [];
        for my $phs_parts (@$phs_parts_all) {
            for my $re_arr (@en_ru_re_all) {
                my $prfxs = [];
                if (defined ($prfxs->[0] = ($phs_parts->[0] =~ $re_arr->[0] )[0] )) {
                    if (defined ($prfxs->[1] = ($phs_parts->[1] =~ $re_arr->[1] )[0] )) {
                        my $phs_parts_new = [map { substr( $phs_parts->[$_], length($prfxs->[$_]) ) } (0, 1)];
                        #print Dumper($re_arr);
                        push @$phs_parts_all_new, [ @$phs_parts_new ];

                        for my $i ( 0, 1 ) {
                            my $prfx = $prfxs->[$i];
                            if ( $phs_parts_new->[$i] =~ qr/^($prfx)/ ) {    # Удвоенные буквы
                                my $phs_parts_new1 = [ @$phs_parts_new ];
                                $phs_parts_new1->[$i] = substr( $phs_parts_new1->[$i], length($prfx) );
                                push @$phs_parts_all_new, [ @$phs_parts_new1 ];
                            }
                        }
                    }
                }

                # TODO
                if (substr($phs_parts->[0], 0, 1) eq substr($phs_parts->[1], 0, 1)) {
                    my $phs_parts_new1 = [ map { substr($_, 1) } @$phs_parts ];
                    push @$phs_parts_all_new, [ @$phs_parts_new1 ];
                }
            }

            # uniq
            my %str2el = map { join("; ", @{$_})  => $_ } @$phs_parts_all_new;
            $phs_parts_all_new = [ values %str2el ];
        }

        last unless @$phs_parts_all_new;
        if (grep { $_->[0] eq $_->[1] } @$phs_parts_all_new) {
            $is_translit = 1;
            last;
        }
        $phs_parts_all = $phs_parts_all_new;
    }

    return $is_translit;
};

# считает diff двух структур
sub deep_diff {
    my ($old, $new) = @_;
    my $ref = ref($old);
    if (ref($new) ne $ref) {
        return 'type: '.($ref || 'SCALAR').' => '.(ref($new) || 'SCALAR');
    }
    if (!$ref) {
        if ($old ne $new) {
            return 'value: '.$old.' => '.$new;
        } else {
            return;
        }
    } elsif ($ref eq 'ARRAY') {
        my %diff;
        for my $i (0 .. max(scalar(@$old), scalar(@$new)) - 1) {
            my $diff;
            if ($i >= @$old) {
                $diff = 'gone';
            } elsif ($i >= @$new) {
                $diff = 'came';
            } else {
                $diff = deep_diff($old->[$i], $new->[$i]);
            }
            $diff{"index_$i"} = $diff if $diff;
        }
        if (keys %diff) {
            return \%diff;
        } else {
            return;
        }
    } elsif ($ref eq 'HASH') {
        my @diff;
        my @old_k = keys %$old;
        my @new_k = keys %$new;
        my %old_k = map { $_ => 1 } @old_k;
        my %new_k = map { $_ => 1 } @new_k;

        my %diff;
        $diff{$_} = 'gone' for grep { !$new_k{$_} } @old_k;
        $diff{$_} = 'came' for grep { !$old_k{$_} } @new_k;
        for my $k (grep { $new_k{$_} } @old_k) {
            my $diff = deep_diff($old->{$k}, $new->{$k});
            $diff{$k} = $diff if $diff;
        }
        if (keys %diff) {
            return \%diff;
        } else {
            return;
        }
    } else {
        die "Can't compare blessed objects!\n";
    }
}

# параметры:
#   $old  - хэш, который патчим
#   $patch - subj
#   clone_source =>  subj (default: 1)
#   clone_patch  =>  subj (default: 1)
#   add =>  не заменять значения, а инкрементировать
sub patch_hash {
    my $old = shift;
    my $patch = shift;

    my %params = (
        # default params
        clone_source => 1,
        clone_patch => 1,

        # given params
        @_,
    );

    my $add = $params{add};
    my $new = $params{clone_source} ? dclone($old) : $old;
    $patch = $params{clone_patch} ? dclone($patch) : $patch;

    
    # патчим рекурсивно
    while (my ($key, $val) = each %$patch) {
        $key =~ s/#{(.+)}$//;
        my $mode = $1 // 'default';
        
        if (
            ref($val) ne 'HASH'
            || $mode eq 'strict'
        ) {
            if ($add) {
                $new->{$key} += $val;
            } else {
                $new->{$key} = $val;
            }
        } else {
            if (!defined $new->{$key}) {
                $new->{$key} = $val;
            } elsif (ref($new->{$key}) ne 'HASH') {
                # спорная ситуация - нужно патчить хэш, а там не хэш
                $new->{$key} = $val;
            } else {
                patch_hash($new->{$key}, $val, %params, clone_source => 0, clone_patch => 0);
            }
        }
    }

    return $new;
}

# copy-paste from Utils::Sys, rename to prevent name colliding
sub _md5int {
    my ($str) = @_;
    my $str_bytes = Encode::encode('UTF-8', $str);
    my @a = unpack("N4", Digest::MD5::md5($str_bytes));
    return ($a[1] ^ $a[3]) << 32 | ($a[0] ^ $a[2]);
}

sub print_diff {
    my $o = shift;
    my $n = shift;
    my %par = @_;
    my %o = map { $_ => 1 } @$o;
    my %n = map { $_ => 1 } @$n;
    printf "common: %d\n", scalar(grep { $n{$_} } @$o);
    my @gone = grep { !$n{$_} } @$o;
    my @came = grep { !$o{$_} } @$n;
    if ($par{only_counts}) {
        printf "gone: %d\n", scalar(@gone);
        printf "came: %d\n", scalar(@came);
        return;
    }
    for my $gone (@gone) {
        printf "< %s\n", $gone;
    }
    for my $came (@came) {
        printf "> %s\n", $came;
    }
}

# quote special chars to write string to tsv file
sub encode_tsv {
    my $str = shift;
    my %D = ("\t" => "\\t", "\n" => "\\n", "\\" => "\\\\", "\0" => "\\0");
    $str =~ s/(.)/ $D{$1} || $1 /ge;
    return $str;
}

sub decode_tsv {
    my $str = shift;
    my %D = ("t" => "\t", "n" => "\n", "\\" => "\\", "0" => "\0");
    $str =~ s/\\(.)/ $D{$1} || $1 /ge;
    return $str;
}

sub parse_tskv_line {
    my $line = shift;
    chomp $line;
    my %kv = map { my ($k, $v) = split /=/, $_, 2; $k => decode_tsv($v) } split /\t/, $line;
    return \%kv;
}

my $json_encoder_for_md5 = JSON::XS->new->utf8(1)->canonical(1)->indent(0);
sub get_data_md5 {
    my $data = shift;
    my $data_json = $json_encoder_for_md5->encode($data);
    return Digest::MD5::md5_hex($data_json);
}

sub content_type_is_html {
    my $content_type = shift;
    return 0 if (!$content_type);
    return $content_type eq 'text/html' || content_type_is_xhtml($content_type);
}

sub content_type_is_xhtml {
    my $content_type = shift;
    return $content_type eq "application/xhtml+xml" ||
           $content_type eq "application/vnd.wap.xhtml+xml";
}

sub content_type_is_xml {
    my $content_type = shift;
    return 0 if (!$content_type);
    return 1 if $content_type eq "text/xml";
    return 1 if $content_type eq "application/xml";
    return 1 if $content_type =~ /\+xml$/;
    return 0;
}

sub content_is_text {
    my $content_type = shift;
    return 1 if content_type_is_html($content_type);
    return 1 if content_type_is_xml($content_type);
    return 1 if $content_type =~ m,^text/,;
    return 1 if $content_type eq "application/json";
    return 0;
}

sub get_urls_from_str {
    my $in_str = shift;
    my @arr = split(/\s*[,;]\s*(?=(?:https?:)?\/\/)/, $in_str);
    return @arr;
}

sub get_python_options {
    my $python_script_path = shift;
    my $py_bin_flag= shift;
    my $py_bin_str = $py_bin_flag ? "/usr/bin/python" : "";

    die "select python_script_path" unless $python_script_path;
    die "python_script_path not found: $python_script_path" if !-f $python_script_path;  # может не быть - при запуске без BMYT, например

    local $/;
    open my $fh, "$py_bin_str $python_script_path |"
        or die "Can't run [python $python_script_path]: $!";
    my $py_opts_json = <$fh>;
    close $fh
        or die "Error at [python $python_script_path]: $!";
    my $json_decoder = JSON::XS->new;
    my $py_opts = $json_decoder->decode($py_opts_json);
    return $py_opts;
}

sub is_offer_id_valid {
    my $offer_id = shift;
    # Just like market do: https://a.yandex-team.ru/arc/trunk/arcadia/market/idx/library/validators/validators.h?rev=r9165746#L83
    return $offer_id =~ /^[0-9a-zA-Zа-яА-Я\.,\/\\\(\)\[\]\-=_]+$/;
}

sub is_price_valid {
    my $price = shift;
    return $price =~ /^(\d+(\.\d+)?|\.\d+)$/ && $price !~ /^(0+(\.0+)?|\.0+)$/;
}

1;
