# -- coding: utf8 --

import copy

from bm.bmyt import BMYT


CATALOGIA_MAPPER_BMYT_PROCESS_COUNT = 4
CATALOGIA_MAPPER_LINES_PER_JOB = 1000000
CATALOGIA_MAPPER_MIN_JOB_COUNT = 10


def run_catalogia_mapper(
        src_table, dst_table, targets,
        text_field=None, banner_fields=None, src_schema=None, job_count=None, yt_client=None):

    print("Catalogia mapper from '{src}' to '{dst}': started".format(src=src_table, dst=dst_table))

    # проверяем входные параметры
    print("Check params...")

    if src_table == dst_table:
        raise Exception("src_table and dst_table can't be the same table")

    if (text_field and banner_fields) or (not text_field and not banner_fields):
        raise Exception("Provide either text_field or banner_fields")

    known_targets = set(['categories', 'flags', 'catids', 'directids', 'directcatids', 'mediagroups', 'mediaids', 'bmapi'])
    for target in targets:
        if target not in known_targets:
            raise Exception("Unknown target '{target}'".format(target=target))

    # создаём bmyt-клиент, скачивам нужные ресурсы
    print("Get catalogia...")

    bmyt = BMYT(process_count=CATALOGIA_MAPPER_BMYT_PROCESS_COUNT, yt_client=yt_client)
    yt_client = bmyt.yt_client

    # сочиняем правильную схему
    print("Prepare output table schema...")

    schema = copy.deepcopy(src_schema) if src_schema else yt_client.get_attribute(src_table, "schema")

    if len(schema) == 0:
        print("Input table '{table}' has empty schema".format(table=src_table))
        schema = None
    else:
        for field in schema:
            if 'sort_order' in field:
                print("As output tables of catalogia_mapper are unsorted, 'sort_order' will be removed from schema for field '{field}'".format(field=field['name']))
                del field['sort_order']

        for target in targets:
            result_field = targets[target]
            if type(result_field) is dict:
                result_field = result_field["result_field"]

            schema.append({
                'name': result_field,
                'type': 'string'
            })

    # готовим маппер
    print("Prepare mapper...")

    stash = {'targets': targets}

    if text_field:
        stash['mode'] = 'text'
        stash['text_field'] = text_field
    elif banner_fields:
        stash['mode'] = 'banner'
        stash['banner_fields'] = banner_fields

    mapper = {
        'dst_names': ['OUTPUT'],
        'dst_options': [{'unknown_as_string': True}],

        'stash': stash,

        'begin': """
            use BaseProject;
            use Utils::Sys qw/md5int/;

            my $proj = BaseProject->new({
                log_file => "/dev/null",
                load_dicts => 1,
                load_minicategs_light => 1,
                allow_lazy_dicts => 1,
                use_comptrie_subphraser => 1,
                use_sandbox_categories_suppression_dict => 1,
            });

            $proj->categs_tree->never_read_categs_cache(1);
            $proj->categs_tree->never_write_categs_cache(1);

            $self->{proj} = $proj;
        """,

        # fixme: perl-код ниже фактически повторяет scripts/standalone_yt_categorize_job.pl
        # со временем "старый" "маппер Антона" станет вызовом run_catalogia_mapper (см. CATALOGIA-1158), и копипаста исчезнет
        # но пока этого не случилось, т.ч. если вы что-то правите здесь - не забывайте править и там тоже
        # breqwas@, 30.06.2018

        # todo: вынести в отдельный файлик
        'mapper': """
            my %result = %$r;
            my $mode = $self->{stash}->{mode};
            my $targets = $self->{stash}->{targets};
            my $proj = $self->{proj};

            if ($mode eq 'text') {
                my $text_field = $self->{stash}->{text_field};
                my $phr = $proj->phrase($r->{$text_field});

                for my $target (keys %$targets) {
                    my $result_field = $targets->{$target};

                    if ($target eq 'categories') {
                        $result{$result_field} = join('/',
                            $phr->get_minicategs
                        );
                    } elsif ($target eq 'flags') {
                        $result{$result_field} = join(',',
                            $phr->get_banner_catalogia_flags
                        );
                    } elsif ($target eq 'directids') {
                        $result{$result_field} = join(',',
                            $phr->get_minicategs_videodirectgroups_directids
                        );
                    } elsif ($target eq 'directcatids') {
                        $result{$result_field} = join(',',
                            $phr->get_minicategs_directids
                        );
                    } elsif ($target eq 'catids') {
                        $result{$result_field} = join(',',
                            grep { $_ }
                            map { $phr->proj->categs_tree->get_minicateg_id($_) }
                            $phr->get_minicategs
                        );
                    } elsif ($target eq 'mediaids') {
                        $result{$result_field} = join(',',
                            grep { $_ }
                            map { md5int($_) }
                            $phr->get_mediagroups_for_categs([
                                $phr->get_minicategs
                            ])
                        );
                    } elsif ($target eq 'mediagroups') {
                        $result{$result_field} = join('/',
                            grep { $_ }
                            $phr->get_mediagroups_for_categs([
                                $phr->get_minicategs
                            ])
                        );
                    } elsif ($target eq 'bmapi') {
                        my $result_field = $targets->{$target}->{result_field};
                        my @methods = @{ $targets->{$target}->{methods} };

                        die "no no result_field provided for bmapi target" unless $result_field;
                        die "no methods provided for bmapi target" unless scalar @methods;

                        my $phl = $phr->proj->phrase_list([$phr]);
                        foreach my $cmd (@methods) {
                            if ((my $method = $phl->can($cmd)) && ($phl->can_be_externally_used($cmd, "yt"))) {
                                my $res_phl = $phl->$method();
                                my $res_text = join('\\n', map { $_->text } @$res_phl); # '\\n' is singlequoted on purpose, for correct output
                                                                                        # it also is supposed to have only one backslash (it's just a newline symbol), but oh well, escaping
                                $result{$result_field} = $res_text;
                            } else {
                                die "Unsupported bmapi method '$cmd'";
                            }
                        }
                    } else {
                        die "Unknown target '$target' for mode '$mode'";
                    }
                }
            } elsif ($mode eq 'banner') {
                my $banner_fields = $self->{stash}->{banner_fields};

                my %banner_data = map {
                    $_ => $r->{ $banner_fields->{$_} }
                } grep {
                    defined $r->{ $banner_fields->{$_} }
                } keys %$banner_fields;

                my $bnr = $proj->bf->lbanner(\%banner_data);

                for my $target (keys %$targets) {
                    my $result_field = $targets->{$target};

                    if ($target eq 'categories') {
                        $result{$result_field} = join('/',
                            $bnr->get_minicategs
                        );
                    } elsif ($target eq 'flags') {
                        $result{$result_field} = join(',',
                            $bnr->get_catalogia_flags
                        );
                    } elsif ($target eq 'directids') {
                        $result{$result_field} = join(',',
                            $bnr->get_minicategs_videodirectgroups_directids
                        );
                    } elsif ($target eq 'directcatids') {
                        $result{$result_field} = join(',',
                            $bnr->get_minicategs_directids
                        );
                    } elsif ($target eq 'catids') {
                        $result{$result_field} = join(',',
                            grep { $_ }
                            map { $bnr->proj->categs_tree->get_minicateg_id($_) }
                            $bnr->get_minicategs
                        );
                    } elsif ($target eq 'mediaids') {
                        $result{$result_field} = join(',',
                            grep { $_ }
                            map { md5int($_) }
                            $bnr->preprocess_title_body->get_mediagroups_for_categs([
                                $bnr->get_minicategs
                            ])
                        );
                    } elsif ($target eq 'mediagroups') {
                        $result{$result_field} = join('/',
                            grep { $_ }
                            $bnr->preprocess_title_body->get_mediagroups_for_categs([
                                $bnr->get_minicategs
                            ])
                        );
                    } else {
                        die "Unknown target '$target' for mode '$mode'";
                    }
                }

            } else {
                die "Unknown mode '$mode'";
            }

            yield(\%result => OUTPUT);
        """
    }

    # подбираем подходящий job_count
    # константы подобраны эмпирически так, чтобы на типичной операции одна джоба работала 15-20 минут
    if job_count is None:
        print("Choose good job_count...")
        row_count = yt_client.get_attribute(src_table, 'row_count')
        job_count = max(CATALOGIA_MAPPER_MIN_JOB_COUNT, int(row_count / CATALOGIA_MAPPER_LINES_PER_JOB))

    # запускаем маппер
    print("Run mapper...")

    bmyt.run_bm_map(mapper, src_table, dst_table, dst_schema=[schema], job_count=job_count)

    print("Catalogia mapper from '{src}' to '{dst}': done".format(src=src_table, dst=dst_table))
