#!/usr/bin/env python
# coding=utf-8


def generate_categs(bmyt_cl, src_syns_table,
                    dst_categs_table, dst_categs_regions_table, dst_syns_categs_table, dst_regions_table, lang):

    # tmp_table = '//tmp/modest/generate_categs.tmp'
    # if 1 == 1:
    with bmyt_cl.yt_client.Transaction() as tx, \
            bmyt_cl.yt_client.TempTable() as tmp_table:

        bm_mapper = {
            'begin': """
                use BaseProject;
                my $lang = "%(lang)s" // '';

                # use CatalogiaMediaProject;
                # my $proj = CatalogiaMediaProject->new({
                #     no_form => 1,
                #     no_auth => 1,
                # });
                my $proj = BaseProject->new({
                    load_dicts => 1,
                    load_minicategs_light => 1,
                    use_comptrie_subphraser => 1,
                    use_sandbox_categories_suppression_dict => 1,
                    allow_lazy_dicts => 1,
                    load_languages => [ qw(ru en tr) ],
                });

                $proj->categs_tree->never_read_categs_cache(1);
                $proj->categs_tree->never_write_categs_cache(1);

                $self->{language} = $lang ? $proj->get_language($lang) : $proj->default_language;

                $self->{proj} = $proj;
            """ % ({"lang": lang}),
            'mapper': '''
                my $phr = $self->{language}->phrase($r->{'snorm'});
                if ($phr) {

                    # atoms
                    my $h = $phr->search_atoms_snorm;
                    my $codes = {};
                    my %regions;
                    for my $snorm (keys %$h) {
                        for my $name (keys %{$h->{$snorm}}) {
                            my $code = $self->{proj}->categs_tree->get_minicateg_id($name) // '';
                            $codes->{$code}++ if $code;
                            if($name =~ /^\.region/) {
                                $regions{"$code $snorm"}++;
                            }
                        }
                    }
                    my $regions = join " , ", sort keys %regions;

                    # join categs and atoms using ' _ '
                    my @categs = grep{$_} map{$self->{proj}->categs_tree->get_minicateg_id($_)} $phr->get_minicategs;
                    my $categs = join(" ", @categs, "_", sort keys %$codes);

                    my $out_r;
                    $out_r->{'snorm'} = $phr->text;
                    $out_r->{'categs'} = $categs;
                    $out_r->{'regions'} = $regions;

                    yield($out_r => YT_TABLE_TMP);
                }
            ''',

            'dst_names': ['YT_TABLE_TMP'],
            'dst_fields': [{'snorm': str,
                            'categs': str,
                            'regions': str}]
        }

        bmyt_cl.run_bm_map(
            bm_mapper,
            src_syns_table,
            tmp_table,
        )

        yql_query = '''
        PRAGMA yt.ForceInferSchema;
        PRAGMA SimpleColumns;
        use hahn;

        INSERT INTO `{yt_table_categs_regions}` WITH TRUNCATE
        SELECT
            snorm,
            categs,
            regions
        FROM
            `{yt_tmp_table}`
        ORDER BY snorm;

        COMMIT;
        --------------------------------------------------

        INSERT INTO `{yt_table_categs}` WITH TRUNCATE
        SELECT
            snorm,
            categs
        FROM
            `{yt_table_categs_regions}`
        ORDER BY snorm;

        --------------------------------------------------

        INSERT INTO `{yt_table_regions}` WITH TRUNCATE
        SELECT
            snorm,
            regions
        FROM
            `{yt_table_categs_regions}`
        ORDER BY snorm;

        COMMIT;
        --------------------------------------------------

        INSERT INTO `{yt_table_syns_categs}` WITH TRUNCATE
        SELECT
            syns.norm_freq as norm_freq,
            categs.categs as categs
        FROM
            `{yt_table_syns}` as syns
        INNER JOIN
            `{yt_table_categs}` as categs
        USING (snorm);
        '''.format(yt_table_categs_regions=dst_categs_regions_table,
                   yt_tmp_table=tmp_table,
                   yt_table_syns=src_syns_table,
                   yt_table_categs=dst_categs_table,
                   yt_table_regions=dst_regions_table,
                   yt_table_syns_categs=dst_syns_categs_table)

        bmyt_cl.do_yql(yql_query, title='generate categs ' + lang, transaction_id=tx.transaction_id)
