# -*- coding: utf-8 -*-

from yabs.tabtools import mr_do_map, mr_do_aggregate, mr_do_join, Join, StatAggregator, Grep, Cut, Mapper, Sum
from yabs.tabutils import TemporaryTableWithMeta
from yabs.logconfig import get_logs_regexp_time
from yabs.logger import info
from argparse import ArgumentParser
from mapreducelib import MapReduce

source_log_regexp = 'yabs-log/\d{6}/JoinedEFH(?P<TIME>%Y%m%d%H)'
default_experiment_bits = [3489660928, 1610612736, 1879048192]

default_broad_phrase_table = 'path/to/BroadPhrase'
default_phrase_price_table = 'path/to/PhrasePrice'
default_phrase_dict_table = 'path/to/PhraseDict'
default_banner_table = 'path/to/Banner'
default_orig_phrase_table = 'users/stys/broadmatch/OrigPhrase'


def build_orig_phrase_table(dst_table, **kwargs):
    """
    Build table of banners to original phrases and phrase texts
    :param dst_table: Output table name
    """

    phrase_price_table = kwargs.get('phrase_price_table') or default_phrase_price_table
    phrase_dict_table = kwargs.get('phrase_dict_table') or default_phrase_dict_table
    banner_table = kwargs.get('banner_table') or default_banner_table

    with TemporaryTableWithMeta() as tmp_phrase_price:
        with TemporaryTableWithMeta() as tmp_joined:
            mr_do_map(
                mappers_list=[
                    Grep('r.ContextType==1'),
                    Cut(keys=['PhraseID'], fields=['GroupExportID', 'ContextType'])
                ],
                src_tables=[phrase_price_table],
                dst_tables=[tmp_phrase_price.name]
            )

            mr_do_join(
                joiner=Join(
                    first_table=tmp_phrase_price.name,
                    second_table=phrase_dict_table,
                    keys=['PhraseID']
                ),
                dst_tables=[tmp_joined.name]
            )

            mr_do_join(
                joiner=Join(
                    first_table=tmp_joined.name,
                    second_table=banner_table,
                    keys=['GroupExportID']
                ),
                postmap=[
                    Mapper('r.Phrase = r.Data'),
                    Cut(fields=['BannerID', 'ContextType', 'PhraseID', 'Phrase'])
                ],
                dst_tables=[dst_table]
            )


def build_slot_stat_table(src_tables, dst_table, **kwargs):
    """
    Build table of pairs OrigPhrase -> BroadPhrase with statistics
    :param src_tables: List of source log tables for collecting statistics
    :param dst_table: Output table name
    """

    # table of banners to original phrases with text
    orig_phrase_table = kwargs.get('orig_phrase_table') or default_orig_phrase_table

    # table of broadmatching phrases to phrase text
    broad_phrase_table = kwargs.get('broad_phrase_table') or default_broad_phrase_table

    # experiment bits for log filtering
    experiment_bits = kwargs.get('default_experiment_bits') or default_experiment_bits

    with TemporaryTableWithMeta() as tmp:

        # aggregate statistics from logs for broadmatch phrases (ContextType==3)
        mr_do_aggregate(
            aggregator=StatAggregator(
                reducers=[
                    Sum('Show', 'Shows'),
                    Sum('Click', 'Clicks'),
                    Sum('VClick', 'VClicks'),
                    Sum('SClick', 'SClicks'),
                    Sum('LClick', 'LClicks'),
                    Sum('D120', 'D120'),
                    Sum('MaxDepth', 'Depth'),
                    Sum('MaxDuration', 'Duration'),
                    Sum('Cost', 'Cost'),
                    Sum('VCost', 'VCost')
                ],
                keys=['TypeID', 'BannerID', 'PhraseID']
            ),
            premap=[
                Grep('r.ContextType == 3 and r.FraudBits == 0 and r.PhraseID != 0 and r.PlaceID == 542 \
                    and (r.ExperimentBits & ((1<<40)-(1<<28))) in %s' % experiment_bits),
                Mapper('''
                    r.Show = int(r.CounterType==1)
                    r.Click = int(r.CounterType==2)
                    r.VClick = int(r.HasCounter)
                    r.SClick = int(r.MaxDepth > 1 and r.CounterType == 2) if r.HasCounter else 0
                    r.LClick = r.LClick if r.HasCounter else 0
                    r.MaxDepth = r.MaxDepth if r.HasCounter else 0
                    r.MaxDuration = r.MaxDuration if r.HasCounter else 0
                    r.D120 = int(r.MaxDuration > 120) if r.HasCounter else 0
                    r.Cost = r.EventCost
                    r.VCost = r.EventCost if r.HasCounter else 0
                '''),
                Cut(fields=['TypeID', 'BannerID', 'PhraseID', 'Show', 'Click', 'VClick', 'SClick', 'LClick',
                            'MaxDepth', 'MaxDuration', 'D120', 'Cost', 'VCost']),
            ],
            src_tables=src_tables,
            dst_tables=[tmp.name]
        )

        # join broadmatch phrases text
        with TemporaryTableWithMeta() as tmp_bm_phrase:
            mr_do_map(
                mappers_list=[
                    Mapper('r.PhraseID = r.BroadPhraseID'),
                    Mapper('r.Phrase = r.Data')
                ],
                src_tables=[broad_phrase_table],
                dst_tables=[tmp_bm_phrase.name]
            )

            mr_do_join(
                joiner=Join(
                    first_table=tmp.name,
                    second_table=tmp_bm_phrase.name,
                    keys=['PhraseID']
                ),
                postmap=[
                    Mapper('r.BroadPhrase = r.Phrase'),
                    Cut(keys=['BannerID'], fields=[
                        'TypeID', 'BroadPhraseID', 'BroadPhrase', 'NormType',
                        'Shows', 'Clicks', 'VClicks', 'SClicks', 'LClicks', 'D120', 'Depth', 'Duration', 'Cost', 'VCost'
                    ])
                ],
                dst_tables=[tmp.name]
            )

        # join original phrases
        mr_do_join(
            joiner=Join(
                first_table=tmp.name,
                second_table=orig_phrase_table,
                keys=['BannerID']
            ),
            postmap=[
                Mapper('r.OrigPhraseID = r.PhraseID; r.OrigPhrase = r.Phrase'),
                Cut(
                    keys=['BannerID'],
                    fields=[
                        'TypeID',
                        'BroadPhraseID', 'BroadPhrase', 'NormType',
                        'OrigPhraseID', 'OrigPhrase',
                        'Shows', 'Clicks', 'VClicks', 'SClicks', 'LClicks', 'D120', 'Depth', 'Duration', 'Cost', 'VCost'
                    ]
                )
            ],
            dst_tables=[dst_table]
        )


def main():
    parser = ArgumentParser()
    parser.add_argument('-s', '--start', required=True)
    parser.add_argument('-f', '--finish', required=True)
    parser.add_argument('-p', '--orig-phrase-table')
    parser.add_argument('-d', '--dst-table', required=True)
    args = parser.parse_args()

    # check banner_to_orig_phrase_table exits:
    orig_phrase_table = args.orig_phrase_table or default_orig_phrase_table
    if 1 != len(list(MapReduce.getSample(orig_phrase_table, count=1))):
        info("Table orig_phrase_table is not found. Building from dbrestore ...")
        build_orig_phrase_table(orig_phrase_table)

    # build slot stat table
    src_tables = sorted(t['name'] for t in get_logs_regexp_time(source_log_regexp, args.start, args.finish))
    build_slot_stat_table(src_tables, args.dst_table)


if __name__ == '__main__':
    main()
