#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import datetime
import json
import logging
import subprocess

import yt.wrapper
from sandbox.projects.modadvert.common.ytutils import yt_connect


def run(**kwargs):
    p = subprocess.Popen(
        [sys.executable, __file__],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    (out, err) = p.communicate(input=json.dumps(kwargs))
    if out:
        logging.info(out)
    if err:
        logging.error(err)
    if p.returncode:
        raise Exception("run exited with code: {:d}".format(p.returncode))


class CountReducer(object):
    def __init__(self, dates):
        self.dates = dates

    def __call__(self, key, rows):
        count = sum(1 for row in rows)
        date = self.dates[key['index']] + datetime.timedelta(minutes=1)
        yield {'date': date.strftime('%Y-%m-%d %H:%M'), 'banners_since_last_check': count}


def mapper(row):
    if row['type'] == 'banner':
        yield {'index': row['@table_index']}


def get_last_row(yth, output_table):
    count = yth.row_count(output_table)
    table_path = yt.wrapper.TablePath(output_table, start_index=count-1, end_index=count)
    return yth.read_table(table_path, raw=False).next()


def get_tables(yth, source_dir, date_lower_bound, date_format='%Y-%m-%dT%H:%M:%S'):
    path_bound = yt.wrapper.ypath_join(source_dir, datetime.datetime.strftime(date_lower_bound, date_format))
    tables = sorted(yth.search(source_dir, node_type='table', path_filter=lambda path: path >= path_bound))
    prefix = yt.wrapper.ypath_join(source_dir, '')
    return tables, [datetime.datetime.strptime(path.replace(prefix, ''), date_format) for path in tables]


def main(args):
    yth = yt_connect(
        args['yt_proxy_url'],
        args['yt_token']
    )
    last_row = get_last_row(yth, args['output_table'])
    last_date = datetime.datetime.strptime(last_row['date'], '%Y-%m-%d %H:%M')
    before_count = last_row['total_banners_count']

    tables, dates = get_tables(yth, args['source_dir'], last_date)
    logging.info(tables)
    with yth.TempTable() as temp_table:
        yth.run_map_reduce(
            mapper,
            CountReducer(dates),
            tables,
            temp_table,
            reduce_by=['index'],
            format=yt.wrapper.YsonFormat(control_attributes_mode='row_fields')
        )
        yth.run_sort(temp_table, sort_by=['date'])
        rows = list(yth.read_table(temp_table))

        for row in rows:
            before_count += row['banners_since_last_check']
            row['total_banners_count'] = before_count

        table_path = yt.wrapper.TablePath(args['output_table'], append=True, sorted_by=['date'])
        yth.write_table(table_path, rows)


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s (%(module)s) %(message)s")
    args = json.load(sys.stdin)
    main(args)
