# -*- coding: UTF-8 -*-
"""
Считаем суммарную длину, пройденную пользователями за день,
используя приматченные треки.
Делаем разбивку по регионам и приложениям
Работает в инкриментальном режими, добавляя данные о новых днях к единой сводной таблице

В cron прописываем

47    4    * * 7    arcadia/analytics/geo/tools/tracks/travel_times/length/calc_total_length > arcadia/analytics/geo/tools/tracks/travel_times/length/cron.log 2>&1

"""

import re
import os
import sys
import json
import datetime

import yt.wrapper as yt
from yql.api.v1.client import YqlClient

SAVE_TABLES = "//home/geo-analytics/mednikov/tracks_length"
FINAL_TABLE = SAVE_TABLES + "/" + "daily_length_app_region"


def run_yql_query(query, yql_client):
    request = yql_client.query(query, syntax_version=1)
    request.run()
    for _ in request.get_results():
        break

    if not request.is_success:
        print("%s. YQL request wasn't succussfull" % request.status)
        if request.errors:
            print('Returned errors:')
            for error in request.errors:
                print(' - ' + str(error))
        raise Exception()

def run(command):
    print(command)
#     return
    if os.system(command):
        print("Error at", command)
        exit(1)

def get_last_calculated():
#     return datetime.datetime(2018, 11, 1)
    if not yt.exists(FINAL_TABLE):
        last_date = datetime.datetime.now() - datetime.timedelta(30)
        print("No final table, will use {} for start".format(last_date))
        return the_date

    last_date = max(data["date"] for data in yt.read_table(FINAL_TABLE, format=yt.JsonFormat()))
    print("Last date {}".format(last_date))
    return datetime.datetime.strptime(last_date, "%Y-%m-%d")

def reduce_use_new(key, recs):
    data = [rec for rec in recs]
    if len(data) > 2:
        raise ValueError
    elif len(data) == 2:
        res = [1 if rec.get("flag") == "new" else 0
               for rec in data]
        assert sum(res) == 1
        if res[0]:
            del data[0]["flag"]
            yield data[0]
        else:
            del data[1]["flag"]
            yield data[1]
    else:
        if data[0].get("flag"): #Нужно при первом запуске
            del data[0]["flag"]
        yield data[0]

#     res = None
#     for rec in recs:
#         if res is None:
#             res = rec
#             continue
#         if rec.get("flag") == "new":
#             res = rec
#         else:
#             if not (res.get("flag") == "new"):
#                 res = rec
#     if "flag" in res:
#         del res["flag"]
#     yield res

def main():
    yt.config.set_proxy("hahn.yt.yandex.net")

    app_dict = {"navi": ['ru.yandex.mobile.navigator', 'ru.yandex.yandexnavi'],
                "mobmaps": ["ru.yandex.traffic", "ru.yandex.yandexmaps"]}
    app_list = []
    for app, clids in app_dict.items():
        for clid in clids:
            app_list.append([clid, app])

    app_case = (
        "CASE clid "
        + " ".join("WHEN '{}' THEN '{}'".format(*clid_app)
                 for clid_app in app_list)
        + " ELSE 'other' END"
    )

    token_filename = os.path.expanduser("~/.yql/token")
    try:
        with open(token_filename) as rf:
            os.environ["YQL_TOKEN"] = rf.readline().strip()
    except IOError:
        pass
    yql_client = YqlClient()

    the_date = get_last_calculated()
    print("last calculated date", the_date)
    tables = []
    while True:
        the_date += datetime.timedelta(days=1)
        date_str = the_date.strftime("%Y-%m-%d")
        table_out = "{}/{}".format(SAVE_TABLES, date_str)
        table_in = "//home/maps/jams/production/data/travel_times/{}".format(date_str)

        if not yt.exists(table_in) or yt.is_empty(table_in):
            break

        tables.append(table_out)

        if yt.exists(table_out) and not yt.is_empty(table_out):
            print(table_out, "already exists and not blank, wouldn't calculate it again")
            continue

        query = ("USE hahn;"
                 " INSERT INTO `{table}` WITH TRUNCATE"
                 " SELECT '{date}' as `date`, 'new' as flag, SUM(length) as length, region_id, app FROM ("
                 " SELECT SUM(geolength) as length, region_id, {app_case} AS app"
                 " FROM `{tracks}`"
                 " GROUP BY region_id, clid"
                 ") GROUP BY region_id, app;"
                ).format(table=table_out,
                         tracks=table_in,
                         date=date_str,
                         app_case=app_case)

#         command = ("echo \"{}\" | {}").format(query, YQL_COMMAND)
#         run(command)
        run_yql_query(query, yql_client)


    if not tables:
        print("No new data")
        return

    table_out = FINAL_TABLE
    table_in = tables + ([table_out] if yt.exists(table_out) else [])
    reducer = reduce_use_new
    print("sort + ",  reducer, table_in, table_out)
    yt.run_map_reduce(None, reducer, table_in, table_out,
                      sort_by=["date", "app", "region_id"],
                      reduce_by=["date", "app", "region_id"])

    for table in tables:
        yt.remove(table)


if __name__ == "__main__":
    main()
