# -*- coding: utf-8 -*-
import os
import argparse
import datetime
import time
import sys
import requests
import json
import yt.wrapper as yt
import itertools
import logging

logging.getLogger("Yt").setLevel(logging.DEBUG)


def get_arg_parser():
    parser = argparse.ArgumentParser("filter results")

    parser.add_argument(
        "--input-dates", type=argparse.FileType("r"), required=True
    )
    parser.add_argument("--ch-table", required=True)
    parser.add_argument("--yt-path", required=True)
    parser.add_argument("--ch-user", required=True)
    parser.add_argument("--ch-password", required=True)
    parser.add_argument("--delete-only", action="store_true")

    return parser.parse_args()


args = get_arg_parser()

rows = "(UpdateTime, ParentUUID, ContentUUID, PageID, ParentTypeID, ContentTypeID, VideoContentID, IsView, UniqID, Service, RegionID, UserGender, UserAgeSegment, Price, PartnerPrice, Duration, Hits, ShownHits, DeviceType, BrowserName, VideoCategoryID)"
USER = args.ch_user
PASSWORD = args.ch_password
insert_query = u"""insert into {ch_table} {rows} Values \n{values}""".format(
    rows=rows, values="{values}", ch_table=args.ch_table
)
delete_query = u"""alter table {ch_table} delete where "UpdateTime" in ({dates})""".format(
    ch_table=args.ch_table, dates="{dates}"
)


def get_clickhouse_data(
    query,
    host="https://vla-gwso9dly0670we0h.db.yandex.net:8443/?database=vh_stats",
    connection_timeout=1500,
):
    retries = 0
    while retries <= 5:
        try:
            return requests.post(
                host,
                auth=(USER, PASSWORD),
                data=query,
                timeout=connection_timeout,
                verify=False,
            )
        except requests.exceptions.ConnectionError as e:
            retries += 1
            tosleep = 300 * retries
            print(
                "retrying cause of connection error: {}. Sleeping for {} secs".format(
                    e, tosleep
                )
            )
            if retries >= 5:
                raise
            time.sleep(tosleep)


bulk_size = 50000

values = u"(\
'{UpdateTime}',\
'{ParentUUID}',\
'{ContentUUID}',\
{PageID},\
{ParentTypeID},\
{ContentTypeID},\
'{VideoContentID}',\
{IsView},\
{UniqID},\
'{Service}',\
{RegionID},\
'{UserGender}',\
'{UserAgeSegment}',\
{Price},\
{PartnerPrice},\
{Duration},\
{Hits},\
{ShownHits},\
{DeviceType},\
'{BrowserName}',\
{VideoCategoryID}\
)"

ytc = yt.YtClient(proxy="hahn")


def gen_query(dates):
    for date in dates:
        table = args.yt_path + "/" + date
        for row in ytc.read_table(table, format="json"):
            update_time = row["UpdateTime"]
            update_time = update_time if update_time > 0 else 0
            update_time = datetime.date.fromtimestamp(update_time).isoformat()

            try:
                detailed_device_type = (
                    (row["DetailedDeviceType"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                browser_name = (
                    (row["BrowserName"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                video_session_id = (
                    (row["VideoSessionID"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                parent_uuid = (
                    (row["ParentUUID"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                content_uuid = (
                    (row["ContentUUID"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                video_content_id = (
                    (row["VideoContentID"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                service = (
                    (row["Service"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                user_gender = (
                    (row["UserGender"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
                user_age_segment = (
                    (row["UserAgeSegment"] or "")
                    .decode("utf-8")
                    .replace("\\", "\\\\")
                    .replace("'", "\\'")
                )
            except UnicodeEncodeError:
                detailed_device_type = ""
                browser_name = ""
                video_session_id = ""
                parent_uuid = ""
                content_uuid = ""
                user_gender = ""
                user_age_segment = ""

            yield values.format(
                UpdateTime=update_time,
                ParentUUID=parent_uuid,
                ContentUUID=content_uuid,
                PageID=row["PageID"],
                ParentTypeID=row["ParentTypeID"],
                ContentTypeID=row["ContentTypeID"],
                VideoContentID=video_content_id,
                IsView=row["IsView"],
                UniqID=row["UniqID"],
                Service=service,
                RegionID=row["RegionID"],
                UserGender=user_gender,
                UserAgeSegment=user_age_segment,
                Price=row["Price"],
                PartnerPrice=row["PartnerPrice"],
                Duration=row["Duration"],
                Hits=row["Hits"],
                ShownHits=row["ShownHits"],
                DeviceType=row["DeviceType"] or 0,
                BrowserName=browser_name,
                VideoCategoryID=row["VideoCategoryID"] or 0,
            )
            # except UnicodeEncodeError as e:
            # print row
            # raise Exception


dates = json.load(args.input_dates)

to_delete = delete_query.format(
    dates=",".join(["'{date}'".format(date=date) for date in dates])
)
answer = get_clickhouse_data(to_delete)
if answer.status_code != 200:
    raise Exception(
        "http error {}: {}".format(
            answer.status_code, answer.text
        )
    )
if args.delete_only:
    print("delete query executed, exiting")
    sys.exit(0)

query_gen = gen_query(dates)

count = 0

text = ", \n".join(itertools.islice(query_gen, bulk_size))


while text:
    text = insert_query.format(values=text)
    answer = get_clickhouse_data(text)
    if answer.status_code != 200:
        raise Exception(
            "http error {}: {}".format(
                answer.status_code, answer.text
            )
        )
        # print answer.status_code
        # print answer.text
        # break
        # raise Exception("ERROR!!!")
        # code = os.system(CH_CMD.format(values=",".join(data)))
    count += 1
    text = ", \n".join(itertools.islice(query_gen, bulk_size))
    print(count)
