# -*- coding: utf-8 -*-
import os
import argparse
import datetime
import time
import sys
import requests
import json
import yt.wrapper as yt
import itertools

def get_arg_parser():
    parser = argparse.ArgumentParser('filter results')

    parser.add_argument('input_dates', type=argparse.FileType('r'))
    parser.add_argument("--output")
    # parser.add_argument('--ch-table', required=True)
    # parser.add_argument('--yt-path', required=True)
    # parser.add_argument('--ch-user', required=True)
    # parser.add_argument('--ch-password', required=True)

    return parser.parse_args()

args = get_arg_parser()

rows = "(UpdateTime, ParentUUID, ContentUUID, PageID, ParentTypeID, ContentTypeID, VideoSessionID, VideoContentID, IsView, UniqID, Service, RegionID, UserGender, UserAgeSegment, Price, PartnerPrice, Duration, Hits, ShownHits, IP, UserAgent, DeviceType, DetailedDeviceType, BrowserName, VideoCategoryID)"
USER = os.environ["CH_USER"]
PASSWORD = os.environ["CH_PASSWORD"]
CH_TABLE = "VideohostingViewsDetailedStat"
insert_query =  u"""insert into {ch_table} {rows} Values \n{values}""".format(rows=rows, values="{values}", ch_table=CH_TABLE)
delete_query = u"""alter table {ch_table} delete where "UpdateTime" in ({dates})""".format(ch_table=CH_TABLE, dates="{dates}")

def get_clickhouse_data(query, host='https://[2a02:6b8:c0e:373:0:1589:2778:4751]:8443/?database=vh_stats', connection_timeout = 1500):
    r = requests.post(host, auth = (USER, PASSWORD), data = query, timeout = connection_timeout, verify=False)
    return r

bulk_size = 100000

values = u"(\
'{UpdateTime}',\
'{ParentUUID}',\
'{ContentUUID}',\
{PageID},\
{ParentTypeID},\
{ContentTypeID},\
'{VideoSessionID}',\
'{VideoContentID}',\
{IsView},\
{UniqID},\
'{Service}',\
{RegionID},\
'{UserGender}',\
'{UserAgeSegment}',\
{Price},\
{PartnerPrice},\
{Duration},\
{Hits},\
{ShownHits},\
'{IP}',\
'{UserAgent}',\
{DeviceType},\
'{DetailedDeviceType}',\
'{BrowserName}',\
{VideoCategoryID}\
)"

ytc = yt.YtClient(proxy="hahn", token=os.environ["YT_TOKEN"])

def gen_query(dates):
    for date in dates:
        table = "//home/videoquality/vh_analytics/vhds_reduced/" + date
        for row in ytc.read_table(table, format="json"):
            update_time = row["UpdateTime"]
            update_time = update_time if update_time > 0 else 0
            update_time = datetime.date.fromtimestamp(update_time).isoformat()

            try:
                user_agent = (row["UserAgent"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                detailed_device_type = (row["DetailedDeviceType"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                browser_name = (row["BrowserName"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                video_session_id = (row["VideoSessionID"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                parent_uuid = (row["ParentUUID"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                content_uuid = (row["ContentUUID"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                video_content_id = (row["VideoContentID"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                service = (row["Service"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                user_gender = (row["UserGender"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
                user_age_segment = (row["UserAgeSegment"] or "").decode("utf-8").replace('\\','\\\\').replace('\'','\\\'')
            except UnicodeEncodeError:
                user_agent = ""
                detailed_device_type = ""
                browser_name = ""
                video_session_id = ""
                parent_uuid = ""
                content_uuid = ""
                user_gender = ""
                user_age_segment = ""

            yield values.format(
                UpdateTime=update_time,
                ParentUUID=parent_uuid,
                ContentUUID=content_uuid,
                PageID=row["PageID"],
                ParentTypeID=row["ParentTypeID"],
                ContentTypeID=row["ContentTypeID"],
                VideoSessionID=video_session_id,
                VideoContentID=video_content_id,
                IsView=row["IsView"],
                UniqID=row["UniqID"],
                Service=service,
                RegionID=row["RegionID"],
                UserGender=user_gender,
                UserAgeSegment=user_age_segment,
                Price=row["Price"],
                PartnerPrice=row["PartnerPrice"],
                Duration=row["Duration"],
                Hits=row["Hits"],
                ShownHits=row["ShownHits"],
                IP=row["IP"] or "",
                UserAgent=user_agent,
                DeviceType=row["DeviceType"] or 0,
                DetailedDeviceType=detailed_device_type,
                BrowserName=browser_name,
                VideoCategoryID=row["VideoCategoryID"] or 0,
            )
            # except UnicodeEncodeError as e:
                # print row
                # raise Exception

dates = json.load(args.input_dates)

to_delete = delete_query.format(dates=",".join(["'{date}'".format(date=date) for date in dates]))
answer = get_clickhouse_data(to_delete)
if answer.status_code != 200:
    print text
    print answer.text
    raise Exception

query_gen = gen_query(dates)

count = 0

text = ", \n".join(itertools.islice(query_gen, bulk_size))


while text:
    text = insert_query.format(values=text)
    answer = get_clickhouse_data(text)
    if answer.status_code != 200:
        print text
        print answer.text
        raise Exception
        # print answer.status_code
        # print answer.text
        # break
        # raise Exception("ERROR!!!")
        # code = os.system(CH_CMD.format(values=",".join(data)))
    count += 1
    text = ", \n".join(itertools.islice(query_gen, bulk_size))
    print count

with open(args.output, "w") as f:
    f.write("{}")
