# coding=utf-8

import requests
import pandas as pd
import io
import sys
import yt.wrapper as yt
import ast
import argparse
from nile.api.v1 import clusters

def get_ch_data(query, host, user, password, connection_timeout=5000, **kwargs):
    params = kwargs
    query_edited = query + """
        format
                  TabSeparatedWithNames"""
    r = requests.post(host, params=params, auth=(user, password), timeout=connection_timeout, data=query_edited.encode('utf-8'))
    if r.status_code == 200:
        return r.text
    else:
        print(r.status_code, r.content)
        raise ValueError(r.status_code, r.content)

def read_data(query, host, user, password, connection_timeout=5000):
    data = get_ch_data(query, host, user, password, connection_timeout)
    data = pd.read_csv(io.StringIO(data), sep="\t")
    return data

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--timestamp', required=True, help='Last payment timestamp')
    parser.add_argument('--yql_token', required=True, help='Hahn YQL token')
    parser.add_argument('--ch_user', required=True, help='Clickhouse user')
    parser.add_argument('--ch_password', required=True, help='Clickhouse password')
    parser.add_argument('--ch_host', required=True, help='Clickhouse host')
    parser.add_argument('--target_table', required=True, help='Table at yt')

    args = parser.parse_args()
    CH_USER = args.ch_user
    CH_PASS = args.ch_password
    CH_HOST = args.ch_host
    TARGET_TABLE = args.target_table
    yql_token = args.yql_token
    timestamp = args.timestamp

    cluster = clusters.yt.Hahn(token=yql_token)

    CH_QUERY = "select " \
               "toUInt16(StartDate) as StartDate, " \
               "toUInt32(StartTime) as StartTime, " \
               "toInt8(Sign) as Sign, " \
               "toInt8(UserAgent) as UserAgent, " \
               "toInt8(IsMobile) as IsMobile, " \
               "toInt8(IsTablet) as IsTablet, " \
               "toInt8(IsTV) as IsTV, " \
               "toInt8(IsYandex) as IsYandex, " \
               "toUInt64(PassportUserID) as PassportUserID, " \
               "toUInt64(UserID) as UserID, " \
               "toUInt32(UserIDType) as UserIDType, " \
               "toUInt8(OS) as OS, " \
               "dictGetString('OS', 'value', toUInt64(OS)) AS OSName, " \
               "dictGetUInt64('OS', 'ParentId', toUInt64(OS)) AS ParentOS, " \
               "dictGetString('OS', 'value', ParentOS) as ParentOSName, " \
               "dictGetHierarchy('OS', toUInt64(OS)) as OSHierarchy, " \
               "toInt8(RegionID) as RegionID, " \
               "coalesce(nullIf(coalesce(nullif(if(regionToCountry(toUInt32(RegionID)) = 225, coalesce(nullIf(regionToName(regionToArea(toUInt32(RegionID)), 'ru'), ''), regionToName(regionToDistrict(toUInt32(RegionID)), 'ru')), null), ''), nullIf(regionToName(regionToCountry(toUInt32(RegionID)), 'ru'), ''), 'Неопределено'), 'Россия'), 'Россия — неопределено') as region, " \
               "ParsedParams.Key1 as ParsedParams_Key1, " \
               "ParsedParams.Key2 as ParsedParams_Key2, " \
               "ParsedParams.Key3 as ParsedParams_Key3, " \
               "ParsedParams.Key4 as ParsedParams_Key4, " \
               "ParsedParams.Key5 as ParsedParams_Key5, " \
               "ParsedParams.Key6 as ParsedParams_Key6, " \
               "ParsedParams.Key7 as ParsedParams_Key7, " \
               "ParsedParams.Key8 as ParsedParams_Key8, " \
               "Experiment.SystemID as Experiment_SystemID, " \
               "Experiment.GroupID as Experiment_GroupID " \
               "from visits_all " \
               "where CounterID = 73147015 " \
               "and toUInt32(StartTime) > {0} " \
               "and toDate(StartDate) >= toDate({0})".format(timestamp)

    data = read_data(query=CH_QUERY, host=CH_HOST, user=CH_USER, password=CH_PASS)
    data["ParsedParams_Key1"] = data["ParsedParams_Key1"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key2"] = data["ParsedParams_Key2"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key3"] = data["ParsedParams_Key3"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key4"] = data["ParsedParams_Key4"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key5"] = data["ParsedParams_Key5"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key6"] = data["ParsedParams_Key6"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key7"] = data["ParsedParams_Key7"].apply(lambda x: ast.literal_eval(str(x)))
    data["ParsedParams_Key8"] = data["ParsedParams_Key8"].apply(lambda x: ast.literal_eval(str(x)))
    data["OSHierarchy"] = data["OSHierarchy"].apply(lambda x: ast.literal_eval(str(x)))
    data["Experiment_SystemID"] = data["Experiment_SystemID"].apply(lambda x: ast.literal_eval(str(x)))
    data["Experiment_GroupID"] = data["Experiment_GroupID"].apply(lambda x: ast.literal_eval(str(x)))
   
    upload_client = cluster.driver.client
    upload_client.write_table(upload_client.TablePath(name=TARGET_TABLE, append=True), data.T.to_dict().values())

    return

if __name__ == '__main__': main()
