from __future__ import print_function
from yql.api.v1.client import YqlClient
import datetime
from subprocess import call
import yt.wrapper as yt
import os
import pandas as pd
import urllib
import urllib2
import requests
import json

def post_user_params(counter_id, token):
    huge_df = pd.read_csv('for_metrica.csv')
    n = 30000  #chunk row size
    list_df = [huge_df[i:i+n] for i in range(0,huge_df.shape[0],n)]
    for i,chunk in enumerate(list_df):
        print('post params for ' + str(i) + ' chunk')
        post_user_params_df(counter_id, token, chunk)

def post_user_params_df(counter_id, token, tmp_df):
    tmp_df[['userid', 'key', 'value']].to_csv('for_metrica_tmp.csv', header=True, index=False)
    url = 'https://api-metrika.yandex.ru/management/v1/counter/{counterId}/user_params/uploadings/upload?action=update&oauth_token={token}' \
        .format(counterId = counter_id, token=token)
    files = {'file': open('for_metrica_tmp.csv', 'rb')}
    r = requests.post(url, files =  files)
    if r.status_code != 200:
        raise ValueError, r.text

    uploading_id = json.loads(r.text)['uploading']['id']
    url = 'https://api-metrika.yandex.ru/management/v1/counter/{counterId}/user_params/uploading/{uploadingId}/confirm?oauth_token={token}' \
        .format(counterId = counter_id, token=token, uploadingId=uploading_id)
    r = requests.post(url, data = '{"uploading":{"content_id_type":"user_id", "action":"update"}}')
    if r.status_code != 200:
        raise ValueError, r.text
    return r.text

def clear_old_seo_puids():
    if yt.exists("//home/webmaster/users/aleksart/seo_puids"):
        yt.remove("//home/webmaster/users/aleksart/seo_puids")

def callback(operation):
    print('Operation %s is %s, progress data:\n%s' % (
        operation.operation_id,
        operation.status,
        str(operation.json)
    ))
    if 'status' in operation.json and not operation.in_progress:
        print(operation.json)

def get_seo_puids():
    client = YqlClient(db='hahn', token = yql_token)

    date = str(datetime.date.today()-datetime.timedelta(1))

    request = client.query('insert into hahn.[//home/webmaster/users/aleksart/seo_puids] select puids.puid, seo.yuid, seo.seo_confidence' +
                           ' from (select yandexuid as yuid, Json::GetField(Yson::SerializeJson(marketing_segments),"208"){0} as seo_confidence' +
                           ' from hahn.[statbox/crypta-yandexuid-profiles-log/' + date + ']' +
                           ' where Json::GetField(Yson::SerializeJson(marketing_segments),"208"){0} == "1" or Json::GetField(Yson::SerializeJson(marketing_segments),"208"){0} == "0.5") as seo ' +
                           'join (select CAST(map._other{"yuid"} as Uint64) as yuid, puid from hahn.[//home/crypta/production/state/graph/dicts/puid_yuid_limit] as map ) as puids on puids.yuid == seo.yuid')
    request.run()
    request.subscribe(callback)
    print('Sleeping...')
    print(request)

def download_seo_uids():
    seo_uids= list(yt.read_table("//home/webmaster/users/aleksart/seo_puids", format=yt.DsvFormat()))
    seo_uids = pd.DataFrame(seo_uids)
    seo_uids.to_csv('seo_puids',header=["passport_id","yandex_id","confidence"], index=False,columns=["puids.puid","seo.yuid","seo.seo_confidence"])
    print("downloading_seo_uids")

def download_all_uids():
    url = 'http://mtmega.yandex.ru:8123/?user=aleksart&password=aleksart'
    data = "select distinct PassportUserID from visits_all where StartDate > \'2017-01-01\' and CounterID=30468942"
    req = urllib2.Request(url, data)
    response = urllib2.urlopen(req)
    result = response.read()
    if response.code == 200:
        f = open('all_puids','w')
        f.write(result)
        f.close()
        os.system("sed -i '1 i\passport_id' all_puids")
    else:
        print(result)

def prepare_csv_for_metrica():
    all_users = pd.read_csv('all_puids',dtype={'passport_id':str})
    seo_users = pd.read_csv('seo_puids',sep=",",dtype={'passport_id':str,'yandex_id':str,'confidence':float})
    seo_users = seo_users.reindex(columns=['passport_id','confidence'])
    uniq_seo = seo_users[['passport_id', 'confidence']].drop_duplicates(keep=False)
    all_users = all_users[all_users['passport_id']!='0']
    joined = pd.merge(uniq_seo,all_users,on='passport_id',how='right')
    joined.confidene = joined.confidence.fillna(0, inplace=True)
    joined['key']='seo_confidence'
    joined.to_csv('for_metrica.csv', columns=["passport_id","key","confidence"],index=False,header=['userid','key','value'])

if __name__ == "__main__":
    global yql_token = 'TODO'
    global metrika_token = 'TODO'
    yt.config["token"]= yql_token
    yt.config["proxy"]["url"] = "hahn.yt.yandex.net"

    clear_old_seo_puids()
    get_seo_puids()
    download_seo_uids()
    download_all_uids()
    prepare_csv_for_metrica()
    post_user_params(30468942, metrika_token)
