from __future__ import print_function
from yql.api.v1.client import YqlClient
import datetime
from subprocess import call
import yt.wrapper as yt
import os
import pandas as pd
import urllib
import urllib2
import requests
import json
import numpy as np

def post_user_params(counter_id, token):
    huge_df = pd.read_csv('for_metrica.csv')
    n = 30000  #chunk row size
    list_df = [huge_df[i:i+n] for i in range(0,huge_df.shape[0],n)]
    for i,chunk in enumerate(list_df):
        print('post params for ' + str(i) + ' chunk')
        post_user_params_df(counter_id, token, chunk)

def post_user_params_df(counter_id, token, tmp_df):
    tmp_df[['userid', 'key', 'value']].to_csv('for_metrica_tmp.csv', header=True, index=False)
    url = 'https://api-metrika.yandex.ru/management/v1/counter/{counterId}/user_params/uploadings/upload?action=update&oauth_token={token}' \
        .format(counterId = counter_id, token=token)
    files = {'file': open('for_metrica_tmp.csv', 'rb')}
    r = requests.post(url, files =  files)
    if r.status_code != 200:
        raise ValueError, r.text

    uploading_id = json.loads(r.text)['uploading']['id']
    url = 'https://api-metrika.yandex.ru/management/v1/counter/{counterId}/user_params/uploading/{uploadingId}/confirm?oauth_token={token}' \
        .format(counterId = counter_id, token=token, uploadingId=uploading_id)
    r = requests.post(url, data = '{"uploading":{"content_id_type":"user_id", "action":"update"}}')
    if r.status_code != 200:
        raise ValueError, r.text
    return r.text

def callback(operation):
    print('Operation %s is %s, progress data:\n%s' % (
        operation.operation_id,
        operation.status,
        str(operation.json)
    ))
    if 'status' in operation.json and not operation.in_progress:
        print(operation.json)

def apply_yql_query(filename, dated=False):
    client = YqlClient(db='hahn',token=yql_token)

    date = str(datetime.date.today()-datetime.timedelta(1))

    query = open(filename).read()
    if dated:
        query = query.replace("current_query_date",date)
    print(query)
    request = client.query(query)
    request.run()
    request.subscribe(callback)
    print('Sleeping...')
    print(request)


def download_all_uids():
    url = 'http://mtmega.yandex.ru:8123/?user=aleksart&password=aleksart'
    data = "select distinct PassportUserID as passport_id from visits_all where StartDate > \'2017-01-01\' and CounterID=30468942 FORMAT JSON"
    req = urllib2.Request(url, data)
    response = urllib2.urlopen(req)
    js_resp = json.loads(response.read())
    yt.write_table("//home/webmaster/users/aleksart/wmc-4628/webmaster_puids",js_resp["data"])

def mapper(row):
    types = ['seo','developers','marketing','business','top_manager']
    for type in types:
        if type in row:
            if row[type] is not "":
                yield {"userid":row["puid"],"key":type,"value":row[type]}
            else:
                yield {"userid":row["puid"],"key":type,"value":"0"}

def map_for_metrica():
    yt.run_map(mapper, "//home/webmaster/users/aleksart/wmc-4628/filtered_segments","//home/webmaster/users/aleksart/wmc-4628/segments_for_metrika")

def prepare_csv_for_metrica():
    rows = list(yt.read_table("//home/webmaster/users/aleksart/wmc-4628/segments_for_metrika", format=yt.DsvFormat()))
    rows = pd.DataFrame(rows)
    rows.to_csv('for_metrica.csv',header=["userid","key","value"], index=False,columns=["userid","key","value"])


if __name__ == "__main__":
    global yql_token = 'TODO'
    global metrika_token = 'TODO'
    yt.config["token"]=yql_token
    yt.config["proxy"]["url"] = "hahn.yt.yandex.net"

    apply_yql_query("crypta_query.yql", dated=True)
    download_all_uids()
    apply_yql_query("filter_segments.yql")
    map_for_metrica()
    prepare_csv_for_metrica()
    post_user_params(30468942, metrika_token)
