import uuid
import os

from yt.wrapper.client import Yt
from yt.wrapper.ypath import TablePath, ypath_dirname
from webmaster3.cass import Cass, Keyspaces
from webmaster3.task import Task
from cassandra import ConsistencyLevel

SESSION = None

YT_TABLE_SCHEMA = [
    {'name': 'host', 'type': 'string'},
    {'name': 'mainMirrorHost', 'type': 'string'},
]

def run(cluster):
    global SESSION

    SESSION = Cass.cluster(cluster).connect()
    SESSION.set_keyspace(Keyspaces.WEBMASTER3)
    SESSION.default_timeout = 1200
    SESSION.default_consistency_level = ConsistencyLevel.QUORUM

    #    rows_qs = SESSION.execute('SELECT * FROM user_notification_email')
    #    write_yt_table(yt_row_gen(rows_qs), '//home/webmaster/users/leonidrom/user_emails')

    rows_qs = SESSION.execute("SELECT * FROM common_data_state2 WHERE data_type='%s'" % 'MAIN_MIRRORS_DB')
    mirrors_gen = uuid.UUID(list(rows_qs)[0].value)
    print mirrors_gen

    all_mirrors = []
    for shard_no in range(0, 10):
        rows_qs = SESSION.execute("SELECT * FROM main_mirrors2 WHERE mirror_generation_id=%s AND shard_id=%s", (mirrors_gen, shard_no))
        for m in rows_qs:
            all_mirrors.append(m)

    write_yt_table(yt_row_gen(all_mirrors), '//home/webmaster/users/leonidrom/main_mirrors')

def host_id_to_url(host_id):
    split = host_id.split(':')
    return split[0] + '://' + split[1]

def yt_row_gen(rows_qs):
    count = 0
    for row in rows_qs:
        count += 1
        if count % 1000 == 0:
            print "Processed: " + str(count)

        yield {
            'host': host_id_to_url(row.host_id),
            'mainMirrorHost': host_id_to_url(row.main_mirror_host_id)
        }

def get_yt_token():
    yt_token = os.environ.get('YT_TOKEN')
    if yt_token:
        return yt_token

    from os.path import expanduser
    home_path = expanduser("~")

    try:
        with open(home_path + '/' + '.yt/token') as f:
            l = list(f)
            if l:
                return l[0].strip()
    except:
        return None


YT_CLIENT = Yt(proxy='hahn.yt.yandex.net', token=get_yt_token())

def write_yt_table(rows, table_name):
    tp = TablePath(table_name, schema=YT_TABLE_SCHEMA, append=False)
    YT_CLIENT.write_table(tp, rows, raw=False, force_create=True)


Task.run(run, "prod")
