import os
import datetime
from cassandra import util
from yt.wrapper.client import Yt
from yt.wrapper.ypath import TablePath, ypath_dirname

from webmaster3.cass import Cass, Keyspaces
from webmaster3.task import Task

WRITER = None
SESSION = None

MAX_AGE_IN_DAYS = 10

REQUESTS_TABLE_SCHEMA = [
    {'name': 'Host', 'type': 'string'},
    {'name': 'CreatedDate', 'type': 'string'},
    {'name': 'RequestId', 'type': 'string'},
    {'name': 'CurrentRegions', 'type': 'any'},
    {'name': 'RequestedRegions', 'type': 'any'},
    {'name': 'AcceptedRegions', 'type': 'any'},
    {'name': 'EvidenceUrl', 'type': 'string'}
]


def run(cluster):
    global WRITER, SESSION

    SESSION = Cass.cluster(cluster).connect()
    SESSION.set_keyspace(Keyspaces.WEBMASTER3)

    print "Collecting all host ids..."
    all_hosts_it = SESSION.execute("SELECT host_id FROM host_regions_mod_reqs")
    all_hosts = set()
    for h in all_hosts_it:
        all_hosts.add(h.host_id)

    print "Done collecting all host ids: " + str(len(all_hosts))

    count = 0
    reqs_to_upload = []
    print "Collecting unprocessed requests..."
    for host_id in all_hosts:
        if count % 1000 == 0:
            print 'Processed: %s' % count

        req_list = list(SESSION.execute("select * from host_regions_mod_reqs where host_id = '%s' ORDER BY request_id DESC limit 1" % host_id))
        if not req_list:
            count += 1
            continue

        for req in req_list:
            status = req.status
            created_date = util.datetime_from_uuid1(req.request_id)
            now = datetime.datetime.now()
            delta_days = (now - created_date).days
            if status == 2 and delta_days <= MAX_AGE_IN_DAYS:
                r = {
                    'Host': req.host_id,
                    'CreatedDate': str(created_date),
                    'RequestId': str(req.request_id),
                    'CurrentRegions': req.current_regions,
                    'RequestedRegions': req.requested_regions,
                    'AcceptedRegions': req.accepted_regions,
                    'EvidenceUrl': req.evidence_url
                }
                reqs_to_upload.append(r)

        count += 1

    print "Done collecting unprocessed requests: " + str(len(reqs_to_upload))

    print "Uploading collected requests to Yt..."
    write_yt_table(reqs_to_upload, '//home/webmaster/users/leonidrom/unprocessed_hrm_reqs')

    print "Done."


def get_yt_token():
    yt_token = os.getenv('YT_TOKEN', None)
    if yt_token:
        return yt_token

    from os.path import expanduser
    home_path = expanduser("~")

    try:
        with open(home_path + '/' + '.yt/token') as f:
            l = list(f)
            if l:
                return l[0].strip()
    except:
        return None


YT_CLIENT = Yt(proxy='hahn.yt.yandex.net', token=get_yt_token())


def write_yt_table(rows, table_name):
    tp = TablePath(table_name, schema=REQUESTS_TABLE_SCHEMA, append=False)
    YT_CLIENT.write_table(tp, rows, raw=False, force_create=True)


Task.run(run, "prod")
