__author__ = 'burakonal'
import datetime
import time
import json
import urllib
import urllib2
import requests
from datetime import datetime
from comscore import *
from clickhouse import *
import commands
import pickle
import time
import datetime

# class written by cansucullu@ for handling date issues. Usage is strongly recommended.
class DatesOperator:
    def __init__(self):
        self.date_format = '%Y-%m-%d'

    def prepare_dates(self, start_date, stop_date):
        days_list = []
        start = datetime.datetime.strptime(start_date, self.date_format)
        stop = datetime.datetime.strptime(stop_date, self.date_format)
        delta = (stop - start).days + 1

        for i in xrange(delta):
            new_date_datetime = start + datetime.timedelta(days=i)
            days_list.append(new_date_datetime.strftime(self.date_format))

        return days_list

    def get_ndays_difference(self, input_date, n):
        input_date_datetime = datetime.datetime.strptime(input_date, self.date_format)

        if n > 0:
            output_date_datetime = input_date_datetime + datetime.timedelta(days=n)
        else:
            output_date_datetime = input_date_datetime - datetime.timedelta(days=abs(n))

        output_date = output_date_datetime.strftime(self.date_format)

        return output_date


def readTable(date):
    date = datetime.datetime.strptime(date, '%Y-%m-%d').strftime('%Y%m%d')
    path = 'extdata/comscore/sessions/' + date
    prefix = './bin/mr_cat -s sakura00.search.yandex.net:8013 '
    output = '/home/burakonal/tasks/1293/data/' + date
    print prefix+path
    commands.getoutput(prefix+path+' > '+output)
    with open(output, "r") as f:
        if len(list(i.strip for i in f)) == 0:
            flag = False
        else:
            flag = True
    return output, flag

def main():
    while True:

        with open('/home/burakonal/tasks/1293/logs', 'a+') as f:
            dates = [i.strip() for i in f]
            date, table_status = dates[-1].split('\t')
            if table_status == "ok":
                integer_time = time.mktime(datetime.datetime.strptime(date, '%Y-%m-%d').timetuple())
                integer_time += 24*60*60
                date = datetime.datetime.fromtimestamp(integer_time).strftime('%Y-%m-%d')
            print date
            try:
                file_to_be_deleted, table_flag = readTable(date)
                if not table_flag:
                    print "Table is not ready on Sakura! Sleeping 1 Day!"
                    time.sleep(24*60*60)
                    continue
            except Exception, e:
                print e
                f.write("\n"+date+'\t'+'read_problem')
                print 'Read Problem! Sleeping 1 Hour!'
                time.sleep(60*60)
                continue

            # ClickHouse Part
            # let's check whether we have the metrika ratios for that date. if not, we need to read 1 week data
            try:
                metrika_ratios = get_ratios(date)
            except KeyError:
                # take 1 week data from metrika
                integer_time = time.mktime(datetime.datetime.strptime(date, '%Y-%m-%d').timetuple())
                integer_time += 24*60*60*7
                endDate = datetime.datetime.fromtimestamp(integer_time).strftime('%Y-%m-%d')
                flag = get_query_result(date, endDate)
                if not flag:
                    f.write("\n"+date+'\t'+'clickhouse_problem')
                    print 'ClickHouse Problem! Sleeping 1 Hour!'
                    time.sleep(3600)
                    continue
                else:
                    metrika_ratios = get_ratios(date)
            # ComScore Part
            # parameters for ComScore
            status = 'SEARCH_JOINED'
            search_engines = ['google web search', 'yandex web search', 'google', 'yandex', 'ask reply page', 'bing web', 'bing']
            search_engine_tags = ['ask', 'bing', 'google', 'yandex']
            browsers = ['chrome', 'firefox', 'internet explorer', 'yandex']
            try:
                comscore_data = getting_comscore_data(status, file_to_be_deleted, search_engines, search_engine_tags, browsers)
                processing_comscore_data(search_engine_tags, comscore_data, metrika_ratios, browsers, date)
            except Exception, e:
                print e
                f.write("\n"+date+'\t'+'comscore_problem')
                print "ComScore Data Problem! Sleeping 1 Hour!"
                time.sleep(3600)
                continue
            print "{0} is completed!".format(date)
            f.write("\n"+date+"\t"+"ok")
            commands.getoutput("rm " + file_to_be_deleted)

if __name__ == '__main__':
    main()
