# -*- coding: utf-8 -*-
__author__ = 'burakonal'

import urllib
import urllib2
import commands
import pandas as pd
import io
desired_width = 320
pd.set_option('display.width', desired_width)


def get_clickhouse_data(query, connection_timeout=1500, host='http://mtmega01i.yandex.ru:8123/'):

    # params = {'query': query,
    #             'user': 'aydogank',
    #             'password': 'u8Ab6S4M'
    #     }
    error_flag = 0
    params = {'query': query,
            'user': 'burakonal',
            'password': 'tw5Qsc2O'
    }
    query_get = urllib.urlencode(params)
    url = host + '?' + query_get
    try:
        req = urllib2.Request(url)
        res = urllib2.urlopen(req, timeout=connection_timeout)
        result = res.read()
        return result, error_flag
    except:
        error_flag = 1
        return url, error_flag
"""
This function takes date as parameter and returns a txt file. This will then be read by another function
to return desired ratios.
"""
def get_query_result(startDate, endDate):

    #ClickHouse query preperation

    template = """
             SELECT sum(Sign), substring(Referer, 1, 5), StartDate, SearchEngineID, UserAgent
             FROM visits_all
             WHERE (StartDate >= toDate('{date_param}') AND StartDate <= toDate('{date_param2}'))
                AND (regionToCountry(RegionID) = 983)
                AND (IsMobile = 0)
                AND (IsYandex=0)
                AND (SearchEngineID in(2, 3, 84, 85, 89, 90))
                AND (UserAgent in (3, 5, 6, 70))
             GROUP BY
                StartDate,
                SearchEngineID,
                UserAgent,
                substring(Referer, 1, 5)
                """
    browser_codes = {"70": "yabrowser", "3": 'firefox', "5": 'msie', "6": 'chrome'}
    search_engine_codes = {"2": 'yandex_web', "3": 'google_web', "89": "ask", "90": "ask", "84": "bing", "85": "bing"}
    query = template.format(date_param=startDate, date_param2=endDate)
    print query
    result, error_flag = get_clickhouse_data(query)
    print result
    if error_flag:
        print result
        return False
    result_list = result.split('\n')
    table = pd.DataFrame(columns=("date", "browser", "search engine", "protocol", "frequency"))
    index = 0
    for line in result_list[:-1]:
        if line != '\n':
            freq, protocol, date, se_code, bro_code = line.split('\t')
            if protocol != '':
                search_engine = search_engine_codes[se_code]
                browser = browser_codes[bro_code]
                if ':' in protocol:
                    protocol = protocol[:-1]
                table.loc[index] = [date, browser, search_engine, protocol, int(freq)]
                index += 1
    ##### This is the originial part #####

    with open('/home/burakonal/tasks/1293/metrika_data/data', 'a') as f:
        table.to_csv(path_or_buf=f, sep='\t', header=False, float_format='%.0f', decimal=',', index=False)
    return True

def get_ratios(targetdate):
    with open('/home/burakonal/tasks/1293/metrika_data/data', 'r') as f:
        table = pd.read_csv(f, sep='\t', names=["date", "browser", "search engine", "protocol", "frequency"], parse_dates=True, low_memory=True)
    table_groupped = pd.DataFrame(table.groupby(['date', 'browser', 'search engine'])['frequency'].sum())
    table_groupped.reset_index(inplace=True)
    dates = set(table_groupped['date'].tolist())

    '''
    http/https ratios of search engines in different browsers. For chrome, I keep google as constant;
    for ya-browser I keep yandex as constant.
    '''

    ratios = {}
    temp = {}
    for date in dates:
        # http ratio in yabrowser. constant is yandex web-search.
        http_yandex_web_in_yabrowser = float(table[(table['date'] == date) & (table['browser'] == 'yabrowser') & (table['search engine'] == 'yandex_web') & (table['protocol'] == 'http')]['frequency'])
        temp['yandex'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'yabrowser') & (table_groupped['search engine'] == 'yandex_web')]['frequency'])/http_yandex_web_in_yabrowser
        temp['google'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'yabrowser') & (table_groupped['search engine'] == 'google_web')]['frequency'])/http_yandex_web_in_yabrowser
        temp['bing'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'yabrowser') & (table_groupped['search engine'] == 'bing')]['frequency'])/http_yandex_web_in_yabrowser
        temp['ask'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'yabrowser') & (table_groupped['search engine'] == 'ask')]['frequency'])/http_yandex_web_in_yabrowser
        ratios[date] = {}
        ratios[date]['yabrowser'] = temp.copy()

        # http ratio in chrome. constant is google web-search.
        # We will not fix chrome as comscore data is already fixed towards the end of august.
        http_google_web_in_yabrowser = float(table[(table['date'] == date) & (table['browser'] == 'chrome') & (table['search engine'] == 'google_web') & (table['protocol'] == 'http')]['frequency'])
        temp['yandex'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'chrome') & (table_groupped['search engine'] == 'yandex_web')]['frequency'])/http_google_web_in_yabrowser
        temp['google'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'chrome') & (table_groupped['search engine'] == 'google_web')]['frequency'])/http_google_web_in_yabrowser
        temp['bing'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'chrome') & (table_groupped['search engine'] == 'bing')]['frequency'])/http_google_web_in_yabrowser
        temp['ask'] = float(table_groupped[(table_groupped['date'] == date) & (table_groupped['browser'] == 'chrome') & (table_groupped['search engine'] == 'ask')]['frequency'])/http_google_web_in_yabrowser
        ratios[date]['chrome'] = temp.copy()
    # return of ratios for the given date. The result consist of yandexbrowser and chrome parts separately.
    return ratios[targetdate]
def main():
    # print get_query_result('2015-08-31', '2015-09-06', '2015-08-23')
    print get_ratios("2015-08-31")

if __name__ == '__main__':
    main()
