#!/usr/bin/env python
#coding=utf-8
from __future__ import division

__author__ = 'cansucullu'

import sys
import re
import datetime
import requests
import urllib
import urllib2
import urlparse
import json
import time

import yt.wrapper as yt

def module_filter(module):
    if not module:
        return True
    name = getattr(module, '__name__', '')
    return not (name == 'uatraits' or name.startswith('statbox'))

yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config.set_proxy('plato.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True


class SearchAnalyser:
    """
        Main class for handlind all searches made by switched users
    """
    def __init__(self):
        #  Parameters related to filtration of logs
        self.clids = ["2189882", "2219050", "2219052", "2224320"]
        self.yasofts = ['litevbchrome_chr', 'homesearchextchrome_chr', 'searchextchrome_chr']

        self.all_clids = dict([
            ('2189882', 'Search Extension dayuse'),
            ('2196600', 'Search Extension omnibox'),
            ('2187647', 'Search Extension na'),
            ('2220368', 'Search Extension na'),
            ('2219050', 'Search Pop-up Screen Extension dayuse'),
            ('2219051', 'Search Pop-up Screen Extension omnibox'),
            ('2224775', 'Search Pop-up Screen Extension na'),
            ('2219052', 'New Tab Extension dayuse'),
            ('2219053', 'New Tab Extension omnibox'),
            ('2219054', 'New Tab Extension Yandex logo on new tab'),
            ('2219055', 'New Tab Extension new tab'),
            ('2224771', 'New Tab Extension na'),
            ('2224320', 'Homepage Extension dayuse'),
            ('2224321', 'Homepage Extension omnibox'),
            ('2224322', 'Homepage Extension homepage & startpage'),
            ('2224323', 'Homepage Extension context menu search'),
        ])

        self.clid_yasoft_map = dict([
            ('2189882', 'searchextchrome_chr'),
            ('2196600', 'searchextchrome_chr'),
            ('2187647', 'searchextchrome_chr'),
            ('2220368', 'searchextchrome_chr'),
            ('2219050', 'searchextchrome_chr'),
            ('2219051', 'searchextchrome_chr'),
            ('2224775', 'searchextchrome_chr'),
            ('2219052', 'litevbchrome_chr'),
            ('2219053', 'litevbchrome_chr'),
            ('2219054', 'litevbchrome_chr'),
            ('2219055', 'litevbchrome_chr'),
            ('2224771', 'litevbchrome_chr'),
            ('2224320', 'homesearchextchrome_chr'),
            ('2224321', 'homesearchextchrome_chr'),
            ('2224322', 'homesearchextchrome_chr'),
            ('2224323', 'homesearchextchrome_chr'),
        ])

        #  Tables from Yandex
        self.export_access_log_table_raw = '//statbox/export-access-log/'
        self.watchlog_table_raw = '//statbox/watch-log/'

        #  My tables
        self.install_table_raw = '//home/tr-analysts/cansucullu/RetentionData/install/'
        self.unique_install_table_raw = '//home/tr-analysts/cansucullu/RetentionData/unique-install/'

        self.dayuse_table_raw = '//home/tr-analysts/cansucullu/RetentionData/dayuse/'
        self.unique_dayuse_table_raw = '//home/tr-analysts/cansucullu/RetentionData/unique-dayuse/'

        self.retention_pivot_table = '//home/tr-analysts/cansucullu/RetentionData/PivotTable'

        self.filtered_watchlog_table_raw = '//home/tr-analysts/cansucullu/SearchesData/FilteredWatchlogs/'
        self.searches_table_raw = '//home/tr-analysts/cansucullu/SearchesData/PivotTables/'

        #  Other important parameters
        self.statface_names = dict([
            ('immediate retention', 'yandex.com.tr/Special/Metrics/Switch/AudienceMetrics/ImmediateRetention'),
            ('real retention', 'yandex.com.tr/Special/Metrics/Switch/RetentionAnalysis/RetentionSummary'),
            ('smooth dayuse', 'yandex.com.tr/Special/Metrics/Switch/AudienceMetrics/YaSoftDayuse'),
            ('user search distribution', 'yandex.com.tr/Special/Metrics/Switch/RetentionAnalysis/YaSoftUserSearchDistribution')
        ])

        self.date_format = '%Y-%m-%d'
        self.time_format = '%Y-%m-%d %H:%M:%S'

        self.stat_data = dict([
            ('StatRobotUser', 'robot_cansucullu'),
            ('StatRobotPassword', 'Tai0epood8'),
        ])


    def prepare_dates(self, start_date, stop_date):
        """
        Return a list of dates from start date to stop date both ends included

        Note:
            Can be used with :func:`~SearchAnalyser.get_ndays_difference` to get list of days
            from a reference date to a +/- duration.
            Ex:
            >>> api = SearchAnalyser()
            >>> input_date = '2015-08-01'
            >>> api.prepare_dates(input_date, api.get_ndays_difference(input_date, 6))
            ['2015-08-01', '2015-08-02', '2015-08-03', '2015-08-04', '2015-08-05', '2015-08-06', '2015-08-07']

            You may also change default date format if neccessary
            >>> api.date_format(''%Y%m%d'')


        Args:
            start_date (str): Start date.
            stop_date (str): Stop date.

        Returns:
            list. List of dates from start date to stop date both ends included
        """

        days_list = []
        start = datetime.datetime.strptime(start_date, self.date_format)
        stop = datetime.datetime.strptime(stop_date, self.date_format)
        delta = (stop - start).days + 1

        for i in xrange(delta):
            new_date_datetime = start + datetime.timedelta(days=i)
            days_list.append(new_date_datetime.strftime(self.date_format))

        return days_list

    def get_ndays_difference(self, input_date, n):
        """
        Return a date which is n days earlier or later

        Note:
            You may change default date format if neccessary
            >>> api = SearchAnalyser()
            >>> api.date_format(''%Y%m%d'')

        Args:
            input_date (str): Input reference date.
            n (int): Delta, can be negative.

        Returns:
            str. output date = inout date + day offset
        """
        input_date_datetime = datetime.datetime.strptime(input_date, self.date_format)

        if n > 0:
            output_date_datetime = input_date_datetime + datetime.timedelta(days=n)
        else:
            output_date_datetime = input_date_datetime - datetime.timedelta(days=abs(n))

        output_date = output_date_datetime.strftime(self.date_format)

        return output_date

    def update_daily_statface_report(self, name, data):
        url = 'https://stat.yandex-team.ru/_api/report/data'
        #headers = {'StatRobotUser': 'robot_cansucullu', 'StatRobotPassword': 'Tai0epood8'}
        headers = {'StatRobotUser': self.stat_data['StatRobotUser'], 'StatRobotPassword': self.stat_data['StatRobotPassword']}
        values = {
            'name': self.statface_names[name],
            'scale': 'd',
            'data': json.dumps({'values': data})
        }

        data = urllib.urlencode(values)
        req = urllib2.Request(url, data, headers)
        response = urllib2.urlopen(req)

    def collect_ui_from_install(self, date):
        """
        Return uis and yandexuid->ui mapping from install table

        Args:
            date (str): Input date.

        Returns:
            dict. yandexuid_data[yandexuid] = ui
            set. set of unique uis

        Note:
            Does not parse record if yandexuid = y-

            We assume that each yandexuid is mapped to only 1
        """
        yandexuid_data = {}
        uis = []
        install_table_final = self.install_table_raw + date
        for line in yt.read_table(install_table_final, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            if not record['yandexuid'] == 'y-':
                yandexuid_data[record['yandexuid'][1:]] = record['ui']

            uis.append(record['ui'])

        uis = set(uis)

        return yandexuid_data, uis

    def collect_ui_from_dayuse(self, date, uis):
        """
        Filters dayuse table based on uis set
        Return uis and yandexuid->ui mapping from install table

        Args:
            date (str): Input date.
            uis (set): List of uis to filter

        Returns:
            dict. yandexuid_data[yandexuid] = ui

        Note:
            Does not parse record if yandexuid = y-

            We assume that each yandexuid is mapped to only 1
        """
        yandexuid_data = {}
        dayuse_table_final = self.dayuse_table_raw + date
        for line in yt.read_table(dayuse_table_final, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            if record['ui'] in uis:
                if not record['yandexuid'] == 'y-':
                    yandexuid_data[record['yandexuid'][1:]] = record['ui']

        return yandexuid_data

    def look_watchlog(self, date, yandexuid_data, pivot_date, appendTable=True):
        """
        Parse watchlog and create or append to table  searches_table_raw + pivot_date

        Args:
            date (str): Input date.
            yandexuid_data (dict):
            pivot_date (str):
            appendTable (bool): Used for specifying append table mode.

        Returns:
            None

        Note:
            Does not parse record if yandexuid = y-

            We assume that each yandexuid is mapped to only 1
        """

        source_table = self.watchlog_table_raw + date
        searches_table = self.searches_table_raw + pivot_date
        print searches_table

        # Install Part
        if appendTable:
            yt.run_map(
                WatchlogMapper(date, yandexuid_data, self.all_clids),
                source_table = source_table,
                destination_table = yt.TablePath(searches_table, append=True),
                format=yt.DsvFormat(),
            )
        else:
            yt.run_map(
                WatchlogMapper(date, yandexuid_data, self.all_clids),
                source_table = source_table,
                destination_table = searches_table,
                format=yt.DsvFormat(),
            )

        yt.run_sort(
            source_table = searches_table,
            destination_table = searches_table,
            sort_by=['yandexuid'],
        )

    def run_searches(self, date):
        """
        Main function for preparing Searches Pivot Table

        Args:
            date (str): Input date.

        Returns:
            None

        Note:
            Does not parse record if yandexuid = y-

            We assume that each yandexuid is mapped to only 1
        """

        pivot_date = date
        end_date = self.get_ndays_difference(date, 29)

        dates = self.prepare_dates(pivot_date, end_date)
        print dates

        print "get installed uis from day ", pivot_date
        yandexuid_data , uis = self.collect_ui_from_install(dates[0])
        print "total ui", len(set(yandexuid_data.values()))
        print "total yandexui", len(set(yandexuid_data.keys()))

        # Create initial output table from scratch
        self.look_watchlog(date, yandexuid_data, pivot_date, appendTable=False)

        for day in dates[1:]:
            print "watchlog day, ", day
            yandexuid_data = self.collect_ui_from_dayuse(day, uis)
            print "total ui", len(set(yandexuid_data.values()))
            print "total yandexui", len(set(yandexuid_data.keys()))

            # Append the rest of the days
            self.look_watchlog(day, yandexuid_data, pivot_date, appendTable=True)

    def create_filtered_watchlog_table(self, date):
        source_table = self.watchlog_table_raw + date
        filtered_table = self.filtered_watchlog_table_raw + date
        print source_table

        yt.run_map(
            WatchlogFiltererMapper(date, self.all_clids),
            source_table = source_table,
            destination_table = filtered_table,
            format=yt.DsvFormat(),
        )
        yt.run_sort(
            source_table = filtered_table,
            destination_table = filtered_table,
            sort_by=['timestamp'],
        )

    def create_summary_table(self, date):
        filtered_table = self.searches_table_raw + date
        summary_table = self.searches_table_raw + date + '-summary'

        # Sort by ui
        yt.run_sort(
            source_table = filtered_table,
            destination_table = filtered_table,
            sort_by=['ui'],
        )

        # Reduce by ui
        #def __init__(self, clids_to_check, clid_yasoft_map, install_date):
        yt.run_reduce(
            SearchesTableReducer(clids_to_check=self.all_clids.keys(), clid_yasoft_map=self.clid_yasoft_map, install_date=date),
            source_table = filtered_table,
            destination_table = summary_table,
            reduce_by=['ui'],
            format=yt.DsvFormat(),
        )

        yt.run_sort(
            source_table = summary_table,
            destination_table = summary_table,
            sort_by=['ui', 'timestamp'],
        )

    def run_user_per_search_distribution(self, date):
        ui_yasoft_map = self.get_ui_yasfot_map_from_unique_install(date)
        distribution = self.calculate_user_per_search_distribution(date, ui_yasoft_map)

        stat_dict = {}
        for yasoft in distribution.keys():
            stat_dict['fielddate'] = date
            stat_dict['yasoft'] = yasoft
            stat_dict['s0'] = distribution[yasoft]['0']
            stat_dict['s1'] = distribution[yasoft]['1']
            stat_dict['s2_10'] = distribution[yasoft]['2-10']
            stat_dict['s11p'] = distribution[yasoft]['11+']
            stat_dict['all'] = distribution[yasoft]['all']

            self.update_daily_statface_report(name='user search distribution', data=[stat_dict])

    def calculate_user_per_search_distribution(self, date, ui_yasoft_map):
        #import pprint
        distribution = {}
        distribution['all'] = {}
        distribution['all']['all'] = 0

        summary_table = self.searches_table_raw + date + '-summary'

        for line in yt.read_table(summary_table, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            # if record['yasoft'] in distribution.keys():
            #     if record['search_count_with_clids'] in distribution[record['yasoft']].keys():
            #         distribution[record['yasoft']][record['search_count_with_clids']] += 1
            #     else:
            #         distribution[record['yasoft']][record['search_count_with_clids']] = 1
            # else:
            #     distribution[record['yasoft']] = {}
            #     distribution[record['yasoft']][record['search_count_with_clids']] = 1

            # Frequency part, currently 0, 1, 2-10, 11+
            effective_freq = ''
            int_search_count = int(record['search_count_with_clids'])
            if int_search_count == 0:
                effective_freq = '0'
            elif int_search_count == 1:
                effective_freq = '1'
            elif int_search_count >= 2 and int_search_count <= 10:
                effective_freq = '2-10'
            else:
                effective_freq = '11+'
                
            # yasoft part
            effective_yasoft = 'None'
            if record['ui'] in ui_yasoft_map.keys():
                effective_yasoft = ui_yasoft_map[record['ui']]
                
            # Crazy part
            if effective_freq in distribution['all'].keys():
                distribution['all'][effective_freq] += 1
            else:
                distribution['all'][effective_freq] = 1
            distribution['all']['all'] += 1

            if effective_yasoft in distribution.keys():
                distribution[effective_yasoft]['all'] += 1
                if effective_freq in distribution[effective_yasoft].keys():
                    distribution[effective_yasoft][effective_freq] += 1
                else:
                    distribution[effective_yasoft][effective_freq] = 1
            else:
                distribution[effective_yasoft] = {}
                distribution[effective_yasoft][effective_freq] = 1

                distribution[effective_yasoft]['all'] = 1


        #pprint.pprint(distribution)
        return distribution

    def get_ui_yasfot_map_from_unique_install(self, date):
        ui_yasoft_map = {}
        unique_install_table_final = self.unique_install_table_raw + date
        
        for line in yt.read_table(unique_install_table_final, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            ui_yasoft_map[record['ui']] = record['yasoft']

        return ui_yasoft_map

    def get_last_query_frequency(self, date):
        query_distribution = {}
        summary_table = self.searches_table_raw + date + '-summary'

        for line in yt.read_table(summary_table, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            if int(record['lifetime']) <= 21 and int(record['search_count_with_clids']) > 0 :
                if record['last_query'] in query_distribution.keys():
                    query_distribution[record['last_query']] += 1
                else:
                    query_distribution[record['last_query']] = 1

        import pprint
        items = sorted(query_distribution, key=query_distribution.get, reverse=True)
        #pprint.pprint(items[:20])
        for i in items[:100]:
            print query_distribution[i], i
        print '...', '...'
        print sum(query_distribution.values()), 'total'
        #return distribution


    def get_google_churn_query_frequency(self, date):
        query_distribution = {}
        summary_table = self.searches_table_raw + date + '-summary'

        for line in yt.read_table(summary_table, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            if int(record['search_count_with_clids']) == 1 :
                if record['last_query'] in query_distribution.keys():
                    query_distribution[record['last_query']] += 1
                else:
                    query_distribution[record['last_query']] = 1

        import pprint
        items = sorted(query_distribution, key=query_distribution.get, reverse=True)
        #pprint.pprint(items[:20])
        for i in items[:100]:
            print query_distribution[i], i
        print '...', '...'
        print sum(query_distribution.values()), 'total'
        #return distribution

    def calculate_clear_cookie_rate(self, date):
        cookies = {}
        searches_table = self.searches_table_raw + date

        for line in yt.read_table(searches_table, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            cookie = record['yandexuid']

            if not cookie in cookies.keys():
                cookies[cookie] = []
            cookies[cookie].append(record['date'])

        cookie_lifetimes = {}
        for cookie in cookies.keys():
            max_date = max(cookies[cookie])
            lifetime = (datetime.datetime.strptime(max_date, self.date_format) - datetime.datetime.strptime(date, self.date_format)).days

            if lifetime in cookie_lifetimes.keys():
                cookie_lifetimes[lifetime] += 1
            else:
                cookie_lifetimes[lifetime] = 1

        #import pprint
        #pprint.pprint(cookie_lifetimes)
        for k,v in cookie_lifetimes:
            print str(int(k)+1) + '\t' + str(v)

    def get_only_homepage_searches(self, date):
        uis_to_filter = []
        unique_install_table_final = self.unique_install_table_raw + date

        for line in yt.read_table(unique_install_table_final, format='dsv'):
            items = line.strip().split('\t')

            # Prepare record dictionary
            record = {}
            for item in items:
                key, value = item.split('=', 1)
                record[key] = value

            if record['yasoft'] == 'homesearchextchrome_chr':
                uis_to_filter.append(record['ui'])

        searches_table = self.searches_table_raw + date
        filtered_searches_table = self.searches_table_raw + date + '-homepage'

        yt.run_map(
            SearchesTableFiltererMapper(uis_to_filter),
            source_table = searches_table,
            destination_table = filtered_searches_table,
            format=yt.DsvFormat(),
        )
        yt.run_sort(
            source_table = filtered_searches_table,
            destination_table = filtered_searches_table,
            sort_by=['ui', 'timestamp'],
        )



class WatchlogMapper:
    def __init__(self, date, yandexuid_data, clid_map):
        self.counter_id = '9927988'
        self.things_to_check_in_browser_info = ["ln:1", "il:1", "dl:1", "nb:1", "pa:1", "is:1", "ar:1", "pq:1"]
        self.things_to_check_in_url = ["goal://"]
        self.yandexuid_data = yandexuid_data
        self.yandexuids = yandexuid_data.keys()
        self.date = date
        self.clid_map = clid_map

    def __call__(self, rec):
        counter_id = rec['counter_id']

        if counter_id == self.counter_id:

            yandexuid = rec['yandexuid']

            if yandexuid in self.yandexuids:

                browser_info = rec['browser_info']
                url = rec['url']

                timestamp = rec['timestamp']

                decoded = urllib.unquote(url)
                parsed = urlparse.urlparse(decoded)
                params = urlparse.parse_qs(parsed.query)

                if params.has_key('clid'):
                    clid = params['clid'][0]
                else:
                    clid = "NA"

                if self.clid_map.has_key(clid):
                    extension = self.clid_map[clid]
                else:
                    extension = "NA"

                if params.has_key('text'):
                    query = params['text'][0]
                else:
                    query = "<Na>"

                valid_record = True
                for i in self.things_to_check_in_browser_info:
                    if i in browser_info:
                        valid_record = False

                for i in self.things_to_check_in_url:
                    if i in url:
                        valid_record = False

                if valid_record:
                    yield {
                        'url':url,
                        'date': self.date,
                        'query':query,
                        'extension':extension,
                        'clid':clid,
                        'yandexuid':yandexuid,
                        'timestamp':timestamp,
                        'ui':self.yandexuid_data[yandexuid],
                    }


class WatchlogFiltererMapper:
    def __init__(self, date, clid_map):
        self.counter_id = '9927988'
        self.things_to_check_in_browser_info = ["ln:1", "il:1", "dl:1", "nb:1", "pa:1", "is:1", "ar:1", "pq:1"]
        self.things_to_check_in_url = ["goal://"]
        self.date = date
        self.clid_map = clid_map

    def __call__(self, rec):
        counter_id = rec['counter_id']

        if counter_id == self.counter_id:

            yandexuid = rec['yandexuid']
            browser_info = rec['browser_info']
            url = rec['url']

            timestamp = rec['timestamp']

            decoded = urllib.unquote(url)
            parsed = urlparse.urlparse(decoded)
            params = urlparse.parse_qs(parsed.query)

            if params.has_key('clid'):
                clid = params['clid'][0]
            else:
                clid = "NA"

            if self.clid_map.has_key(clid):
                extension = self.clid_map[clid]
            else:
                extension = "NA"

            if params.has_key('text'):
                query = params['text'][0]
            else:
                query = "<Na>"

            valid_record = True
            for i in self.things_to_check_in_browser_info:
                if i in browser_info:
                    valid_record = False

            for i in self.things_to_check_in_url:
                if i in url:
                    valid_record = False

            if valid_record:
                yield {
                    'url':url,
                    'date': self.date,
                    'query':query,
                    'extension':extension,
                    'clid':clid,
                    'yandexuid':yandexuid,
                    'timestamp':timestamp,
                }


class SearchesTableReducer:
    def __init__(self, clids_to_check, clid_yasoft_map, install_date):
        self.clid_yasoft_map = clid_yasoft_map
        self.clids_to_check = clids_to_check
        self.date_format = '%Y-%m-%d'
        self.install_date = install_date

    def __call__(self, key, recs):

        queries_with_clids = []
        queries_without_clids = []
        active_days = []
        yandexuids = []
        yasofts = []
        landing_queries = []

        for rec in recs:
            ui = key['ui']

            yandexuid = rec['yandexuid']
            clid = rec['clid']
            date = rec['date']
            extension = rec['extension']
            query = rec['query']
            timestamp = rec['timestamp']
            url = rec['url']

            if 'utm_source=' in url:
                landing_queries.append(query)
            else:
                if clid in self.clids_to_check:
                    queries_with_clids.append(query)
                    active_days.append(date)

                    yasoft = self.clid_yasoft_map[clid]
                    yasofts.append(yasoft)
                else:
                    queries_without_clids.append(query)

            yandexuids.append(yandexuid)

        if len(active_days) > 0:
            last_seen = max(active_days)
            lifetime = (datetime.datetime.strptime(last_seen, self.date_format) - datetime.datetime.strptime(self.install_date, self.date_format)).days
            last_query = queries_with_clids[-1]
            yasoft_final = get_most_frequent_from_list(yasofts)
        else:
            last_seen = 'None'
            lifetime = 0
            last_query = ''
            yasoft_final = 'None'

        search_count_with_clids = len(queries_with_clids)
        search_count_without_clids = len(queries_without_clids)

        landing_query = get_most_frequent_from_list(landing_queries)
        landing_count = len(landing_queries)

        # Get most observed yasoft, assumption: people use only 1 kind of extension


        yield {
            'ui': ui,
            'yasoft': yasoft_final,
            'last_seen': last_seen,
            'lifetime': str(lifetime),
            'search_count_with_clids': str(search_count_with_clids),
            'search_count_without_clids': str(search_count_without_clids),
            'last_query': last_query,
            'landing_query': landing_query,
            'landing_count': str(landing_count),
        }

class SearchesTableFiltererMapper:
    def __init__(self, uis_to_check):
        self.uis_to_check = uis_to_check

    def __call__(self, rec):
        ui = rec['ui']
        clid = rec['clid']
        date = rec['date']
        extension = rec['extension']
        query = rec['query']
        timestamp = rec['timestamp']
        url = rec['url']
        yandexuid = rec['yandexuid']

        if ui in self.uis_to_check:
            yield {
                'ui':ui,
                'url':url,
                'date': date,
                'query':query,
                'extension':extension,
                'clid':clid,
                'yandexuid':yandexuid,
                'timestamp':timestamp,
            }


def get_most_frequent_from_list(input_list):
    freq = {}
    for i in input_list:
        if i in freq.keys():
            freq[i] += 1
        else:
            freq[i] = 1

    result = ''
    if len(input_list) > 0:
        max_freq = max(freq.values())
        for key, value in freq.items():
            if value == max_freq:
                result = key

    return result


if __name__ == '__main__':
    api = SearchAnalyser()

    # Important: Look for already existing dates in the path below. Do not overwrite existing tables
    # http://yt.yandex.net/plato/#page=navigation&path=//home/tr-analysts/cansucullu/SearchesData/PivotTables
    # E.g.: As of 2015-09-20, Raw Table for 2015-08-01 is ready. So Don't run api.run_searches('2015-08-01')

    #api.run_searches('2015-08-02')
    #date = sys.argv[1]
    #api.run_searches(date)

    #dates = api.prepare_dates('2015-08-01', '2015-09-01')
    #for date in dates:
    #    api.create_filtered_watchlog_table(date)

    api.create_summary_table('2015-08-02')
    #api.run_user_per_search_distribution('2015-08-01')
    #api.get_last_query_frequency('2015-08-01')
    #api.calculate_clear_cookie_rate('2015-08-01')
    #api.get_google_churn_query_frequency('2015-08-01')
    #api.get_only_homepage_searches('2015-08-01')
