import os, logging, ConfigParser
from subprocess import call
from ru.yandex import utils
from ru.yandex.statistics import google_stat
from googleads import adwords
import heapq
__author__ = 'aalogachev'

logger = logging.getLogger('ru.yandex.simularity.cluster_stats')

def process_cluster_queries(cluster_index, queries, out_file, client):
    queries_stat = google_stat.get_queries_stat(queries, client, 'STATS')
    for query, stat in queries_stat.items():
        out_file.write(u'{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format(cluster_index, query, stat.get('month_volume'), stat.get('avg_cpc'), stat.get('competition'), stat.get('categories')))
    get_top_n_results_for_cluster(queries_stat, 10, 'month_volume')

def get_top_n_results_for_cluster(queries_stat, n, field='month_volume'):
    top_n = heapq.nlargest(n, queries_stat.items(), key = lambda o: o[1].get(field))
    for query, stat in top_n:
        #logger.info(u'{0} = (volume={1},avg_cpc={2},competition={3},categories={4})'.format(query, stat.get('month_volume'), stat.get('avg_cpc'), stat.get('competition'), stat.get('categories')))
        logger.info(u'{0}\t{1}'.format(stat.get(field), query))

if __name__ == '__main__':
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
    for handler in logging.root.handlers:
        handler.addFilter(logging.Filter('ru.yandex'))
        #handler.addFilter(logging.Filter(__name__))

    config = ConfigParser.SafeConfigParser()
    config.readfp(open(os.path.join(utils.get_project_path(),'config_queries.json')))
    section = config.defaults()['active_section']
    logger.debug(u'Using config section = {0}'.format(section))
    work_dir = config.get(section=section, option='work_dir')
    logger.debug(u'Using work dir = {0}'.format(work_dir))
    clusters_file = config.get(section, 'clusters_file')
    clusters_file_sorted = clusters_file + '.sorted'
    cluster_stat_file = config.get(section, 'cluster_stat_file')

    #TODO fill clusters here
    #ignore_clusters = [0,1,2,4,5,6,8,9,10,11,12,13,15,18,19,20]
    ignore_clusters = []

    logger.debug(u'Sorting clusters \'{0}\' to \'{1}\''.format(clusters_file, clusters_file_sorted))
    proc_call = ['sort', '-n', clusters_file, '-o', clusters_file_sorted]
    logger.debug(u'Calling "{0}"'.format(proc_call))
    call(proc_call)

    adwords_client = adwords.AdWordsClient.LoadFromStorage()

    in_clusters_file = open(clusters_file_sorted, mode = 'r')
    out_stat = open(cluster_stat_file, mode='w')
    prev_cluster_index = '0'
    cluster_queries = set()
    for line in in_clusters_file:
        tokens = line.split('\t',2)
        #same cluster
        if (tokens[0] != prev_cluster_index):
            if (prev_cluster_index not in ignore_clusters):
                logger.info(u'Processing cluster #{0}'.format(prev_cluster_index))
                process_cluster_queries(prev_cluster_index, list(cluster_queries), out_stat, adwords_client)
            prev_cluster_index = tokens[0]
            cluster_queries = set()
        cluster_queries.add(tokens[1])

    in_clusters_file.close()

    logger.info(u'Processing cluster #{0}'.format(prev_cluster_index))
    process_cluster_queries(prev_cluster_index, cluster_queries, out_stat, adwords_client)

    out_stat.close()



