# -*- encoding: utf-8 -*-
import json,csv,operator,os,re,minus,linecache, urllib, urllib2
from collections import defaultdict
from openpyxl import Workbook,load_workbook
dir = os.path.dirname(os.path.abspath(__file__))
query_name = 'normal_query'
hit_name = 'COUNT_UNIQ_q'
hit_limit = 0
stoplist = list(open(dir+"/stoplist.csv", "rU")); stopl = [];
for e in stoplist: stopl.append(e.strip())

def PrintException():
    exc_type, exc_obj, tb = sys.exc_info()
    f = tb.tb_frame
    lineno = tb.tb_lineno
    filename = f.f_code.co_filename
    linecache.checkcache(filename)
    line = linecache.getline(filename, lineno, f.f_globals)
    print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)

def clear_keywords(kw):
    return re.sub('[()-,]', '', kw)
def make_keyword(kw):
    newkw = ""
    for e in kw.split(" "):
        newkw +=" +" + e
    return newkw.strip()

def get_city(string, geoid=None):
    if geoid is None: geoid = 213
    wizard_api_url = "http://tomshinsky.haze.yandex.net:8084/wizardapi/?query="
    connected = False
    while not connected:
        try:

            q = urllib.quote(string,safe="/")
            url = wizard_api_url + q + "&lr=" + str(geoid)
            #print url
            jsondata = json.load(urllib2.urlopen(urllib2.Request(url)))
            connected = True
            try:
                city = (jsondata[u'GeoAddr'][0][u'Fields'][0][u'Name']).capitalize()
                bestgeoid = int(jsondata[u'GeoAddr'][0][u'BestInheritedId'])
                print city, bestgeoid
            except:
                city = None; bestgeoid = None

        except Exception as e:
            print str(e)
            pass

    return [city,bestgeoid]

def check_in_stoplist(string):
    #print "check"
    #print string.split(" ")
    verdict = False
    for word in string.split(" "):
        if word in stopl:
            #print word
            verdict = True
            break
    print verdict
    return verdict

def read_qc_json(file, add_geo):
    queries = []

    try:
        for i,line in enumerate(open(file)):
            #print line
            if i == 0:
                print "-----\nExpecting:" + query_name +" " + hit_name
                print "FOUND: " + str((json.loads(line)).keys())

            l = json.loads(line);
            ### ВАЖНО ЧТОБЫ В QC ПОЛЯ ИМЕНОВАЛИСЬ ИМЕННО ТАК: q & COUNT_log_line
            hits = int(l[hit_name]);
            query = (l[query_name].encode('raw_unicode_escape').decode('utf-8')).encode('utf-8');
            ####################################################################

            #CHECKING WHAT TO PASS
            if hits > hit_limit and check_in_stoplist(query) == False and len(query.split(" ")) < 6 and len(query.decode('utf-8')) < 81:
                print "adding"
                ### GETTING CITY FROM QUERY STRING VIA WIZARD API ###
                if add_geo == True:
                    wizard = get_city(query)
                    queries.append([query, hits, make_keyword(query), wizard[0],wizard[1]])
                else:
                    queries.append([query, hits, make_keyword(query)])

            else: pass
    except: "Problems reading file"

    sorted_queries = sorted(queries, key=operator.itemgetter(1), reverse=True)

    sorted_queries.insert(0,['Queries','Count','Keywords'])

    return sorted_queries

def save_xml(data,filename,worksheet):
    wb = Workbook(); ws = wb.active; ws.title = worksheet;
    #ws.dimensions.ColumnDimension(worksheet, index='A', width="40")
    for row in data:
        ws.append(row)

    if filename == None:
        xlsname = dir + "/Keywords_" + data[1][0].split(' ')[0] + ".xlsx"
    else: xlsname = filename

    print "Saved as " + str(xlsname)
    wb.save(filename = xlsname)

def save_csv(input, name):
    myfile = open(dir+name, 'wb')
    wr = csv.writer(myfile,delimiter=';')
    wr.writerows(input)
    print "Saved CSV as: " + str(dir+name)

def read_xml(file):
    wb = load_workbook(filename = file)
    #wb = Workbook(guess_types=True)
    #ws = wb.active
    ws = wb['Keywords']
    keywords = []
    for row in ws.rows[1:]:
        kw = (row[0].value).encode('utf-8')
        keywords.append((kw,kw))
    return keywords

def kwcleaner(file,add_geo):
    data=read_qc_json(file=file,add_geo=add_geo)
    save_xml(data=data,filename=None,worksheet="Keywords")
    save_csv(input=data,name="/test.csv")

def minusfunc(file):
    data = read_xml(file)
    wb = load_workbook(filename = file)
    ws2 = wb.create_sheet(); ws2.title = "Minus_Keywords"

    minus_data = minus.minus(data)
    minus_data.insert(0,['Ad group', 'Keyword', 'Type'])

    #REFACTOR!!!
    for row in minus_data:
        ws2.append(row)

    print "Saved as " + str(file)
    wb.save(filename = file)


if __name__ == '__main__':
    import sys
    #print sys.argv
    try:
        file = sys.argv[1]
        add_geo = True if '-geo' in sys.argv else False
        if sys.argv[2] == '-m':
            minusfunc(file)
        elif sys.argv[2] == '-k':
            kwcleaner(file=file, add_geo=add_geo)
        else:
            print "no argument: -m for minuswords; -k for keyword reading"
    except: PrintException()
