#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
import json
from datetime import datetime, timedelta, date
import re


full_path_to_blockstat = '/home/ensuetina/blockstat.dict'

username = 'ensuetina'
date_format = '%Y-%m-%d'

# выбираем кластер
cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(
                                job_root='home/search-research/' + username + '/TV_DYN'
                        ), parallel_operations_limit=10
                    )

TRANSLATION = None

def build_translation():
    global TRANSLATION
    if TRANSLATION is None:
        import sys
        import unicodedata
        TRANSLATION = {
            index: u' ' for index in xrange(sys.maxunicode)
            if unicodedata.category(unichr(index)).startswith('P')
        }
        for char in u'\t\n\x0b\x0c\r$+<=>^`|~':
            TRANSLATION[ord(char)] = u' '
    return TRANSLATION

def normalize_query(query):
    try:
        query = query.decode('utf8')
    except UnicodeDecodeError:
        return

    query = query.translate(build_translation())
    query = query.lower()
    query = query.strip()
    query = re.sub(r'\s\s+', ' ', query)

    return query.encode('utf8')

class parse_us:
    def __init__(self,week):
        self.week = week

    def __call__(self,groups):
        import libra

        channels = ['1tv.ru', 'matchtv.ru', 'ntv.ru', '5-tv.ru', 'vesti.ru', 'ren.tv', 'tnt-online.ru', 'tv.m24.ru',
                    'football-tv.ru', 'sovsekretno.tv', 'echotv.ru', 'probusinesstv.ru', 'uspeh-tv.ru', 'tochkatv.tv',
                    'ocean-tv.media', 'hittv.ru', 'ru.tv', 'sovetidamam.ru', 'oirtv.ru', 'russian.rt.com', 'doc.rt.com',
                    'hdmedia.ru', 'wfc.tv', 'sibir.tv', '1hd.ru', 'o2tv.ru', 'ctc.ru'
                    ]

        for key,recs in groups:
            uid = key.key

            try:
                s = libra.ParseSession(recs, './blockstat.dict')
            except Exception as e:
                continue

            for r in s:
                if r.IsA('TTouchYandexWebRequest'):
                    ui = 'touch'
                elif r.IsA('TYandexWebRequest'):
                    ui = 'desktop'
                elif r.IsA('TPadYandexWebRequest'):
                    ui = 'pad'
                else:
                    continue

                q = normalize_query(str(r.Query))
                ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

                i = 0
                ch = ''
                for bl in r.GetMainBlocks():
                    result = bl.GetMainResult()
                    if not result.IsA('TWebResult'):
                        continue

                    url = str(result.Url)
                    try:
                        host = urlparse.urlparse(url).netloc
                    except:
                        host = url

                    for channel in channels:
                        if channel in host:
                            ch = channel
                            break

                    if ch != '':
                        break

                    i += 1
                    if i == 3:
                        break

                if ch != '':
                    is_ch = 1
                else:
                    is_ch = 0

                yield Record(uid=uid,ui=ui,q=q,channel=ch,date=ts,week=self.week,is_ch=is_ch)



dday = datetime.strptime('2017-07-23', '%Y-%m-%d')
day = dday.date()

date = str(day)
print date

while date > '2015-12-27':
    d0 = str(day)
    d1 = str(day + timedelta(days= -1))
    d2 = str(day + timedelta(days= -2))
    d3 = str(day + timedelta(days= -3))
    d4 = str(day + timedelta(days= -4))
    d5 = str(day + timedelta(days= -5))
    d6 = str(day + timedelta(days= -6))

    week = d6

    day = day + timedelta(days= -7)
    date = str(day)

    print d0,d1,d2,d3,d4,d5,d6
    print week
    #continue

    job = cluster.job()

    t1 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d0 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                                   nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                            memory_limit=4000)
    t2 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d1 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                                   nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                            memory_limit=4000)
    t3 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d2 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                                   nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                                   memory_limit=4000)
    t4 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d3 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                                   nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                                   memory_limit=4000)
    t5 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d4 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                            nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                            memory_limit=4000)
    t6 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d5 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                            nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                            memory_limit=4000)
    t7 = job.table('user_sessions/pub/sample_by_uid_1p/search/daily/' + d6 + '/clean').groupby('key').sort('subkey').reduce(parse_us(week),
                                                                                                                            files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                                            nile.files.RemoteFile('statbox/resources/libra.so')],
                                                                                                                            memory_limit=4000)

    t = job.concat(t1,t2,t3,t4,t5,t6,t7).put('$job_root/total_dyn',append=True)


    job.run()
#    break

