#!/usr/bin/env python
# coding: utf-8

root = "//home/suggest-dev/galamaj/1982"

import datetime

from simplejson import loads as js_loads
from functools import partial
from nile.api.v1 import (
    clusters, Record,
    aggregators as na,
    extractors as ne,
    filters as nf)
from nile.api.v1.statface.client import StatfaceProductionClient, StatfaceBetaClient
from nile.api.v1.statface.report import StatfaceReport
from qb2.api.v1 import (
    QB2,
    extractors as se,
    filters as sf)

import jul

cluster_name = 'hahn'
# cluster = clusters.Hahn(pool='search-research_24julia').env(templates=dict(jr=root),
#                                                             parallel_operations_limit=3,
#                                                             auto_increase_memory_limit=True)
cluster = clusters.Hahn(pool='robot-suggest-dev').env(templates=dict(jr=root, tmp_files='home/suggest-dev/galamaj/tmp'),
                                                            parallel_operations_limit=3,
                                                            auto_increase_memory_limit=True)
yt = cluster.driver.client

start_date = datetime.datetime.strptime(open('1982_prev_date.txt', 'r').read(), '%Y-%m-%d'
                                        ).date() + datetime.timedelta(1)
end_date = datetime.date.today() - datetime.timedelta(2)
# end_date = datetime.date.today() - datetime.timedelta(5)
prev_monday = start_date - datetime.timedelta(days=start_date.weekday()) - datetime.timedelta(7)
start_date7 = start_date - datetime.timedelta(7)

start_date_str = start_date.strftime('%Y-%m-%d')
start_date7_str = start_date7.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')
#end_date_str = "2018-04-18"
prev_monday_str = prev_monday.strftime('%Y-%m-%d')
S2_SUM_FIELDS = {
    'dayuse', 'search_clicked', 'bar_clicked',
    'splash_shown', 'promo_shown',
    'widget_dayuse',
    'widget_manual_shown', 'widget_clicked'}
S3_AGGR_FIELDS = S2_SUM_FIELDS|{
    'enable_bar', 'splash_agreed','promo_agreed',
    'splash_shown_bar', 'splash_shown_widget',
    'splash_agreed_bar', 'splash_agreed_widget',
    'has_dayuse', 'has_bar_clicked',
    'widget_search_clicked', 'bar_search_clicked',
    'widget_enable', 'widget_manual_agreed',
    'has_widget_dayuse', 'has_any_dayuse', 'has_widget_clicked'}

mode = 'main'

if start_date_str > end_date_str:
    raise Exception("Bad dates: start date", start_date_str, '>', 'end date', end_date_str)

print start_date_str, end_date_str

"""
Step 1.
Parses metrika-mobile-log for searchlib events
"""


def s1(day, records, a0, a1, a2, a3, a4, a5, a6, a7):
    streams = [a0, a1, a2, a3, a4, a5, a6, a7]
    for r in records:
        try:
            data = js_loads(r.raw_event_value)
        except:
            data = {}
        #res = Record(**data)
        # streams[1](res)
        data["did"] = r.device_id
        data["day"] = r.event_date
        data["platform"] = r.app_platform
        data["project"] = r.app_project
        data["project_uuid"] = r.uuid
        data["event_name"] = r.event_name[10:]
        if not r.country:
            data["country"] = 'un'
        else:
            for c in r.country:
                data["country"] = country_by_id[c]
        data["table_day"] = day
        data["log_late"] = (datetime.datetime.strptime(day, '%Y-%m-%d').date() -
                        datetime.datetime.strptime(r.event_date, '%Y-%m-%d').date()).days
        if data["log_late"] < 0:
            data["log_late"] = 0
        elif data["log_late"] > 7:
            data["log_late"] = 7
        res = Record(**data)
        streams[res.log_late](res)


countries_id = jul.regions_id.values()
country_by_id = {v: k for k, v in jul.regions_id.iteritems()}

dates_from_step1 = set()

for d in jul.date_range(start_date, end_date):
    str_d = d.strftime('%Y-%m-%d')
    s1_path = root+'/s1_searchlib_logs/'
    if not jul.exists_and_not_empty(s1_path+str_d+'/'+str_d, cluster):
        job = cluster.job(name='1982_searchlib_funnel_s1_'+str_d, uuid_by_name=True)
        cur_mml = jul.log_path(cluster_name, 'mml', d)
        s1_streams = job.table(cur_mml)\
            .qb2(log='metrika-mobile-log',
                 fields=['app_project', 'app_platform', 'uuid', 'device_id', 'event_date',
                         'event_name', 'raw_event_value', se.geo_region('country', countries_id)],
                 filters=[sf.custom(lambda z: z and z.startswith('searchlib_'), 'event_name')],
                 memory_limit=1024)\
            .map(partial(s1, str_d))
        for i in range(8):
            s1_res = s1_streams[i]\
                .sort('did', 'project', 'day', 'country')\
                .put(s1_path+(d-datetime.timedelta(i)).strftime('%Y-%m-%d')+'/'+str_d)
            dates_from_step1.add((d-datetime.timedelta(i)).strftime('%Y-%m-%d'))
        job.run()

print 's1 done'

"""
Steps 2-3.
2. From parsed logs creates aggregated daily data
3. Adds "all" value to add dimensions
Repeats all calculations for weekly scale
"""


def s2(groups, day=None):
    for keys, records in groups:
        #res = Record(did=keys.did, project=keys.project, country=keys.country, fielddate=day,
                     # ver=0,  search_sources={}, enable_bar=0,
                     # splash_agreed=0, promo_agreed=0, searchlib_uuids=[],
                     # widget_search_clicked=0, bar_search_clicked=0, widget_enable=0,
                     # widget_manual_agreed=0)
        res = {"did":keys.did, "project":keys.project, "country":keys.country, "fielddate":day,
                     "ver":0,  "search_sources":{}, "enable_bar":0,
                     "splash_agreed":0, "promo_agreed":0, "searchlib_uuids":[],
                     "widget_search_clicked":0, "bar_search_clicked":0, "widget_enable":0,
                     "widget_manual_agreed":0}
        for elem in S2_SUM_FIELDS:
            res[elem] = 0
        for r in records:
            if res["fielddate"] is None:
                res["fielddate"] = (datetime.datetime.strptime(r.table_day, '%Y-%m-%d').date() -
                                 datetime.timedelta(r.log_late)).strftime('%Y-%m-%d')
            if r.get('version', 0) > res["ver"]:
                res["ver"] = r.version
            if r.event_name in S2_SUM_FIELDS:
                res[r.event_name] = res.get(r.event_name, 0) + 1
                if r.event_name == 'splash_shown':
                    new_name = 'splash_shown_'+r.get("kind", "bar")
                    res[new_name] = res.get(new_name, 0) + 1
            if r.event_name == 'search_clicked':
                if r.get('kind') == 'widget':
                    res["widget_search_clicked"] += 1
                else:
                    res["bar_search_clicked"] += 1
                source = r.get('source', 'None')
                if source not in res["search_sources"]:
                    res["search_sources"][source] = 1
                else:
                    res["search_sources"][source] += 1
            elif r.event_name == 'enable_bar' and r.get('enable', False):
                res["enable_bar"] += 1
            elif r.event_name == 'widget_enable' and r.get('enable', False):
                res["widget_enable"] += 1
            if r.get('action') in {'yes', 'ok', 'install'}:
                if r.event_name == 'splash_action':
                    res["splash_agreed"] += 1
                    new_name = 'splash_agreed_' + r.get("kind", "bar")
                    res[new_name] = res.get(new_name, 0) + 1
                elif r.event_name == 'promo_action':
                    res["promo_agreed"] += 1
                elif r.event_name == 'widget_manual_action':
                    res["widget_manual_agreed"] += 1
            if hasattr(r, 'searchlib_uuid'):
                if r.searchlib_uuid not in res["searchlib_uuids"]:
                    res["searchlib_uuids"].append(r.searchlib_uuid)
            # if r.event_name == 'splash_action' and r.get('action') == 'settings':
            #     res.splash_settings += 1
        res = Record(**res)
        yield res


def add_version(records):
    for r in records:
        r = r.to_dict()
        if r["ver"] != 0:
            yield Record(**r)
            continue
        if r.get('updates', {}):
            dates_before = [z for z in r["updates"].keys() if z <= r["fielddate"]]
            if dates_before:
                r["ver"] = r["updates"][max(dates_before)]
        yield Record(**r)


def s3_create_tree(records):
    for r in records:
        r = r.to_dict()
        r["has_dayuse"] = int(r["dayuse"] > 0)
        r["has_widget_dayuse"] = int(r.get('widget_dayuse', 0) > 0)
        r["has_any_dayuse"] = r["has_dayuse"] or r["has_widget_dayuse"]
        r["has_bar_clicked"] = int(r["bar_clicked"] > 0)
        r["has_widget_clicked"] = int(r.get('widget_clicked', 0) > 0)
        project = r["project"]
        for c in ('all', r["country"]):
            r["country"] = c
            for ver in ('all', r["ver"]):
                r["ver"] = ver
                for p in {'all', project,
                          'without_searchapp_and_bro'
                          if project not in {'Search_Mobile_App', 'Mobile_Soft_Browser'} else 'all'}:
                    r["project"] = p
                    yield Record(**r)


job = cluster.job(name='1982_searchlib_funnel_s2_s3', uuid_by_name=True)
for d in jul.date_range(start_date7, end_date):
    str_d = d.strftime('%Y-%m-%d')
    s3_path = root+'/s3_day_stat/'+str_d
    if (mode != 'main' or str_d in dates_from_step1) or not jul.exists_and_not_empty(s3_path, cluster):
        s2_path = root+'/s2_by_did_project/'+str_d
        s1_path = root+'/s1_searchlib_logs/'+str_d
        s2_updates = job.concat(*[job.table(s1_path+'/'+x) for x in yt.list(s1_path)])\
            .groupby('did', 'project', 'country')\
            .reduce(s2)\
            .join(job.table(root+'/s2_all_updates'),
                  by=('did', 'project'), type='left', assume_unique_right=True)\
            .map(add_version).put(s2_path)
        s3_stream = s2_updates\
            .map(s3_create_tree, intensity='cpu')\
            .groupby('fielddate', 'project', 'country', 'ver')\
            .aggregate(amount=na.count(),
                       **{x: na.sum(x) for x in S3_AGGR_FIELDS}) \
            .put(s3_path)
job.run()

print 's2 and s3 done'

"""
Step 2.5
1. For each device_id searches first_search from bar and last_date of active bar
2. Joins searchlib_uuid from app_metrika with searchlib_uuid from logs (connected with yandexuids),
   joins usears searches from searches history table
3. Creates table with full data about users "activated" search bar, calculates lower funnel metrics at each dimension
"""


def first_search_s1(groups):
    for keys, records in groups:
        res = None
        uuids = []
        last_date = '1000-00-00'
        for r in records:
            if r.get('searchlib_uuids'):
                for elem in r.searchlib_uuids:
                    if elem not in uuids:
                        uuids.append(elem)
            if r.fielddate > last_date and (r.dayuse > 0 or r.enable_bar > 0):
                last_date = r.fielddate
            if r.get('last_date', '1000-00-00') > last_date:
                last_date = r.last_date
            if r.search_clicked == 0:
                continue
            if not res or res["fielddate"] > r.fielddate:
                res = r.to_dict()
        if res:
            res["searchlib_uuids"] = uuids
            res["last_date"] = last_date
            res["type"] = 'main'
            yield Record(**res)


def multi_field(src, dst, records):
    for r in records:
        r = r.to_dict()
        for elem in r[src]:
            r[dst] = elem
            yield Record(**r)


def join_searches(groups):
    for keys, records in groups:
        #res = Record(did=keys.did, info={}, searchlib_info={}, type='all_searches')
        res = {"did":keys.did, "info":{}, "searchlib_info":{}, "type":'all_searches'}
        for r in records:
            for field in {'info', 'searchlib_info'}:
                for day in r.get(field, {}):
                    if day not in res[field]:
                        res[field][day] = r[field][day]
                    else:
                        res[field][day] += r[field][day]
        yield Record(**res)


def add_history(groups):
    for keys, records in groups:
        info = {}
        searchlib_info = {}
        res = None
        for r in records:
            if r.get('type', 'main') == 'main':
                res = r.to_dict()
            else:
                info = r.get('info', {})
                searchlib_info = r.get('searchlib_info', {})
        if not res:
            continue
        res["info"] = info
        res["searchlib_info"] = searchlib_info
        if any(res["fielddate"] > day for day in res["searchlib_info"]):
            continue
        install_date = datetime.datetime.strptime(res["fielddate"], '%Y-%m-%d').date()
        week_before = (install_date - datetime.timedelta(7)).strftime('%Y-%m-%d')
        week_after = (install_date + datetime.timedelta(7)).strftime('%Y-%m-%d')
        res["before"] = sum(res["info"][day] for day in res["info"] if week_before <= day < res["fielddate"])
        res["after"] = sum(res["info"][day] for day in res["info"] if week_after >= day > res["fielddate"])
        res["after_soft"] = sum(res["searchlib_info"][day] for day in res["searchlib_info"] if week_after >= day > res["fielddate"])
        res["after_soft_all"] = sum(res["searchlib_info"][day] for day in res["searchlib_info"] if day > res["fielddate"])
        res["wr"] = res["after"]-res["before"]
        res["weekraise_share"] = (res["after"]-res["before"])/float(res["after_soft"]) if res["after_soft"] else 0
        if res["weekraise_share"] > 1:
            res["weekraise_share"] = 1
        last_date = datetime.datetime.strptime(res["last_date"], '%Y-%m-%d').date() if res["last_date"] != '1000-00-00' else install_date
        res["lt"] = (last_date - install_date).days
        res["ltv"] = res["after_soft_all"]*res["weekraise_share"]
        yield Record(**res)


job = cluster.job(name='1982_searchlib_funnel_s2_first_searches')
tabs = [job.table(root+'/s2_by_did_project/'+d.strftime('%Y-%m-%d')) for d in jul.date_range(start_date7, end_date)]
fs_table = root+'/s2.5_first_search'
if jul.exists_and_not_empty(fs_table, cluster):
    tabs.append(job.table(fs_table))
# fs_stream_1 = job.table(
fs_stream_1 = job.concat(*tabs)\
    .groupby('did')\
    .reduce(first_search_s1)\
    .put(fs_table)
# fs_stream_2 = job.table(
fs_stream_2 = fs_stream_1\
    .map(partial(multi_field, 'searchlib_uuids', 'searchlib_uuid'))\
    .join(job.table('//home/search-research/24julia/2035/searches_by_searchlib_uuid')
          .project('searchlib_uuid', 'uids', searchlib_info='info'),
          type='inner', by='searchlib_uuid', assume_unique_right=True)\
    .map(partial(multi_field, 'uids', 'uid'))\
    .join(job.table(jul.log_path(cluster_name, 'history_short')),
          type='inner', by='uid', assume_unique_right=True)\
    .groupby('did')\
    .reduce(join_searches)\
    .put('$jr/2.6_search_history')
# fs_stream_3 = job.table(
fs_stream_3 = job.concat(fs_stream_1, fs_stream_2)\
    .groupby('did')\
    .reduce(add_history)\
    .put('$jr/2.7_first_search_with_history')\
    .map(s3_create_tree, intensity='cpu')\
    .groupby('fielddate', 'project', 'country', 'ver')\
    .aggregate(first_search=na.count(),
               **{name: na.mean(name) for name in ('after', 'before', 'wr',
                                                   'after_soft', 'lt', 'after_soft_all')})\
    .project(ne.all(),
             weekraise_share=ne.custom(lambda w, z: w/float(z) if z else 0, 'wr', 'after_soft'),
             ltv_per_user=ne.custom(lambda a, w: a*w, 'after_soft_all', 'weekraise_share'))\
    .sort('fielddate', 'project', 'country', 'ver')\
    .put('$jr/2.8_lower_funnel')
job.run()


"""
Apps audience
1. Calculates daily audience of apps
2. Creates table with history of searchlib updates for each pair (device_id, project)
3. Calculates daily audience of apps with different searchlib versions
Repeats all calculations for weekly scale
"""


# def s1_2(records):
#     for r in records:
#         res = Record(project=r.app_project, did=r.device_id, event_date=r.event_date)
#         for c in res.country_set:
#             res.country = country_by_id[c]
#         yield res


def s2_2(groups):
    for keys, records in groups:
        #res = Record(project=keys.project, did=keys.did, type='all_updates', updates={})
        res = {"project":keys.project, "did":keys.did, "type":'all_updates', "updates":{}}
        old = {}
        for r in records:
            if r.get('type') == 'all_updates':
                old = r.updates
                continue
            if r.day not in res["updates"] or res["updates"][r.day] < r.get('version', 0):
                res["updates"][r.day] = r.get('version', 0)
        for day in old:
            if day not in res["updates"] or res["updates"][day] < old[day]:
                res["updates"][day] = old[day]
        yield Record(**res)


def s3_1(records):
    for r in records:
        if hasattr(r, 'updates'):
            if r.event_date in r.updates:
                ver = r.updates[r.event_date]
            elif min(r.updates.keys()) < r.event_date:
                ver = r.updates[max(z for z in r.updates.keys() if z < r.event_date)]
            else:
                ver = None
        else:
            ver = None
        r = r.to_dict()
        r["fielddate"] = r["event_date"]
        r["has_lib"] = int(ver is not None)
        country = r["country"]
        project = r["project"]
        for c in ('all', country):
            r["country"] = c
            for v in ('all', ver):
                r["ver"] = v
                for p in {'all', project,
                          'without_searchapp_and_bro' if project not in {'Search_Mobile_App', 'Mobile_Soft_Browser'} else 'all'}:
                    r["project"] = p
                    yield Record(**r)


job = cluster.job(name='1982_searchlib_funnel_apps', uuid_by_name=True)
s1_path = root+'/s1_all_opens'
s1_opens = job.table(jul.log_path(cluster_name, 'mml', start_date, end_date))\
    .qb2(
        log='metrika-mobile-log',
        fields=['app_project', 'device_id', 'event_date', se.geo_region('country_set', countries_id)],
        filters=[sf.one_of('app_project', ('Mobile_Navig_Static', 'Mobile_Soft_Metro', 'Mobile_Soft_Transport',
                                           'Mobile_Soft_Weather','Mobile_Soft_Maps', 'Mobile_Soft_Trains',
                                           'Mobile_Soft_Keyboard', 'mobile.translate.yandex.net',
                                           'Mobile_Soft_Aviatickets', 'Search_Mobile_App', 'Mobile_Soft_Browser',
                                           'Mobile_Soft_Auto.ru', 'Yandex_Widget', "Mobile_Soft_Disk")),
                 sf.equals('app_platform', "Android"), sf.default_filtering('metrika-mobile-log')],
        memory_limit=1024)\
    .project('event_date', did='device_id', project='app_project',
             country=ne.custom(lambda z: country_by_id[list(z)[0]] if z else None, 'country_set'))\
    .unique('project', 'did', 'event_date', 'country')
s1_opens = job.concat(s1_opens,
                      job.table(s1_path))\
    .unique('project', 'did', 'event_date', 'country')\
    .put(s1_path)

s2_path = root+'/s2_all_updates'
# s2_stream = job.table(
s2_updates = job.concat(*[job.table(root+'/s1_searchlib_logs/'+d.strftime('%Y-%m-%d')+'/'+x)
                          for d in jul.date_range(start_date7, end_date)
                          for x in yt.list(root+'/s1_searchlib_logs/'+d.strftime('%Y-%m-%d'))])\
    .filter(nf.custom(lambda z: z == 'update', 'event_name'))
s2_updates = job.concat(s2_updates, job.table(s2_path))\
    .groupby('project', 'did')\
    .reduce(s2_2)\
    .put(s2_path)

s3_path = root+'/s3_current_opens'
s3_steams = s1_opens\
    .join(s2_updates, by=('project', 'did'), type='left', assume_unique_right=True)\
    .map(s3_1, intensity='cpu')\
    .groupby('fielddate', 'project', 'country', 'ver', 'did')\
    .aggregate(has_lib=na.max('has_lib'))\
    .groupby('fielddate', 'project', 'country', 'ver')\
    .aggregate(apps_amount=na.count(), has_lib=na.sum('has_lib'))\
    .put(s3_path)

job.run()

"""
Joins all pre-calculated measures and posts results to stat/
The same thing for weekly scale.
"""

def post_res_to_stat(path, scale='d'):
    records = cluster.read(path)
    f_list = {"fielddate", 'project', 'country', 'ver', 'amount',
              'apps_amount', 'has_lib',
              'first_search', 'before', 'after', 'after_soft', 'wr',
              'lt', 'ltv_per_user'}|S3_AGGR_FIELDS
    res = {}
    for elem in f_list:
        res[elem] = []
    for r in records:
        if r.get('apps_amount', 0) + r.get('amount', 0) < 100:
            continue
        for field in f_list:
            value = getattr(r, field, '')
            if value == '' and field in {"fielddate", 'project', 'ver', 'amount'}:
                value = 'no info'
            if not value and field == 'country':
                value = 'other'
            res[field].append(value)
    res = jul.reformat_to_stat(res)
    print res
    try:
        jul.post_stat("Distribution", "Adhoc/searchlib_funnel", res, scale=scale)
    except:
        print res[:1000]

dates_to_post = list(jul.date_range(start_date7, end_date))+[x-datetime.timedelta(y*14)
                                                             for y in range(1, 11) for x in jul.date_range(start_date, end_date)]
days_to_post = []
for d in list(set(dates_to_post)):
    str_d = d.strftime('%Y-%m-%d')
    if not jul.exists_and_not_empty(root+'/s3_day_stat/'+str_d, cluster):
        continue
    days_to_post.append(str_d)

job = cluster.job(name='1982_searchlib_funnel_s4_join_for_stat_t')
s4_stream = job.concat(*[job.table(root+'/s3_day_stat/'+d) for d in days_to_post])\
    .join(job.table('$jr/s3_current_opens')
          .filter(nf.custom(lambda z: z in days_to_post, 'fielddate')),
          type='full', by=('fielddate', 'project', 'country', 'ver'), assume_unique=True)\
    .join(job.table('$jr/2.8_lower_funnel')
          .filter(nf.custom(lambda z: z in days_to_post, 'fielddate')),
          type='full', by=('fielddate', 'project', 'country', 'ver'), assume_unique=True)\
    .put('$jr/s4_cur_days')
#     .project(
#         ne.all(),
#         ver=ne.custom(lambda x: x if x is not None else 'None', 'ver'),
#         project=ne.custom(lambda x: x if x is not None else 'None', 'project'),
#         country=ne.custom(lambda x: x if x is not None else 'other', 'country')
# ).put('$jr/s4_cur_days')

job.run()
post_res_to_stat(root+'/s4_cur_days')
print "Done"

# ########################################################################################################################
#
# client = StatfaceBetaClient(
#     username="robot_galamaj",
#     password="entI2At5cemro4e"
# )
#
# report = StatfaceReport().path('Adhoc/galamaj/distribution').scale('daily')
#     # ������ ���� � ������,
#     # .path('Adhoc/galamaj/distribution') \
#     # # ������� �����������
#     # .scale('daily')
#     # � ���� ����� ��� ���������� ������
#     # .replace_mask('fielddate', 'browser')
# report = report.client(client)
# cluster = clusters.Hahn()
# report = report.data(cluster.read(root+'/s4_cur_days'))
# print report
# report.publish()
# print "done"
#
# ########################################################################################################################
#
# client = StatfaceProductionClient(
#     username="robot_galamaj",
#     password="entI2At5cemro4e"
# )
#
# report = StatfaceReport().path('Distribution/Adhoc/searchlib_funnel').scale('daily')
#     # ������ ���� � ������,
#     # .path('Adhoc/galamaj/distribution') \
#     # # ������� �����������
#     # .scale('daily')
#     # � ���� ����� ��� ���������� ������
#     # .replace_mask('fielddate', 'browser')
# report = report.client(client)
# cluster = clusters.Hahn()
# report = report.data(cluster.read(root+'/s4_cur_days'))
# print report
# report.publish()
# print "done"
#
# ########################################################################################################################

"""
The same logic but at weekly scale
"""

days = []
for d in jul.date_range(prev_monday, end_date):
    try:
        str_d = d.strftime('%Y-%m-%d')
        if d.weekday() != 0:
            days.append(str_d)
            if d != end_date:
                continue
        if not days:
            days = [str_d]
            continue
        monday_d = min(days)
        if mode != 'main' and not any(d in dates_from_step1 for d in days):
            continue
        s3_path = root+'/s3_week_stat/'+monday_d
        s2_path = root+'/s2_by_did_project_w/'+monday_d
        job = cluster.job(name='1982_searchlib_funnel_s2_w'+monday_d, uuid_by_name=True)
        # print days
        # print ' '.join(y+'/'+x for y in days for x in yt.list(root+'/s1_searchlib_logs/'+y))
        s2_updates = job.concat(*[job.table(root+'/s1_searchlib_logs/'+y+'/'+x)
                                  for y in days
                                  for x in yt.list(root+'/s1_searchlib_logs/'+y)])\
            .groupby('did', 'project', 'country')\
            .reduce(partial(s2, day=monday_d))\
            .put(s2_path)
        s3_stream = s2_updates\
            .map(s3_create_tree, intensity='cpu')\
            .groupby('fielddate', 'project', 'country', 'ver')\
            .aggregate(amount=na.count(),
                       **{x: na.sum(x) for x in S3_AGGR_FIELDS})\
            .put(s3_path)
        job.run()
        days = [str_d]
    except:
        continue


def to_monday(records):
    for r in records:
        day_date = datetime.datetime.strptime(r.fielddate, '%Y-%m-%d').date()
        r = r.to_dict()
        r["fielddate"] = (day_date - datetime.timedelta(days=day_date.weekday())).strftime('%Y-%m-%d')
        for field in {'after', 'before', 'wr', 'after_soft', 'lt', 'after_soft_all'}:
            r[field] = r.get(field, 0)*r["first_search"]
        yield Record(**r)

job = cluster.job(name='1982_searchlib_funnel_s2_first_searches_w')
lf_w_stream = job.table('$jr/2.8_lower_funnel')\
    .map(to_monday)\
    .groupby('fielddate', 'project', 'country', 'ver')\
    .aggregate(first_search=na.sum('first_search'),
               **{name+'_': na.sum(name)
                  for name in ('lt', 'wr', 'after', 'before', 'after_soft', 'after_soft_all')})\
    .project('fielddate', 'project', 'country', 'ver', 'first_search',
             weekraise_share=ne.custom(lambda w, z: w/float(z) if z else 0, 'wr', 'after_soft'),
             ltv_per_user=ne.custom(lambda a, w: a*w, 'after_soft_all', 'weekraise_share'),
             **{name: ne.custom(lambda x, f: x/float(f), name+'_', 'first_search')
                for name in ('lt', 'wr', 'after', 'before', 'after_soft', 'after_soft_all')})\
    .sort('fielddate', 'project', 'country', 'ver')\
    .put('$jr/2.8_lower_funnel_w')
job.run()


def s2_2_w(records):
    for r in records:
        r = r.to_dict()
        updates = {}
        for day in r["updates"]:
            day_date = datetime.datetime.strptime(day, '%Y-%m-%d').date()
            monday = (day_date - datetime.timedelta(days=day_date.weekday())).strftime('%Y-%m-%d')
            if monday not in updates or updates[monday] < r["updates"][day]:
                updates[monday] = r["updates"][day]
        r["updates"] = updates
        yield Record(**r)


job = cluster.job(name='1982_searchlib_funnel_apps_w', uuid_by_name=True)
s1_opens = job.table('$jr/s1_all_opens')\
    .project('project', 'did', 'country',
             event_date=ne.custom(lambda z: (datetime.datetime.strptime(z, '%Y-%m-%d').date() -
                                             datetime.timedelta(days=datetime.datetime.strptime(z, '%Y-%m-%d').date().weekday()))
                                  .strftime('%Y-%m-%d'), 'event_date'))\
    .unique('project', 'did', 'event_date', 'country')\
    .put('$jr/s1_all_opens_w')

s2_updates = job.table('$jr/s2_all_updates')\
    .map(s2_2_w)\
    .put('$jr/s2_all_updates_w')

s3_steams = s1_opens\
    .join(s2_updates, by=('project', 'did'), type='left', assume_unique_right=True)\
    .map(s3_1, intensity='cpu')\
    .groupby('fielddate', 'project', 'country', 'ver', 'did')\
    .aggregate(has_lib=na.max('has_lib'))\
    .groupby('fielddate', 'project', 'country', 'ver')\
    .aggregate(apps_amount=na.count(), has_lib=na.sum('has_lib'))\
    .put('$jr/s3_current_opens_w')
job.run()


weeks_to_post = []
for d in jul.date_range(prev_monday-datetime.timedelta(140), end_date):
    str_d = d.strftime('%Y-%m-%d')
    if not jul.exists_and_not_empty(root+'/s3_week_stat/'+str_d, cluster):
        continue
    weeks_to_post.append(str_d)

job = cluster.job(name='1982_searchlib_funnel_s4_join_for_stat_t')
s4_stream = job.concat(*[job.table(root+'/s3_week_stat/'+d) for d in weeks_to_post])\
    .join(job.table('$jr/s3_current_opens_w')
          .filter(nf.custom(lambda z: z in weeks_to_post, 'fielddate')),
          type='full', by=('fielddate', 'project', 'country', 'ver'), assume_unique=True)\
    .join(job.table('$jr/2.8_lower_funnel_w')
          .filter(nf.custom(lambda z: z in weeks_to_post, 'fielddate')),
          type='full', by=('fielddate', 'project', 'country', 'ver'), assume_unique=True)\
    .put('$jr/s4_cur_weeks')
#     .project(
#         ne.all(),
#         ver=ne.custom(lambda x: x if x is not None else 'None', 'ver'),
#         project=ne.custom(lambda x: x if x is not None else 'None', 'project'),
#         country=ne.custom(lambda x: x if x is not None else 'other', 'country')
# ).put('$jr/s4_cur_weeks')
job.run()
post_res_to_stat(root+'/s4_cur_weeks', scale='w')

# ########################################################################################################################
#
# client = StatfaceBetaClient(
#     username="robot_galamaj",
#     password="entI2At5cemro4e"
# )
#
# report = StatfaceReport().path('Adhoc/galamaj/distribution').scale('daily')
#     # ������ ���� � ������,
#     # .path('Adhoc/galamaj/distribution') \
#     # # ������� �����������
#     # .scale('daily')
#     # � ���� ����� ��� ���������� ������
#     # .replace_mask('fielddate', 'browser')
# report = report.client(client)
# cluster = clusters.Hahn()
# report = report.data(cluster.read(root+'/s4_cur_weeks'))
# print report
# report.publish()
# print "done"
#
# ########################################################################################################################
#
# client = StatfaceProductionClient(
#     username="robot_galamaj",
#     password="entI2At5cemro4e"
# )
#
# report = StatfaceReport().path('Distribution/Adhoc/searchlib_funnel').scale('daily')
#     # ������ ���� � ������,
#     # .path('Adhoc/galamaj/distribution') \
#     # # ������� �����������
#     # .scale('daily')
#     # � ���� ����� ��� ���������� ������
#     # .replace_mask('fielddate', 'browser')
# report = report.client(client)
# cluster = clusters.Hahn()
# report = report.data(cluster.read(root+'/s4_cur_weeks'))
# print report
# report.publish()
# print "done"
#
# ########################################################################################################################

log_file = open('1982_prev_date.txt', 'w')
log_file.write(end_date_str)
log_file.close()

