import urllib
import urllib2
import time
import datetime
import pandas as pd
from StringIO import StringIO
connection_timeout=1500
import pandas as pd
import re
import requests
from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
from qb2.api.v1 import resources as sr
import pandas as pd
import datetime
import numpy as np
import random
import json
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None

def get_country_oblast_city(geo_id, kr_region, kr_region_dop, geobase6):
    try:
        geo_id = int(geo_id)
    except:
        return 'Other'
    country = kr_region_dop.keys()
    country.extend(kr_region.keys())
    country = set(country)
    if geo_id is not None:
        list_numbers = []
        reg_path = geobase6.region_by_id(geo_id).path
        region_profile = (
            [region.id, region.type] for region in reg_path
            if region.type in REGION_TYPE_LIST
        )
        for r_id, r_type in region_profile:
            if r_type == sr.geo.RegionTypes.COUNTRY and r_id not in country:
                return 'Other'
            list_numbers.append(r_id)
            if len(list_numbers) == 4:
                break
        while len(list_numbers) != 4:
            list_numbers.append(OTHER_REGION)
        return geobase6.region_by_id(list_numbers[1]).name
    else:
        return 'Other'

def parse_event(EventValue):
    try:
        event = json.loads(EventValue)
    except:
        return None

    return event.get('dfid', {}).get('apps', {}).get('names', [])
def is_searchapp_installed(event_type, event_value, platfom):
    if event_type.upper() == 'EVENT_IDENTITY':
        app_list = parse_event(event_value)
        if app_list:
            if str(platfom).lower() == 'android':
                if 'ru.yandex.searchplugin' in app_list:
                    return 1
                else:
                    return 0
            else:
                if 'ru.yandex.mobile' in app_list:
                    return 1
                else:
                    return 0
        return 0
    else:
        return 0
def is_yabro_installed(event_type, event_value, platfom):
    if event_type.upper() == 'EVENT_IDENTITY':
        app_list = parse_event(event_value)
        if app_list:
            if str(platfom).lower() == 'android':
                if 'com.yandex.browser' in app_list:
                    return 1
                else:
                    return 0
            else:
                if 'ru.yandex.mobile.search.ipad' in app_list or 'ru.yandex.mobile.search' in app_list:
                    return 1
                else:
                    return 0
        return 0
    else:
        return 0

def is_yabrolite_installed(event_type, event_value, platfom):
    if event_type.upper() == 'EVENT_IDENTITY':
        app_list = parse_event(event_value)
        if app_list:
            if platfom.lower() == 'android':
                if 'com.yandex.browser.lite' in app_list:
                    return 1
                else:
                    return 0
        return 0
    else:
        return 0

def is_searchapp_active(session_type, app_id, platfom):
    if str(session_type).upper() == 'SESSION_FOREGROUND':
        if str(platfom).lower() == 'android':
            if str(app_id).lower() in ['ru.yandex.searchplugin']:
                return 1
            else:
                return 0
        else:
            if str(app_id) in ['ru.yandex.mobile']:
                return 1
            else:
                return 0
        return 0
    else:
        return 0
def is_yabro_active(session_type, app_id, platfom):
    if str(session_type).upper() == 'SESSION_FOREGROUND':
        if str(platfom).lower() == 'android':
            if str(app_id).lower() in ['com.yandex.browser']:
                return 1
            else:
                return 0
        else:
            if str(app_id).lower() in ['ru.yandex.mobile.search.ipad', 'ru.yandex.mobile.search']:
                return 1
            else:
                return 0
        return 0
    else:
        return 0

def is_yabrolite_active(session_type, app_id, platfom):
    if str(session_type).upper() == 'SESSION_FOREGROUND':
        if str(platfom).lower() == 'android':
            if str(app_id).lower() in ['com.yandex.browser.lite']:
                return 1
            else:
                return 0
        return 0
    else:
        return 0

def is_have_identity(event_type):
    if event_type.upper() == 'EVENT_IDENTITY':
        return 1
    else:
        return 0
REGION_TYPE_LIST = {
    sr.geo.RegionTypes.EARTH,
    sr.geo.RegionTypes.COUNTRY,
    sr.geo.RegionTypes.REGION,
    sr.geo.RegionTypes.CITY
}
OTHER_REGION = 957
EARTH = 10000
OTHER_REGION_LIST = [
    EARTH,
    OTHER_REGION,
    OTHER_REGION,
    OTHER_REGION
]
enddate = datetime.datetime.now() - datetime.timedelta(days=datetime.datetime.now().weekday()+1)
startdate =  enddate - datetime.timedelta(days=enddate.weekday())
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
templates=dict(
    job_root='home/turkey-analytics/ktereshin',
    dates='{%s..%s}' % (startdate, enddate)
)
)

job = cluster.job()
users = job.table('statbox/metrika-mobile-log.6p/@dates') \
    .filter(
        nf.custom(lambda x: not x in [None, ''], 'DeviceID'),
        nf.custom(lambda x: not x in [None, ''], 'RegionID'),
        nf.custom(lambda x: str(x).lower() in ['android', 'ios'], 'AppPlatform'),
        nf.custom(lambda x: startdate <= x <= enddate, 'StartDate')
    ) \
    .project(
        device_id = ne.custom(lambda x: str(x).lower(), 'DeviceID'),
        country = ne.custom(
            get_country_oblast_city,
            'RegionID',
            sr.yaml('key_report_region.yaml'),
            sr.yaml('key_report_region_dop.yaml'),
            sr.resource('Geobase')
        ),
        AppPlatform = ne.custom(lambda x: str(x).lower(),'AppPlatform'),
        week = ne.custom(week_start, 'StartDate'),
        date = 'StartDate',
        is_searchapp_installed = ne.custom(is_searchapp_installed, 'EventType', 'EventValue', 'AppPlatform'),
        is_yabro_installed = ne.custom(is_yabro_installed, 'EventType', 'EventValue', 'AppPlatform'),
        is_yabrolite_installed = ne.custom(is_yabrolite_installed, 'EventType', 'EventValue', 'AppPlatform'),
        is_have_identity = ne.custom(is_have_identity, 'EventType'),
        is_searchapp_active = ne.custom(is_searchapp_active, 'SessionType', 'AppID', 'AppPlatform'),
        is_yabro_active = ne.custom(is_yabro_active, 'SessionType', 'AppID', 'AppPlatform'),
        is_yabrolite_active = ne.custom(is_yabrolite_active, 'SessionType', 'AppID', 'AppPlatform'),
    ) \
    .groupby(
        'device_id',
        'AppPlatform',
        'country',
        'week'
    ) \
    .aggregate(
        is_searchapp_installed = na.max('is_searchapp_installed'),
        is_yabro_installed = na.max('is_yabro_installed'),
        is_yabrolite_installed = na.max('is_yabrolite_installed'),
        is_searchapp_active = na.max('is_searchapp_active'),
        is_yabrolite_active = na.max('is_yabrolite_active'),
        is_yabro_active = na.max('is_yabro_active'),
        is_have_identity = na.max('is_have_identity')
    ) \
    .groupby(
        'AppPlatform',
        'country',
        'week',
        'is_searchapp_installed',
        'is_yabro_installed',
        'is_yabrolite_installed',
        'is_searchapp_active',
        'is_yabrolite_active',
        'is_yabro_active',
        'is_have_identity'
    ) \
    .aggregate(
        devices = na.count()
    ) \
    .put('$job_root/yandex_app_penetration/audience_v1', append = True)
job.run()

job = cluster.job()
data = job.table('$job_root/yandex_app_penetration/audience_v1').read().as_dataframe()
data['devices'] = data['devices'] * 16
def merge_df(res_data, data, group_fields_list, filter_field, filter_value, field_name, new_field_name):
    return pd.merge(
        res_data,
        data[data[filter_field] == filter_value].groupby(group_fields_list)[field_name].sum().reset_index().rename(columns = {field_name: new_field_name}),
    on =group_fields_list,
    how = 'left'
    )
res_data = data.groupby(
    [
        'AppPlatform',
        'country',
        'week'
    ]
)['devices'].sum().reset_index().rename(columns = {'devices': 'all_devices'})


group_fields_list =     [
        'AppPlatform',
        'country',
        'week'
    ]
filter_field = 'is_have_identity'
filter_value = 1
field_name = 'devices'
filter_fields = ['is_have_identity', 'is_yabro_installed', 'is_yabrolite_installed', 'is_yabrolite_active', 'is_yabro_active']
for filter_field in filter_fields:
    res_data = merge_df(res_data, data, group_fields_list, filter_field, filter_value, field_name, field_name + '_' + '_'.join(filter_field.split('_')[1:]))
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_installed'] == 1)
        & (data['is_yabrolite_installed'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_yabro_and_yabrolite_installed'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_installed'] == 1)
        | (data['is_yabrolite_installed'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_yabro_or_yabrolite_installed'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_active'] == 1)
        & (data['is_yabrolite_active'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_yabro_and_yabrolite_active'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_active'] == 1)
        | (data['is_yabrolite_active'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_yabro_or_yabrolite_active'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_installed'] == 1)
        & (data['is_yabrolite_installed'] == 0)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_only_yabro_installed'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_installed'] == 0)
        & (data['is_yabrolite_installed'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_only_yabrolite_installed'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_active'] == 1)
        & (data['is_yabrolite_active'] == 0)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_only_yabro_active'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = pd.merge(
    res_data,
    data[
        (data['is_yabro_active'] == 1)
        & (data['is_yabrolite_active'] == 1)
        ].groupby(
        [
            'AppPlatform',
            'country',
            'week'
        ]
    )['devices'].sum().reset_index().rename(columns = {'devices': 'devises_only_yabrolite_active'}),
    on =['AppPlatform', 'country', 'week'],
    how = 'left'
    )
res_data = res_data.fillna(0)
res_data.drop_duplicates(inplace=True)
res_data.rename(columns = {'AppPlatform': 'app_platform', 'week': 'fielddate'}, inplace=True)
import requests
import re
csv_data = re.sub('\n$', '', res_data.drop_duplicates().fillna('unknown').to_csv(sep=';', index=False))
r = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/data',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'name': 'Distribution/ktereshin/app_metrika_audience/yabrolite_yabro_penetration',
        'scale': 'w',
        'data': csv_data,
    },
)
