import urllib
import urllib2
import time
import datetime
import pandas as pd
from StringIO import StringIO
connection_timeout=1500
import pandas as pd
import re
import requests
from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
from qb2.api.v1 import resources as sr
import pandas as pd
import datetime
import numpy as np
import random
import json
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None

def get_country_oblast_city(geo_id, kr_region, kr_region_dop, geobase6):
    try:
        geo_id = int(geo_id)
    except:
        return 'Other'
    country = kr_region_dop.keys()
    country.extend(kr_region.keys())
    country = set(country)
    if geo_id is not None:
        list_numbers = []
        reg_path = geobase6.region_by_id(geo_id).path
        region_profile = (
            [region.id, region.type] for region in reg_path
            if region.type in REGION_TYPE_LIST
        )
        for r_id, r_type in region_profile:
            if r_type == sr.geo.RegionTypes.COUNTRY and r_id not in country:
                return 'Other'
            list_numbers.append(r_id)
            if len(list_numbers) == 4:
                break
        while len(list_numbers) != 4:
            list_numbers.append(OTHER_REGION)
        return geobase6.region_by_id(list_numbers[1]).name
    else:
        return 'Other'

def parse_event(EventValue):
    try:
        event = json.loads(EventValue)
    except:
        return None

    return event.get('dfid', {}).get('apps', {}).get('names', [])
def is_searchapp_installed(event_type, event_value, platfom):
    if event_type.upper() == 'EVENT_IDENTITY':
        app_list = parse_event(event_value)
        if app_list:
            if platfom.lower() == 'android':
                if 'ru.yandex.searchplugin' in app_list:
                    return 1
                else:
                    return 0
            else:
                if 'ru.yandex.mobile' in app_list:
                    return 1
                else:
                    return 0
        return 0
    else:
        return 0
def is_yabro_installed(event_type, event_value, platfom):
    if event_type.upper() == 'EVENT_IDENTITY':
        app_list = parse_event(event_value)
        if app_list:
            if platfom.lower() == 'android':
                if 'com.yandex.browser' in app_list:
                    return 1
                else:
                    return 0
            else:
                if 'ru.yandex.mobile.search.ipad' in app_list or 'ru.yandex.mobile.search' in app_list:
                    return 1
                else:
                    return 0
        return 0
    else:
        return 0

def is_have_identity(event_type):
    if event_type.upper() == 'EVENT_IDENTITY':
        return 1
    else:
        return 0
REGION_TYPE_LIST = {
    sr.geo.RegionTypes.EARTH,
    sr.geo.RegionTypes.COUNTRY,
    sr.geo.RegionTypes.REGION,
    sr.geo.RegionTypes.CITY
}
OTHER_REGION = 957
EARTH = 10000
OTHER_REGION_LIST = [
    EARTH,
    OTHER_REGION,
    OTHER_REGION,
    OTHER_REGION
]
enddate = datetime.datetime.now() - datetime.timedelta(days=datetime.datetime.now().weekday()+1)
startdate =  enddate - datetime.timedelta(days=enddate.weekday())
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
templates=dict(
    job_root='home/turkey-analytics/ktereshin',
    dates='{%s..%s}' % (startdate, enddate)
)
)
job = cluster.job()
users = job.table('statbox/metrika-mobile-log.6p/@dates') \
    .filter(
        nf.custom(lambda x: not x in [None, ''], 'DeviceID'),
        nf.custom(lambda x: not x in [None, ''], 'RegionID'),
        nf.custom(lambda x: str(x).lower() in ['android', 'ios'], 'AppPlatform'),
        nf.custom(lambda x: startdate <= x <= enddate, 'StartDate')
    ) \
    .project(
        'DeviceID',
        country = ne.custom(
            get_country_oblast_city,
            'RegionID',
            sr.yaml('key_report_region.yaml'),
            sr.yaml('key_report_region_dop.yaml'),
            sr.resource('Geobase')
        ),
        AppPlatform = ne.custom(lambda x: str(x).lower(),'AppPlatform'),
        week = ne.custom(week_start, 'StartDate'),
        is_searchapp_installed = ne.custom(is_searchapp_installed, 'EventType', 'EventValue', 'AppPlatform'),
        is_yabro_installed = ne.custom(is_yabro_installed, 'EventType', 'EventValue', 'AppPlatform'),
        is_have_identity = ne.custom(is_have_identity, 'EventType')
    ) \
    .groupby(
        'DeviceID',
        'AppPlatform',
        'country',
        'week'
    ) \
    .aggregate(
        is_searchapp_installed = na.max('is_searchapp_installed'),
        is_yabro_installed = na.max('is_yabro_installed'),
        is_have_identity = na.max('is_have_identity')
    ) \
    .groupby(
        'AppPlatform',
        'country',
        'week',
        'is_have_identity',
        'is_searchapp_installed',
        'is_yabro_installed'
    ) \
    .aggregate(
        devices = na.count()
    ) \
    .put('$job_root/yandex_app_penetration/audience_aggr', append=True)
job.run()
job = cluster.job()
data = job.table('$job_root/yandex_app_penetration/audience_aggr').read().as_dataframe()
data = pd.merge(
    data,
    data.groupby(['AppPlatform','country', 'week'])['devices'].sum().reset_index().rename(columns = {'devices': 'all_devices'}),
    on = ['AppPlatform', 'country', 'week'],
    how='left'
    ).rename(columns = {'week': 'fielddate'})
data['devices'] = data['devices']*16
data['all_devices'] = data['all_devices']*16
data['devices_share'] = data['devices'] / data['all_devices']
result = data[
    (data['is_searchapp_installed'] == 1) &
    (data['is_yabro_installed'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'searchapp_and_yabro_devices', 'devices_share':'searchapp_and_yabro_devices_share'})
result = pd.merge(
    result,
    data[
    (data['is_searchapp_installed'] == 1) &
    (data['is_yabro_installed'] == 0)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'only_searchapp_devices', 'devices_share':'only_searchapp_devices_share'})
)
result = pd.merge(
    result,
    data[
    (data['is_searchapp_installed'] == 0) &
    (data['is_yabro_installed'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'only_yabro_devices', 'devices_share':'only_yabro_devices_share'}),
    on = ['AppPlatform','country','fielddate']
)
result = pd.merge(
    result,
    data[
    (data['is_searchapp_installed'] == 1) |
    (data['is_yabro_installed'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'searchapp_or_yabro_devices', 'devices_share':'searchapp_or_yabro_devices_share'}),
    on = ['AppPlatform','country','fielddate']
)
result = pd.merge(
    result,
    data[
    (data['is_searchapp_installed'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'all_searchapp_devices', 'devices_share':'all_searchapp_devices_share'}),
    on = ['AppPlatform','country','fielddate']
)
result = pd.merge(
    result,
    data[
    (data['is_yabro_installed'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'all_yabro_devices', 'devices_share':'all_yabro_devices_share'}),
    on = ['AppPlatform','country','fielddate']
)
result = pd.merge(
    result,
    data.groupby(['AppPlatform','country', 'fielddate'])['devices'].sum().reset_index().rename(columns = {'devices': 'all_devices'}),
    on = ['AppPlatform','country','fielddate']
)
result = pd.merge(
    result,
data[
    (data['is_have_identity'] == 1)
].groupby(['AppPlatform','country','fielddate'])['devices','devices_share'].sum().reset_index().rename(columns = {'devices':'have_identity_event_devices', 'devices_share':'have_identity_event__devices_share'}),
    on = ['AppPlatform','country','fielddate']
)
result.drop_duplicates(inplace=True)
result.rename(columns = {'AppPlatform': 'app_platform'}, inplace=True)
import requests
import re
csv_data = re.sub('\n$', '', result.drop_duplicates().fillna('unknown').to_csv(sep=';', index=False))
r = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/data',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'name': 'Distribution/ktereshin/app_metrika_audience/searchapp_yabro_penetration',
        'scale': 'w',
        'data': csv_data,
    },
)
