# coding: utf-8
import urllib
import urllib2
import time
import datetime
import pandas as pd
from StringIO import StringIO
connection_timeout=1500
import pandas as pd
import re
import requests
from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
from qb2.api.v1 import resources as sr
import pandas as pd
import datetime
import numpy as np
import random
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
def count_unique(groups):
    for key, records in groups:
        result_dict = {
            'AppPlatform': key.AppPlatform,
            'is_searchapp': key.is_searchapp,
            'is_yabro': key.is_yabro,
            'week': key.week,
            'country': key.country
        }
        did = ''
        did_count = 0
        for record in records:
            if did != record.DeviceID:
                did_count += 1
                did = record.DeviceID
        result_dict['devices'] = did_count
        yield Record(**result_dict)
def calc_share(st, _dict):
    try:
        return _dict[st]
    except:
        return 0.0
def calc_shares(groups):
    for key, records in groups:
        _dict = {}
        all_users = 0
        for record in records:
            all_users = all_users + record.devices
            _dict[str(record.is_searchapp) + '_' + str(record.is_yabro)] = record.devices
        result_dict = {
            'AppPlatform': key.AppPlatform,
            'week': key.week,
            'country': key.country,
            'Share Not YaBro AND Not SearchApp': calc_share('0_0', _dict)/float(all_users),
            'Share Only SearchApp': calc_share('1_0', _dict)/float(all_users),
            'Share Only YaBro': calc_share('0_1', _dict)/float(all_users),
            'Share YaBro AND SearchApp': calc_share('1_1', _dict)/float(all_users),
            'Share YaBro OR SearchApp': (calc_share('1_0', _dict)+calc_share('0_1', _dict))/float(all_users),
            'Devices Not YaBro AND Not SearchApp': calc_share('0_0', _dict),
            'Devices Only SearchApp': calc_share('1_0', _dict),
            'Devices Only YaBro': calc_share('0_1', _dict),
            'Devices YaBro AND SearchApp': calc_share('1_1', _dict),
            'Devices YaBro OR SearchApp': calc_share('1_0', _dict)+calc_share('0_1', _dict) + calc_share('1_1', _dict),
            'All Devices': all_users
        }
        yield Record(**result_dict)
def get_country_oblast_city(geo_id, kr_region, kr_region_dop, geobase6):
    '''Получаем из geo_id название страны'''
    try:
        geo_id = int(geo_id)
    except:
        return 'Other'
    country = kr_region_dop.keys()
    country.extend(kr_region.keys())
    country = set(country)
    if geo_id is not None:
        list_numbers = []
        reg_path = geobase6.region_by_id(geo_id).path
        region_profile = (
            [region.id, region.type] for region in reg_path
            if region.type in REGION_TYPE_LIST
        )
        for r_id, r_type in region_profile:
            if r_type == sr.geo.RegionTypes.COUNTRY and r_id not in country:
                return 'Other'
            list_numbers.append(r_id)
            if len(list_numbers) == 4:
                break
        while len(list_numbers) != 4:
            list_numbers.append(OTHER_REGION)
        return geobase6.region_by_id(list_numbers[1]).name
    else:
        return 'Other'
REGION_TYPE_LIST = {
    sr.geo.RegionTypes.EARTH,
    sr.geo.RegionTypes.COUNTRY,
    sr.geo.RegionTypes.REGION,
    sr.geo.RegionTypes.CITY
}
OTHER_REGION = 957
EARTH = 10000
OTHER_REGION_LIST = [
    EARTH,
    OTHER_REGION,
    OTHER_REGION,
    OTHER_REGION
]
enddate = datetime.datetime.now() - datetime.timedelta(days=datetime.datetime.now().weekday()+1)
startdate =  enddate - datetime.timedelta(days=enddate.weekday())
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
templates=dict(
    job_root='home/turkey-analytics/ktereshin',
    dates='{%s..%s}' % (startdate, enddate)
)
)
job = cluster.job()
users = job.table('statbox/metrika-mobile-log.6p/@dates') \
    .filter(
        nf.custom(lambda x: not x in [None, ''], 'DeviceID'),
        nf.custom(lambda x: str(x).lower() in ['android', 'ios'], 'AppPlatform'),
        nf.custom(lambda x: startdate <= x <= enddate, 'StartDate')
    ) \
    .project(
        'DeviceID',
        country = ne.custom(
            get_country_oblast_city,
            'RegionID',
            sr.yaml('key_report_region.yaml'),
            sr.yaml('key_report_region_dop.yaml'),
            sr.resource('Geobase')
        ),
        AppPlatform = ne.custom(lambda x: str(x).lower(),'AppPlatform'),
        is_searchapp = ne.custom(lambda x: 1 if x in ['10321', '42989'] else 0, 'APIKey'),
        is_yabro = ne.custom(lambda x: 1 if x in ['106400', '19531', '19534'] else 0, 'APIKey'),
        week = ne.custom(week_start, 'StartDate')
    ) \
    .groupby(
        'DeviceID',
        'AppPlatform',
        'week',
        'country'
    ) \
    .aggregate(
        is_searchapp = na.max('is_searchapp'),
        is_yabro = na.max('is_yabro')
    ) \
    .groupby(
        'AppPlatform',
        'is_searchapp',
        'is_yabro',
        'week',
        'country'
    ) \
    .sort('DeviceID') \
    .reduce(count_unique) \
    .groupby(
        'AppPlatform',
        'week',
        'country'
    ) \
    .reduce(calc_shares) \
    .put('$job_root/app_metrika_audience/audience', append=True)
job.run()
job = cluster.job()
users = job.table('$job_root/app_metrika_audience/audience').read().as_dataframe()
result = users.rename(columns = {'week':'fielddate', 'AppPlatform': 'app_platform'}).rename(columns = lambda x: x.lower().replace(' ', '_')).fillna(0)
result.drop_duplicates(inplace=True)
for col in result.columns:
    if 'devices' in col:
        result[col] = result[col]*16
for col in result.columns:
    if 'share' in col:
        result[col] = result[col]*100
result = result[result['all_devices'] != 0]
import requests
yaml_config = u'''
---
allow_recalculate: "1"
autovalues_enabled: "0"
dimensions:
  - fielddate: date
  - app_platform: string
  - country: string
measures:
  - all_devices: number
  - devices_not_yabro_and_not_searchapp: number
  - devices_only_searchapp: number
  - devices_only_yabro: number
  - devices_yabro_and_searchapp: number
  - devices_yabro_or_searchapp: number
  - share_not_yabro_and_not_searchapp: number
  - share_only_searchapp: number
  - share_only_yabro: number
  - share_yabro_and_searchapp: number
  - share_yabro_or_searchapp: number
view_types:
  app_platform:
    type: Selector
    default: android
  country:
    type: Selector
    default: Россия
  share_not_yabro_and_not_searchapp:
    type: Float
    precision: "3"
  share_only_searchapp:
    type: Float
    precision: "3"
  share_only_yabro:
    type: Float
    precision: "3"
  share_yabro_and_searchapp:
    type: Float
    precision: "3"
  share_yabro_or_searchapp:
    type: Float
    precision: "3"
aggregate_uncomplete_period: "0"
'''
resp = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/config',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'cube_config': yaml_config,'title': 'App Metrika: YaBro and Search App', 'name': 'Distribution/ktereshin/app_metrika_audience/app_metrika_audience_new'
    },
)
import re
csv_data = re.sub('\n$', '', result.to_csv(sep=';', index=False))
r = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/data',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'name': 'Distribution/ktereshin/app_metrika_audience/app_metrika_audience_new',
        'scale': 'w',
        'data': csv_data,
    },
)
