# -*- coding: utf-8 -*-

from sandbox import sdk2
from sandbox.sdk2 import yav

CHUNK_SIZE = 100000 * 1024
DAYS = 180
HOST = 'http://mtstat.yandex.ru:8123/'


def get_google_client():
    from googleads import adwords, oauth2

    secret_google_ads_developer_token = yav.Secret("sec-01er29n335pdyv2wytnt5ckvnx")
    secret_google_ads_refresh_token = yav.Secret("sec-01er29memy5v2vferks008f67s")
    secret_google_ads_client_secret = yav.Secret("sec-01er29kkftcy7gfy5zy90z366m")
    secret_google_ads_oauth2_client = yav.Secret("sec-01er29j2gkqmsgsd242e620mm3")

    developer_token = secret_google_ads_developer_token.data()["developer_token"]
    refresh_token = secret_google_ads_refresh_token.data()["refresh_token"]
    client_secret = secret_google_ads_client_secret.data()["client_secret"]
    oauth2_client = secret_google_ads_oauth2_client.data()["oauth2_client"]

    oauth2_client = oauth2.GoogleRefreshTokenClient(oauth2_client, client_secret, refresh_token)
    client = adwords.AdWordsClient(
        developer_token=developer_token,
        oauth2_client=oauth2_client,
        client_customer_id="865-686-2627",
        timeout=30,
    )
    return client


def get_search_insights_from_google(dt):
    from googleads import adwords
    from datetime import timedelta
    import io

    client = get_google_client()

    report_downloader = client.GetReportDownloader(version='v201809')

    report_query = (adwords.ReportQueryBuilder()
                    .Select('Date', 'CampaignId', 'CampaignName', 'AdGroupId', 'AdGroupName',
                            'Id', 'Criteria',
                            'Impressions', 'Clicks', 'Cost')
                    .From('KEYWORDS_PERFORMANCE_REPORT')
                    .Where('AdNetworkType1').EqualTo('SEARCH')
                    .During(date_range=None, start_date=dt - timedelta(days=DAYS), end_date=dt)
                    .Build())

    report_data = io.StringIO()
    stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query, 'CSV', skip_report_header=True,
                                                                   skip_column_header=True, skip_report_summary=True,
                                                                   include_zero_impressions=False)

    try:
        while True:
            chunk = stream_data.read(CHUNK_SIZE)
            if not chunk: break
            report_data.write(chunk.decode("utf8"))
            return report_data.getvalue()
    finally:
        report_data.close()
        stream_data.close()


def get_video_insights_from_google(dt):
    from googleads import adwords
    from datetime import timedelta
    import io

    client = get_google_client()

    report_downloader = client.GetReportDownloader(version='v201809')

    report_query = (adwords.ReportQueryBuilder()
                    .Select('Date', 'CampaignId', 'CampaignName', 'AdGroupId', 'AdGroupName',
                            'CreativeId', 'CreativeId',
                            'Impressions', 'Clicks', 'Cost')
                    .From('VIDEO_PERFORMANCE_REPORT')
                    .Where('AdNetworkType1').EqualTo('YOUTUBE_WATCH')
                    .During(date_range=None, start_date=dt - timedelta(days=DAYS), end_date=dt)
                    .Build())

    report_data = io.StringIO()
    stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query, 'CSV', skip_report_header=True,
                                                                   skip_column_header=True, skip_report_summary=True,
                                                                   include_zero_impressions=False)

    try:
        while True:
            chunk = stream_data.read(CHUNK_SIZE)
            if not chunk: break
            report_data.write(chunk.decode("utf8"))
            return report_data.getvalue()
    finally:
        report_data.close()
        stream_data.close()


def get_display_insights_from_google(dt):
    from googleads import adwords
    from datetime import timedelta
    import io

    client = get_google_client()

    report_downloader = client.GetReportDownloader(version='v201809')

    report_query = (adwords.ReportQueryBuilder()
                    .Select('Date', 'CampaignId', 'CampaignName', 'AdGroupId', 'AdGroupName',
                            'Id', 'BusinessName',
                            'Impressions', 'Clicks', 'Cost')
                    .From('AD_PERFORMANCE_REPORT')
                    .Where('AdNetworkType1').EqualTo('CONTENT')
                    .During(date_range=None, start_date=dt - timedelta(days=DAYS), end_date=dt)
                    .Build())

    report_data = io.StringIO()
    stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query, 'CSV', skip_report_header=True,
                                                                   skip_column_header=True, skip_report_summary=True,
                                                                   include_zero_impressions=False)

    try:
        while True:
            chunk = stream_data.read(CHUNK_SIZE)
            if not chunk: break
            report_data.write(chunk.decode("utf8"))
            return report_data.getvalue()
    finally:
        report_data.close()
        stream_data.close()


def get_clickhouse_data(query, host=HOST, connection_timeout=15000, **kwargs):
    import requests

    user = 'chizhonkov'
    secret_clickhouse_token = yav.Secret("sec-01ewjhey2j3dxqf5wk28h1c9tw")
    clickhouse_token = secret_clickhouse_token.data()["token"]

    params = kwargs
    r = requests.post(host, params=params, auth=(user, clickhouse_token), timeout=connection_timeout,
                      data=query)
    if r.status_code == 200:
        return r.text
    else:
        raise ValueError(r.text)


def get_clickhouse_df(query, host=HOST, connection_timeout=1500):
    import pandas as pd
    import io

    query = query + 'FORMAT TabSeparatedWithNames'
    data = get_clickhouse_data(query, host, connection_timeout)
    df = pd.read_csv(io.StringIO(data), sep='\t', header=0, low_memory=False,
                     dtype={
                         'date': str,
                         'campaign_id': str,
                         'adset_id': str,
                         'ad_id': str,
                         'leads': int})
    return df


def query_builder():
    q = """
        SELECT
            StartDate as date,
            UTMCampaign as campaign_id,
            UTMContent as adset_id,
            UTMTerm as ad_id,
            sum(Sign) AS leads
        FROM default.visits_all
        WHERE CounterID IN (46231566, 66355762)
            AND UTMSource == 'google'
            AND UTMMedium == 'cpc'
            AND StartDate > today()-""" + str(DAYS) + """
            AND (toString(Goals.ID) LIKE '%51667681%' OR toString(Goals.ID) LIKE '%129307987%')
        GROUP BY StartDate, UTMCampaign, UTMContent, UTMTerm
        ORDER BY StartDate
    """
    return q


def get_insights_from_google():
    import pandas as pd
    import numpy as np
    from datetime import date

    dt = date.today()

    search_insights = get_search_insights_from_google(dt)
    display_insights = get_display_insights_from_google(dt)

    columns = ['date',
               'campaign_id',
               'campaign_name',
               'adset_id',
               'adset_name',
               'ad_id',
               'ad_name',
               'impressions',
               'clicks',
               'spend']

    rows_list = []
    for insights in [search_insights, display_insights]:
        if insights:
            for insight in insights.split('\n'):
                ins = insight.split(',')
                if len(ins) == len(columns):
                    rows_list.append(ins)

    df = pd.DataFrame(rows_list, columns=columns)

    df['impressions'] = df['impressions'].astype('int')
    df['clicks'] = df['clicks'].astype('int')
    df['spend'] = df['spend'].apply(lambda s: int(s) / 1000000.)
    df['source'] = 'google'
    df['ad_id'] = df['ad_id'].apply(lambda a: a.encode('utf-8'))

    leads_df = get_clickhouse_df(query_builder())
    leads_df['ad_id'] = leads_df['ad_id'].apply(lambda a: a[4:] if (str(a)[0] == 'k') else a)

    df = df.merge(right=leads_df, how='left', on=['date', 'campaign_id', 'adset_id', 'ad_id'])
    df['leads'] = df['leads'].apply(lambda x: 0 if np.isnan(x) else int(x))

    return df


def put_data_to_yt(df, yt_table):
    import yt.wrapper as yt

    secret_yt_token = yav.Secret("sec-01ent9wp5rkfmtyjm0skkm408h")
    yt_token = secret_yt_token.data()["TOKEN"]

    client = yt.YtClient(proxy="hahn",
                         config={"tabular_data_format": "json", "token": yt_token})

    schema = [
        {"name": "date", "type": "string"},
        {"name": "source", "type": "string"},
        {"name": "campaign_id", "type": "string"},
        {"name": "campaign_name", "type": "string"},
        {"name": "adset_id", "type": "string"},
        {"name": "adset_name", "type": "string"},
        {"name": "ad_id", "type": "string"},
        {"name": "ad_name", "type": "string"},
        {"name": "spend", "type": "double"},
        {"name": "impressions", "type": "int32"},
        {"name": "clicks", "type": "int32"},
        {"name": "leads", "type": "int32"},
    ]

    client.write_table(yt.TablePath(yt_table, attributes={"schema": schema}), df.to_dict(orient='records'),
                       format=yt.JsonFormat(attributes={"encode_utf8": False}))


class GoogleAdsSMB(sdk2.Task):
    def on_create(self):
        self.Requirements.tasks_resource = sdk2.service_resources.SandboxTasksBinary.find(
            attrs={
                'target': 'maps/geomarketing/buying/GoogleAdsSMB/bin'
            }
        ).first()

    def on_execute(self):
        google_df = get_insights_from_google()
        put_data_to_yt(google_df, '//home/geo-analytics/marketing/datasets/buying/smb/cpl/google-ads')
