#!/usr/bin/env python
import click
import datetime as dt
import json
import logging
import os
import pandas as pd
import requests
import urllib
import yenv
import yt.wrapper as yt
from nile.api.v1 import Record, filters

from advisor_money.common.cli import DateType
from advisor_money.common.hasoffers_api import HasOffersAPI
from advisor_money.db import jafar_db
from advisor_money.settings import (METRIKA_MOBILE_LOG_1_DAY, METRIKA_MOBILE_LOG_30_MIN, LAUNCHER_API_KEY, YT_CONFIG,
                                    ADVISOR_POSTBACKS_PATH, YT_ADVISOR_TMP)
from advisor_money.utils.date_utils import date_range, date_to_datetime
from advisor_money.utils.yt_utils import get_cluster, get_yt_path

ADVISOR_TMP_DIR = '/tmp/advisor-money/'

logger = logging.getLogger(__name__)


class ConversionMatcherYT(object):
    @staticmethod
    def extract_event_value(records):
        for record in records:
            try:
                event_value = json.loads(record['EventValue'])
            except (ValueError, TypeError):
                continue

            if 'offer_id' in event_value and event_value['offer_id'] != 'empty' and 'impression_id' in event_value:
                yield Record(
                    device_id=record['DeviceID'],
                    package_name=event_value['package_name'],
                    offer_id=event_value['offer_id'],
                    impression_id=event_value['impression_id'],
                )

    def match_conversions_from_metrika(self, metrika_tables, postbacks_tables, dst_table):
        cluster = get_cluster()
        job = cluster.job()

        postbacks = job.table(
            postbacks_tables,
            ignore_missing=True
        ).project(
            'device_id', 'package_name'
        )

        job.table(
            metrika_tables,
            ignore_missing=True
        ).filter(
            filters.equals('APIKey', LAUNCHER_API_KEY),
            filters.equals('EventName', 'rec_install'),
        ).map(
            self.extract_event_value
        ).join(  # finding rec_install events without corresponding postbacks
            postbacks, type='left_only', by=('device_id', 'package_name')
        ).put(
            dst_table
        )
        job.run()


class CustomConversionUploader(object):
    METRIKA_TABLES_DAYS_AGO = 2
    POSTBACK_TABLES_DAYS_AGO = 183
    CLICK_LOGS_DAYS_AGO = METRIKA_TABLES_DAYS_AGO + 14
    CONVERSION_URL = 'http://yandex.go2cloud.org/aff_lsr'

    def __init__(self, date):
        self.date = date
        self.api = HasOffersAPI()
        # getting the set of offer ids for which we should approve conversions
        self.offer_ids = self.get_custom_conversion_offers()
        self.external_offer_ids = {self._parse_offer_id(item) for item in self.offer_ids}

    @staticmethod
    def get_custom_conversion_offers():
        """
        Returns offer_id used in any of existing custom conversion configs.
        """
        offer_ids = set()
        for config in jafar_db.custom_conversion_config.find():
            offer_ids.update(config['offer_ids'])
        return offer_ids

    @staticmethod
    def _parse_offer_id(offer_id):
        _, external_id = offer_id.rsplit('_', 1)
        return int(external_id)

    def upload_conversions_to_hasoffers(self, path):

        transaction_ids = self._get_transaction_ids_from_click_logs()

        for row in yt.read_table(get_yt_path(path), format='json'):
            offer_id = row['offer_id']

            if not offer_id.startswith('direct-'):
                continue

            external_offer_id = self._parse_offer_id(offer_id)
            if not external_offer_id in self.external_offer_ids:
                continue

            impression_id = row['impression_id']

            transaction_id = transaction_ids.get(impression_id)
            if not transaction_id:
                continue

            if self._conversion_exists(transaction_id):
                continue

            # Triggering conversion only in testing and production environment
            if yenv.type == 'development':
                logger.info(
                    'Skip triggering conversion for offer %s with transaction_id=%s in development environment',
                    external_offer_id, transaction_id)
                continue

            response = requests.get(url=self.CONVERSION_URL, params={'offer_id': external_offer_id,
                                                                     'transaction_id': transaction_id})
            if response.ok and 'success=true' in response.content:
                logger.info('Successfully created conversion for offer %s with transaction_id: %s',
                            external_offer_id, transaction_id)
            else:
                logger.warning('Failed to create conversion for offer %s with transaction_id: %s',
                               external_offer_id, transaction_id)

    def _get_transaction_ids_from_click_logs(self):
        transaction_ids = {}
        for date in date_range(self.date - dt.timedelta(days=self.CLICK_LOGS_DAYS_AGO), self.date):
            for df in self._read_click_logs(date):
                for _, row in df.iterrows():
                    # If there many clicks with different transaction_id for one impression_id,
                    # we should trigger conversion only for one click such that field is_click_unique==1
                    if row['offer_id'] in self.external_offer_ids and row['is_click_unique']:
                        if pd.isnull(row['affiliate_sub1']):
                            continue
                        impression_id = urllib.unquote_plus(row['affiliate_sub1'])
                        transaction_ids[impression_id] = row['transaction_id']
        return transaction_ids

    def _read_click_logs(self, date):
        for log_filename in self.api.get_click_logs_filenames(date):
            folder, filename = log_filename.rsplit('/', 1)
            path = os.path.join(ADVISOR_TMP_DIR, folder)

            if not os.path.exists(path):
                os.makedirs(path)

            path_to_file = os.path.join(path, filename)
            if os.path.exists(path_to_file):
                logger.info('Read click log file %s from disk.', path_to_file)
            else:
                raw_file = self.api.download_click_log_file(log_filename)
                with open(path_to_file, 'w') as f:
                    f.write(raw_file)
                logger.info('Write click log file %s to disk.', path_to_file)
            yield pd.read_csv(path_to_file, compression='zip')

    def _conversion_exists(self, transaction_id):
        return self.api.get_conversion_by_transaction_id(transaction_id) is not None

    def get_postback_tables(self):
        date_range = (self.date - dt.timedelta(days=self.POSTBACK_TABLES_DAYS_AGO), self.date)
        return get_yt_path(ADVISOR_POSTBACKS_PATH.format('{%s..%s}' % date_range))

    def get_metrika_tables(self):
        now = dt.datetime.now()
        paths = []
        metrika_tables_dates_range = date_range(self.date - dt.timedelta(days=self.METRIKA_TABLES_DAYS_AGO), self.date)
        for date in metrika_tables_dates_range:
            if date < now.date():
                path = METRIKA_MOBILE_LOG_1_DAY.format(date.isoformat())
                paths.append(path)
            else:
                start_dt = date_to_datetime(date)
                end_dt = now
                for d in date_range(start_dt, end_dt, delta=dt.timedelta(minutes=30)):
                    path = METRIKA_MOBILE_LOG_30_MIN.format(d.isoformat())
                    paths.append(path)
        return '{%s}' % ','.join(paths)


def match_and_upload_hasoffers_conversions(date):
    matcher = ConversionMatcherYT()
    uploader = CustomConversionUploader(date)
    metrika_tables = uploader.get_metrika_tables()
    postback_tables = uploader.get_postback_tables()
    with yt.TempTable(YT_ADVISOR_TMP) as dst_table:
        logger.info("Start matching conversions from yt metrika")
        matcher.match_conversions_from_metrika(metrika_tables, postback_tables, dst_table)

        logger.info("Start uploading conversions to hasoffers")
        uploader.upload_conversions_to_hasoffers(dst_table)
        logger.info("Finish uploading conversions")


@click.command()
@click.option('--date', '-d', default=dt.date.today, type=DateType(), help='Date in format yyyy-mm-dd')
def cli(date):
    yt.update_config(YT_CONFIG)
    match_and_upload_hasoffers_conversions(date)


if __name__ == '__main__':
    cli()
