# -*- encoding: utf-8 -*-
from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment

import os
import calendar
import datetime
import functools
import json
import multiprocessing.dummy
import logging
import urllib2

rides = []

def _get_analyzer_url(url=None, session=None, prestable=False, frmat=None):
    params = ''
    if url:
        params += '&ride={}'.format(url)
    if session:
        params += '&session={}'.format(session)
    if frmat:
        params += '&format={}'.format(frmat)
    return 'http://{}.carsharing.yandex.net/api/staff/track/analyzer?{}&status='.format(
        'prestable' if prestable else 'admin',
        params
    )

def _request_with_retries(url, headers):
    logging.info("requesting {}".format(url))
    ex = None
    for attempt in xrange(5):
        try:
            request = urllib2.Request(url, headers=headers)
            response = urllib2.urlopen(request)
            report = json.loads(response.read())
            return report
        except urllib2.HTTPError as e:
            ex = e
            if e.code == 404:
                return None
            if e.code == 500:
                return None
    if ex:
        raise ex
    else:
        raise RuntimeError()

def _common_mapper(headers, frmat=None, url=None, session=None):
    global rides
    if url:
        logging.info('processing segment {}'.format(url))
    if session:
        logging.info('processing session {}'.format(session))
    request = _get_analyzer_url(frmat=frmat, url=url, session=session)
    report = _request_with_retries(request, headers)
    if report:
        tracks = report["tracks"]
        for track in tracks:
            rides.append(track)
        logging.debug("{},{}".format(len(tracks), len(rides)))
    else:
        if url:
            logging.warning('skipping {}'.format(url))
        if session:
            logging.warning('skipping {}'.format(session))

def _session_mapper(headers, frmat, session):
    _common_mapper(headers, frmat, session=session)

def _url_mapper(headers, frmat, url):
    _common_mapper(headers, frmat, url=url)


class YaDriveGenerateAnalyzerCache(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = [
            PipEnvironment("yandex-yt"),
            PipEnvironment("yandex-yt-yson-bindings-skynet")
        ]
        cores = 1
        ram = 8 * 1024
        disk_space = 2048

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        date = sdk2.parameters.String(
            'Date',
            default='YESTERDAY'
        )
        frmat = sdk2.parameters.String(
            'Format',
            choices=[
                ('legacy', 'legacy'),
                ('legacy_with_raw', 'legacy_with_raw'),
            ],
            default='legacy'
        )
        yt_proxy = sdk2.parameters.String(
            'YT Proxy',
            default='hahn.yt.yandex.net'
        )
        yt_token_name = sdk2.parameters.String(
            'YT Token name',
            default='YT_TOKEN'
        )
        drive_token_name = sdk2.parameters.String(
            'Drive token name',
            default='DRIVE_TOKEN'
        )
        drive_orders_export = sdk2.parameters.String(
            'Drive orders YT export directory'
        )

    def get_drive_navigator_url(self, mask, date):
        mask = mask if mask else str()
        start = datetime.datetime(date.year, date.month, date.day)
        finish = start + datetime.timedelta(days=1)
        timestamp_start = calendar.timegm(start.timetuple())
        timestamp_finish = calendar.timegm(finish.timetuple())
        text = 's_session_id:"{}*"+i_ts_start:"{}..{}"'.format(mask, timestamp_start, timestamp_finish)
        return 'drive_navigator?text={}&format=json&how=i_ts_start&numdoc=10000&haha=da&pron=earlyurls&relev=attr_limit=99999999&balancertimeout=2000'.format(text)

    def get_analyzer_table(self, date):
        return '//home/carsharing/production/analyzer_cache/1d/' + date.strftime('%Y-%m-%d')

    def get_export_table(self, date):
        return os.path.join(self.Parameters.drive_orders_export, date.strftime('%Y-%m-%d'))

    SAAS_HOST = 'saas-searchproxy-maps.yandex.net:17000'

    def on_execute(self):
        import yt.wrapper as yt

        yt_proxy = self.Parameters.yt_proxy
        if yt_proxy:
            logging.info('set YT_PROXY to {}'.format(yt_proxy))
            yt.config['proxy']['url'] = yt_proxy

        yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_token_name)
        if yt_token:
            logging.info('set YT_TOKEN')
            yt.config['token'] = yt_token

        drive_token = sdk2.Vault.data(self.owner, self.Parameters.drive_token_name)

        date_string = self.Parameters.date
        if date_string == 'YESTERDAY':
            now = datetime.datetime.now()
            date = now - datetime.timedelta(days=1)
        else:
            date = datetime.datetime.strptime(date_string, '%Y%m%d')

        headers = {
            "Authorization": "OAuth {}".format(drive_token),
            "UserPermissionsCache": "true",
        }

        masks = []
        if not self.Parameters.drive_orders_export:
            masks = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']

        urls = []
        for mask in masks:
            request = 'http://' + self.SAAS_HOST + '/' + self.get_drive_navigator_url(mask, date)
            report = self.request_with_retries(request, headers)
            if "response" not in report:
                continue
            if "results" not in report["response"]:
                continue
            for result in report["response"]["results"]:
                for group in result["groups"]:
                    for document in group["documents"]:
                        url = document["url"]
                        urls.append(url)

        sessions = []
        if self.Parameters.drive_orders_export:
            export_table = self.get_export_table(date)
            logging.info('reading table {}'.format(export_table))
            for row in yt.read_table(export_table):
                session = row['session_id']
                if session:
                    sessions.append(session)

        frmat = self.Parameters.frmat
        url_mapper = functools.partial(_url_mapper, headers, frmat)
        session_mapper = functools.partial(_session_mapper, headers, frmat)
        pool = multiprocessing.dummy.Pool(32)
        pool.map(url_mapper, urls)
        pool.map(session_mapper, sessions)

        table = self.get_analyzer_table(date)
        global rides
        logging.info("{} rides".format(len(rides)))
        yt.write_table(table, rides)
