# -*- coding: utf-8 -*-

import sys
import os
import logging
import json
from urlparse import urlparse
from datetime import datetime
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import channel
from sandbox.projects import resource_types
from sandbox.projects.common import utils2


DEFAULT_SCRAPER_SETTING_NAME = 'yandex_hamster'


def generate_scraper_setting_name(preset, host):
    if preset in ['yandex-web', 'yandex-web-profiled']:
        if host == 'https://hamster.yandex.ru':
            return 'yandex_hamster'
        if host == 'https://woogie.hamster.yandex.ru':
            return 'yandex_woogie'
    return preset + '_' + urlparse(host).hostname.split('.', 1)[0]


def _load_json_safe(s):
    return json.loads(s) if s else s


class FilterParam(parameters.SandboxStringParameter):
    name = 'flt'
    description = 'Erf filters (each line for new filter)'
    multiline = True
    required = True
    group = 'Main'

    @classmethod
    def cast(cls, value):
        value = super(FilterParam, cls).cast(value)
        try:
            json.loads(value)
            return value
        except ValueError:
            return json.dumps(value.strip(u'\n').split(u'\n'))


class BacketParam(parameters.ResourceSelector):
    name = 'backet'
    description = (
        "Queries backet resource. Backet is line-separated records *query [region]*. "
        "If region isn't specified then you must set it in parameter"
    )
    required = True
    group = 'Main'


class RegionParam(parameters.SandboxStringParameter):
    name = 'region'
    description = "Region for all queries. If isn't specified than backet must have per-query region records"
    default_value = ''
    group = 'Main'


class VerboseParam(parameters.SandboxBoolParameter):
    name = 'verbose'
    description = 'Set verbose'
    group = 'Main'


class ScraperAuthParam(parameters.SandboxStringParameter):
    name = 'auth_vault'
    description = "Sandbox vault user (must contain 'scraper_user', 'scraper_oauth', 'stat_user', 'stat_pass' values)"
    required = True
    group = 'Auth'


class ReportNameParam(parameters.SandboxStringParameter):
    name = 'stat_report_name'
    description = 'Stat report name'
    default_value = 'Search_Spam/Erf_Monitoring'
    group = 'Result'


class ReportScaleParam(parameters.SandboxRadioParameter):
    name = 'report_scale_param'
    description = 'Stat report scale'
    required = True
    choices = [('monthly', 'monthly'), ('weekly', 'weekly'), ('daily', 'daily'),
               ('hourly', 'hourly'), ('minutely', 'minutely')]
    group = 'Result'


class ResultResourceTTL(parameters.SandboxIntegerParameter):
    name = 'result_resource_ttl'
    description = 'Result resource TTL'
    required = True
    default_value = 14
    group = 'Result'


class ScraperPageSizeParam(parameters.SandboxIntegerParameter):
    name = 'scraper_page_size_param'
    description = 'Scraper page size param'
    required = True
    default_value = 100
    group = 'Scraper'


class ScraperCgiParam(parameters.SandboxStringParameter):
    name = 'scraper_cgi_params'
    description = 'Scraper cgi parameters (each line for parameter name=value)'
    multiline = True
    required = False
    group = 'Scraper'

    @classmethod
    def cast(cls, value):
        def parse_cgi(s):
            name, value = s.split(u'=', 1)
            return name, value

        value = super(ScraperCgiParam, cls).cast(value)
        value = value.strip(u'\n')
        if not value:
            return []
        try:
            json.loads(value)
            return value
        except ValueError:
            return json.dumps(map(parse_cgi, value.split(u'\n')))


class ScraperPresetParam(parameters.SandboxStringParameter):
    name = 'scraper_preset_param'
    description = 'Scraper preset param'
    required = True
    default_value = 'yandex-web'
    group = 'Scraper'


class ScraperHostParam(parameters.SandboxStringParameter):
    name = 'scraper_host_param'
    description = 'Scraper host param'
    required = True
    default_value = 'https://hamster.yandex.ru'
    group = 'Scraper'

    @classmethod
    def cast(cls, value):
        value = super(ScraperHostParam, cls).cast(value)
        parts = urlparse(value)
        if not parts.scheme or not parts.netloc:
            raise ValueError('Invalid host. Host must have scheme and netloc')
        return value


class ErfMonitor(SandboxTask):
    """
        Monitoring erf, SEARCHSPAM-9671
    """

    type = 'ERF_MONITOR'

# TODO use last version of python-statface-client
    environment = (
        PipEnvironment('python-statface-client', '0.65.0', use_wheel=True),
    )

    input_parameters = [
        FilterParam,
        BacketParam,
        RegionParam,
        VerboseParam,
        ScraperAuthParam,
        ReportNameParam,
        ReportScaleParam,
        ScraperPageSizeParam,
        ScraperCgiParam,
        ScraperPresetParam,
        ScraperHostParam,
        ResultResourceTTL
    ]

    local_dir = 'local_files'
    scraper_task_name = 'sandbox:erf monitor'

    def on_execute(self):
        self._setup_environment()
        from erf_monitor.find_by_filter import find_by_filter

        scraper_user = self.get_vault_data(self.ctx[ScraperAuthParam.name], 'scraper_user')
        oauth_token = self.get_vault_data(self.ctx[ScraperAuthParam.name], 'scraper_oauth')

        requests = self._load_requests()
        region = self.ctx[RegionParam.name] if len(self.ctx[RegionParam.name]) > 0 else None
        found, request_count = find_by_filter(scraper_user, oauth_token, self.scraper_task_name, requests,
                                              _load_json_safe(self.ctx[FilterParam.name]), region=region,
                                              page_size=self.ctx[ScraperPageSizeParam.name],
                                              cgi_params=_load_json_safe(self.ctx[ScraperCgiParam.name]),
                                              scraper_preset=self.ctx[ScraperPresetParam.name],
                                              host=self.ctx[ScraperHostParam.name])
        found_resource = self._save_found(found)
        self._update_stat(found, request_count, found_resource)

    def _setup_environment(self):
        sys.path.append(Arcadia.get_arcadia_src_dir('arcadia:/arc/trunk/arcadia/yweb/antispam/util'))
        sys.path.append(Arcadia.get_arcadia_src_dir('arcadia:/arc/trunk/arcadia/yweb/antiporno/scripts'))  # erf_monitor

    def _load_requests(self):
        requests_file = self.sync_resource(self.ctx[BacketParam.name])

        file_name = requests_file
        if not os.path.exists(file_name):
            logging.info("File '%s' not found, will check with another path", file_name)
            file_name = os.path.join(os.path.dirname(__file__), file_name)

        from erf_monitor.find_by_filter import prepare_requests
        return prepare_requests(file_name)

    def _save_found(self, found):
        path_result_file = os.path.join(self.abs_path(), 'result_out.json')
        with open(path_result_file, 'w') as result_file:
            json.dump(found, result_file, indent=4)
        resource_id = self.create_resource(
            self.descr,
            path_result_file,
            resource_types.OTHER_RESOURCE,
            attributes={'ttl': self.ctx[ResultResourceTTL.name]}
        ).id
        self.ctx['json_resource_id'] = resource_id
        return resource_id

    def _update_stat(self, found, request_count, result_resource):
        import statface_client

        count = 0
        sum_position = 0
        for query, lst in found.iteritems():
            for doc in lst:
                count += 1
                sum_position += doc[0]
        mean_position = int(sum_position / count) if count != 0 else 0

        backet_id = self.ctx[BacketParam.name]
        backet_name = channel.channel.sandbox.get_resource_attribute(backet_id, 'name')
        backet_name = backet_name if backet_name else str(backet_id)
        scraper_cgi_params = '&'.join([
            '{}={}'.format(cgi[0], cgi[1]) for cgi in _load_json_safe(self.ctx[ScraperCgiParam.name])
        ])
        monitor_name = '{}_{}_[{}]'.format(
            backet_name, self.ctx[ScraperPageSizeParam.name], ', '.join(_load_json_safe(self.ctx[FilterParam.name]))
        )
        if scraper_cgi_params:
            monitor_name = '{}_[{}]'.format(monitor_name, scraper_cgi_params)
        scraper_setting_name = generate_scraper_setting_name(
            self.ctx[ScraperPresetParam.name], self.ctx[ScraperHostParam.name]
        )
        if scraper_setting_name != DEFAULT_SCRAPER_SETTING_NAME:
            monitor_name = '{}_{}'.format(monitor_name, scraper_setting_name)
        report_scale = self.ctx[ReportScaleParam.name]
        data = [
            {
                'fielddate': self._get_date(report_scale),
                'monitor_name': monitor_name,
                'request_count': request_count,
                'backet': utils2.resource_redirect_url(self.ctx[BacketParam.name]),
                'found': count,
                'mean_position': mean_position,
                'result': utils2.resource_redirect_url(result_resource),
                'scraper_page_size': self.ctx[ScraperPageSizeParam.name],
                'scraper_cgi_params': scraper_cgi_params,
            }
        ]

        logging.info('Stat data: {}'.format(data))

        stat_user = self.get_vault_data(self.ctx[ScraperAuthParam.name], 'stat_user')
        stat_pass = self.get_vault_data(self.ctx[ScraperAuthParam.name], 'stat_pass')
        report_name = self.ctx[ReportNameParam.name]
        stat = statface_client.StatfaceClient(stat_user, stat_pass, statface_client.STATFACE_PRODUCTION)
        report = stat.get_report(report_name)
        report.upload_data(report_scale, data)

    def _get_date(self, scale):
        format = {
            'monthly': '%Y-%m-%d',
            'weekly': '%Y-%m-%d',
            'daily': '%Y-%m-%d',
            'hourly': '%Y-%m-%d %H:%M:%S',
            'minutely': '%Y-%m-%d %H:%M:%S',
        }.get(scale, None)
        if format is None:
            logging.fatal('Invalid scale value')
        return datetime.now().strftime(format)


__Task__ = ErfMonitor
