# -*- coding: utf-8 -*-

import os
import re
import gzip
import logging
import urlparse
from sandbox.projects import resource_types
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter


class HostsParam(SandboxStringParameter):
    name = "host"
    description = "Host: "
    required = True
    default_value = 'man1-7665.search.yandex.net'


class LogPathParam(SandboxStringParameter):
    name = "logpath"
    description = "Log Path: "
    required = True
    default_value = '/logs/current-vps_access-vps-15660'


class ServiceParam(SandboxStringParameter):
    name = "service"
    description = "Service Name: "
    required = False
    default_value = 'vps'


class RequestsCount(SandboxIntegerParameter):
    name = 'reqcount'
    description = 'Requests Count: '
    required = False
    default_value = 10000


def download_file(host, src, dst, timeout=600):
    # noinspection PyBroadException
    try:
        from sandbox.sandboxsdk.process import run_process
        logging.info('Trying remote copy {} from {}'.format(src, host))
        run_process(['rsync -vz rsync://{}{} {}'.format(host, src, dst)], shell=True, timeout=timeout)
        logging.info('Successful remote copy')
        return True
    except Exception:
        logging.exception('Failure attempt remote copy from host {} with src {} and dst {} with exception'
                          .format(host, src, dst))
    return False


REQUEST_PATTERN = re.compile('] \"([A-Z]+) (.*?) (HTTP/[0-9]+\.[0-9]+)\" ([0-9]+) ')


def generate_ammo(input_file, output_file, service_name, requests_count, gzip_level=0):
    remains = requests_count
    service_prefix = '/' + service_name
    fin = open(input_file, 'r')
    fout = gzip.open(output_file, 'w', compresslevel=gzip_level) if gzip_level > 0 else open(output_file, 'w')
    try:
        line_n = 0
        for line in fin:
            line_n += 1
            if line_n % 100 == 0:
                logging.debug("Processed {} lines".format(line_n))
            m = REQUEST_PATTERN.findall(line)
            if len(m) == 1 and len(m[0]) == 4 and m[0][0] == 'GET' and m[0][3] == '200':
                url_str = m[0][1]
                url = urlparse.urlparse(url_str)
                result = url.path
                if not result.startswith(service_prefix):
                    continue
                if len(url.query) > 0:
                    result += '?' + url.query
                fout.write(result + '\n')
                remains -= 1
                if remains == 0:
                    break
    finally:
        fin.close()
        fout.close()
    if remains > 0:
        logging.warn('Check failed, not enough data: remains {}'.format(remains))
        os.unlink(output_file)
        return False, 'Not enough data, remains {}'.format(remains)
    return True, None


class VpsGenerateAmmo(SandboxTask):
    type = 'VPS_GENERATE_AMMO'
    execution_space = 10240
    input_parameters = [HostsParam, LogPathParam, ServiceParam, RequestsCount]

    def on_execute(self):
        host = self.ctx[HostsParam.name]
        logging.info('Host: ' + host)

        log_file = 'access.log'
        service_name = self.ctx.get(ServiceParam.name, ServiceParam.default_value)
        requests_count = self.ctx.get(RequestsCount.name, RequestsCount.default_value)

        ammo = service_name + '.ammo.gz'

        try:
            success_download = download_file(host, self.ctx[LogPathParam.name], log_file)
            if not success_download:
                raise ValueError('Can not download logs from host {}'.format(host))
            check, reason = generate_ammo(log_file, ammo, service_name, requests_count, 9)
            if not check:
                raise ValueError('Failure built ammunition {} from host {}, reason: {}'.format(ammo, host, reason))
            else:
                self.create_resource('ammunition {}'.format(ammo), ammo, resource_types.VPS_AMMO)
                logging.info('Successfully built ammunition {}'.format(ammo))
        finally:
            os.unlink(log_file)


__Task__ = VpsGenerateAmmo
