import json
import hashlib
import os
import logging
import tarfile
import time
import StringIO
import urllib2
import urlparse
import yaml

from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.common.types.resource import State

from sandbox.projects.TaxiML import common
from sandbox.projects.TaxiML import resources


NIRVANA_TOKEN_SECRET_KEY = 'robot-taxi-ml-deploy-nirvana_api_oauth'
STARTREK_TOKEN_SECRET_KEY = 'robot-taxi-ml-deploy-startrek_api_oauth'
REQUIRED_ATTRIBUTES = ['environment', 'version_maj', 'version_min', 'name', 'type']


class TaxiMlCheckAndUploadResources(sdk2.Task):
    """ Task to check nirvana graphs and upload results to sandbox  """

    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment('nirvana_api'),
            environments.PipEnvironment('startrek_client'),
        )
        cores = 1
        ram = 1024
        disk_space = 1024

        class Caches(sdk2.Requirements.Caches):
            pass  # means that task do not use any shared caches

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.RadioGroup('Environment') as environment:
            environment.values[
                common.ENVIRONMENT_TESTING] = environment.Value(
                value=common.ENVIRONMENT_TESTING, default=True)
            environment.values[
                common.ENVIRONMENT_PRODUCTION] = environment.Value(
                value=common.ENVIRONMENT_PRODUCTION)

    def on_execute(self):
        nirvana = _get_nirvana_client()
        for resource_request in self._get_new_resource_requests():
            logging.info('start to process resource request: %s',
                         resource_request.id)

            if not _check_resource_attributes(resource_request):
                continue

            try:
                graph_properties = _get_graph_properties(resource_request)
                logging.info('graph properties: %s', graph_properties)
                # TODO pkostikov: disable due to new SOX audit agreements
                # https://st.yandex-team.ru/TAXIMLINFRA-270
                # if (self.Parameters.environment == common.ENVIRONMENT_TESTING or
                #         _check_graph(nirvana, resource_request,
                #                      graph_properties)):
                #     self._upload_resource(
                #         nirvana, resource_request, graph_properties)
                self._upload_resource(
                    nirvana, resource_request, graph_properties)
            except Exception:
                logging.error('couldn\'t process resource request %s',
                              resource_request.id, exc_info=True)

    def _get_new_resource_requests(self):
        logging.info('Environment from parameter: %s',
                     self.Parameters.environment)

        uploaded_resource_ids = set()

        for resource in common.find_resources(
                resources.TAXI_ML_RESOURCE,
                state=[State.READY, State.NOT_READY],
                owner=common.get_resources_owner(self.Parameters.environment),
                attrs={'environment': self.Parameters.environment}):
            if hasattr(resource, 'request_id'):
                uploaded_resource_ids.add(int(resource.request_id))
            else:
                logging.error(
                    'found resource without request_id attributes: %s',
                    resource.id)

        logging.info('found %d processed uploaded resources',
                     len(uploaded_resource_ids))

        resource_requests = []
        unique_resource_names = set([])
        for resource_request in common.find_resources(
                resources.TAXI_ML_RESOURCE_REQUEST,
                state=[State.READY],
                attrs={'environment': self.Parameters.environment}):
            if (resource_request.id not in uploaded_resource_ids and
                    hasattr(resource_request, 'name') and
                    resource_request.name not in unique_resource_names):
                logging.info('resource request to upload: %s',
                             resource_request.id)
                resource_requests.append(resource_request)
                unique_resource_names.add(resource_request.name)

        logging.info('found %d resource requests to upload',
                     len(resource_requests))

        return resource_requests

    def _upload_resource(self, nirvana, resource_request, graph_properties):
        workflow_instance_id = graph_properties['process_uid']

        block_results = nirvana.get_block_results(
            workflow_instance_id=workflow_instance_id,
            block_patterns=[{'guid': graph_properties['block_uid']}],
            outputs=['resource']
        )
        logging.info('block results: %s ' % block_results)

        if not block_results:
            logging.error(
                'skip resource request %s because of empty graph result',
                resource_request.id)
            return

        attrs = dict(resource_request)
        attrs['ttl'] = 'inf'
        attrs['request_id'] = resource_request.id
        attrs['version_build'] = int(time.time())

        logging.info('create directory for resource content')
        res_dir = self.path(attrs['name'])
        res_dir.mkdir()
        new_resource_dir = str(res_dir)

        request = urllib2.Request(block_results[0]['results'][0]['storagePath'])
        request.add_header(
            'Authorization',
            'OAuth {}'.format(sdk2.Vault.data(NIRVANA_TOKEN_SECRET_KEY))
        )
        response = urllib2.urlopen(request)

        logging.info('read result and extract to disc')
        with tarfile.open(fileobj=StringIO.StringIO(response.read())) as tf:
            tf.extractall(new_resource_dir)

        logging.info('read md5sum.txt file')
        with open(os.path.join(new_resource_dir, 'md5sum.txt')) as md5_file:
            for line in md5_file.readlines():
                hash, file_name = line.strip().split()
                file_path = os.path.join(new_resource_dir, file_name)
                if not os.path.exists(file_path):
                    raise Exception('md5 check failed: file ' + file_path +
                                    ' doesn\'t exists')
                if hash != _md5(file_path):
                    raise Exception('md5 doesn\'t match ' + file_path)

        logging.info('extract tar files')
        for f in os.listdir(new_resource_dir):
            if f.endswith('.tar'):
                tar_path = os.path.join(new_resource_dir, f)
                with tarfile.open(tar_path) as tf:
                    tf.extractall(new_resource_dir)
                os.remove(tar_path)

        logging.info('graph result was stored to the file system')

        with open(os.path.join(
                new_resource_dir, 'resource_meta.yaml'), 'w') as output:
            yaml.safe_dump(
                {
                    'graph_properties': graph_properties,
                    'attributes': attrs
                },
                output,
                encoding='utf-8',
                allow_unicode=True,
                default_flow_style=False
            )
        logging.info('resource_meta.yaml was stored to the file system')

        new_resource = resources.TAXI_ML_RESOURCE(
            self, 'resource uploaded from nirvana graph', attrs['name'],
            **attrs
        )
        new_resource.request_id = resource_request.id
        new_resource_data = sdk2.ResourceData(new_resource)
        new_resource_data.ready()

        logging.info('resource request %s was successfully processed',
                     resource_request.id)


def _check_resource_attributes(resource_request):
    attributes = dict(resource_request)
    for attr in REQUIRED_ATTRIBUTES:
        if attr not in attributes:
            logging.error('attribute %s missing', attr)
            return False
    return True


def _get_st_ticket(resource_request):
    import startrek_client

    st_ticket = dict(resource_request).get('st_ticket')
    if st_ticket is None:
        logging.warning(
            'graph check failed: no st_ticket in resource attributes')
        return None

    st_client = startrek_client.Startrek(
        useragent='python', token=sdk2.Vault.data(STARTREK_TOKEN_SECRET_KEY))

    try:
        return st_client.issues[st_ticket]
    except startrek_client.exceptions.NotFound:
        logging.warning('graph check failed: ticket %s not found', st_ticket)

    return None


def _check_st_ticket(issue, instace_id):
    if not hasattr(issue, 'nirvanaUrl'):
        logging.warning('graph check failed: ticket %s doesn\'t have '
                        'attribute nirvanaUrl')
        return False

    path_items = urlparse.urlparse(issue.nirvanaUrl).path.split('/')
    nirvana_url_instance_id = path_items[-2]
    if nirvana_url_instance_id != instace_id:
        logging.warning('instance_id from nirvana url (%s) not equal to '
                        'the real parent graph instance id (%s)',
                        nirvana_url_instance_id,
                        instace_id)
        return False

    return True


def _check_graph(nirvana, resource_request, graph_properties):
    workflow_instance_id = graph_properties['workflow_instance_uid']

    ticket = _get_st_ticket(resource_request)
    if ticket is None:
        return False

    if _check_st_ticket(ticket, workflow_instance_id):
        return True

    workflow_metadata = nirvana.get_workflow_meta_data(
        workflow_instance_id=workflow_instance_id)
    logging.info('get workflow meta data: %s', workflow_metadata)

    parent_workflow_instance_id = workflow_metadata['cloneOfInstance']
    parent_workflow_metadata = nirvana.get_workflow_meta_data(
        workflow_instance_id=parent_workflow_instance_id)
    logging.info('get parent workflow meta data: %s', parent_workflow_metadata)

    # check parent graph status
    if parent_workflow_metadata.get('lifecycleStatus') != 'approved':
        logging.warning(
            'graph check failed: parent workflow has to be approved')
        return False

    # check if the parent graph is equal to the current graph
    graph = nirvana.read_workflow_instance(workflow_instance_id)
    parent_graph = nirvana.read_workflow_instance(parent_workflow_instance_id)

    if (str(graph['blocks']) != str(parent_graph['blocks']) or
            str(graph['connections']) != str(parent_graph['connections'])):
        logging.warning(
            'graph check failed: not equal to parent graph'
        )
        return False

    if not _check_st_ticket(ticket,
                            parent_workflow_metadata.get('instanceId')):
        return False

    return True


def _get_nirvana_client():
    import nirvana_api
    return nirvana_api.NirvanaApi(sdk2.Vault.data(NIRVANA_TOKEN_SECRET_KEY))


def _get_graph_properties(resource_request):
    resource_request_data = sdk2.ResourceData(resource_request)
    graph_properties = json.loads(resource_request_data.path.read_bytes())
    return graph_properties


def _md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
