#  -*- coding: utf-8 -*-

import os
import logging
import shutil
import threading
import json
import urllib
import time
import re
import copy

from sandbox.common import errors
from sandbox.projects import resource_types
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.svn import Arcadia
import sandbox.projects.TestReportUnit as Unit
from sandbox.projects.common import apihelpers
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.task import SandboxTask
import sandbox.projects.ReportDataRuntimeItem as Source
from sandbox.sandboxsdk.parameters import LastReleasedResource
import sandbox.projects.report.common as report_common
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.paths import copy_path


class NecessaryData(object):
    name = 'necessary_data'


class CacheSources(LastReleasedResource):
    name = 'bundle_cache_source'
    description = "Cache sources. Generate data using these sources:"
    resource_type = [resource_types.REPORT_DATA_RUNTIME_ITEM_SOURCE]
    required = False
    multiple = True


class Sms(SandboxStringParameter):
    name = 'sms'
    description = 'Notify by sms if the task will be failed:'


class ForceUpdateSources(SandboxStringParameter):
    name = 'update_sources'
    description = '''
        Force update sources (e.g. YxWeb::Util::Region YxWeb::Util::Category or all !YxWeb::Util::Category).
        MUST NOT intersect with cache sources.
    '''


class ReportDataRuntime(Source.ReportDataRuntimeItem):
    """
       Создает runtime данные data.permanent либо data.runtime
    """

    type = 'REPORT_DATA_RUNTIME'

    environment = (
        environments.SvnEnvironment(),
    )

    cores = 1
    required_ram = 8072
    execution_space = 16000
    # подзадачи должны завершиться за time_to_wait сек
    time_to_wait = 3600
    # нужно ли упаковать ресурс
    tar = 0

    input_parameters = [
        Source.ProdSelector, Source.Project, report_common.ApacheBundleParameter, Source.Selector, Unit.ArcadiaUrl,
        Source.ReportCoreParameter, CacheSources, Source.UseExpiredSource, ForceUpdateSources, Sms, Unit.RtccBundle,
        Source.ContainerLxc,
    ]

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)

    def send_sms(self):
        users_list = str(self.ctx[Sms.name]).strip()
        if users_list:
            users_list = ','.join([x.strip() for x in users_list.split(',') if x.strip()])
            if users_list:
                users_list = urllib.quote(users_list)
                msg = urllib.quote('Sandbox. Task {id} is {status}'.format(id=self.id, status=self.Status.FAILURE))
                url = "https://golem.yandex-team.ru/api/sms/send.sbml?resps=%s&msg=%s" % (users_list, msg)
                logging.info("sms url=%s" % url)
                f = urllib.urlopen(url)
                if f.getcode() != 200:
                    logging.error("Can not send sms url=%s return code %s" % (url, f.getcode()))
                else:
                    logging.info("Send sms to: %s" % users_list)
                f.close()

    def on_execute(self):
        try:
            self.my_prepare()
            self._bundle_modules = tuple(self.modules('all').keys())
            # self._bundle_modules = tuple(self.modules('YxWeb::Util::LCookieKeys').keys())
            # self._bundle_modules = tuple(self.modules('YxWeb::Util::Region').keys())
            # self._bundle_modules = tuple(self.modules('YxWeb::Util::Region YxWeb::Util::ReqTokens::Localized').keys())
            # self._bundle_modules = tuple(self.modules('YxWeb::Data::Blogs::FT').keys())
            logging.info("Bundle contains %s modules. bundle_modules=%s" % (len(self._bundle_modules), self._bundle_modules))

            if 'subtask_ids' not in self.ctx:
                self.ctx[NecessaryData.name] = self.find_cache()

                # все модули можно взять из кеша, поэтому форсируем создание подзадач
                if not filter(lambda(x): x["data"] is None, self.ctx[NecessaryData.name]):
                    logging.info('ALL BUNDLE FROM CACHE')
                    self.make_data_runtime_bundle()
                    return

            self.main(self.ctx[NecessaryData.name])
        except errors.TaskFailure as e:
            self.send_sms()
            raise e

    def find_cache(self):
        source_cache = {}
        # источники не зависят от project, configuration. Источники характеризуются атрибутом md5_uri
        # не должно быть ресурсов с одинаковым атрибутом module
        if self.ctx[CacheSources.name]:
            if type(self.ctx[CacheSources.name]) is not list:
                self.ctx[CacheSources.name] = [self.ctx[CacheSources.name]]
            for res_id in self.ctx[CacheSources.name]:
                resource = channel.sandbox.get_resource(res_id)
                if not resource:
                    raise errors.TaskFailure('Can not find resource with id=%s' % res_id)

                mod_name = resource.attributes['module']
                if source_cache.get(mod_name):
                    raise errors.TaskFailure('More than one resource for module %s: id=%s, id=%s' % (mod_name, resource.id, source_cache[resource.attributes['module']].id))

                source_cache[mod_name] = resource
        logging.info('source_cache=%s' % source_cache)

        force_sources = {}
        if self.ctx[ForceUpdateSources.name]:
            # want support force all except some modules, e.g. all !YxWeb::Util::GeoBase
            pattern = re.compile('(![^ ]+)')
            f = pattern.sub('', self.ctx[ForceUpdateSources.name]).strip()
            skip = {}
            for s in pattern.findall(self.ctx[ForceUpdateSources.name]):
                skip[s[1:]] = None

            for mod_name in skip:
                if mod_name not in self.available_modules:
                    raise errors.TaskFailure("Can not find module %s in available list." % mod_name)

            if f:
                fail_list = []
                for mod_name in self.modules(f):
                    if mod_name in skip:
                        continue
                    elif mod_name in source_cache:
                        fail_list.append(mod_name)
                    else:
                        force_sources[mod_name] = 1
                if fail_list:
                    raise errors.TaskFailure("Modules %s can not intersect with 'Cache sources'." % fail_list)
        logging.debug('force_sources=%s' % force_sources)

        # ----
        module_list = self.module_list(self._bundle_modules)
        # if not module_list:
        #     raise errors.TaskFailure("No modules")

        mod_rev = []
        for seq in module_list:
            for mod_name in seq:
                mod_rev.append(mod_name)
        # выполняется быстрее если передать список
        revisions = self.modules_revision(mod_rev)

        # для модулей из module_list(кроме source_cache и force_sources) найти ресурсы источников(по md5_uri) и ресурсы данных
        source_md5_cache = {}
        data_cache = {}
        for seq in module_list:
            for mod_name in seq:
                if ((mod_name not in source_cache) and (mod_name not in force_sources)):
                    if not self.available_modules[mod_name]['md5_uri']:
                        raise errors.TaskFailure("Can not find md5_uri for module %s" % mod_name)

                    attrs = {
                        'module':  mod_name,
                        'md5_uri': self.available_modules[mod_name]["md5_uri"]
                    }
                    resource = apihelpers.get_last_resource_with_attrs(resource_types.REPORT_DATA_RUNTIME_ITEM_SOURCE, attrs, all_attrs=True)
                    logging.debug("Try search source by attrs: %s" % attrs)
                    if resource:
                        logging.debug("Source found")
                        source_md5_cache[mod_name] = resource

                    attrs = {
                        'module': mod_name,
                        'version': self.versions['runtime_version'],
                        'project': self.ctx[Source.Project.name],
                        'configuration': self.ctx[Source.ProdSelector.name],
                        'revision': str(revisions[mod_name]),
                        'source_md5_uri': self.available_modules[mod_name]["md5_uri"],
                    }
                    resource = apihelpers.get_last_resource_with_attrs(resource_types.REPORT_DATA_RUNTIME_ITEM, attrs, all_attrs=True)
                    logging.debug("Try search data by attrs: %s" % attrs)
                    if resource:
                        logging.debug("Data found")
                        data_cache[mod_name] = resource

        logging.info("Source contains %u modules. source_md5_cache=%s" % (len(source_md5_cache), source_md5_cache))
        now = time.time()
        # проверить найденные ресурсы на свежесть
        expired_sources = self.remove_expired_modules(source_md5_cache, now)
        logging.info("Source contains %s modules. Remove expired. source_md5_cache.keys()=%s" % (len(source_md5_cache), source_md5_cache.keys()))

        logging.info("Data contains %s modules. data_cache=%s" % (len(data_cache), data_cache))
        # проверить найденные ресурсы на свежесть
        self.remove_expired_modules(data_cache, now)
        logging.info("Data contains %s modules. Remove expired. data_cache.keys()=%s" % (len(data_cache), data_cache.keys()))

        # если какой-нибудь модуль(A из кеша(data_cache)) зависит от модуля(B) и модуля B нет в кеше(либо есть но id ресурса не совпадает),
        # то нужно удалить из кеша модуль A(нужно собрать данные для A)
        while True:
            deleted = 0
            for mod_name in data_cache.keys():
                if mod_name in self.dependence:
                    for dep_name in self.dependence[mod_name]:
                        if dep_name not in data_cache:
                            data_cache.pop(mod_name)
                            deleted = 1
                            break
                        else:
                            if 'depend_on' in data_cache[mod_name].attributes:
                                dep_list = [int(i) for i in data_cache[mod_name].attributes['depend_on'].split(',')]
                                if not data_cache[dep_name].id in dep_list:
                                    data_cache.pop(mod_name)
                                    deleted = 1
                                    break
            if not deleted:
                break
        logging.info("Data contains %s modules. Remove dependenced. data_cache.keys()=%s" % (len(data_cache), data_cache.keys()))

        # [{name: mod_name, source: res_id, data: res_id, use_cache_if_failed: '[10]' }]
        necess_data = []
        for seq in module_list:
            for mod_name in seq:
                item = {
                    'name':   mod_name,
                    'source': None,
                    'data':   None if mod_name not in data_cache else data_cache[mod_name].id,
                    'use_expired_sourse': False,
                }

                s = source_cache.get(mod_name) or source_md5_cache.get(mod_name)
                if mod_name in expired_sources:
                    if self.ctx[Source.UseExpiredSource.name]:
                        item['use_expired_sourse'] = True
                        item['source'] = expired_sources[mod_name].id
                elif s:
                    item['source'] = s.id

                necess_data.append(item)

        logging.info(json.dumps(necess_data, indent=1))
        return necess_data

    def remove_expired_modules(self, resources, now):
        expired = {}
        if not resources:
            return expired

        times = {}
        for mod_name in resources:
            res = resources[mod_name]
            # такого не должно быть, но проверку оставим
            if mod_name != res.attributes["module"]:
                raise errors.TaskFailure("mod_name(%s) MUST eq attribute module(%s)" % (mod_name, res.attributes["module"]))

            times[mod_name] = int(res.attributes["expire_time"])

        for mod_name, expire_time in times.iteritems():
            if expire_time <= now:
                expired[mod_name] = resources.pop(mod_name)

        return expired

    def make_data_runtime_bundle(self, res=None):
        # собираем бандл
        if res is None:
            res = {}

        logging.debug("data_path=%s" % self.find_source_path)
        cache_data = {}
        for item in self.ctx[NecessaryData.name]:
            if item["data"]:
                cache_data[item["name"]] = item["data"]

        bundle_res = []
        for mod_name in self._bundle_modules:
            if mod_name in res:
                bundle_res.append(res[mod_name])
            elif mod_name in cache_data:
                bundle_res.append(cache_data[mod_name])
            else:
                raise errors.TaskFailure("Can not find data resource for module %s in ctx[%s] and in the subtask resources" % (mod_name, NecessaryData.name))
        logging.info("bundle_res=%s" % bundle_res)

        # run twice, first for make dirs
        run_process(['y-local-env perl scripts/make.pl --mode=setup --generator --verbose %s' % (' '.join(self._bundle_modules))], shell=True, log_prefix='setup', work_dir=self.report_dir)

        def thread_copy(res_id, res_path, error):
            try:
                src_path = self.sync_resource(res_id)
                res_path[res_id] = src_path
            except Exception as e:
                error.append("Can not sync res_id: %s. Exception: %s" % (res_id, e))
                raise e

        pool_error = []
        res_path = {}
        for res_id in bundle_res:
            res_path[res_id] = ""

        # копируем ресурсы параллельно
        pool = [threading.Thread(target=thread_copy, args=(res_id, res_path, pool_error)) for res_id in bundle_res]
        map(threading.Thread.start, pool)
        map(threading.Thread.join, pool)
        if pool_error:
            raise errors.TaskFailure("%s" % pool_error)

        # распаковываем последовательно
        for res_id in res_path:
            self.unpack_data(res_path[res_id])

        logging.info("unpacked all data in the %s" % self.find_source_path['runtime'])

        # and second for make metainfo
        run_process(['y-local-env perl scripts/make.pl --mode=setup --generator --verbose %s' % (' '.join(self._bundle_modules))], shell=True, log_prefix='setup', work_dir=self.report_dir)

        attrs = {
            'version': self.versions['runtime_version'],
            'project': self.ctx[Source.Project.name],
            'configuration': self.ctx[Source.ProdSelector.name],
            'revision': max([int(channel.sandbox.get_resource(res_id).attributes['revision']) for res_id in bundle_res]),
            'report_version': self.versions['report_version'],
        }
        # create metainfo for all modules
        metainfo = {}

        # ------- delete after some month
        old_meta = True
        if attrs['report_version'] == 'trunk' and attrs["revision"] >= 2412347:
            old_meta = False
        else:
            if attrs['project'] == 'WEB':
                # r161.1.5 r161.HEAD
                result = re.search('^r(\d+)', attrs['report_version'])
                if result and int(result.group(1)) > 161:
                    old_meta = False
            elif attrs['project'] == 'IMGS':
                # r53_images.1.0 branches.report.images.r53
                result = re.search('^r(\d+)|r(\d+)$', attrs['report_version'])
                if result and int(max(result.groups(0))) > 53:
                    old_meta = False
            elif attrs['project'] == 'VIDEO':
                # r1_video.1.0 branches.report.video.r1
                result = re.search('^r(\d+)|r(\d+)$', attrs['report_version'])
                if result and int(max(result.groups(0))) > 0:
                    old_meta = False
            elif attrs['project'] == 'YACA':
                # r1_yaca.1.0 branches.report.yaca.r1
                result = re.search('^r(\d+)|r(\d+)$', attrs['report_version'])
                if result and int(max(result.groups(0))) > 0:
                    old_meta = False
            elif attrs['project'] == 'NEWS':
                # r153_news.1.20 r159_news.HEAD
                result = re.search('^r(\d+)$', attrs['report_version'])
                if result and int(result.group(1)) > 159:
                    old_meta = False
        # -------

        if old_meta:
            for mod_name in self._bundle_modules:
                data = self.module_metainfo(mod_name)
                metainfo[data['path']] = data
        else:
            metainfo = copy.copy(attrs)
            metainfo["sandbox_task"] = self.id
            metainfo["build_time"] = int(time.time())
            metainfo["modules"] = {}
            for mod_name in self._bundle_modules:
                data = self.module_metainfo(mod_name)
                metainfo["modules"][data['path']] = data

        f = open(os.path.join(self.find_source_path["metainfo"], "sandbox.json"), "w")
        f.write(json.dumps(metainfo, sort_keys=True, indent=4))
        f.close()

        run_process(['y-local-env perl scripts/make.pl --mode=check --generator --verbose %s' % (' '.join(self._bundle_modules))], shell=True, log_prefix='make', work_dir=self.report_dir)

        # удалить директорию source
        if os.path.exists(self.find_source_path['source']):
            shutil.rmtree(self.find_source_path['source'])

        source_path = self.find_source_path['runtime']
        resource_type = resource_types.REPORT_DATA_RUNTIME

        checksum = Unit.TestReportUnit.run_and_read('find ./ -type f -exec md5sum {} \;', source_path)
        f = open(os.path.join(self.find_source_path["metainfo"], "checksum.txt"), "w")
        f.write(checksum)
        f.close()

        resource_path = '%s-%s' % (source_path, self.versions['runtime_version'])
        if self.tar:
            resource_path += '.tar'
            tar_params = "-cf"
            if self.tar_zip:
                tar_params = "-czf"
                resource_path += '.gz'

            run_process(['tar %s %s %s' % (tar_params, resource_path, source_path)], shell=True, log_prefix='tar')
        else:
            copy_path(source_path, resource_path)

        # добавить к имени версию
        res_descr = "%s-%s" % (source_path, self.versions['runtime_version'])

        if 'data_resource_id' not in self.ctx:
            # для создания ресурса нужен относительный путь
            resource = self.create_resource(
                resource_type=resource_type,
                resource_path=resource_path,
                description=res_descr,
                attributes=attrs
            )
            self.ctx['data_resource_id'] = resource.id
            logging.info("Resource %s was created successfully" % resource.type)
            # add to descr Use expired source
            if Source.IsExpired.name in self.ctx:
                self.descr += " (Use expired source)"

    def arcadia_info(self):
        url = self.ctx[Unit.ArcadiaUrlFrozen.name]
        parsed_url = Arcadia.parse_url(url)
        if parsed_url:
            return parsed_url.revision, parsed_url.tag, parsed_url.branch

        return None, None, None

    def main(self, sequence):
        if self.ctx.get('subtask_ids'):
            self.make_data_runtime_bundle(self.subtask_resources())
        else:
            # для каждого модуля создать подзадачу
            # модули уже упорядочены по зависимостям, поэтому когда потребуется модуль(с данными, от которого мы зависим) он уже будет создан(потому что мы создаем ресурс в методе on_enqueue)
            subtask_ids = []
            cache_data = {}
            for item in sequence:
                mod_name = item["name"]
                # если есть данные, то подзадачу не создаем
                if item["data"]:
                    cache_data[mod_name] = item["data"]
                    continue

                # YxImages::Data::Samples images/data/samples
                svn_url = self.ctx[Unit.ArcadiaUrlFrozen.name] if self.ctx[Source.Selector.name] == 'svn' else ""
                apache_bundle_id = self.ctx.get(report_common.ApacheBundleParameter.name)
                if not apache_bundle_id:
                    apache_bundle_id = Unit.TestReportUnit._get_last_released_resource('APACHE_BUNDLE').id
                    logging.info('Using APACHE_BUNDLE: %s', apache_bundle_id)
                sub_ctx = {
                    Source.ProdSelector.name:        self.ctx[Source.ProdSelector.name],
                    Source.Project.name:             self.ctx[Source.Project.name],
                    Source.Module.name:              mod_name,
                    Source.Selector.name:            self.ctx[Source.Selector.name],
                    Source.Unit.ArcadiaUrl.name:     svn_url,
                    Source.CacheSource.name:         item["source"],
                    Source.UseExpiredSource.name:    item["use_expired_sourse"],
                    Source.DependsOnResource.name:   [cache_data[name] for name in self.dependence.get(mod_name, [])],
                    report_common.ApacheBundleParameter.name: apache_bundle_id,
                    Unit.RtccBundle.name:             self.ctx[Unit.RtccBundle.name]
                }
                if Source.ReportCoreParameter.name in self.ctx:
                    sub_ctx[Source.ReportCoreParameter.name] = self.ctx[Source.ReportCoreParameter.name]

                # не делаем уведомления для дочерних задач
                sub_ctx['notify_via'] = ''
                sub_ctx["notifications"] = []
                if item["source"] and not item["use_expired_sourse"]:
                    descr = "Only data for module %s" % mod_name
                else:
                    descr = "Source and data for module %s" % mod_name

                subtask = self.create_subtask(
                    task_type=Source.ReportDataRuntimeItem.type,
                    description=descr,
                    input_parameters=sub_ctx,
                    important=self.important
                )
                subtask_ids.append(subtask.id)

                cache_data[mod_name] = subtask.ctx['data_resource_id']
                logging.info("Create subtask for module %s" % mod_name)

            self.ctx['subtask_ids'] = subtask_ids
            channel.sandbox.server.wait_time(self.id, self.time_to_wait)
            self.wait_all_tasks_completed(subtask_ids)

    def subtask_resources(self):
        # получить id ресурсов из сабтасков
        res = {}
        error = []
        for subtask_id in self.ctx['subtask_ids']:
            subtask = channel.sandbox.get_task(subtask_id)
            mod_name = subtask.ctx[Source.Module.name]
            if Source.IsExpired.name not in self.ctx and Source.IsExpired.name in subtask.ctx:
                self.ctx[Source.IsExpired.name] = subtask.ctx[Source.IsExpired.name]

            if not subtask.is_finished():
                error.append('Generation for module {0} was failed. Subtask {1} failed with status: {2}.'.format(mod_name, subtask.id, subtask.status))
            else:
                res[mod_name] = subtask.ctx['data_resource_id']

        if error:
            raise errors.TaskFailure('\n'.join(error))

        return res

    def add_dependent_module(self, mod_name, modules, added=None):
        if added is None:
            added = {}

        if mod_name in self.dependence:
            for dep_name in self.dependence[mod_name]:
                if dep_name not in self.available_modules:
                    raise errors.TaskFailure('Don\'t know module %s. Check make.pl --mode=alias all' % mod_name)
                if dep_name not in modules:
                    logging.info("Module %s depends on module %s which not in modules. May be add it." % (mod_name, dep_name))
                    added[dep_name] = 1
                    self.add_dependent_module(dep_name, modules, added)
        return added

    def module_list(self, modules, cache_data=None):
        if cache_data is None:
            cache_data = {}
        # модули для которых нужно создать данные
        modules = dict.fromkeys(modules, 1)

        # если нужно создать данные для модуля, а он зависит от других, то нужно создавать данные и для них
        # но только если этих модулей нет в словаре cache_data
        for mod_name in modules.keys():
            added = self.add_dependent_module(mod_name, modules)
            for add_name in added:
                if add_name not in cache_data:
                    logging.error("For module %s add module: %s." % (mod_name, add_name))
                    modules[add_name] = 1

        installed = dict.fromkeys(cache_data, 1)
        first = []
        for mod_name in modules.keys():
            if mod_name not in self.dependence:
                modules.pop(mod_name)
                first.append(mod_name)
                installed[mod_name] = 1

        sequence = []
        if first:
            sequence.append(tuple(first))

        # в modules остались только модули с зависимостями
        # найти последовательность в которой нужно создавать данные для зависимых модулей
        subseq = self._find_seq(modules, installed)
        if subseq:
            sequence.extend(subseq)

        sequence = tuple(sequence)
        logging.info("module_list=%s" % (sequence,))
        return sequence

    def _find_seq(self, modules, installed):
        sequence = []
        for i in range(len(modules.keys())):
            if not modules:
                break

            step = []
            for mod_name in modules.keys():
                # если все зависимые данные созданы, то можно создавать данные для модуля
                if all([dep_name in installed for dep_name in self.dependence[mod_name]]):
                    modules.pop(mod_name)
                    step.append(mod_name)

            # за один проход должна исчезать минимум одна зависимость
            if not step:
                cycle = {}
                for mod_name in modules:
                    cycle[mod_name] = {'dependence': self.dependence[mod_name]}
                raise errors.TaskFailure('Cycle dependence or require module: %s' % cycle)
            step = tuple(step)
            logging.info("Step %s: %s" % (i, step))
            sequence.append(step)
            for mod_name in step:
                installed[mod_name] = 1

        return tuple(sequence)


__Task__ = ReportDataRuntime
