# -*- coding: utf-8 -*-

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import paths
from sandbox.sandboxsdk.channel import channel

from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers
from sandbox.projects.NewsAutoreleasableTask import NewsAutoreleasableTask
from sandbox.projects.NewsAutoreleasableTask import Params as NewsAutoreleasableTaskParams

from urllib2 import urlopen
from urllib import urlencode
from urlparse import urljoin
import logging
from contextlib import closing
import json
import os
import base64
import bz2
import re


class NEWS_REPORT_DATA_UNITED(resource_types.NEWS_REPORT_DATA):
    pass


class Params(object):
    class NewsdUrl(parameters.SandboxStringParameter):
        name = 'newsd_url'
        description = 'newsd http url'
        default_value = 'http://data-int.news.yandex.ru/'

    class NewsdProdUrl(parameters.SandboxStringParameter):
        name = 'newsd_prod_url'
        description = 'newsd prod http url'
        default_value = 'http://data-int.news.yandex.ru/'

    class NewsdExpUrl(parameters.SandboxStringParameter):
        name = 'newsd_exp_url'
        description = 'newsd exp http url'
        default_value = 'http://data-int.news.yandex.ru/exp'

    class Attrs(parameters.DictRepeater, parameters.SandboxStringParameter):
        name = 'attrs'
        description = 'Set resource attrs'

    class FilesList(parameters.SandboxStringParameter):
        name = 'files'
        description = 'List of files to get from newsd'
        default_value = 'agencies_en.store agencies_en.json agencies.store agencies.json feeds.store feeds.json newsdata2.json ratings_en.store ratings_en.json ratings.store ratings.json'

    class CreateUnitedData(parameters.SandboxBoolParameter):
        name = 'is_united_data'
        description = 'Union (exp+prod) for Qloud Beta'

    params = [NewsdUrl, FilesList, Attrs, CreateUnitedData, NewsdProdUrl, NewsdExpUrl]


def compare_file(diff_path, old_file_path, new_file_path):
    diff_file_path = os.path.join(diff_path, os.path.basename(old_file_path) + '.diff')
    diff_err_file_path = diff_file_path + '.err'

    os.system('diff -uw ' + old_file_path + ' ' + new_file_path + ' > ' + diff_file_path + ' 2>' + diff_err_file_path)

    if os.path.exists(diff_err_file_path):
        err_msg = open(diff_err_file_path, 'r').read()

        if err_msg:
            raise Exception(err_msg)

    if not os.path.exists(diff_file_path):
        return False

    return bool(open(diff_file_path, 'r').readline())


class GenNewsReportData(NewsAutoreleasableTask):
    '''
    Выкачивает из slave_newsd данные, нужные для репорта, и создаёт из них ресурс для выкладки
    '''

    type = 'GEN_NEWS_REPORT_DATA'
    cores = 1

    input_parameters = NewsAutoreleasableTaskParams.params + Params.params

    do_release = 'do_release'

    @staticmethod
    def get_data(url, parse=False):
        logging.info('Fetching %s' % url)
        with closing(urlopen(url)) as stream:
            return json.load(stream)

    @staticmethod
    def write_bdata(path, data):
        logging.info('Writing %s' % path)
        with open(path, 'w') as fd:
            fd.write(bz2.decompress(base64.decodestring(data)))

    def get_stable_services(self):
        return []

    def on_release(self, additional_params):
        for res in self.list_resources():
            if channel.sandbox.get_resource_attribute(res, 'ttl'):
                channel.sandbox.set_resource_attribute(res, 'ttl', '1')

        NewsAutoreleasableTask.on_release(self, additional_params)

    def fetch_data(self, urlbase, files):
        files = files.split()
        query = 'meta?' + urlencode((
            ('ns', 'news'),
            ('id', 'news'),
            ('key', files)),
            doseq=True)
        return self.get_data(urljoin(urlbase, query))

    def create_united_data_resource(self):
        files = self.ctx.get(Params.FilesList.name)

        prod_data = self.fetch_data(self.ctx.get(Params.NewsdProdUrl.name), files)
        self.create_files_from_data(prod_data, 'current')

        exp_data = self.fetch_data(self.ctx.get(Params.NewsdExpUrl.name), files)
        self.create_files_from_data(exp_data, 'exp')

        os.system('tar -czvf news_report_data.tar.gz current exp')

        result = self.create_resource(self.descr, 'news_report_data.tar.gz', NEWS_REPORT_DATA_UNITED, arch='any')
        self.mark_resource_ready(result.id)

    def create_files_from_data(self, data, dest_folder):
        for (fname, file_data) in data.iteritems():
            fullpath = os.path.join(dest_folder, fname)
            paths.make_folder(os.path.dirname(fullpath))
            self.write_bdata(fullpath, file_data)

    def do_execute(self):
        files = self.ctx.get(Params.FilesList.name)
        attrs = self.ctx.get(Params.Attrs.name)
        suffixes = ['']

        if self.ctx.get(Params.CreateUnitedData.name, False):
            self.create_united_data_resource()

        data = self.fetch_data(self.ctx.get(Params.NewsdUrl.name), files)

        if 'news_cluster' in attrs:
            suffixes.append('_' + attrs['news_cluster'].upper())

        old_resource_query = dict(attrs)

        if (NewsAutoreleasableTaskParams.ReleaseTo.name in self.ctx) and self.ctx[NewsAutoreleasableTaskParams.ReleaseTo.name]:
            old_resource_query['released'] = self.ctx[NewsAutoreleasableTaskParams.ReleaseTo.name]

        old_resource = apihelpers.get_last_resource_with_attrs(
            'NEWS_REPORT_DATA',
            old_resource_query,
            all_attrs=True
        )

        new_data = {}

        for suffix in suffixes:
            resource_name = 'NEWS_REPORT_DATA' + suffix

            if not hasattr(resource_types, resource_name):
                continue

            result = self.create_resource(self.descr, 'news_report_data' + suffix, getattr(resource_types, resource_name), arch='any', attributes=attrs)
            paths.make_folder(result.path)
            for (fname, file_data) in data.iteritems():
                fullpath = os.path.join(result.path, fname)
                paths.make_folder(os.path.dirname(fullpath))
                self.write_bdata(fullpath, file_data)
                new_data[fname] = fullpath

            self.mark_resource_ready(result.id)

        have_diff = False

        if old_resource:
            old_resource_path = self.sync_resource(old_resource)
            diff_path = self.abs_path('news_report_data_diff')
            paths.make_folder(diff_path)
            old_data = set()
            have_errors = False

            for old_file_name in os.listdir(old_resource_path):
                old_file_path = os.path.join(old_resource_path, old_file_name)

                if not os.path.isfile(old_file_path):
                    continue

                old_data.add(old_file_name)

                if old_file_name in new_data:
                    json_file_name = re.sub(r'\.store$', '.json', old_file_name)

                    if (json_file_name != old_file_name) and (json_file_name in new_data):
                        continue

                    try:
                        if compare_file(diff_path, old_file_path, new_data[old_file_name]):
                            have_diff = True

                    except Exception as e:
                        logging.error(e)
                        have_errors = True

                else:
                    have_diff = True

                    with open(os.path.join(diff_path, os.path.basename(old_file_path) + '.removed'), 'w') as fd:
                        fd.write('file removed\n')

            for new_file_name in new_data.keys():
                if new_file_name not in old_data:
                    have_diff = True

                    with open(os.path.join(diff_path, os.path.basename(old_file_path) + '.new'), 'w') as fd:
                        fd.write('new file\n')

            if have_diff or have_errors:
                diff_resource_attrs = dict(attrs)
                diff_resource_attrs['previous_resource'] = old_resource.id
                diff_resource = self.create_resource(self.descr, 'news_report_data_diff', resource_types.NEWS_REPORT_DATA_DIFF, arch='any', attributes=diff_resource_attrs)
                self.mark_resource_ready(diff_resource.id)

            if have_errors:
                raise Exception("Something went wrong")

        else:
            have_diff = True

        return have_diff


__Task__ = GenNewsReportData
