# -*- coding: utf-8 -*-

import datetime as dt
import logging
import os
from shutil import copyfile

import sandbox.common.types.task as ctt
import sandbox.sdk2 as sdk2
from sandbox.common.errors import TaskFailure
from sandbox.common.types.misc import NotExists
from sandbox.projects import resource_types
from sandbox.projects.common.nanny.nanny import ReleaseToNannyTask2
from sandbox.projects.hqcg import HQCG_SITEMAP
from sandbox.projects.yql.RunYQL2 import RunYQL2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sandboxsdk.sandboxapi import RELEASE_STABLE

logger = logging.getLogger(__name__)


class HqcgSitemap(sdk2.Task, ReleaseToNannyTask2):
    """\
        Build Hight Quality Content Sitemap
    """

    class Requirements(sdk2.Requirements):
        cores = 1
        environments = [
            PipEnvironment('yandex-yt')
        ]

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        yt_tmp_dir = sdk2.parameters.String('Yt tmp dir', default='home/search-functionality/hqcg/tmp', required=True)
        yt_pool = sdk2.parameters.String('Yt pool', default='rearrange', required=True)
        kill_timeout = 3600

    def _format_query(self, yt_dst_table):
        return '''
            USE `hahn`;

            pragma yt.InferSchema;
            pragma yt.TmpFolder = '{tmp_folder}';
            pragma yt.Pool = '{yt_pool}';

            insert into `{dst_table}` with truncate

            select
                url as Url,
                DateTime::ToSeconds(DateTime::MakeTimestamp(DateTime::ParseIso8601(created_at))) as PublishTime,
                DateTime::ToSeconds(DateTime::MakeTimestamp(DateTime::ParseIso8601(updated_at))) as UpdateTime
            from
                `home/search-functionality/hqcg/backup/prod/latest/article_versions`
            where
                Yson::ConvertToString(state) = 'public'
            '''.format(tmp_folder=self.Parameters.yt_tmp_dir, yt_pool=self.Parameters.yt_pool, dst_table=yt_dst_table)

    def _run_yql_task(self, query):
        input_parameters = {
            RunYQL2.Parameters.query.name: query,
            RunYQL2.Parameters.yql_token_vault_name.name: 'hqcg_yql_token',
            RunYQL2.Parameters.publish_query.name: True,
            RunYQL2.Parameters.trace_query.name: True,
            RunYQL2.Parameters.retry_period.name: 60,
            RunYQL2.Parameters.use_v1_syntax.name: True
        }
        yql_task = RunYQL2(
            self,
            description="Find public articles",
            notifications=self.Parameters.notifications,
            priority=ctt.Priority(ctt.Priority.Class.SERVICE, ctt.Priority.Subclass.HIGH),
            create_sub_task=True,
            **{
                key: value.id if isinstance(value, resource_types.AbstractResource) else value
                for key, value in input_parameters.iteritems() if value is not NotExists
                }
        )
        yql_task.enqueue()
        return yql_task.id

    def on_execute(self):
        from yt.wrapper import YtClient
        yt_dst_table = self.Parameters.yt_tmp_dir + '/hqcg_public_articles'

        with self.memoize_stage.run_yql:
            query = self._format_query(yt_dst_table)

            logging.info('Run task with yql query: {}'.format(query))
            yql_task_id = self._run_yql_task(query)
            setattr(self.Context, 'yql_task_id', yql_task_id)
            logging.info('Wait output {} from {} task id'.format(yql_task_id, RunYQL2.Parameters.result_operation_id))

            raise sdk2.WaitTask([yql_task_id], ctt.Status.Group.FINISH + ctt.Status.Group.BREAK, wait_all=True)

        yql_task_id = getattr(self.Context, 'yql_task_id')
        yql_task = sdk2.Task.find(id=yql_task_id, children=True, status=ctt.Status.SUCCESS).order(-sdk2.Task.id).first()
        if not yql_task:
            raise TaskFailure('Child task in bad state: {}'.format(yql_task_id))

        yt_client = YtClient(proxy='hahn', token=sdk2.Vault.data(self.owner, "hqcg_yt_token"))

        def _get_public_article():
            for row in yt_client.read_table('//{}'.format(yt_dst_table), format='json'):
                url = row['Url']
                publish_time = row['PublishTime']
                update_time = row['UpdateTime'] if 'UpdateTime' in row else None
                if update_time == 10800:
                    update_time = None
                yield (url, publish_time, update_time)

        os.mkdir('sitemap')
        os.chdir('sitemap')

        with open('sitemap-index.xml', 'wt') as sitemap_index_file, \
             open('talk-sitemap-index.xml', 'wt') as talk_sitemap_index_file:

            sitemap_index_file.write('<?xml version="1.0" encoding="UTF-8"?>')
            sitemap_index_file.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')

            talk_sitemap_index_file.write('<?xml version="1.0" encoding="UTF-8"?>')
            talk_sitemap_index_file.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')

            finished = False
            index = 0

            while not finished:
                with open('sitemap-{}.xml'.format(index), 'wt') as sitemap:
                    sitemap.write('<?xml version="1.0" encoding="UTF-8"?>')
                    sitemap.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')

                    urls = 0
                    for (url, publish_time, update_time) in _get_public_article():
                        sitemap.write('<url><loc>')
                        sitemap.write(url)
                        sitemap.write('</loc><lastmod>')
                        if update_time is not None:
                            sitemap.write(dt.datetime.utcfromtimestamp(int(update_time)).strftime('%Y-%m-%dT%H:%M:%SZ'))
                        else:
                            sitemap.write(dt.datetime.utcfromtimestamp(int(publish_time)).strftime('%Y-%m-%dT%H:%M:%SZ'))
                        sitemap.write('</lastmod></url>')
                        urls += 1
                        if urls >= 50000:
                            break
                    sitemap.write('</urlset>')
                    if urls < 50000:
                        finished = True

                copyfile('sitemap-{}.xml'.format(index), 'talk-sitemap-{}.xml'.format(index))

                sitemap_index_file.write('<sitemap><loc>')
                sitemap_index_file.write('https://yandex.ru/talk/sitemap-{}.xml'.format(index))
                sitemap_index_file.write('</loc></sitemap>')

                talk_sitemap_index_file.write('<sitemap><loc>')
                talk_sitemap_index_file.write('https://yandex.ru/turbo/talk-sitemap-{}.xml'.format(index))
                talk_sitemap_index_file.write('</loc></sitemap>')

                index += 1

            sitemap_index_file.write('</sitemapindex>')
            talk_sitemap_index_file.write('</sitemapindex>')

        os.chdir('..')

        res = sdk2.ResourceData(HQCG_SITEMAP(self, 'HQCG sitemap daily', 'sitemap', ttl='inf'))
        res.ready()

        release_params = {
            'release_comments': 'automatic release',
            'release_subject': 'daily sitemap',
            'release_status': RELEASE_STABLE,
            'email_notifications': {
                'cc': [],
                'to': []
            },
            'releaser': self.owner
        }
        ReleaseToNannyTask2.on_release(self, release_params)
        sdk2.Task.on_release(self, release_params)
