# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function

import datetime
import json
import logging

from sandbox import sdk2
from sandbox.projects.oko.crawl_repositories import upload
from sandbox.projects.oko.task_mixin import OkoTaskMixin

logger = logging.getLogger(__name__)

KILL_TIMEOUT = 2 * 60 * 60
DATE_REGEXP = r"\d{4}-\d{2}-\d{2}"


class OkoCrawler(OkoTaskMixin, sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        disk_space = 5 * 1024
        ram = 12 * 1024

    class Parameters(OkoTaskMixin.Parameters):
        kill_timeout = KILL_TIMEOUT
        max_restarts = 1

        date = sdk2.parameters.String("Current date")

        s3_token = sdk2.parameters.YavSecret(
            "Yav secret with S3 tokens",
            default="sec-01e4vg10m9yww93h65y5hx4v96",
            required=True,
        )

        yql_token = sdk2.parameters.YavSecret(
            "Yav secret with YQL token",
            default="sec-01e59y6570t8p3zgz1wtg58bye",
            required=True,
        )

        ch_token = sdk2.parameters.YavSecret(
            "Yav secret with ClickHouse token",
            default="sec-01eafccvtzhyy14mb9rtgqt46p",
            required=True,
        )

        github_token = sdk2.parameters.YavSecret(
            "Yav secret with Github PST token",
            default="sec-01f24ydt0m5qax45kerhresn9f",
            required=True
        )

        with sdk2.parameters.CheckGroup("Get data from") as crawl:
            crawl.values.arcadia = crawl.Value("Arcadia", checked=True)
            crawl.values.github = crawl.Value("GitHub", checked=True)
            crawl.values.dogma = crawl.Value("Dogma", checked=True)

        with sdk2.parameters.CheckGroup("Upload data to") as upload:
            upload.values.resources = upload.Value("Resources", checked=False)
            upload.values.s3 = upload.Value("S3", checked=True)
            upload.values.clickhouse = upload.Value("ClickHouse", checked=True)

        clickhouse_hosts = sdk2.parameters.List(
            "ClickHouse Hosts",
            default=["sas-0mzu83l5k3lycw2s.db.yandex.net", "man-by7b2tj9ip9i4lc4.db.yandex.net", "vla-wod5d1tuyzqu2lpv.db.yandex.net"],
            required=True
        )

    def on_execute(self):
        self._set_context()

        files = self._get_files()
        filtered_files = self._filter_files(files)
        del files
        self._upload_files(filtered_files)

        if "clickhouse" in self.Parameters.upload:
            self._extract_packages(filtered_files)
            self._get_commits()

    def _set_context(self):
        if self.Parameters.date is None or len(str(self.Parameters.date)) == 0:
            self.Context.date = datetime.date.today().isoformat()
        else:
            self.Context.date = self.Parameters.date

    def _get_files(self):
        from sandbox.projects.oko.crawl_repositories.arcadia import ArcadiaDownloader
        from sandbox.projects.oko.crawl_repositories.github import GithubCrawler

        files = []

        if "arcadia" in self.Parameters.crawl:
            arc = ArcadiaDownloader()
            files.extend(arc.get_files())
        else:
            logger.info("Not downloading files from Arcadia!")

        if "github" in self.Parameters.crawl:
            gh = GithubCrawler()
            files.extend(gh.get_files(token=self.Parameters.github_token.data()['token']))
        else:
            logger.info("Not downloading files from GitHub!")

        return files

    def _get_commits(self):
        from sandbox.projects.oko.crawl_repositories.clickhouse_upload import ClickhouseUploader
        from sandbox.projects.oko.crawl_repositories.yql_manifest import (
            ARCADIA_DOGMA_ALIVE_QUERY,
            ARCADIA_DOGMA_COMMIT_QUERY,
            YQLManifest
        )

        uploader = ClickhouseUploader(
            token=self.Parameters.ch_token.data()["token"],
            hosts=self.Parameters.clickhouse_hosts
        )
        client = YQLManifest.make_client(token=self.Parameters.yql_token.data()["token"])

        if "dogma" in self.Parameters.crawl:
            commit_request = YQLManifest.make_request(client, ARCADIA_DOGMA_COMMIT_QUERY)
            alive_request = YQLManifest.make_request(client, ARCADIA_DOGMA_ALIVE_QUERY)

            commit_result = {
                (unicode(t.strftime("%Y-%m-%d %H:%M:%S")), f1, f2, f3, f4)
                for t, f1, f2, f3, f4 in YQLManifest.get_results(commit_request).rows
            }
            logger.debug("commit_result length = %s", len(commit_result))

            alive_result = {(self.Context.date, f1, f2) for f1, f2 in YQLManifest.get_results(alive_request).rows}
            logger.debug("alive_result length = %s", len(alive_result))

            uploader.write_dogma(commit_result, alive_result)

    def _upload_files(self, files):
        if "resources" in self.Parameters.upload:
            upload.ResourceUploader(self).upload(files)
        else:
            logger.debug("Not uploading to resources!")

        if "s3" in self.Parameters.upload:
            upload.S3Uploader(self).upload(files)
        else:
            logger.debug("Not uploading to S3!")

        if "clickhouse" in self.Parameters.upload:
            logger.info("Uploading files to ClickHouse is not supported")

    @staticmethod
    def _filter_files(files):
        logger.info("Got %s files to filter", len(files))
        result = list(filter(OkoCrawler._check_valid_package_json, files))
        logger.info("%s files are valid", len(result))
        return result

    @staticmethod
    def _check_valid_package_json(file):
        try:
            package_json = json.loads(file["file"])
            if "dependencies" in package_json or "devDependencies" in package_json:
                return True
            return False
        except ValueError:
            return False

    def _extract_packages(self, files):
        from sandbox.projects.oko.crawl_repositories.packages import PackageExtractor

        return PackageExtractor(self).extract_versions(files)
