# coding=utf-8

from __future__ import unicode_literals

import logging
import os
from urlparse import urlsplit, urlunsplit
from sandbox import sdk2
from sandbox.projects.common import binary_task
from sandbox.projects.resource_types import PLAIN_TEXT_QUERIES

logger = logging.getLogger(__name__)


class LoadRequests(binary_task.LastBinaryTaskRelease, sdk2.Task):
    class Requirements(sdk2.Requirements):
        pass

    class Caches(sdk2.Requirements.Caches):
        pass  # means that task do not use any shared caches

    class Parameters(sdk2.Parameters):
        ext_params = binary_task.binary_release_parameters(stable=True)
        yt_token_owner = sdk2.parameters.String(
            "YT token owner",
            default="SEARCH-RELEASERS"
        )
        yt_cluster = sdk2.parameters.String(
            "YT Cluster",
            default="arnold"
        )
        folder_path = sdk2.parameters.String(
            "Path to table to read requests from",
            default="//logs/recommender-reqans-log/1d"
        )
        max_request_number = sdk2.parameters.Integer(
            "Max number of requests to be loaded",
            default=100
        )
        column = sdk2.parameters.String(
            "Column with request",
            default="fullRequest"
        )

    def _cut_host(self, request):
        url_parts = urlsplit(request)
        url_parts = url_parts._replace(scheme="")._replace(netloc="")
        return urlunsplit(url_parts)

    def _get_requests(self, folder_path):
        # type: (str) -> list[str]

        logger.info("Reading folder {}...".format(folder_path))

        import yt.wrapper as yt
        yt.config['token'] = sdk2.Vault.data(self.Parameters.yt_token_owner, 'yt_token')
        yt.config['proxy']['url'] = self.Parameters.yt_cluster

        tables = sorted(yt.list(folder_path))
        if len(tables) == 0:
            logger.error("No tables in folder {}!".format(folder_path))
            return []

        latest_table = tables[-1]
        table_path = os.path.join(folder_path, latest_table)
        logger.info("Full latest table path is {}...".format(table_path))

        reqs = []
        for row in yt.read_table(table_path):
            if len(reqs) < self.Parameters.max_request_number:
                request = row[self.Parameters.column]
                reqs.append(self._cut_host(request))
            else:
                break
        logger.info("Read {} rows.".format(len(reqs)))

        return reqs

    def _write_requests(self, requests, file):
        logger.info("Writing {} requests...".format(len(requests)))
        with open(file, "w") as req_file:
            req_file.write('\n'.join(requests))

    def on_execute(self):
        requests = self._get_requests(self.Parameters.folder_path)
        requests_resource = PLAIN_TEXT_QUERIES(self, 'Resource with loaded requests.', 'loaded_requests.txt')
        self._write_requests(requests, str(requests_resource.path))
