from itertools import imap
from urlparse import urlparse
from sandbox import sdk2
import sandbox.common.errors as errors
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.masstransit.common.utils import DateParameter


YT_CLUSTER = "hahn"
TOKEN_KEY = "robot_robot-mtr_yql_token"


class MapsMasstransitPrepareProductionLogAmmo(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        cores = 1
        environments = [
            PipEnvironment("yql"),
            PipEnvironment("yandex-yt")  # required to read full results
        ]

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        kill_timeout = 2400  # 40 min

        resource_type = sdk2.parameters.String(
            "Resource type to be created",
            required=True)
        vhost = sdk2.parameters.String(
            "Vhost - value by which log is filtered",
            required=True)
        requests_number = sdk2.parameters.Integer(
            "Number of requests that are stored from log",
            required=True)
        log_date = DateParameter(
            "Date of log to filter",
            default=DateParameter.day_before_yesterday())
        handles = sdk2.parameters.List(
            "List of handles to grep from logs",
            required=True)
        inverse_filter = sdk2.parameters.Bool(
            "Take everything from log except handles",
            default=False)

        with sdk2.parameters.Output:
            ammo = sdk2.parameters.Resource("Result ammo", required=True)

    @staticmethod
    def _process_ammo_row(row):
        assert len(row) == 1, "More than one column in result row"
        request = row[0]
        return u"{request} {tag}\n".format(
            request=request,
            tag=urlparse(request).path)

    def _prepare_query(self):
        request_template = """
            SELECT request FROM {yt_cluster}.`logs/maps-log/1d/{date}`
            WHERE vhost = '{vhost}' AND
                status='200' AND
                request {inverse} REGEXP '{handles}'
            ORDER BY Digest::NumericHash(TableRecordIndex())
            LIMIT {requests_number};
        """
        handles_regexp = '(' + '|'.join(self.Parameters.handles) + ')'
        return request_template.format(
            yt_cluster=YT_CLUSTER,
            date=self.Parameters.log_date,
            vhost=self.Parameters.vhost,
            inverse='NOT' if self.Parameters.inverse_filter else '',
            handles=handles_regexp,
            requests_number=self.Parameters.requests_number
        )

    def _run_query(self, query):
        query.run()
        query.get_results()  # wait for result
        if not query.is_success:
            self.set_info("YQL request status: {0}".format(query.status))
            if query.errors:
                self.set_info("Request errors: {0}".format(
                    "; ".join(map(str, query.errors))))
            raise errors.TaskFailure("YQL query failed")

    def _write_result(self, ammo_rows):
        description = "Production log ammo for {0} at {1} with handles {2}".format(
            self.Parameters.vhost,
            self.Parameters.log_date,
            self.Parameters.handles)
        resource = sdk2.Resource[self.Parameters.resource_type](
            self,
            description,
            "{0}.txt".format(self.Parameters.log_date),
            log_date=self.Parameters.log_date,
            sync_upload_to_mds=True
        )
        resource_data = sdk2.ResourceData(resource)
        with resource_data.path.open("w") as ammo_file:
            ammo_file.writelines(ammo_rows)
        resource_data.ready()
        self.Parameters.ammo = resource

    def on_execute(self):
        from yql.api.v1.client import YqlClient
        yql_client = YqlClient(
            db=YT_CLUSTER,
            token=sdk2.Vault.data(TOKEN_KEY)
        )
        query = yql_client.query(self._prepare_query(), syntax_version=1)
        self._run_query(query)
        ammo_rows = imap(self._process_ammo_row, query.table.get_iterator())
        self._write_result(ammo_rows)
