import datetime
import os
import re
import zipfile
from hashlib import sha256

from sandbox import sdk2
from sandbox.projects.security.ReportFuzzing.resources import CorpusFromLogs
from sandbox.sdk2.environments import PipEnvironment

# Configs
CORPUS_FOLDER = './corpus'
DEFAULT_CORPUS_TTL = 20
WORKDIR = '.'

RECENT_REPORT_QUERIES = '''
SELECT extractURLParameter(query_params, 'place') as place, count(*) as count,
groupUniqArray({group_n})(query_params) as sample FROM `market`.`trace`
WHERE date = today()
AND timestamp > toUnixTimestamp('{timestamp}')
AND query_params like '%yandsearch%'
AND environment == 'PRODUCTION'
AND source_module == 'market_front_desktop'
GROUP BY place
ORDER BY count DESC
'''

REPORT_QUERIES = '''
SELECT place, count(*) as count, AGGREGATE_LIST_DISTINCT(url, {group_n}) as samples FROM(
    SELECT Url::GetCGIParam(url, 'place') as place, url FROM `logs/market-balancer-log-production/1d/{date}`
    WHERE url LIKE '%yandsearch%'
) GROUP BY place ORDER BY count DESC
'''


def report_queries(group_n):
    date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    return REPORT_QUERIES.format(group_n=group_n, date=date)


def recent_report_queries(group_n):
    timestamp = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
    return RECENT_REPORT_QUERIES.format(group_n=int(group_n), timestamp=timestamp)


class ReportCollectCorpusFromYQL(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yandex-yt'),
            PipEnvironment('yql'),
        )

    class Parameters(sdk2.Parameters):
        with sdk2.parameters.Group('DB'):
            db = sdk2.parameters.String(
                'Database to use',
                required=True,
                default_value='hahn'
            )
            group_n = sdk2.parameters.Integer(
                'Number of unique items in group',
                required=True,
                default_value=100
            )
            from_db = sdk2.parameters.String(
                'FROM db parameter',
                required=True,
                default_value='market.trace'
            )
            yql_vault_token = sdk2.parameters.String(
                'Vault token name [deprecated]',
                default_value='YQL_TOKEN'
            )
            yql_yav_token = sdk2.parameters.YavSecret(
                'Yav YQL_TOKEN'
            )
            yav_token_key = sdk2.parameters.String(
                'Yav token key name',
                default_value='YQL_TOKEN'
            )

    def _from_param(self):
        from_db = str(self.Parameters.from_db)
        if not re.compile(r'^[\w.]+$').match(from_db):
            raise Exception('Invalid FROM field')
        return '.'.join(list(map(lambda x: '`' + x + '`', from_db.split('.'))))

    def on_execute(self):
        from yql.api.v1.client import YqlClient
        os.mkdir(CORPUS_FOLDER)
        if self.Parameters.yql_yav_token:
            token = self.Parameters.yql_yav_token.data()[self.Parameters.yav_token_key]
        else:
            token = sdk2.Vault.data(self.owner, self.Parameters.yql_vault_token)
        client = YqlClient(
            db=str(self.Parameters.db),
            token=token
        )
        req = client.query(
            report_queries(self.Parameters.group_n),
            syntax_version=1
        )
        req.run()
        req.get_results(wait=True)
        req.table.fetch_full_data()

        corpus_meta = dict()
        corpus_filepath = os.path.join(WORKDIR, 'corpus.zip')
        with zipfile.ZipFile(corpus_filepath, 'w', zipfile.ZIP_DEFLATED) as corpus:
            for row in req.table.rows:
                place, count, samples = row
                corpus_meta.update({place: count})
                self._collect_corpus_items(corpus, samples, place)
        self._export_corpus(corpus_filepath, corpus_meta)

    def _collect_corpus_items(self, corpus, records, place):
        time_mark = datetime.datetime.now().strftime("%d.%m.%y")
        for record in records:
            data = 'GET {} HTTP/1.1\nHost: localhost\n\n'.format(record)
            f_name = '{}-{}-{}.cov'.format(time_mark, place, sha256(data.encode('utf-8')).hexdigest())
            filepath = os.path.join(CORPUS_FOLDER, f_name)
            with open(filepath, 'w') as f:
                f.write(data)
            corpus.write(filepath)

    def _export_corpus(self, filename, meta):
        sdk2.ResourceData(CorpusFromLogs(
            self,
            str(meta),
            filename,
        )).ready()
