# -*- coding: utf-8 -*-

import logging
import time
import uuid
import os
import re
import random
import datetime

from sandbox.projects.scraper_over_yt.ScraperOverYtMakeTestBatch.yt_table import YtTable


class ScraperOverYtCreator:
    def __init__(self, pool_configs, telegram_sender, processed_table_path, test_batch_directory_path, yt):
        self.pool_configs = pool_configs
        self.telegram_sender = telegram_sender
        self.processed_table_path = processed_table_path
        self.yt = yt
        self.test_batch_directory_path = test_batch_directory_path

    def get_rows_count_from_output_table(self, working_directory):
        output_table_path = os.path.join(working_directory, 'output_table')
        return self.yt.get(os.path.join(output_table_path, '@row_count'))

    def create_test_batch(self):
        pool_created_tables = {}

        try:
            self.yt.mkdir(self.test_batch_directory_path)
        except BaseException as e:
            logging.info(e)
            logging.info('No created or already exists {path_to_create}'.format(path_to_create=self.test_batch_directory_path))

        self.yt.set('{}/@expiration_time'.format(self.test_batch_directory_path), (datetime.datetime.now() + datetime.timedelta(hours=2)).isoformat())

        pools = self.pool_configs.get_pool_list()
        selected_tables = self.select_tables(pools)

        for pool in pools:
            pool_created_tables[pool] = []

        for pool in selected_tables:
            max_selected_tables = self.pool_configs.get_max_selected_tables(pool)
            selected = 0
            for table_row in selected_tables[pool]:
                try:
                    pool_created_tables[pool].append(self.create_table(table_row['working_directory'], pool, table_row['spec']))
                    selected += 1
                except BaseException as e:
                    logging.info(e)
                    logging.info('Something went wrong while creating table')
                else:
                    if selected == max_selected_tables:
                        break

        return pool_created_tables

    def create_table(self, working_directory, pool, spec):
        input_table_path = os.path.join(working_directory, 'input_table')
        error_table_path = os.path.join(working_directory, 'error_table')

        rows_of_input_table = list(self.yt.read_table(input_table_path, raw=False))
        rows_of_error_table = list(self.yt.read_table(error_table_path, raw=False))

        error_id = set()

        for row in rows_of_error_table:
            error_id.add(row['id'])

        rows_of_input_table_filtered = []

        regexp_skip_params = re.compile(self.pool_configs.get_skip_params(pool))
        regexp_allow_hosts = re.compile(self.pool_configs.get_allow_hosts(pool))

        for row in rows_of_input_table:
            if regexp_allow_hosts.search(row['uri']) and not regexp_skip_params.search(row['uri']) and row['id'] not in error_id:
                rows_of_input_table_filtered.append(row)

        min_rows_to_select = self.pool_configs.get_min_rows_by_table(pool)
        max_rows_to_select = min(self.pool_configs.get_max_rows_by_table(pool), len(rows_of_input_table_filtered))

        if len(rows_of_input_table_filtered) < min_rows_to_select:
            err_msg = 'Not enough rows for {pool}. Selected {selected} rows from {min_rows}'.format(pool=pool, selected=len(rows_of_input_table_filtered), min_rows=len(rows_of_input_table))
            logging.info(err_msg)
            raise Exception(err_msg)

        selected_rows = random.sample(rows_of_input_table_filtered, random.randrange(min_rows_to_select, max_rows_to_select + 1))

        path_id = str(uuid.uuid4())

        path_to_create = os.path.join(self.test_batch_directory_path, path_id)
        input_table_path_to_create = os.path.join(path_to_create, 'input_table')
        self.yt.mkdir(path_to_create)

        spec['input_table'] = input_table_path_to_create
        spec['id'] = path_id
        spec['pool'] = pool
        spec['login'] = 'dynamic-pie'
        spec['max_running_jobs'] = self.pool_configs.get_max_running_jobs(pool)

        if 'fetch_timeout' in spec:
            spec['fetch_timeout'] = int(spec['fetch_timeout'][:-2])

        table = YtTable(selected_rows, input_table_path_to_create, self.yt, pool)

        table.set_spec(spec)
        table.set_pool(pool)
        table.set_id(path_id)
        table.set_working_directory(path_to_create)

        return table

    def select_tables(self, pools):
        pools_with_quotes = map(lambda x: '"{pool}"'.format(pool=x), pools)
        pools_string = ','.join(pools_with_quotes)

        current = time.mktime(datetime.datetime.now().timetuple())
        timestamp = int(current - self.pool_configs.get_max_filter_time())

        query = 'pool, spec, working_directory, download_completed from [{0}] where pool in ({1}) and download_completed > {2} and final_status = "completed"'.format(
            self.processed_table_path,
            pools_string,
            timestamp
        )

        rows = list(self.yt.select_rows(query))

        selected_tables = {}
        for pool in pools:
            selected_tables[pool] = []

        for row in rows:
            pool = row['pool']
            working_directory = row['working_directory']

            min_rows = self.pool_configs.get_min_rows_by_table(pool)
            filter_time_completed = self.pool_configs.get_filter_time(pool)

            if self.get_rows_count_from_output_table(working_directory) >= min_rows and int(current - int(row['download_completed'])) < filter_time_completed:
                selected_tables[pool].append(row)
                logging.info('For {pool} selected row. Directory = {dir}'.format(pool=pool, dir=working_directory))

        return selected_tables
