import logging
import threading
import time
from queue import Queue

import requests
from travel.hotels.lib.python3.cli.cli import auto_progress_reporter
from travel.hotels.lib.python3.utils.throttler import Throttler
from travel.hotels.lib.python3.yt import ytlib

LOG = logging.getLogger(__name__)

CATEGORIES = {
    "184106414": "Гостиница",
    "184106404": "Санаторий",
    "184106400": "Дом Отдыха",
    "20699506347": "Хостел",
    "184106426": "Турбаза",
    "184106420": "Кемпинг",
    "255921949": "Отдых на ферме",
}


class WizardResult:
    def __init__(self, geo_id, geo_text, category_id, category_name):
        self.geo_id = geo_id
        self.geo_text = geo_text
        self.category_id = category_id
        self.category_name = category_name

    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        return f'WizardResult({self.geo_id} - "{self.geo_text}", {self.category_id} - "{self.category_name}")'


class WizardRequester:
    def __init__(self, url, max_rps):
        self.url = url
        self.throttler = Throttler(max_rps)
        self.session = requests.Session()

    def make_request(self, query):
        self.throttler.delay_before_next_call()

        response = self.session.get(self.url, params={
            'text': query,
            'format': 'json',
            'rn': 'Travel',
            'geoaddr_geometa': '1',
            'wizextra': 'geoaddr_geometa=1',
        }).json()

        found_category_id = None
        found_category_name = None
        suggested_rubric = response['rules'].get('Rubrics', {}).get('SuggestedRubrics')
        if suggested_rubric in CATEGORIES:
            found_category_id = int(suggested_rubric)
            found_category_name = CATEGORIES.get(suggested_rubric)

        best_geo = response['rules'].get('GeoAddr', {}).get('BestGeo')
        normalized_text = response['rules'].get('GeoAddr', {}).get('NormalizedText')
        if isinstance(best_geo, list):
            filtered = [(geo_id, geo_name) for geo_id, geo_name in zip(best_geo, normalized_text) if geo_id is not None and geo_id != "-1"]
            if len(filtered) == 0:
                best_geo, normalized_text = None, None
            else:
                best_geo, normalized_text = filtered[0]

        if best_geo is None or best_geo == "-1":
            best_geo, normalized_text = None, None
        else:
            best_geo = int(best_geo)

        return WizardResult(best_geo, normalized_text, found_category_id, found_category_name)


class BackgroundYtWriter:
    def __init__(self, yt_client, yt_table_path, schema, max_pending_records=100):
        self.yt_client = yt_client
        self.yt_table_path = yt_table_path
        self.schema = schema
        self.max_pending_records = max_pending_records
        self.pending_records = Queue()
        self.flusher = threading.Thread(target=self._start_flushing)
        self.flusher.start()

    def __enter__(self):
        self.yt_client.create('table', self.yt_table_path, recursive=True, ignore_existing=False, attributes={"schema": self.schema})
        return self

    def __exit__(self, type, value, traceback):
        self.pending_records.put(None)
        self.flusher.join()

    def write(self, data):
        self.pending_records.put(data)

    def _start_flushing(self):
        buffer = []
        while True:
            curr = self.pending_records.get()
            if curr is None or len(buffer) > self.max_pending_records:
                self._flush_buffer(buffer)
                buffer = []
            if curr is None:
                break
            buffer.append(curr)

    def _flush_buffer(self, buffer):
        try:
            self.yt_client.write_table(self.yt_client.TablePath(self.yt_table_path, append=True), buffer)
        except Exception as e:
            print(f'Failed to write raw response to yt: {e}')
            raise


class WizardHotelNamesParser:
    def __init__(self, yt_client, feed_path, results_path, wizard_url, max_rps, tries_limit, max_failed_records, records_limit=None):
        self.yt_client = yt_client
        self.feed_path = feed_path
        self.results_path = results_path
        self.wizard_requester = WizardRequester(wizard_url, max_rps)
        self.tries_limit = tries_limit
        self.max_failed_records = max_failed_records
        self.records_limit = records_limit
        self.failed_records = 0

    def run(self):
        schema = ytlib.schema_from_dict({
            'permalink': 'int64',
            'name': 'string',
            'known_rubric_name': 'string',
            'known_destination': 'string',
            'parsed_geo_id': 'int32',
            'parsed_geo_name': 'string',
            'parsed_category_id': 'int64',
            'parsed_category_name': 'string',
        })

        with BackgroundYtWriter(self.yt_client, self.results_path, schema) as results_writer:
            for record in self.process_hotels_table(self.feed_path):
                results_writer.write(record)

    def process_raw_key(self, permalink, name, rubric, destination):
        res = WizardResult(None, None, None, None)
        for i in range(self.tries_limit):
            try:
                res = self.wizard_requester.make_request(name)
                break
            except Exception as e:
                logging.warning(f'Failed to process "{name}", retries left: {self.tries_limit - i - 1}', e)
                time.sleep(i + 1)
        else:
            logging.warning(f'Failed to process "{name}", using None result')
            self.failed_records += 1
            if self.failed_records > self.max_failed_records:
                raise Exception(f'Too many failed records ({self.failed_records})')

        return {
            'permalink': permalink,
            'name': name,
            'known_rubric_name': rubric,
            'known_destination': destination,
            'parsed_geo_id': res.geo_id,
            'parsed_geo_name': res.geo_text,
            'parsed_category_id': res.category_id,
            'parsed_category_name': res.category_name,
        }

    def process_hotels_table(self, table):
        if self.records_limit is not None:
            table_path = self.yt_client.TablePath(table, columns=['Property ID', 'Property name', 'Rubric name', 'Destination name'], start_index=0, end_index=self.records_limit)
            total_count = min(self.records_limit, self.yt_client.row_count(table_path))
        else:
            table_path = self.yt_client.TablePath(table, columns=['Property ID', 'Property name', 'Rubric name', 'Destination name'])
            total_count = self.yt_client.row_count(table_path)
        for row in auto_progress_reporter(self.yt_client.read_table(table_path), name='Parsing hotel names via wizard', total=total_count):
            if row['Property name'] is not None:
                yield self.process_raw_key(row['Property ID'], row['Property name'], row['Rubric name'], row['Destination name'])
