from dataclasses import dataclass, field
from enum import Enum, unique
from typing import ClassVar, TypeVar, Tuple, List, Optional, Type, Callable
import re

from bs4 import BeautifulSoup, NavigableString, Tag, Comment
from transitions import Machine, MachineError


class HtmlDataParsingException(Exception):
    def __init__(self, message: str, instance: 'HtmlParser', tag: Tag):
        message = f"{instance.__class__.__name__}: {message}"
        super(HtmlDataParsingException, self).__init__(message)
        self.message = message
        self.tag = tag


class HtmlParser:
    EMPTY_TEXT = re.compile(r"^[\n\t\s]*$")
    HOTEL_SLUG_LINK_PATTERN = r'^hotel:slug:(.+)$'

    @unique
    class State(Enum):
        INITIAL = "initial"
        FINAL = "final"

    def __init__(self, states: Type[Enum], class_restriction: str = None):
        self._machine = Machine(
            model=self,
            states=[e for e in HtmlParser.State] + [e for e in states],
            initial=HtmlParser.State.INITIAL
        )
        self.tag_mapping = {}
        self.expected_tags = {}
        self.class_restriction = class_restriction

    def _add_transition(self, tag_names: List[str], processor: Callable[[Tag], None], source: List[Enum], dest: Enum, final: bool = False):
        processor_name = processor.__name__
        if not processor_name.startswith("on_"):
            raise Exception("Processor must starts with 'on_' prefix")
        trigger_name = processor_name[3:]
        self._machine.add_transition(
            trigger=trigger_name,
            source=source,
            dest=dest,
            before=processor_name,
        )
        for tag_name in tag_names:
            self.tag_mapping[tag_name] = getattr(self, trigger_name)
        if final:
            self._machine.add_transition(
                trigger="finalize",
                source=dest,
                dest=HtmlParser.State.FINAL,
            )
        for from_state in source:
            for tag_name in tag_names:
                self.expected_tags.setdefault(from_state, []).append(f"\"{tag_name}\"")
        if final:
            self.expected_tags.setdefault(dest, []).append("end of block")

    def _parse_p(self, p_content: Tag) -> Tuple["Paragraph", str]:
        result = []
        raw = ""
        for block in p_content:
            if isinstance(block, NavigableString):
                result.append(PlainTextBlock(text=str(block)))
                raw += str(block)
            elif isinstance(block, Tag):
                if block.name == "i":
                    result.append(PlainTextBlock(text=block.text, styles=[PlainTextBlockStyle.ITALIC]))
                    raw += block.text
                elif block.name == "b":
                    result.append(PlainTextBlock(text=block.text, styles=[PlainTextBlockStyle.BOLD]))
                    raw += block.text
                elif block.name == "a":
                    result.append(self._parse_link(block))
                    raw += block.text
                elif block.name == "span" and "price" in block.attrs.get("class"):
                    value = block.text
                    currency = Currency(block.attrs["currency"])
                    try:
                        result.append(PriceTextBlock(Price(value, currency)))
                    except Exception as e:
                        raise HtmlDataParsingException(f"Price value error, {e}", self, p_content)
                    raw += block.text + " руб."  # TODO: Extract currency name from currency object
                else:
                    # TODO: WARN
                    result.append(PlainTextBlock(text=block.text))
                    raw += block.text
        return Paragraph(result), raw

    def _parse_link(self, a_content: Tag):
        href = a_content.attrs.get('href')
        if not href:
            raise HtmlDataParsingException('Mailformed link', self, a_content)

        slug = re.fullmatch(self.HOTEL_SLUG_LINK_PATTERN, href)
        if slug:
            slug = slug.group(1)
            return HotelLinkBlock(a_content.text, slug)

        return ExternalLinkBlock(a_content.text, href)

    def _tag_name(self, tag: Tag):
        classes = tag.attrs.get("class", [])
        if 2 <= len(classes):
            raise HtmlDataParsingException("Multiple classes not supported", self, tag)

        if classes:
            return f"{tag.name}.{classes[0]}"
        else:
            return tag.name

    def _is_empty_tag(self, tag: Tag):
        return isinstance(tag, NavigableString) and self.EMPTY_TEXT.fullmatch(tag) or isinstance(tag, Comment)

    def _build_expected_tag_message(self):
        expected_tags = self.expected_tags[self.state]
        if len(expected_tags) == 0:
            raise Exception("Logic error")
        elif len(expected_tags) == 1:
            return f"{expected_tags[0]}"
        else:
            list = ", ".join(map(lambda name: f"{name}", expected_tags))
            return f"one of {list}"

    def parse(self, tag: Tag):
        if self.class_restriction is not None:
            expected_tag_name = f"div.{self.class_restriction}"
            actual_tag_name = self._tag_name(tag)
            if expected_tag_name != actual_tag_name:
                raise HtmlDataParsingException(f"Trying to parse {actual_tag_name} but expected {expected_tag_name}", self, tag)

        for child in tag.children:
            if self._is_empty_tag(child):
                continue

            child_name = self._tag_name(child)
            if child_name not in self.tag_mapping:
                raise HtmlDataParsingException(f"Unknown tag \"{child_name}\". Expected {self._build_expected_tag_message()}.", self, tag)
            try:
                self.tag_mapping[child_name](child)
            except MachineError:
                raise HtmlDataParsingException(f"Unexpected tag \"{child_name}\". Expected {self._build_expected_tag_message()}.", self, tag) from None

        try:
            self.finalize()
        except MachineError:
            raise HtmlDataParsingException(f"Unexpected end of block. Expected {self._build_expected_tag_message()}.", self, tag) from None
        return self.build_result()

    def build_result(self):
        raise NotImplementedError("Method must be implemented in descendant")


class DivExtractor(HtmlParser):
    @unique
    class State(Enum):
        DIV_EXTRACTED = "div_extracted"

    def __init__(self, div_parser: HtmlParser):
        tag_name = "div" if div_parser.class_restriction is None else f"div.{div_parser.class_restriction}"

        super(DivExtractor, self).__init__(DivExtractor.State)
        self._add_transition([tag_name], self.on_extract_div, [HtmlParser.State.INITIAL], DivExtractor.State.DIV_EXTRACTED, final=True)

        self.parser = div_parser
        self.result = None

    def on_extract_div(self, div: Tag):
        self.result = self.parser.parse(div)

    def build_result(self):
        return self.result


class QuestionParser(HtmlParser):
    @unique
    class State(Enum):
        QUESTION_PARSED = "question_parsed"
        ANSWER_PARSED = "answer_parsed"

    def __init__(self):
        super(QuestionParser, self).__init__(QuestionParser.State, class_restriction="question")

        self._add_transition(["h3"], self.on_parse_question, [HtmlParser.State.INITIAL], QuestionParser.State.QUESTION_PARSED)
        self._add_transition(["p"], self.on_parse_answer, [QuestionParser.State.QUESTION_PARSED], QuestionParser.State.ANSWER_PARSED, final=True)

        self.question = None
        self.answer = None
        self.answer_raw_text = None

    def on_parse_question(self, tag: Tag):
        self.question = tag.text

    def on_parse_answer(self, tag: Tag):
        self.answer, self.answer_raw_text = self._parse_p(tag)

    def build_result(self):
        return Question(self.question, self.answer, self.answer_raw_text)


class FaqParser(HtmlParser):
    @unique
    class State(Enum):
        TITLE_PARSED = "title_parsed"
        QUESTION_PARSED = "question_parsed"

    def __init__(self):
        super(FaqParser, self).__init__(FaqParser.State)
        self._add_transition(["h2"], self.on_parse_title, [HtmlParser.State.INITIAL], FaqParser.State.TITLE_PARSED)
        self._add_transition(
            ["div.question"],
            self.on_parse_question,
            [HtmlParser.State.INITIAL, FaqParser.State.TITLE_PARSED, FaqParser.State.QUESTION_PARSED],
            FaqParser.State.QUESTION_PARSED,
            final=True,
        )

        self.title = None
        self.questions = []

    def on_parse_title(self, title_tag: Tag):
        self.title = title_tag.text

    def on_parse_question(self, question_tag: Tag):
        ps = QuestionParser()
        qu = ps.parse(question_tag)
        self.questions.append(qu)

    def build_result(self):
        return FaqRenderedBlock(self.title, self.questions)


class SpoilerTextBlockParser(HtmlParser):
    @unique
    class State(Enum):
        TITLE_PARSED = "title_parsed"
        DESCRIPTION_PARSED = "description_parsed"

    def __init__(self):
        super(SpoilerTextBlockParser, self).__init__(SpoilerTextBlockParser.State, class_restriction="spoiler")
        self._add_transition(["h3"], self.on_parse_title, [HtmlParser.State.INITIAL], SpoilerTextBlockParser.State.TITLE_PARSED)
        self._add_transition(["p"], self.on_parse_description, [SpoilerTextBlockParser.State.TITLE_PARSED], SpoilerTextBlockParser.State.DESCRIPTION_PARSED, final=True)

        self.title = None
        self.description = None

    def on_parse_title(self, title_tag: Tag):
        self.title = title_tag.text

    def on_parse_description(self, tag: Tag):
        self.description, _ = self._parse_p(tag)

    def build_result(self):
        return SpoilerTextBlock(self.title, self.description)


class SubsectionTextBlockParser(HtmlParser):
    @unique
    class State(Enum):
        TITLE_PARSED = "title_parsed"
        PARAGRAPH_PARSED = "paragraph_parsed"

    def __init__(self):
        super(SubsectionTextBlockParser, self).__init__(SubsectionTextBlockParser.State, class_restriction="subsection")
        self._add_transition(["h3"], self.on_parse_title, [HtmlParser.State.INITIAL], SubsectionTextBlockParser.State.TITLE_PARSED)
        self._add_transition(
            ["p"],
            self.on_parse_paragraph,
            [SubsectionTextBlockParser.State.TITLE_PARSED, SubsectionTextBlockParser.State.PARAGRAPH_PARSED],
            SubsectionTextBlockParser.State.PARAGRAPH_PARSED,
            final=True,
        )

        self.title = None
        self.paragraphs = []

    def on_parse_title(self, title_tag: Tag):
        self.title = title_tag.text

    def on_parse_paragraph(self, tag: Tag):
        paragraph, _ = self._parse_p(tag)
        self.paragraphs.append(paragraph)

    def build_result(self):
        return SubSectionTextBlock(self.title, self.paragraphs)


class TextRenderedBlockParser(HtmlParser):
    @unique
    class State(Enum):
        TITLE_PARSED = "title_parsed"
        CHILD_PARSED = "child_parsed"

    def __init__(self):
        super(TextRenderedBlockParser, self).__init__(TextRenderedBlockParser.State, class_restriction="section")
        self._add_transition(["h2"], self.on_parse_title, [HtmlParser.State.INITIAL], TextRenderedBlockParser.State.TITLE_PARSED)
        self._add_transition(
            ["p", "div.spoiler", "div.subsection"],
            self.on_parse_child,
            [HtmlParser.State.INITIAL, TextRenderedBlockParser.State.TITLE_PARSED, TextRenderedBlockParser.State.CHILD_PARSED],
            TextRenderedBlockParser.State.CHILD_PARSED,
            final=True,
        )

        self.title = None
        self.children = []

    def on_parse_title(self, title_tag: Tag):
        self.title = title_tag.text

    def on_parse_child(self, tag: Tag):
        tag_name = self._tag_name(tag)
        if tag_name == "p":
            paragraph, _ = self._parse_p(tag)
            self.children.append(paragraph)
        elif tag_name == "div.spoiler":
            spoiler_block = SpoilerTextBlockParser().parse(tag)
            self.children.append(spoiler_block)
        elif tag_name == "div.subsection":
            subsection = SubsectionTextBlockParser().parse(tag)
            self.children.append(subsection)
        else:
            raise HtmlDataParsingException(f"Unknown tag name {tag_name}", self, tag)

    def build_result(self):
        return TextRenderedBlock(self.children, self.title)


@dataclass
class TankerHtmlRenderedData:
    pass


@dataclass
class FaqRenderedBlock(TankerHtmlRenderedData):
    PARSER: ClassVar[HtmlParser] = FaqParser

    title: str
    questions: List['Question']


@dataclass
class Question(TankerHtmlRenderedData):
    PARSER: ClassVar[HtmlParser] = QuestionParser

    question: str
    answer: 'Paragraph'
    answer_raw_text: str


@dataclass
class SectionTextBlockContent(TankerHtmlRenderedData):
    pass


@dataclass
class TextRenderedBlock(TankerHtmlRenderedData):
    PARSER: ClassVar[HtmlParser] = TextRenderedBlockParser

    children: List[SectionTextBlockContent]
    title: Optional[str] = None


@dataclass
class SubSectionTextBlock(SectionTextBlockContent):
    PARSER: ClassVar[HtmlParser] = SubsectionTextBlockParser

    title: str
    paragraphs: List['Paragraph']


@dataclass
class SpoilerTextBlock(SectionTextBlockContent):
    PARSER: ClassVar[HtmlParser] = SpoilerTextBlockParser

    title: str
    description: 'Paragraph'


@dataclass
class Paragraph(SectionTextBlockContent):
    children: List['ParagraphBlock']


@dataclass
class ParagraphBlock:
    pass


@unique
class PlainTextBlockStyle(Enum):
    BOLD = "bold"
    ITALIC = "italic"


@dataclass
class PlainTextBlock(ParagraphBlock):
    text: str
    styles: List[PlainTextBlockStyle] = field(default_factory=list)


@dataclass
class PriceTextBlock(ParagraphBlock):
    price: "Price"


@dataclass
class Price:
    VALUE_PATTERN = re.compile(r"^[\d., ]+$")

    def __init__(self, value: str, currency: "Currency"):
        if not Price.VALUE_PATTERN.fullmatch(value):
            raise ValueError(f"Invalid price value {value}")

        self.value = value
        self.currency = currency


class Currency(Enum):
    RUB = "RUB"


@dataclass
class HotelLinkBlock(ParagraphBlock):
    text: str
    slug: str


@dataclass
class ExternalLinkBlock(ParagraphBlock):
    text: str
    url: str


def all_subclasses(cls):
    return set(cls.__subclasses__()).union(
        [s for c in cls.__subclasses__() for s in all_subclasses(c)])


THR = TypeVar('THR', bound=TankerHtmlRenderedData)


class HtmlParserWrapper:
    PARSER_MAP = {rdata_subclass: rdata_subclass.PARSER for rdata_subclass in all_subclasses(TankerHtmlRenderedData) if hasattr(rdata_subclass, 'PARSER')}

    def parse(self, raw_html: str, data_class: Type[THR]) -> THR:
        parser_class = self.PARSER_MAP.get(data_class)
        if parser_class is None:
            raise Exception(f"Cannot find parser for {data_class.__name__}")

        bs = BeautifulSoup(raw_html, features="html.parser")
        de = DivExtractor(parser_class())
        return de.parse(bs)
