import pandas as pd
import yt.wrapper as yt
import re
from json import dump
from .excel_validate import StartDocument

yt.config.set_proxy('hahn')


class ResolutionsOnPayment(StartDocument):
    @classmethod
    def get_name(cls):
        return 'resolutions_on_payment'

    @classmethod
    def get_scheme(cls):
        return {
            "columns": {
                "resolution": {
                    "type": "string",
                    "python_type": "string"
                },
                "vin": {
                    "type": "string",
                    "python_type": "string"
                },
            }
        }

    @classmethod
    def parse_document(cls, input_file, output_file, meta=None):
        def check_resolution_number(x):
            x = str(x)
            x = x.replace(' ', '')
            if len(x) == 20:
                for s in x:
                    if not s.isdigit():
                        return False
                return True
            else:
                return False

        def check_vin(x):
            vin_contains = '0 1 2 3 4 5 6 7 8 9 A B C D E F G H J K L M N P R S T U V W X Y Z'
            vin_contains = vin_contains.split(' ')
            x = str(x)
            x = x.replace(' ', '')
            if len(x) == 17:
                for ii in x:
                    if ii not in vin_contains:
                        return False
                return True
            else:
                return False

        file_name = meta.get("file_name", input_file)
        if file_name.split('.')[0] == 'major' or file_name.split('.')[0] == 'vtb':
            df = pd.read_excel(input_file, dtype=str)
            dct = {}
            for i in df.columns:
                dct[sum(df[df[i].notna()][i].apply(check_resolution_number))] = i

            column_resolution = dct[max(dct.keys())]
            index_list = df[df[column_resolution].notna()].index
            for val in df[df[column_resolution].notna()][column_resolution].iloc[0]:
                if val.isalpha():
                    index_list = index_list[1:]
                    break
            dct = {}
            for i in df.columns:
                dct[sum(df[df[i].notna()][i].apply(check_vin))] = i

            column_vin = dct[max(dct.keys())]
            df = df[[column_resolution, column_vin]].iloc[index_list]
            df.reset_index(inplace=True, drop=True)
            df.columns = ['resolution', 'vin']

            data = {}
            rows = []
            for i in range(len(df)):
                rows.append({'resolution': df['resolution'].iloc[i], 'vin': df['vin'].iloc[i]})

            data['rows'] = rows
            data["meta"] = meta or {}

            with open(output_file, "w") as ofd:
                dump(data, ofd, indent=4)
        elif file_name.split('.')[0] == 'msia':
            df = pd.read_excel(input_file, dtype=str)

            def get_vin_and_number():
                """
                Функция достаёт из таблички "cars_attr" ВИН и гос.номер. Это нужно т.к в табличке от МСИА нет ВИНа.
                return: pd.DataFrame
                """
                yt.config.set_proxy('hahn')

                table_path = yt.TablePath('//home/carsharing/production/car/cars_attr', columns=['vin', 'number'])
                column = yt.read_table(table_path)

                vin_list = []
                number_list = []
                for row in column:
                    vin_list.append(row['vin'])
                    number_list.append(row['number'].upper())

                return pd.DataFrame({'vin': vin_list, 'number': number_list})

            def search_need_column(dataframe):
                """
                Эта функция используется для поиска нужного столбца в файле от лизингодателя МСИА.
                dataframe: pd.DataFrame
                return: pd.Serires
                """
                def sum_symbol(string: str):
                    return len(string)

                def search_pattern(string: str):
                    """
                    Функция для поиска номера постановления и гос.номера в строке при помощи регулярных выражений.
                    Если по паттерну чего-то не найдено -> возвращает пустую строку ("")
                    return: set()
                    """
                    #  можно добавить преобразование английских букв в русские
                    pattern_1 = r'\d{20,}'
                    match_1 = re.search(pattern_1, string)
                    pattern_2 = r'[АВЕКМНОРСТУХ]{1}\d{3}[АВЕКМНОРСТУХ]{2}\d{2,3}'
                    match_2 = re.search(pattern_2, string)
                    return match_1[0] if match_1 else None, match_2[0] if match_2 else None

                columns = dataframe.columns
                value = 0
                need_col = []
                for col in columns:
                    if sum(dataframe[dataframe[col].notna()][col].apply(sum_symbol)) > value:
                        value = sum(dataframe[dataframe[col].notna()][col].apply(sum_symbol))
                        need_col = col
                df = dataframe[need_col][dataframe[need_col].notna()].reset_index(drop=True)
                return df.apply(search_pattern)

            df = search_need_column(df)
            resolution = []
            number = []
            for i in range(len(df)):
                resolution.append(df[i][0])
                number.append(df[i][1])

            dct = {'resolution': resolution, 'number': number}
            df = pd.DataFrame(dct)
            df = df[df['resolution'].notna()]
            df = df.merge(get_vin_and_number(), how='left')[['resolution', 'vin']]

            data = {}
            rows = []
            for i in range(len(df)):
                rows.append({'resolution': df['resolution'].iloc[i], 'vin': df['vin'].iloc[i]})

            data['rows'] = rows
            data["meta"] = meta or {}

            with open(output_file, "w") as ofd:
                dump(data, ofd, indent=4)
        else:
            raise Exception('Измени название файла на одно из [major, vtb, msia]. Пример:"major.xlsx"')
