"""
Получаем отчет infobip, распаковываем его и загружаем все файлы в YT.
Отчет содержит несколько excel файлов, запакованных в zip архив.
"""
from collections import namedtuple
from datetime import datetime
from pathlib import Path

import argparse
import inspect
import logging
import os
import shutil
import tempfile
import zipfile

import openpyxl
import yt.wrapper as yt

DATA_SHEET_NAME = "Data"
SUMMARY_SHEET_NAME = "Summary"
GENERATED_ROW = "Generated"

GENERATED_TIME_FORMAT = "%d/%m/%Y %H:%M:%S"
YYYY_MM_DD_FORMAT = "%Y-%m-%d"

XLSX_FILE_TYPES = ['.xlsx', '.xls']

CHUNK_SIZE = 100000

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler()]
)

now = datetime.isoformat(datetime.now())


class Config:
    Service = namedtuple('Service', ['name', 'proxy'])

    ECOSYSTEM = 'ecosystem'
    DEFAULT_CLUSTER_NAME = 'hahn'
    DEFAULT_CLUSTER = Service(DEFAULT_CLUSTER_NAME, f'{DEFAULT_CLUSTER_NAME}.yt.yandex.net')
    __run_init = False

    YT_TOKENS = {}

    @classmethod
    def __init_yt(cls):
        yt_token_ecosystem = os.getenv('YT_TOKEN_ECOSYSTEM')
        if yt_token_ecosystem:
            cls.YT_TOKENS[cls.ECOSYSTEM] = yt_token_ecosystem

    @classmethod
    def init(cls):
        # второй раз инициализировать не надо
        if cls.__run_init:
            return

        logging.info("Config.init started ...")
        for func_name, func in inspect.getmembers(cls, predicate=inspect.ismethod):
            if func_name.startswith('_'):
                logging.info(f"Config.init call: {func_name}")
                func()
        cls.__run_init = True
        logging.info("Config.init done.")


def get_base_parser() -> argparse.ArgumentParser:
    base_parser = argparse.ArgumentParser()
    base_parser.add_argument('report', type=argparse.FileType('r'), help='Input file')
    base_parser.add_argument('--yt-path', dest='yt_path', help='Output yt path', required=True)

    return base_parser


def create_yt_table(client: yt.client.Yt, path: str, schema: list):
    if not client.exists(path):
        client.create("table", path, recursive=True, attributes={"schema": schema})


def write_to_yt(client: yt.client.Yt, path: str, data: list):
    client.write_table(
        yt.ypath.TablePath(path, append=True),
        (record for record in data),
        format=yt.YsonFormat(format="text", require_yson_bindings=False),
    )


def get_schema(items: list) -> list:
    schema = []
    for col in items:
        schema.append({"name": "{}".format(col), "type": "utf8"})
    return schema


def reset_dimensions(ws: openpyxl.worksheet) -> openpyxl.worksheet:
    dimension = ws.calculate_dimension()
    logging.info(f'Checking that file read correctly: max column {dimension}')
    if dimension == 'A1:A1':
        ws.reset_dimensions()
        ws.calculate_dimension(force=True)
    return ws


def process_single_xlsx_file(wb: openpyxl.Workbook, yt_client: yt.client.Yt, yt_path: str):
    yt_data = []
    # get generated date and add it to yt table
    summary = wb[SUMMARY_SHEET_NAME]
    # if incorrect dimensions - recalc
    reset_dimensions(summary)
    dt = [v.value for k, v in summary.rows if k.value == GENERATED_ROW][0]
    dt_parsed = datetime.strptime(dt.split(',')[0], GENERATED_TIME_FORMAT)

    # read report and write to YT
    logging.info('Reading file')
    ws = wb[DATA_SHEET_NAME]
    if ws.calculate_dimension() == 'A1:A1':
        ws.reset_dimensions()
    rows = ws.rows

    first_row = [cell.value for cell in next(rows)]
    logging.info('Defining schema for yt upload')
    schema = get_schema(['dt', 'iso_eventtime'] + first_row)

    logging.info('Checking or creating YT table')
    create_yt_table(yt_client, yt_path, schema)

    row_count = 0
    for row in rows:
        record = {}
        record['dt'] = dt_parsed.strftime(YYYY_MM_DD_FORMAT)
        record['iso_eventtime'] = now
        for k, v in zip(first_row, row):
            record[k] = None if v.value is None else str(v.value)
        yt_data.append(record)
        row_count += 1
        if row_count % CHUNK_SIZE == 0:
            logging.info('Writing Data to YT')
            write_to_yt(yt_client, yt_path, yt_data)
            yt_data = []
            logging.info(f'Processed {row_count} rows')

    logging.info('Writing Data to YT')
    write_to_yt(yt_client, yt_path, yt_data)
    yt_data = []
    logging.info(f'Processed {row_count} rows')
    logging.info('Processing file completed')
    wb.close()


def main():
    """
    Загрузка отчета
        - на входе 1 zip файл
        - создаем temp каталог
        - распаковываем в него zip файл
        - каждый из excel файлов загружаем в YT
    """
    Config.init()
    cfg = Config

    logging.info("Parsing arguments")
    parser = get_base_parser()
    opt = parser.parse_args()

    logging.info(f'Initialized. args={opt}')

    tokens = cfg.YT_TOKENS
    yt_token = tokens.get(cfg.ECOSYSTEM)

    if yt_token is None:
        logging.info('YT token not provided')
        return

    yt_client = yt.client.Yt(
        proxy=cfg.DEFAULT_CLUSTER.proxy,
        token=yt_token,
    )

    logging.info('YT Client initialized')

    file = opt.report.name

    try:
        logging.info('Checking that file is xlsx file')
        zf = zipfile.ZipFile(opt.report.buffer)
        zf.getinfo('[Content_Types].xml')
        logging.info('File is an xlsx file, not need to unpack')
        wb = openpyxl.open(opt.report.buffer, read_only=True)
        logging.info(f'Processing file {file} started')
        process_single_xlsx_file(wb, yt_client, opt.yt_path)
    except KeyError:
        logging.info('Cannot open file as Excel, unpacking')
        unpack_dir = tempfile.mkdtemp()
        logging.info(f'Created temp dir {unpack_dir}')

        with zipfile.ZipFile(file) as f:
            logging.info('Unpacking file')
            f.extractall(unpack_dir)
            logging.info(f'Extracted files {os.listdir(unpack_dir)}')

            for res in os.listdir(unpack_dir):
                filepath = f'{unpack_dir}/{res}'
                if Path(filepath).suffix.lower() in XLSX_FILE_TYPES:
                    wb = openpyxl.open(filepath, read_only=True)
                    logging.info(f'Processing file {filepath} started')
                    process_single_xlsx_file(wb, yt_client, opt.yt_path)

        logging.info('Cleanup temp dir')
        shutil.rmtree(unpack_dir)

    finally:
        logging.info('Nothing to upload')

    logging.info('Done')
