import datetime
import email
import email.message
import imaplib
import logging
import re
import sys
from argparse import ArgumentParser
# from collections import OrderedDict
from decimal import Decimal
import xlrd

from yt.wrapper import YtClient
import yt.wrapper as yt

from dateutil.relativedelta import relativedelta
from parse import parse
from travel.hotels.lib.python3.yt import ytlib
from travel.library.python.tools import replace_args_from_env
from travel.library.python.imap.oauth_tools import imap_oauth_authenticate

LOG = logging.getLogger(__name__)

PROCESSED_MAIL_FIELDS = {
    'mail_name': 'string',
    'processed': 'boolean',
}

ESTIMATED_COMISSION_FIELDS = {
    'Parent Business Partner ID (PID)': 'int64',
    'Business Partner ID (CID)': 'int64',
    'Order Number': 'int64',
    'Order Confirmation Number': 'string',
    'Itinerary Number': 'string',
    'Booking Item (Room) ID': 'int64',
    'Book Date': 'string',
    'Check In Date': 'string',
    'Check Out Date': 'string',
    'Business Model Name': 'string',
    'Online Offline Indicator': 'string',
    'Package Indicator': 'string',
    'Net Room Nights': 'int64',
    'Room Booking Status': 'string',
    'Gross Booking Amount (GBV) USD': 'double',
    'Gross Booking Amount (Local Currency)': 'double',
    'Price Currency Code': 'string',
    'Estimated Marketing Fee USD': 'double',
}

TRANSACTION_LEVEL_DETAIL_FIELDS = {
    'Parent Business Partner ID (PID)': 'int64',
    'Business Partner ID (CID)': 'int64',
    'Order Number': 'int64',
    'Order Confirmation Number': 'string',
    'Itinerary Number': 'string',
    'Booking Item (Room) ID': 'int64',
    'Transaction Category Name': 'string',
    'Transaction Date': 'string',
    'Book Date': 'string',
    'Check In Date': 'string',
    'Check Out Date': 'string',
    'Business Model Name': 'string',
    'Online Offline Indicator': 'string',
    'Package Indicator': 'string',
    'Net Room Nights': 'int64',
    'Room Booking Status': 'string',
    'Gross Booking Amount (GBV) USD': 'double',
}

STATEMENT_FIELDS = {
    'Affiliate Id': 'int64',
    'Transaction Type': 'string',
    'Transaction Date': 'string',
    'Use Date Begin': 'string',
    'Use Date End': 'string',
    'Night Count': 'int64',
    'Total Person Count': 'int64',
    'EAN Itinerary ID': 'string',
    'Confirmation ID': 'string',
    'Affiliate Reference Number': 'string',
    'Currency Code': 'string',
    'Amount': 'double',
    'Marketing Fee Amount': 'double',
    'Amount Payable': 'double',
    'Hotel Name': 'string',
    'Hotel City': 'string',
    'Hotel Country': 'string',
    'Last Name': 'string',
    'First Name': 'string',
    'Partner Type': 'string',
    'Refund Reason': 'string',
    'EAC Oracle Ref.': 'string',
    'Booking Item ID': 'int64',
    'AdditionalData1': 'string',
    'AdditionalData2': 'string',
}


class XlsParser:
    @staticmethod
    def yield_all_xlsx(msg):
        for part in msg.walk():
            filename = part.get_filename()
            if filename is not None and (filename.endswith("xlsx") or filename.endswith("xls")):
                filename = filename.split().pop()
                payload = part.get_payload(decode=True)
                LOG.info("Processing %r (%s bytes)" % (filename, len(payload)))
                yield filename, payload

    @staticmethod
    def yield_all_xlsx_sheet_rows(sheet, header_row, first_body_row=None, last_body_row=None):
        rows = list(sheet.get_rows())
        if first_body_row is None:
            first_body_row = header_row + 1
        if last_body_row is None:
            last_body_row = len(rows)
        keys = list(map(lambda cell: str(cell.value), rows[header_row]))
        for row in rows[first_body_row:last_body_row]:
            values = list(map(lambda cell: cell.value, row))
            item = dict(zip(keys, values))
            item.pop('')
            yield item

    @staticmethod
    def get_header_row(sheet, header_row):
        rows = list(sheet.get_rows())
        header_str = list(map(lambda cell: str(cell.value), rows[header_row]))
        return list(filter(None, header_str))

    @staticmethod
    def yield_all_statements_items(sheet):
        for item in XlsParser.yield_all_xlsx_sheet_rows(sheet, 12, 13, len(list(sheet.get_rows())) - 2):  # hdr=13, dat=14+
            yield item

    @staticmethod
    def yield_all_estimated_comissions_items(sh_ec):
        for item in XlsParser.yield_all_xlsx_sheet_rows(sh_ec, 11):  # hdr=11, dat=12+
            yield item

    @staticmethod
    def yield_all_transaction_level_detail_items(sh_tld):
        for item in XlsParser.yield_all_xlsx_sheet_rows(sh_tld, 1):  # hdr=1, dat=2+
            yield item

    @staticmethod
    def parse_decimal(value, prec=2):
        return Decimal(value).quantize(Decimal("1." + prec * "0"))


class Downloader:
    def __init__(self, args):
        self.args = args
        self.imap = imaplib.IMAP4_SSL("imap.yandex-team.ru")
        self.imap_token = self.args.imap_token
        self.date_from = parse_datetime_iso(self.args.date_from).date()
        self.msgs_statements = []
        self.msgs_booked_yesterday = []
        self.msgs_booked_past_week = []
        self.msgs_stayed_yesterday = []
        self.msgs_stayed_past_week = []
        self.msgs_unknown = []

    def yield_statements(self):
        for msg in self.msgs_statements:
            yield msg

    def yield_booked_stayed(self):
        for msg in self.msgs_stayed_yesterday[:] + self.msgs_booked_yesterday[:] + self.msgs_stayed_past_week[:] + self.msgs_booked_past_week[:]:
            yield msg

    def yield_booked(self):
        for msg in self.msgs_booked_yesterday[:] + self.msgs_booked_past_week[:]:
            yield msg

    def yield_stayed(self):
        for msg in self.msgs_stayed_yesterday[:] + self.msgs_stayed_past_week[:]:
            yield msg

    def get_rest(self):
        return self.msgs_unknown

    def download(self):
        imap_oauth_authenticate(self.imap, "robot-travel-prod@yandex-team.ru", self.imap_token)
        self.imap.select("Yandex|travel-hotels-partner-expedia")
        if self.date_from:
            criterion = "SINCE " + self.date_from.strftime('%d-%b-%Y')
        else:
            criterion = "ALL"
        rsp, msg_ids = self.imap.search(None, criterion)
        assert rsp == "OK"

        for msg_id in msg_ids[0].split():
            rsp, msg_data = self.imap.fetch(msg_id, "(RFC822)")
            assert rsp == "OK"
            assert len(msg_data) == 2
            assert len(msg_data[0]) == 2
            msg_raw = msg_data[0][1]
            msg_obj = email.message_from_bytes(msg_raw)
            subject = re.sub(r"\s+", " ", msg_obj.get("Subject", "")).strip()
            if False:
                pass
            elif re.match(r"^Yandex Delay Notification.+$", subject):
                continue
            elif re.match(r"^Re:.*$", subject):
                continue
            elif re.match(r"^Yandex (?:LLC )?Transaction Statement (\d+) For Period Ending (\d{2}-[A-Z]{3}-\d{4})$",
                          subject):
                LOG.info("STMT: {}".format(subject))
                self.msgs_statements.append(msg_obj)
            elif subject == "Yandex: Estimated Commission Booked - Last 7 days":
                self.msgs_booked_past_week.append(msg_obj)
            elif subject == "Yandex: Estimated Commission Stayed - Last 7 days":
                self.msgs_stayed_past_week.append(msg_obj)
            elif subject == "Yandex: Estimated Comission Stayed Yesterday":
                LOG.info("STAY: {}".format(subject))
                self.msgs_stayed_yesterday.append(msg_obj)
            elif subject == "Yandex: Estimated Comission Booked Yesterday":
                LOG.info("BOOK: {}".format(subject))
                self.msgs_booked_yesterday.append(msg_obj)
            else:
                LOG.info("UNKN: {}".format(subject))
                self.msgs_unknown.append(msg_obj)


class YtUploader:
    def __init__(self, args):
        self.args = args
        self.yt_client = YtClient(proxy=args.yt_proxy, token=args.yt_token)
        self.processed_path = yt.ypath_join(args.yt_path, 'processed_mail')
        self.ensure_table_exists(self.processed_path, PROCESSED_MAIL_FIELDS)
        self.yt_client.set_attribute(path=self.processed_path, attribute='optimize_for', value='scan')

    def ensure_table_exists(self, table_path, schema):
        ytlib.ensure_table_exists(table_path, self.yt_client, ytlib.schema_from_dict(schema))

    def mark_file_processed(self, filename):
        row = [dict(zip(PROCESSED_MAIL_FIELDS.keys(), [filename.strip(), True]))]
        self.yt_client.write_table(self.yt_client.TablePath(self.processed_path, append=True), row)

    def check_file_processed(self, filename):
        rows = self.yt_client.read_table(self.processed_path)
        for row in rows:
            if row['mail_name'] == filename:
                return row['processed']
        return False

    @staticmethod
    def _parse_statement_date_(value):
        day, month, year = value.split("-")
        for index, item in enumerate("JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC".split()):
            if month == item:
                month = 1 + index
                break
        return datetime.date(int(year), int(month), int(day))

    @staticmethod
    def _parse_excel_date_(value):
        return xlrd.xldate.xldate_as_datetime(value, 0)

    @staticmethod
    def parse_cell_value(key, value, schema):
        if re.match(r".*Date$", key):
            if schema != STATEMENT_FIELDS:
                return YtUploader._parse_excel_date_(value).isoformat()
            else:
                return YtUploader._parse_statement_date_(value).isoformat()
        elif schema[key] == 'string':
            return str(value)
        elif schema[key] == 'double':
            if value != '':
                return float(value)
            else:
                return 0.0
        elif schema[key] == 'int64':
            if value != '':
                return int(value)
            else:
                return 0
        else:
            return value

    def create_table_from_xls_sheet(self, table_path, schema, data_rows):
        rows = [{k.strip(): self.parse_cell_value(k.strip(), v, schema) for k, v in row.items()} for row in data_rows]
        with self.yt_client.Transaction():
            self.ensure_table_exists(table_path, schema)
            self.yt_client.write_table(self.yt_client.TablePath(table_path), rows)

    def process_message(self, msg, yt_dir):
        for filename, payload in XlsParser.yield_all_xlsx(msg):
            if not self.check_file_processed(filename):
                wb = xlrd.open_workbook(file_contents=payload)
                sh_ec = wb.sheet_by_name("Estimated Commissions")
                ec_path = yt.ypath_join(self.args.yt_path, yt_dir, filename.strip() + '_EstimatedCommissions')
                sh_tld = wb.sheet_by_name("Transaction Level Detail")
                tld_path = yt.ypath_join(self.args.yt_path, yt_dir, filename.strip() + '_TransactionLevelDetail')
                self.create_table_from_xls_sheet(ec_path, ESTIMATED_COMISSION_FIELDS,
                                                 XlsParser.yield_all_estimated_comissions_items(sh_ec))
                self.create_table_from_xls_sheet(tld_path, TRANSACTION_LEVEL_DETAIL_FIELDS,
                                                 XlsParser.yield_all_transaction_level_detail_items(sh_tld))
                self.mark_file_processed(filename)

    def process_stmt_msg(self, msg, yt_dir):
        for filename, payload in XlsParser.yield_all_xlsx(msg):
            if not self.check_file_processed(filename):
                wb = xlrd.open_workbook(file_contents=payload)
                sheet = wb.sheet_by_name("Sheet2")
                table_path = yt.ypath_join(self.args.yt_path, yt_dir, filename.strip() + '_Statement')
                self.create_table_from_xls_sheet(table_path, STATEMENT_FIELDS,
                                                 XlsParser.yield_all_statements_items(sheet))
                self.mark_file_processed(filename)

    def process_booked(self, downloader: Downloader):
        booked_dir = yt.ypath_join(self.args.yt_path, 'booked')
        ytlib.ensure_dir(self.yt_client, booked_dir)
        for msg in downloader.yield_booked():
            self.process_message(msg, booked_dir)

    def process_stayed(self, downloader: Downloader):
        stayed_dir = yt.ypath_join(self.args.yt_path, 'stayed')
        ytlib.ensure_dir(self.yt_client, stayed_dir)
        for msg in downloader.yield_stayed():
            self.process_message(msg, stayed_dir)

    def process_statements(self, downloader: Downloader):
        statements_dir = yt.ypath_join(self.args.yt_path, 'statements')
        ytlib.ensure_dir(self.yt_client, statements_dir)
        for msg in downloader.yield_statements():
            self.process_stmt_msg(msg, statements_dir)


def main():
    logging.basicConfig(level=logging.INFO, format="%(asctime)-15s | %(module)s | %(levelname)s | %(message)s",
                        stream=sys.stdout)
    logging.getLogger('yt.packages.urllib3.connectionpool').setLevel(logging.WARNING)
    parser = ArgumentParser()
    parser.add_argument('--yt-proxy', default='hahn')
    parser.add_argument('--yt-token', required=True)
    parser.add_argument('--yt-path', default='//home/travel/alexcrush/expedia_bookings/mail_files')
    parser.add_argument('--imap-token', required=True)
    parser.add_argument('--date-from', default=(datetime.date.today() + relativedelta(days=-7)).isoformat())
    args = parser.parse_args(args=replace_args_from_env())
    downloader = Downloader(args)
    downloader.download()

    uploader = YtUploader(args)
    uploader.process_booked(downloader)
    uploader.process_stayed(downloader)
    uploader.process_statements(downloader)


def parse_datetime_iso(dt_text):
    return parse('{:ti}', dt_text)[0]


if __name__ == '__main__':
    main()
