# -*- coding: utf-8 -*-
from argparse import ArgumentParser
from glob import glob
import sys
import sqlite3
import traceback as tb
import xlsxwriter
from const import LOG_FILE, DB_NAME, TBL_NAME, TASK_NAME, CONFIG

DATA_TYPES = {
    'Month': [u'TEXT', u'Month'],
    'ShortMonth': [u'TEXT', u'Month'],
    'Qvartal': [u'TEXT', u'Qvartal'],
    'Client': [u'TEXT', u'Client'],
    'Domain': [u'TEXT', u'Client'],
    'CustomCategory': [u'TEXT', u'CustomCategory'],
    'BannerCategory': [u'TEXT', u'CustomCategory'],
    'Region': [u'TEXT', u'Region'],
    'Device': [u'TEXT', u'Device'],
    'CampaignType': [u'TEXT', u'CampaignType'],
    'BannerType': [u'TEXT', u'BannerType'],
    'IsMobile': [u'TEXT', u'IsMobile'],
    'Direct_Shows': [u'INTEGER DEFAULT 0', u'Direct_Shows'],
    'Direct_Clicks': [u'INTEGER DEFAULT 0', u'Direct_Clicks'],
    'Direct_Cost': [u'REAL DEFAULT 0.0', u'Direct_Cost'],
    'RSYA_Shows': [u'INTEGER DEFAULT 0', u'RSYA_Shows'],
    'RSYA_Clicks': [u'INTEGER DEFAULT 0', u'RSYA_Clicks'],
    'RSYA_Cost': [u'REAL DEFAULT 0.0', u'RSYA_Cost'],
    'Shows': [u'INTEGER DEFAULT 0', u'Shows'],
    'Clicks': [u'INTEGER DEFAULT 0', u'Clicks'],
    'Cost': [u'REAL DEFAULT 0.0', u'Cost'],
    'Client_Type': [u'TEXT', u'Client_Type'],
    'Region_Type': [u'TEXT', u'Region_Type'],
    'CompCount': [u'INTEGER DEFAULT 0', u'CompCount'],
    'Month:str': [u'TEXT', u'Month'],
    'Client:str': [u'TEXT', u'Client'],
    'CustomCategory:str': [u'TEXT', u'CustomCategory'],
    'Region:str': [u'TEXT', u'Region'],
    'Direct_Shows:int': [u'INTEGER DEFAULT 0', u'Direct_Shows'],
    'Direct_Clicks:int': [u'INTEGER DEFAULT 0', u'Direct_Clicks'],
    'Direct_Cost:float': [u'REAL DEFAULT 0.0', u'Direct_Cost'],
    'RSYA_Shows:int': [u'INTEGER DEFAULT 0', u'RSYA_Shows'],
    'RSYA_Clicks:int': [u'INTEGER DEFAULT 0', u'RSYA_Clicks'],
    'RSYA_Cost:float': [u'REAL DEFAULT 0.0', u'RSYA_Cost'],
    'Shows:int': [u'INTEGER DEFAULT 0', u'Shows'],
    'Clicks:int': [u'INTEGER DEFAULT 0', u'Clicks'],
    'Cost:float': [u'REAL DEFAULT 0.0', u'Cost']
}


def make_db(db_name, tbl_name, src_file, sep='\t'):
    u"""Построчно парсит содержимое `src_file`; сохраняет в `db_name`.
    Params:
    src_file -- название файла с исходными данными
    sep      -- разделитель значений в строке

    Notes:
    В строках должны отсуствовать символы: # /
    """
    data_files = []

    file_types = ('', '.tsv', '.txt', '.csv')
    for file_type in file_types:
        data_files.extend(glob('%s%s' % (src_file, file_type)))

    if not data_files:
        raise ValueError('there are no files like `%s`. Check your current working folder' % src_file)
    if len(data_files) > 1:
        raise ValueError('there are too many matched files `%s`. Check your current working folder' % src_file)

    with open(data_files[0]) as src:
        header = parse_row(sep, src.readline())
        rows = []
        for row in src:
            rows.append(parse_row(sep, row))

    conn = sqlite3.connect(db_name)
    conn.text_factory = str
    curs = conn.cursor()
    curs.execute('DROP TABLE IF EXISTS %s' % tbl_name)

    # Кусочек запроса для создания таблицы про все типы столбцов из файла
    # Пример: "Month" TEXT, "Client" TEXT, "CustomCategory" TEXT, "Region" TEXT
    schema_vals = ', '.join(
        map(
            lambda e: '"{fld_name}" {fld_type}'.format(
                fld_name=DATA_TYPES[e][1], fld_type=DATA_TYPES[e][0]
            ),
            header
        )
    )
    create_tbl = 'CREATE TABLE {table} ({fields_type})'.format(table=tbl_name, fields_type=schema_vals)
    curs.execute(create_tbl)

    loader = 'INSERT INTO {table} VALUES ({values})'.format(table=tbl_name, values=','.join('?' * len(header)))
    curs.executemany(loader, rows)

    conn.commit()
    conn.close()


def parse_row(sep, row):
    """row_parse(str, str) -> List[str]"""
    normalize_value = lambda val: val.strip('#" \n\r')
    return map(normalize_value, row.split(sep))


def get_info_lists(db_name, table_name):
    u"""Собирает данные про содержимое таблицы"""
    conn = sqlite3.connect(db_name)
    curs = conn.cursor()

    # клиенты, затраты
    curs.execute(
        CONFIG['info']['competitors'].format(table_name=table_name)
    )
    client_cost_rows = [(row[0], int(row[1])) for row in curs]
    clients = [row[0] for row in client_cost_rows]
    costs = [row[1] for row in client_cost_rows]

    # месяцы
    curs.execute(
        CONFIG['info']['distinct'].format(table_name=table_name, column='Month')
    )
    months = [row[0] for row in curs]

    # категории
    curs.execute(
        CONFIG['info']['distinct'].format(table_name=table_name, column='CustomCategory')
    )
    cats = [row[0] for row in curs]

    # регионы; нормально, если отсутствуют
    cols = get_db_table_columns(db_name, table_name)
    if 'Region' in cols:
        curs.execute(
            CONFIG['info']['distinct'].format(table_name=table_name, column='Region')
        )
        regions = [row[0] for row in curs]
    else:
        regions = []

    conn.close()

    return table_name, months, clients, costs, regions, cats


def get_db_table_columns(db_name, tbl_name):
    conn = sqlite3.connect(db_name)
    curs = conn.cursor()

    curs.execute(u"PRAGMA table_info('%s')" % tbl_name)
    cols = [row[1] for row in curs]
    conn.close()

    return cols


def describe_db_table(db_name, tbl_name, info_path):
    u"""Сохраняет статистику по таблице в файл"""
    info_wb = xlsxwriter.Workbook(info_path)

    info_ws = info_wb.add_worksheet('Info')
    info_ws.set_column(0, 5, 13)
    info_ws.set_column(6, 13, 12)

    merge_format = info_wb.add_format({
        'font_name': 'Arial',
        'font_size': '10',
        'bold': True,
        'align': 'center',
        'valign': 'center',
        'text_wrap': True,
        'bg_color': '#a5a5a5'
    })

    info_ws.write_row(0, 0, [
        'Months', 'Competitors', 'Cost', 'Regions', 'Categories', '',
        'Client', 'Tgt_Reg', 'Period_1', 'Period_2', 'Client in category', 'Lang', 'Currency', 'Bench competitors'
    ], merge_format)

    cells_comments = {
        'H1': u'Геосегмент 1 и геосегмент 2',
        'I1': u'В формате YYYYMM или YYYYMM - YYYYMM',
        'J1': u'В формате YYYYMM или YYYYMM - YYYYMM',
        'K1': u'Учитывать клиента в категории (да/нет). По умолчанию клиент учитывается в категории',
        'L1': u'Язык сборки отчетов (Ru/En)',
        'N1': u'Список конкурентов для бенчмаркинга (максимум 10). '
              u'По умолчанию используется топ-5 по откруткам за Period_2. '
              u'Список конкурентов используется, если отчет строится для одного клиента'
    }
    for cell, comment in cells_comments.iteritems():
        info_ws.write_comment(cell, comment)

    info_ws.write('K2', u'да')
    info_ws.write('L2', u'Ru')
    info_ws.write('M2', u'у.е.')

    info = get_info_lists(db_name, tbl_name)
    for col_num, col_values in enumerate(info[1:]):
        info_ws.write_column(1, col_num, col_values)

    info_wb.close()


def pars_argv():
    parser = ArgumentParser(description=u'converter from tsv to sqlite db')
    parser.add_argument(
        '--src',
        default='data', metavar='', help=u'filename to convert. Default: `%(default)s`.'
    )

    return parser.parse_args(sys.argv[1:])


def main(db_name, tbl_name, src_file, dst):
    with open(LOG_FILE, 'a') as f:
        f.write('start tsv2db\n')

    try:
        make_db(db_name, tbl_name, src_file)
        describe_db_table(db_name, tbl_name, dst)
    except Exception:
        with open(LOG_FILE, 'a') as f:
            tb.print_exc(file=f)
    else:
        with open(LOG_FILE, 'a') as f:
            f.write('done tsv2db\n')


if __name__ == '__main__':
    opt = pars_argv()
    main(DB_NAME, TBL_NAME, opt.src, TASK_NAME)
