#!coding: utf-8
import sys
import sqlite3
import argparse
import traceback
from glob import glob

import xlsxwriter


LOG_FILE = '.logerror.log'
DATA_TYPES = {
    'Month': [u'TEXT', u'Month'],
    'ShortMonth': [u'TEXT', u'Month'],
    'Client': [u'TEXT', u'Client'],
    'Domain': [u'TEXT', u'Client'],
    'CustomCategory': [u'TEXT', u'CustomCategory'],
    'BannerCategory': [u'TEXT', u'CustomCategory'],
    'Region': [u'TEXT', u'Region'],
    'Direct_Shows': [u'INTEGER DEFAULT 0', u'Direct_Shows'],
    'Direct_Clicks': [u'INTEGER DEFAULT 0', u'Direct_Clicks'],
    'Direct_Cost': [u'REAL DEFAULT 0.0', u'Direct_Cost'],
    'RSYA_Shows': [u'INTEGER DEFAULT 0', u'RSYA_Shows'],
    'RSYA_Clicks': [u'INTEGER DEFAULT 0', u'RSYA_Clicks'],
    'RSYA_Cost': [u'REAL DEFAULT 0.0', u'RSYA_Cost'],
    'Shows': [u'INTEGER DEFAULT 0', u'Shows'],
    'Clicks': [u'INTEGER DEFAULT 0', u'Clicks'],
    'Cost': [u'REAL DEFAULT 0.0', u'Cost'],
    'Qvartal': [u'TEXT', u'Qvartal'],
    'Client_Type': [u'TEXT', u'Client_Type'],
    'Region_Type': [u'TEXT', u'Region_Type'],
    'CompCount': [u'INTEGER DEFAULT 0', u'CompCount'],
    'Region:str': [u'TEXT', u'Region'],
    'Month:str': [u'TEXT', u'Month'],
    'Client:str': [u'TEXT', u'Client'],
    'CustomCategory:str': [u'TEXT', u'CustomCategory'],
    'Direct_Shows:int': [u'INTEGER DEFAULT 0', u'Direct_Shows'],
    'Direct_Clicks:int': [u'INTEGER DEFAULT 0', u'Direct_Clicks'],
    'Direct_Cost:float': [u'REAL DEFAULT 0.0', u'Direct_Cost'],
    'RSYA_Shows:int': [u'INTEGER DEFAULT 0', u'RSYA_Shows'],
    'RSYA_Clicks:int': [u'INTEGER DEFAULT 0', u'RSYA_Clicks'],
    'RSYA_Cost:float': [u'REAL DEFAULT 0.0', u'RSYA_Cost'],
    'Shows:int': [u'INTEGER DEFAULT 0', u'Shows'],
    'Clicks:int': [u'INTEGER DEFAULT 0', u'Clicks'],
    'Cost:float': [u'REAL DEFAULT 0.0', u'Cost'],
}


def parse_row(sep, row):
    """row_parse(str, str) -> List[str]"""
    normalize_value = lambda val: val.strip('#" \n\r')
    return map(normalize_value, row.split(sep))


def make_db(db_name, tbl_name, src_file, sep='\t'):
    u"""Построчно парсит содержимое `src_file`; сохраняет в `db_name`.

    Params:
    src_file -- название файла с исходными данными
    sep      -- разделитель значений в строке

    Notes:
    В строках должны отсуствовать символы: # /
    """
    FILE_TYPES = ('', '.tsv')

    data_srcs = []
    for ftype in FILE_TYPES:
        data_srcs.extend(glob("%s%s" % (src_file, ftype)))

    if not data_srcs:
        raise ValueError(
            "there are no files like `%s`. Check your current working folder" % src_file
        )
    if len(data_srcs) > 1:
        raise ValueError(
            "there are too many matched files `%s`. Check your current working folder" % src_file
        )

    with open(data_srcs[0]) as src:
        header = parse_row(sep, src.readline())
        rows = []
        for row in src:
            rows.append(parse_row(sep, row))

    conn = sqlite3.connect(db_name)
    conn.text_factory = str
    curs = conn.cursor()
    curs.execute("DROP TABLE IF EXISTS %s" % tbl_name)

    # Кусочек запроса для создания таблицы про все типы столбцов из файла
    # Пример: "Month" TEXT, "Client" TEXT, "CustomCategory" TEXT, "Region" TEXT
    SCHEMA_VALS = ", ".join(
        map(
            lambda e: '"{fld_name}" {fld_type}'.format(
                fld_name=DATA_TYPES[e][1], fld_type=DATA_TYPES[e][0]
            ),
            header
        )
    )
    create_tbl = 'CREATE TABLE {table} ({fields_type})'.format(
        table=tbl_name, fields_type=SCHEMA_VALS
    )
    curs.execute(create_tbl)
    loader = 'INSERT INTO {table} VALUES ({values})'.format(
        table=tbl_name,
        values=",".join('?' * len(header)),
    )
    curs.executemany(loader, rows)
    conn.commit()
    conn.close()


def get_db_table_columns(db_name, tbl_name):
    conn = sqlite3.connect(db_name)
    curs = conn.cursor()
    curs.execute(
        u"PRAGMA table_info('%s')" % tbl_name
    )
    cols = [row[1] for row in curs]
    conn.close()

    return cols


def info_db_table(db_name, tbl_name):
    u"""Собирает данные про содержимое таблицы"""

    conn = sqlite3.connect(db_name)
    curs = conn.cursor()

    # клиенты, затраты
    curs.execute(
        u'SELECT Client, sum(Cost) AS Cost FROM {TABLE} GROUP BY Client ORDER BY Cost DESC'
        .format(TABLE=tbl_name),
    )
    client_cost_rows = [(row[0], int(row[1])) for row in curs]
    clients = [row[0] for row in client_cost_rows]
    costs = [row[1] for row in client_cost_rows]

    # месяца
    curs.execute(
        u'SELECT DISTINCT Month FROM {TABLE} ORDER BY Month ASC'
        .format(TABLE=tbl_name),
    )
    months = [row[0] for row in curs]

    # категории
    curs.execute(
        u'SELECT DISTINCT CustomCategory FROM {TABLE} ORDER BY CustomCategory ASC'
        .format(TABLE=tbl_name),
    )
    cats = [row[0] for row in curs]

    # регионы; нормально, если отсутствуют
    cols = get_db_table_columns(db_name, tbl_name)
    if 'Region' in cols:
        curs.execute(
            u'SELECT DISTINCT Region FROM {TABLE} ORDER BY Region ASC'
            .format(TABLE=tbl_name)
        )
        regions = [row[0] for row in curs]
    else:
        regions = []

    conn.close()

    return tbl_name, months, clients, costs, regions, cats


def describe_db_table(db_name, tbl_name, dst):
    u"""Сохраняет статистику по таблице в файл"""

    info = info_db_table(db_name, tbl_name)
    wb_info = xlsxwriter.Workbook("%s.xlsx" % dst)

    merge_format = wb_info.add_format({
        'font_name': 'Arial',
        'font_size': '10',
        'bold': True,
        'align': 'center',
        'valign': 'center',
        'text_wrap': True,
        'bg_color': '#a5a5a5'
    })
    ws_info = wb_info.add_worksheet('Info')
    ws_info.write_row(
        0, 0,
        ['Months', 'Competitors', 'Cost', 'Regions', 'Categories', '', 'Client', 'Tgt_Reg',
         'Period_1', 'Period_2', 'Industry_name', 'Optional Share', 'Currency'],
        merge_format,
    )
    ws_info.set_column(0, 5, 12)
    ws_info.write_comment('I1', u'В формате YYYYMM - YYYYMM')
    ws_info.write_comment('J1', u'В формате YYYYMM - YYYYMM')
    ws_info.write_comment('K1', u'Название сборной категории')
    ws_info.write_comment(
        'L1', u'Накопленная доля расходов/кликов для формирования категорий по CPC, CTR, CPM'
    )
    ws_info.write('L2', 100)
    ws_info.write('M2', u'у.е.')

    for i, n in enumerate(info[1:]):
        ws_info.write_column(1, i, n)

    wb_info.close()


def pars_argv():

    parser = argparse.ArgumentParser(description=u'converter from tsv to sqlite db')
    parser.add_argument(
        '--src',
        default='data',
        metavar='',
        help=u'filename to convert. Default: `%(default)s`.'
    )

    return parser.parse_args(sys.argv[1:])


def main(db_name, tbl_name, src_file, dst):
    make_db(db_name, tbl_name, src_file)
    describe_db_table(db_name, tbl_name, dst)


if __name__ == "__main__":
    DESC_FNAME = 'info'
    DB_NAME = 'source.db'
    TBL_NAME = 'data'

    opt = pars_argv()
    with open(LOG_FILE, 'a') as f:
        f.write('start tsv2db\n')

    try:
        main(DB_NAME, TBL_NAME, opt.src, DESC_FNAME)
    except Exception as err:
        with open(LOG_FILE, 'a') as f:
            traceback.print_exc(file=f)
    else:
        with open(LOG_FILE, 'a') as f:
            f.write('done tsv2db\n')
