import subprocess
import sys
import os
from datetime import datetime, timedelta
import time
from pytz import timezone
import re
import io

from absl import app, logging, flags
from absl.flags import FLAGS
import pandas as pd
import numpy as np
import psycopg2

import gspread
from oauth2client.service_account import ServiceAccountCredentials

from mgst_data.config import DB_HOST, DB_USER, DB_PASSWORD
from mgst_data.utils import upsert_data

flags.DEFINE_string(
    'sheet', '1wO1BdqleGLpV4JJ5DzimHsoZZakhRypF6w70sG25jbY', 'tracker sheet')

flags.DEFINE_string(
    'creds', '/Users/siyinpen/mobile-royale-313621-500ba9ade8d9.json', 'gcp credentials'
)

flags.DEFINE_integer(
    'start', 0, 'start page'
)

scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

CREATE_TABLE = """
drop table if exists mgst.mobile_royale_tracker;
create table mgst.mobile_royale_tracker (
  event_name VARCHAR(255),
  date timestamp,
  start_time timestamp,
  end_time timestamp,
  region VARCHAR(255),
  channel VARCHAR(255),
  checked_in BOOLEAN,
  points FLOAT,
  placement INTEGER,
  prize FLOAT,
  award FLOAT
);
"""


def load_google_sheet():
    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        FLAGS.creds, scope)
    gc = gspread.authorize(credentials)
    worksheet = gc.open_by_key(FLAGS.sheet)
    return worksheet


def parse_date(date_str, year):
    if "PST" in date_str:
        tz = timezone("US/Pacific")
    else:
        raise Exception("Unknonw timeozne", date_str)

    time_str, date_str = date_str.split(',')
    if len(date_str.split('/')) == 2:
        date = datetime.strptime(date_str.strip(), '%m/%d')
    else:
        date = datetime.strptime(date_str.strip(), '%m/%d/%Y')

    times = time_str.split('-')
    start_time = times[0].strip().split(' ')[0]
    start_time = datetime.strptime(start_time, '%I%p')

    if len(times) > 1:
        end_time = times[1].strip().split(' ')[0]
        end_time = datetime.strptime(end_time, '%I%p')
    else:
        end_time = start_time + timedelta(hours=5)

    # year = 2020
    # if date.month == 12:
    #     year = 2019

    start_date = datetime(year=year, month=date.month,
                          day=date.day, hour=start_time.hour)
    end_date = datetime(year=year, month=date.month,
                        day=date.day, hour=end_time.hour)

    return tz.localize(start_date), tz.localize(end_date)


def parse_channel(channel_str):
    link = re.match(r".*?twitch.tv/(?P<channel>\w+)", channel_str)
    if link:
        return link.group('channel').lower()
    else:
        return channel_str.lower().replace('http://', '').strip(' /').split(" ")[0]


def parse_xlsx(excel):
    xls = pd.ExcelFile(excel)
    for i, sheet_name in enumerate(xls.sheet_names):
        if i < FLAGS.start:
            continue
        region = sheet_name.split(' ')[-1]
        year = 2000 + int(sheet_name.split('-')[0].split('/')[2].strip())
        print(region, year)

        df = pd.read_excel(xls, sheet_name, nrows=4)
        date_str = df.iloc[2][2]
        if not date_str:
            # shifted cell location
            date_str = df.iloc[2][3]

        start_date, end_date = parse_date(date_str, year)
        print(start_date, end_date)

        df = pd.read_excel(xls, sheet_name, skiprows=5, nrows=61)
        df = df[df['Channel Name'].notnull()]
        if not len(df):
            print('Empty')
            continue
        df = df.where(pd.notnull(df), None)

        df['event_name'] = sheet_name
        df['date'] = start_date.strftime('%Y-%m-%d')
        df['start_time'] = start_date.strftime('%Y-%m-%d %H:%M:%S')
        df['end_time'] = end_date.strftime('%Y-%m-%d %H:%M:%S')
        df['region'] = region
        df['channel'] = df['Channel Name'].apply(parse_channel)
        df['checked_in'] = df['Checked In?'].astype(bool)

        data = df[['event_name', 'date', 'start_time', 'end_time', 'region',
                   'channel', 'checked_in', 'Placement', 'Prize $']].values.tolist()
        upsert_data(con,
                    'mgst.mobile_royale_tracker',
                    ['event_name', 'date', 'start_time', 'end_time', 'region',
                        'channel', 'checked_in', 'placement', 'prize'],
                    ['event_name'],
                    data)


def main(_argv):
    con = psycopg2.connect(dbname='product', host=DB_HOST,
                           port='5439', user=DB_USER, password=DB_PASSWORD)

    con.cursor().execute(CREATE_TABLE)
    con.commit()

    spread = load_google_sheet()
    print("total sheets: ", len(spread.worksheets()))
    for i, sheet in enumerate(spread.worksheets()):
        print("sheet index: ", i)
        if i < FLAGS.start:
            # todo change this to date
            continue

        sheet_name = sheet.title
        region = sheet_name.split('-')[-1].strip()
        year = 2000 + int(sheet_name.split('-')[0].split('/')[2].strip())
        print(sheet_name, region, year)

        values = sheet.get_all_values()
        df = pd.DataFrame(values[1:4])
        date_str = df.iloc[2][2]
        if not date_str:
            # shifted cell location
            date_str = df.iloc[2][3]
        start_date, end_date = parse_date(date_str, year)
        print(start_date, end_date)

        df = pd.DataFrame(values[6:6+61], columns=values[5])
        df = df.iloc[:, :8] # drop weird column data
        df = df.replace(r'^\s*$', np.nan, regex=True)
        df = df[df['Channel Name'].notnull()]
        if not len(df):
            print('Empty')
            continue
        df = df.where(pd.notnull(df), None)

        df['event_name'] = sheet_name
        df['date'] = start_date.strftime('%Y-%m-%d')
        df['start_time'] = start_date.strftime('%Y-%m-%d %H:%M:%S')
        df['end_time'] = end_date.strftime('%Y-%m-%d %H:%M:%S')
        df['region'] = region
        df['channel'] = df['Channel Name'].apply(parse_channel)
        df['checked_in'] = (df['Checked In?'] == 'TRUE').astype(bool)

        print(df)

        data = df[['event_name', 'date', 'start_time', 'end_time', 'region',
                   'channel', 'checked_in', 'Placement', 'Prize $']].values.tolist()
        upsert_data(con,
                    'mgst.mobile_royale_tracker',
                    ['event_name', 'date', 'start_time', 'end_time', 'region',
                        'channel', 'checked_in', 'placement', 'prize'],
                    ['event_name'],
                    data)


if __name__ == '__main__':
    app.run(main)
