import time

from absl import app, flags, logging
from absl.flags import FLAGS
import psycopg2

from mgst_data.config import DB_HOST, DB_USER, DB_PASSWORD
from mgst_data.utils import upsert_data, date_validator, query

flags.DEFINE_string('start_date', None, 'Starting Date (2019-01-01)')
flags.DEFINE_string('end_date', None, 'Ending Date (2019-02-01)')

flags.register_validator(
    'start_date', date_validator(), message='invalid date')
flags.register_validator('end_date', date_validator(), message='invalid date')


def main(_argv):
    start_time = time.time()
    logging.info("Processing channels from %s to %s",
                 FLAGS.start_date, FLAGS.end_date)

    con = psycopg2.connect(dbname='product', host=DB_HOST,
                           port='5439', user=DB_USER, password=DB_PASSWORD)

    l_channels = query(con, """
        SELECT
            channel_id,
            platform,
            language,
            CASE 
                WHEN language IS NULL THEN 'none'
                WHEN platform IN ('facebook', 'youtube', 'afreeca', 'booyah') THEN 'whatlango'
                WHEN platform IN ('cube') THEN 'country'
                WHEN platform IN ('dlive', 'mixer', 'nimo') THEN 'source'
                ELSE 'none' 
            END AS language_type
        FROM (SELECT channel_id,
            platform,
            language,
            ROW_NUMBER() OVER (PARTITION BY channel_id
                                ORDER BY COUNT(language) DESC) AS rank
        FROM tahoe.marionette_channel_prod
        WHERE date BETWEEN %s AND %s
        GROUP BY 1,
            2,
            3)
        WHERE rank = 1
        """, [FLAGS.start_date, FLAGS.end_date], fetch=True)

    total_langs = sum(map(lambda x: 1 if x[3] != 'none' else 0, l_channels))
    logging.info("Loaded %d channels with %d languages",
                 len(l_channels), total_langs)

    upsert_data(con,
                'mgst.marionette_metadata_language',
                ['channel_id', 'platform', 'language', 'type'],
                ['channel_id', 'platform', 'type'],
                l_channels)

    c_channels = query(con, """
        SELECT
            channel_id,
            platform,
            country,
            CASE 
                WHEN country IS NULL THEN 'none'
                WHEN platform IN ('dlive', 'mixer') THEN 'language'
                WHEN platform IN ('cube', 'nimo') THEN 'source'
                ELSE 'none' 
            END AS country_type
        FROM (SELECT channel_id,
            platform,
            country_code AS country,
            ROW_NUMBER() OVER (PARTITION BY channel_id
                                ORDER BY COUNT(country_code) DESC) AS rank
        FROM tahoe.marionette_channel_prod
        WHERE date BETWEEN %s AND %s
        GROUP BY 1,
            2,
            3)
        WHERE rank = 1
        """, [FLAGS.start_date, FLAGS.end_date], fetch=True)

    total_cs = sum(map(lambda x: 1 if x[3] != 'none' else 0, c_channels))
    logging.info("Loaded %d channels with %d countries",
                 len(c_channels), total_cs)

    upsert_data(con,
                'mgst.marionette_metadata_country',
                ['channel_id', 'platform', 'country', 'type'],
                ['channel_id', 'platform', 'type'],
                c_channels)

    link_channels = query(con, """
        SELECT
            channel_id,
            platform,
            channel_name,
            CASE
                WHEN platform = 'youtube' THEN 'https://youtube.com' || channel_id
                WHEN platform = 'facebook' THEN 'https://facebook.com/' || channel_id
                WHEN platform = 'mixer' THEN 'https://mixer.com/' || channel_name
                WHEN platform = 'cube' THEN 'https://cube.tv/' || channel_id
                WHEN platform = 'nimo' THEN 'https://nimo.tv/live/' || channel_id
                WHEN platform = 'dlive' THEN 'https://dlive.tv/' || channel_name
                WHEN platform = 'afreeca' THEN 'https://afreecatv.com/' || channel_id
                WHEN platform = 'booyah' THEN 'https://booyah.live/channels/' || channel_id
            END AS link
        FROM (SELECT channel_id,
            platform,
            MAX(channel_name) as channel_name
        FROM tahoe.marionette_channel_prod
        WHERE date BETWEEN %s AND %s
        GROUP BY 1,
                 2)
        """, [FLAGS.start_date, FLAGS.end_date], fetch=True)

    logging.info("Loaded %d channels links", len(link_channels))

    upsert_data(con,
                'mgst.marionette_metadata_link',
                ['channel_id', 'platform', 'channel_name', 'link'],
                ['channel_id', 'platform'],
                link_channels)

    logging.info("Done %ds", time.time() - start_time)


if __name__ == '__main__':
    app.run(main)
