import psycopg2
import boto3
import pandas as pd


def ssm(name):
    return boto3.client('ssm').get_parameter(Name=name)['Parameter']['Value']


DB_HOST = ssm('/mgst-data/moneyball-redshift/host')
DB_USER = ssm('/mgst-data/moneyball-redshift/user')
DB_PASS = ssm('/mgst-data/moneyball-redshift/pass')
DB_NAME = ssm('/mgst-data/moneyball-redshift/db')


con = psycopg2.connect(dbname=DB_NAME, host=DB_HOST,
                       user=DB_USER, password=DB_PASS, port=5439)
cursor = con.cursor()


df = pd.read_sql("""
WITH 
channel_ids AS (
    SELECT
        distinct channel_id
    FROM youtube_live_video_monthly_aggregate
    WHERE
        topic = '/m/0bzvm2'
),
channels AS (
    SELECT
        t1.channel_id,
        MAX(t1.location) as location
    FROM
        scanner_channel t1
    JOIN channel_ids t2
        ON t1.channel_id = t2.channel_id
    GROUP BY 1
),
channels_es AS (
    SELECT
        t1.channel_id,
        t1.default_language,
        t1.country,
        t1.subscriber_count
    FROM
        elastic_search_import_youtube_channel t1
    JOIN channel_ids t2
        ON t1.channel_id = t2.channel_id
),
channels_agg AS(
    SELECT
        t1.channel_id,
        max(t1.detected_language) as detected_language
    FROM 
        youtube_live_video_monthly_aggregate t1
    WHERE
        t1.detected_language IS NOT NULL
    GROUP BY 1
)
SELECT
        t1.channel_id as channel_id,
        nvl(t3.country, t2.location, 'Unknown') as country,
        nvl(t3.default_language, t4.detected_language, 'Unknown') as language,
        nvl(t3.subscriber_count, 0) as subscriber_count
    FROM
        channel_ids t1
    LEFT JOIN channels t2
        ON (t1.channel_id = t2.channel_id)
    LEFT JOIN channels_es t3
        ON (t1.channel_id = t3.channel_id)
    LEFT JOIN channels_agg t4
        ON (t1.channel_id = t4.channel_id)
""", con)

print(len(df))
# non_none = df[(df.country != 'none') | (df.language != 'none')]
non_none = df[df.subscriber_count != 0]
non_none.to_csv('moneyball_metadata_export.csv')

