import psycopg2
import datetime
import boto3
import subprocess
import pytz
from dateutil.relativedelta import relativedelta

def in_s3(path, file):
    results = s3.list_objects(Bucket = 'old-redshift-data', Prefix = path)
    if 'Contents' not in results.keys():
        return False
    elif file in results['Contents']:
        return True
    else:
        return False


server='tahoe-tap.community-data.twitch.a2z.com'
login='manchikc'
db='product'
port = 5439
conn = psycopg2.connect(dbname=db,user=login,port=port,password='1InosZYTJp2le3mqZKm2ZRlu5sDTPGGW',host=server)

#get all of the tables
cursor = conn.cursor()
cursor.execute('SELECT * FROM INFORMATION_SCHEMA.tables WHERE table_schema like \'dx%\' ORDER BY table_schema;')

arr = cursor.fetchall()
conn.commit()

s3 = boto3.client('s3')
table_folders = s3.list_objects(Bucket='old-redshift-data')
if 'Contents' not in table_folders.keys():
    table_folders = []
else:
    table_folders = table_folders['Contents']

#output = subprocess.check_output(['aws','s3','ls','s3://old-redshift-data'])
#print(output.decode('utf-8'))
#print(type(output))


#filter out tables from the csv script

tables = []
for line in open('files.csv'):
    tables.append(line.split(','))

#tables has tuples in the format of [tablename, column to look into]

#Get the date two years ago from today
curr_date = datetime.datetime.now(tz=pytz.utc)
curr_date = curr_date.astimezone(pytz.timezone('US/Pacific'))

two_years_date = curr_date -  relativedelta(years=2)



for table in tables:
    cursor.execute('SELECT {} FROM {} WHERE {} < \'{}\'::DATE GROUP BY {} ORDER BY {} ASC;'.format(table[1],table[0], table[1], two_years_date.strftime('%Y-%m-%d'), table[1], table[1]))
    conn.commit()
    date = cursor.fetchall()
    #print(date)
    if '{}/'.format(table) not in table_folders:
        s3.put_object(Bucket='old-redshift-data', Key = '{}/'.format(table[0]))

    for d in date:
        date = str(d[0]).split('-')
        d = datetime.date(int(date[0]),int(date[1]),int(date[2]))

        #Check if this date folder exists in s3 or not
        if not in_s3('{}/'.format(table[0]), '{}/'.format(d.strftime('%Y_%m_%d'))):
            #print('Adding day folder')
            s3.put_object(Bucket = 'old-redshift-data', Key = '{}/{}'.format(table[0], '{}/'.format(d.strftime('%Y_%m_%d'))))

        command = 'UNLOAD ($$ SELECT * FROM {} WHERE {} = \'{}\'::DATE  $$) \nto \'s3://old-redshift-data/{}/\' \niam_role \'arn:aws:iam::838590096234:role/dx-data-s3-access\';'.format(table[0],table[1],d.strftime('%Y-%m-%d'), '{}/{}'.format(table[0],d.strftime('%Y_%m_%d')))
        print(command)
        cursor.execute(command)
        conn.commit()
        #Verification. If the files are created, we can delete.
        fil = s3.list_objects(Bucket='old-redshift-data',Prefix = '{}/{}/'.format(table[0],d.strftime('%Y_%m_%d')))
        if 'Contents' not in fil.keys():
            print('Problem')
        else:
            #print('Good to go for deletion')
            delete_command = 'DELETE FROM {} WHERE {} = \'{}\'::DATE'.format(table[0],table[1],d.strftime('%Y-%m-%d'))
            cursor.execute(delete_command)



