#!/usr/bin/env python2.7
import logging
import sys
import pymysql
import warnings
from counters_base_class import CountersBase
from datetime import timedelta, date

MIN_KEEP_HOURLY_DAYS = 1


class MergeHourlyCountersIntoDaily(CountersBase):
    def __init__(self, **kwargs):
        super(MergeHourlyCountersIntoDaily, self).__init__(**kwargs)
        self.params = kwargs
        self.days_to_merge = None
        self.max_days_to_merge = self.params['settings']['max_days_to_merge']
        self.keep_hourly_days = self.params['settings']['keep_hourly_days']  \
            if self.params['settings']['keep_hourly_days'] >= MIN_KEEP_HOURLY_DAYS else MIN_KEEP_HOURLY_DAYS
        self.delete_from_daily_table = self.params['settings']['delete_from_daily_table']

    def get_days_to_merge(self):
        conn = self.get_db_conn()
        # lets find min non-merged day
        query = "select date(min(time)) from {table} where not find_in_set('flag_merged', flags)"\
            .format(table=self.get_db_hourly_table())
        with conn as cursor:
            cursor.execute(query)
            result = cursor.fetchall()
        conn.close()
        min_day = result[0][0]
        logging.info('min day in hourly table is {}'.format(min_day))
        conn = self.get_db_conn()
        # lets find max ch updated day: if day wasn't updated from ch - we cant merge it
        query = "select max(time) from {table} where find_in_set('flag_ch_updated', flags)"\
            .format(table=self.get_db_hourly_table())
        with conn as cursor:
            cursor.execute(query)
            result = cursor.fetchall()
        conn.close()
        max_day_ch_updated = result[0][0]
        if max_day_ch_updated is None:
            logging.info('no rows in hourly table with ch_updated flag')
            exit(0)
        logging.info('max ch updated day is {}'.format(max_day_ch_updated.date()))
        max_day = date.today() - timedelta(days=self.keep_hourly_days)
        max_day = min(max_day, max_day_ch_updated.date())
        if min_day < max_day - timedelta(days=self.max_days_to_merge):
            max_day = min_day + timedelta(days=self.max_days_to_merge)
        # real max_day is max_day minus 1 day: it will counted in range() below
        if max_day <= min_day:
            logging.info('nothing to merge')
        else:
            self.days_to_merge = [min_day + timedelta(days=x) for x in range(0, (max_day - min_day).days)]
            logging.info('days to merge: {}'.format(', '.join([d.strftime("%Y-%m-%d") for d in self.days_to_merge])))

    def get_merge_query(self, day):
        query = """
            INSERT INTO {daily_table}
            ({id_fields}, impressions, clicks, day)
            SELECT {id_fields}, sum(impressions), sum(clicks), '{date}'
            FROM {hourly_table}
            WHERE time >= TIMESTAMP('{date}') AND time < TIMESTAMP(DATE_ADD('{date}', INTERVAL 1 DAY))
            GROUP BY {id_fields}
        """.format(daily_table=self.get_db_daily_table(), hourly_table=self.get_db_hourly_table(),
                   id_fields=','.join(self.get_id_fields()), date=day)
        return query

    def db_merge_single_day(self, day):
        conn = self.get_db_conn()
        if self.delete_from_daily_table:
            logging.info('deleting old data from daily table for {} day'.format(day))
            conn.cursor().execute(self.get_daily_delete_query(day))
            logging.info('deleting is finished')
        logging.info('merging data in daily table for {} day'.format(day))
        conn.cursor().execute(self.get_merge_query(day))
        logging.info('merging is finished')
        logging.info('marking hourly table data merged for {} day'.format(day))
        affected_rows = conn.cursor().execute(self.get_hourly_mark_merged_day_query(day))
        logging.info('marking is done for {} rows'.format(affected_rows))
        conn.commit()
        logging.info('changes commited')

    def remove_old_data_from_hourly_table(self):
        conn = self.get_db_conn()
        logging.info('removing old data from hourly table')
        affected_rows = conn.cursor().execute(self.get_remove_old_data_from_hourly_table_query())
        logging.info('removed {} rows from hourly table'.format(affected_rows))
        conn.commit()
        logging.info('changes commited')

    def proceed(self):
        # ignore sql warnings: concat_ws on set fields
        warnings.simplefilter("ignore", category=pymysql.Warning)
        self.set_logger()
        try:
            self.get_days_to_merge()
            if self.days_to_merge:
                for day in self.days_to_merge:
                    self.db_merge_single_day(day=day)
            self.remove_old_data_from_hourly_table()
            logging.info('Script has been successfully completed. ')
            return True
        except Exception:
            raise
        finally:
            self.close_db_conn()


if __name__ == "__main__":
    if len(sys.argv) == 1:
        # execute from command line with no args: let's use config file for settings
        import config as conf
        params = conf.params

    else:
        # args not empty: let's take settings from args as json.dumps dicts
        params = MergeHourlyCountersIntoDaily.parse_args('settings', 'mysql')
    MergeHourlyCountersIntoDaily(**params).proceed()
