import os
import logging
logging.basicConfig(filename='/tmp/ward.log',level=logging.DEBUG,format='%(asctime)s %(message)s')
#logging.getLogger().addHandler(logging.StreamHandler())

from multiprocessing import Process
from time import sleep
from datetime import datetime

import boto3
import uuid
import pandas as pd
from botocore.exceptions import ClientError
from fastparquet import ParquetFile, write as write_pq

class Dispatcher(Process):
    def __init__(self, root, bucket):
        Process.__init__(self)
        self.stop = False
        self.root = root
        self.timeout = 30 * 8 * 2
        self.s3 = boto3.client("s3")
        self.bucket = bucket

    def run(self):
        while not self.stop:
            starttime = datetime.now()
            logging.debug("run tick")
            try:
                files = self.get_files()
                if files:
                    sum_filename = self.sum_parq(files)
                    if sum_filename:
                        rn = datetime.now().strftime('%Y-%m-%d')
                        sum_filepath = self.root + sum_filename
                        self.upload_file(sum_filepath, "parq-structured/date=%s/%s.parq" % (rn, sum_filename))
                        self.delete_files(files+[sum_filename])
            except KeyboardInterrupt:
                self.stop = True
                raise
            except Exception as e:
                logging.exception("exception in dispatcher run loop")

            endtime = datetime.now()
            diff = max(self.timeout - (endtime - starttime).seconds, 0)
            logging.debug("sleeping for %s sec" % diff)
            sleep(diff)

    def delete_files(self, files):
        for f in files:
            try:
                logging.info("del %s" % f)
                os.remove(self.root + f)
            except:
                pass

    def get_files(self):
        return [f for f in os.listdir(self.root) if f.split("__")[-1] != "SUM"]

    def sum_parq(self, files):
        dfs = []
        i = 0
        del_files = []
        for f in files:
            try:
                dfs.append(ParquetFile(self.root + f).to_pandas())
            except OSError as e:
                if  e.errno == 22:
                    logging.exception("failed to add file %s" % f)
                    del files[i]
                    del_files.append(f)
                else:
                    raise
            i += 1

        if del_files:
            logging.warning("%s files were unreadable and being deleted" % (len(del_files)))
            self.delete_files(del_files)
        logging.info("dispatcher concat %s files" % len(dfs))
        if not len(dfs):
            return

        pd_sum = pd.concat(dfs, axis=0)
#        pd_sum['timestamp'] = pd.to_datetime(pd_sum['timestamp'])
#        pd_sum['date'] = pd.to_datetime(pd_sum['date'])
        filename = "%s__SUM" % str(uuid.uuid4())
        logging.info("writing to %s" % filename)
        write_pq(self.root + filename, pd_sum)
        return filename

    def upload_file(self, file_name, object_name):
        """Upload a file to an S3 bucket

        :param file_name: File to upload
        :param object_name: Path to upload to
        :return: True if file was uploaded, else False
        """

        # Upload the file
        try:
            logging.info("uploading %s to s3://%s/%s" % (file_name, self.bucket, object_name))
            response = self.s3.upload_file(file_name, self.bucket, object_name)
        except ClientError as e:
            logging.error(e)
            return False

        return True


if __name__ == "__main__":
    Dispatcher("/tmp/ward/", "darkseer-ward-s3").start()
