#!/usr/bin/env python2

import os.path
import psycopg2
import sys
import logging

from argparse import ArgumentParser
from contextlib import closing

CACHE = '/tmp/clean_storage_delete_queue_cache'

log = logging.getLogger()
log_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
file_handler = logging.FileHandler('/tmp/clean_storage_delete_queue.log')
file_handler.setFormatter(log_formatter)
log.addHandler(file_handler)


def main():
    args = parse_args()
    setup_log(args)
    perform(
        dsn=args.dsn,
        max_qid=get_max_qid(args),
        chunk_size=args.chunk_size,
    )


def parse_args():
    parser = ArgumentParser(
        description='Delete from mail.storage_delete_queue records '
                    'with st_id existing in mail.messages',
    )
    parser.add_argument(
        '-m',
        '--max_qid',
        type=int,
        default=None,
        help='maximum qid to check st_id',
    )
    parser.add_argument(
        '-n',
        '--chunk_size',
        type=int,
        default=1000,
        help='limit for select from mail.storage_delete_queue for one request',
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='enable debug logging',
    )
    parser.add_argument(
        '-q',
        '--quite',
        action='store_true',
        help='disable log to stdout',
    )
    parser.add_argument(
        'dsn',
        help='connection info for database',
    )
    return parser.parse_args()


def setup_log(args):
    log_level = logging.DEBUG if args.verbose else logging.INFO
    file_handler.setLevel(log_level)
    log.setLevel(log_level)
    if not args.quite:
        stdout_handler = logging.StreamHandler(sys.stdout)
        stdout_handler.setFormatter(log_formatter)
        stdout_handler.setLevel(log_level)
        log.addHandler(stdout_handler)


def get_max_qid(args):
    return get_cached_max_qid() if args.max_qid is None else args.max_qid


def get_cached_max_qid():
    if not os.path.isfile(CACHE):
        return sys.maxint
    with open(CACHE) as f:
        return int(f.read())


def set_cached_max_qid(value):
    with open(CACHE, 'w') as f:
        f.write(str(value))


def perform(dsn, max_qid, chunk_size):
    log.info('start')
    log.info('dsn: %s', dsn)
    log.info('max_qid: %d', max_qid)
    log.info('chunk_size: %d', chunk_size)
    try:
        with closing(psycopg2.connect(dsn)) as connection:
            connection.autocommit = True
            if is_replica(connection):
                log.info('database is replica, will do nothing')
                return
            while True:
                new_max_qid = perform_one(connection, max_qid, chunk_size)
                log.info('new_max_qid: %d', new_max_qid)
                if new_max_qid == max_qid:
                    break
                max_qid = new_max_qid
                set_cached_max_qid(max_qid)
        log.info('finished')
    except StandardError as e:
        log.exception(e)


def perform_one(connection, max_qid, chunk_size):
    records = get_storage_delete_queue_records(connection, max_qid, chunk_size)
    qids = list()
    new_max_qid = max_qid
    for qid, st_id in records:
        if is_st_id_in_messages(connection, st_id=st_id):
            log.debug('will delete qid=%d, st_id=%s', qid, st_id)
            qids.append(qid)
        else:
            log.debug('will ignore qid=%d, st_id=%s', qid, st_id)
        new_max_qid = qid
    if qids:
        delete_qids(connection, qids)
        log.debug('deleted qids=%s', str(qids))
    return new_max_qid


def get_storage_delete_queue_records(connection, qid, chunk_size):
    with closing(connection.cursor()) as cursor:
        cursor.execute('''
                SELECT qid, st_id
                  FROM mail.storage_delete_queue
                 WHERE qid <= %(qid)s
                 ORDER BY qid DESC
                 LIMIT %(chunk_size)s
            ''', dict(qid=qid, chunk_size=chunk_size))
        return cursor.fetchall()


def is_st_id_in_messages(connection, st_id):
    with closing(connection.cursor()) as cursor:
        cursor.execute('''
                SELECT 1
                  FROM mail.messages
                 WHERE hashtext(st_id) = hashtext(%(st_id)s)
                   AND st_id = %(st_id)s
            ''', dict(st_id=st_id))
        return bool(cursor.fetchall())


def delete_qids(connection, qids):
    with closing(connection.cursor()) as cursor:
        cursor.execute('''
                DELETE FROM mail.storage_delete_queue
                 WHERE qid = ANY(%s)
            ''', (qids,))


def is_replica(connection):
    with closing(connection.cursor()) as cursor:
        cursor.execute('SELECT pg_is_in_recovery()')
        return cursor.fetchone()[0]


if __name__ == '__main__':
    main()
