from pymongo import MongoClient
import argparse
from tqdm import tqdm
import json
import time
import datetime
from bson.objectid import ObjectId
import sys


def get_filter(last_id):
    if last_id is None:
        return None
    return {'_id': {'$lt': last_id}}

parser = argparse.ArgumentParser(description="Scan the database for the urls")
parser.add_argument("--limit", type=int, default=100,
                    help="How many records to handle in one sitting")
parser.add_argument("--urls", type=str, required=True, nargs="+",
                    help="The urls you want to find (a prefix to start with")
parser.add_argument("--host", type=str, required=True, help="The mongo host")
parser.add_argument("--port", type=int, help="The mongo port", default=27017)
parser.add_argument("--database", type=str, required=True,
                    help="Database name")
parser.add_argument("--username", type=str, required=True)
parser.add_argument("--last_id", type=str, default="")
parser.add_argument("--password", type=str, required=True)
parser.add_argument("--filter_future", type=str, default="no",
                    choices=["yes", "no"],
                    help="Should we look just for the future events?")
parser.add_argument("--stop_at_first", type=str, default="no",
                    choices=["yes", "no"],
                    help="Should we stop after finding the first match?")

args = parser.parse_args()

client = MongoClient(host=args.host, port=args.port,
                     username=args.username, password=args.password)

db = client.get_database(name=args.database)

last_id = None

if args.last_id:
    last_id = ObjectId(args.last_id)

event = db["event"]

matched = list()
datefmt = "%Y-%m-%d %H:%M:%S"
current = datetime.datetime.now().timestamp()

has_more = True
stop = False
first = True

with tqdm() as pbar:
    while has_more and not stop:
        try:
            has_more = False
            events = event\
                .find(get_filter(last_id))\
                .sort("_id", -1)\
                .limit(args.limit)

            for item in events:
                has_more = True
                last_id = item["_id"]
                reminders = item["reminders"]
                for reminder in reminders:
                    found_url = False
                    for url in args.urls:
                        if reminder.get("url", "").startswith(url):
                            found_url = True
                    if found_url:
                        reminder_timestamp = reminder["sendTs"].timestamp()
                        if args.filter_future == 'yes' \
                           and reminder_timestamp < current:
                            continue
                        if first:
                            first = False
                            tqdm.write("Found a match", sys.stderr)
                        sendTsStr = reminder["sendTs"].strftime(datefmt)
                        matched.append({"eventId": str(item["_id"]),
                                        "reminderId": str(reminder["id"]),
                                        "url": reminder["url"],
                                        "extId": item["extId"],
                                        "sendTs": [reminder_timestamp,
                                                   sendTsStr]})
                        if args.stop_at_first == 'yes':
                            stop = True
                            break
                pbar.update()
                if stop:
                    break
        except KeyboardInterrupt:
            stop = True

print(json.dumps({"matched": matched,
                  "last_id": str(last_id),
                  "urls": args.urls,
                  "timestamp": str(last_id.generation_time)}))
