#!/usr/bin/python3
# -*- coding: utf-8 -*-

import os
import psycopg2
import psycopg2.extras
import json
from urllib.error import HTTPError
from urllib.request import urlopen, Request
from urllib.parse import urlencode, unquote, urlparse
import logging
import random
import sys
from socket import AF_INET6
from http.server import BaseHTTPRequestHandler, HTTPServer
from threading import Thread, Lock, Event

TOKENS_FILE = "/etc/migrator/pregenerated_tokens"
PREGENERATED_TOKENS = None

RPOP_HOST = "https://collectors-ext.mail.yandex.net"
RPOP_API_PORT = 443
RPOP_SMTP_API_PORT = 443
BLACKBOX_HOST = "https://blackbox-mail.yandex.net"
SHARPEI_HOST = "http://sharpei.mail.yandex.net"
COLLECTORS_HOST = "http://collectors.stable.qloud-b.yandex.net"
THREADS_COUNT = 5
MAX_MIDS_COUNT = 10000

PROCESSES_COUNT = 1
CURRENT_PROCESS_NUM = 1

TYPE_SUFFIX = "|||&system"  # add suffix, to ensure result name cannot exist in maildb/imap
LOCALIZATION_NAMES = {
    "INBOX": "inbox" + TYPE_SUFFIX,
    "Черновики": "drafts" + TYPE_SUFFIX,
    "Drafts": "drafts" + TYPE_SUFFIX,
    "Спам": "spam" + TYPE_SUFFIX,
    "Spam": "spam" + TYPE_SUFFIX,
    "Отправленные": "sent" + TYPE_SUFFIX,
    "Sent": "sent" + TYPE_SUFFIX,
    "Удаленные": "trash" + TYPE_SUFFIX,
    "Trash": "trash" + TYPE_SUFFIX,
    "Исходящие": "outbox" + TYPE_SUFFIX,
    "Outbox": "outbox" + TYPE_SUFFIX,
    "Archive": "archive" + TYPE_SUFFIX,
    "Архив": "archive" + TYPE_SUFFIX,
}

SHARDS = [
    "host=rpopdb02f.mail.yandex.net port=6432 user=rpop dbname=rpopdb",
    "host=rpopdb01f.mail.yandex.net port=6432 user=rpop dbname=rpopdb",
    "host=rpopdb04f.mail.yandex.net port=6432 user=rpop dbname=rpopdb",
    "host=rpopdb03f.mail.yandex.net port=6432 user=rpop dbname=rpopdb",
]
IMAP_SERVER = "imap.yandex.ru"
POP_SERVER = "pop.yandex.ru"

# p_exec %collectors-ext ip addr | grep tun4 | grep -Po "inet\s\d+\\.\d+.\d+.\d+" | awk '{print "'\''" $2 "'\''"}'
COLLECTORS_EXT_IP = [
    "141.8.172.21",
    "141.8.172.32",
    "141.8.172.38",
    "5.45.218.14",
    "5.45.218.35",
    "95.108.244.160",
    "141.8.172.37",
    "141.8.172.2",
    "5.45.218.12",
    "95.108.244.164",
    "95.108.244.137",
    "95.108.244.140",
    "95.108.244.162",
    "141.8.172.25",
    "95.108.244.147",
    "141.8.172.3",
    "95.108.244.154",
    "141.8.172.22",
    "141.8.172.14",
    "95.108.244.151",
    "5.45.218.23",
    "5.45.218.120",
    "95.108.244.158",
    "5.45.218.121",
    "95.108.244.144",
    "141.8.172.17",
    "141.8.172.19",
    "95.108.244.148",
    "95.108.244.130",
    "95.108.244.139",
    "95.108.244.156",
    "5.45.218.22",
    "141.8.172.39",
    "141.8.172.29",
    "5.45.218.3",
    "5.45.218.33",
    "5.45.218.107",
    "5.45.218.31",
    "5.45.218.108",
    "5.45.218.32",
    "77.88.40.173",
    "77.88.40.163",
    "5.45.218.37",
    "77.88.40.172",
    "77.88.40.155",
    "77.88.40.181",
    "77.88.40.184",
    "77.88.40.175",
    "77.88.40.182",
    "77.88.40.170",
    "5.45.218.117",
    "77.88.40.171",
    "77.88.40.174",
    "77.88.40.168",
    "141.8.172.34",
    "77.88.40.159",
    "95.108.244.138",
    "95.108.244.159",
    "95.108.244.165",
    "95.108.244.146",
    "141.8.172.23",
    "141.8.172.27",
    "141.8.172.112",
    "5.45.218.125",
    "5.45.218.123",
    "5.45.218.20",
    "5.45.218.126",
    "141.8.172.15",
    "141.8.172.35",
    "141.8.172.16",
    "141.8.172.30",
    "141.8.172.28",
    "95.108.244.163",
    "141.8.172.18",
    "5.45.218.30",
    "95.108.244.157",
    "95.108.244.143",
    "5.45.218.18",
    "77.88.40.158",
    "77.88.40.156",
    "77.88.40.154",
    "77.88.40.167",
    "95.108.244.145",
    "95.108.244.150",
    "95.108.244.161",
    "5.45.218.122",
    "5.45.218.16",
    "5.45.218.124",
    "5.45.218.13",
    "77.88.40.164",
    "77.88.40.157",
    "77.88.40.166",
    "77.88.40.149",
    "77.88.40.165",
    "77.88.40.161",
    "77.88.40.185",
    "77.88.40.146",
    "95.108.244.149",
    "141.8.172.121",
    "141.8.172.31",
]

logging.basicConfig(
    handlers=[logging.FileHandler("/var/log/migrator/migrator.log", "a", encoding="utf-8")],
    level=logging.DEBUG,
    format="[%(asctime)s] %(thread)d %(levelname)s %(message)s",
)

queue_mutex = Lock()
queue_event = Event()
migration_queue = []


def load_url(url, add_headers={}, get_params={}, post_params={}):
    if get_params:
        url += "?" + urlencode(get_params)
    logging.info("loading url: %s", url)
    request = Request(
        url,
        headers=add_headers,
        data=urlencode(post_params).encode("utf-8") if post_params else None,
    )
    resp = urlopen(request)
    return resp.read().decode("utf-8")


def load_json(url, add_headers={}, get_params={}, post_params={}):
    resp = load_url(url, add_headers, get_params, post_params)
    return json.loads(resp)


def migrate_collector(
    uid,
    login,
    password,
    token,
    last_mid,
    skipped_mids,
    old_popid,
    creation_ts,
    ignore_folders_struct,
    server,
    enabled,
    ignore_invalid_credentials,
):
    logging.info(
        "migrating collector uid=%s login=%s, last_mid=%s, skipped_mids=%s popid=%s creation_ts=%s ignore_folders_struct=%s server=%s enabled=%s ignore_invalid_credentials=%s",
        uid,
        login,
        last_mid,
        skipped_mids,
        old_popid,
        creation_ts,
        ignore_folders_struct,
        server,
        enabled,
        ignore_invalid_credentials,
    )
    get_params = {"dst_uid": uid, "old_popid": old_popid}
    post_params = {
        "login": login,
        "last_mid": last_mid,
        "skipped_mids": ",".join([str(mid) for mid in skipped_mids]),
        "creation_ts": creation_ts,
        "ignore_folders_struct": 1 if ignore_folders_struct else 0,
        "original_server": server,
        "collector_ip": random.choice(COLLECTORS_EXT_IP),
        "enabled": 1 if enabled else 0,
        "ignore_invalid_credentials": 1 if ignore_invalid_credentials else 0,
    }
    if token is not None:
        post_params["auth_token"] = token
    else:
        post_params["password"] = password

    try:
        migration_res = load_url(
            "{}:8080/migrate".format(COLLECTORS_HOST),
            get_params=get_params,
            post_params=post_params,
        )
        logging.info("migrated: %s", migration_res)
        return migration_res
    except HTTPError as e:
        resp = e.read().decode("utf-8")
        context_id = e.hdrs["Y-Context"] if "Y-Context" in e.hdrs else "no context"
        logging.error(
            "HTTPError: %s, resp=%s, popid=%s, dst_uid=%s, login=%s",
            context_id,
            resp,
            str(old_popid),
            str(uid),
            str(login),
        )
        return resp


def mark_rpop_migrated(suid, popid):
    logging.info("mark migrated")
    load_json(
        "{}:{}/api/mark_migrated".format(RPOP_HOST, RPOP_API_PORT),
        get_params={"json": 1, "suid": suid, "popid": popid},
    )


def get_collector(cursor, popid):
    logging.info("getting collector from rpopdb")
    cursor.execute(
        """
    SELECT
        uname,
        login,
        email,
        popid,
        server,
        use_imap,
        ROUND(EXTRACT(EPOCH FROM create_date))::bigint as create_date,
        is_on,
        error_status
    FROM rpop.pop_profile
    WHERE popid = %s""",
        (popid,),
    )
    return cursor.fetchone()


def get_uid_by_suid(suid):
    logging.info("getting uid by suid")
    resp = load_json(
        "{}/blackbox".format(BLACKBOX_HOST),
        get_params={
            "method": "userinfo",
            "format": "json",
            "userip": "127.0.0.1",
            "sid": 2,
            "suid": suid,
        },
    )
    user = resp["users"][0]
    if "value" in user["uid"]:
        return user["uid"]["value"]
    return None


def get_uid_by_login(login):
    logging.info("getting uid by login")
    resp = load_json(
        "{}/blackbox".format(BLACKBOX_HOST),
        get_params={
            "method": "userinfo",
            "format": "json",
            "userip": "127.0.0.1",
            "login": login,
            "dbfields": "subscription.suid.2",
        },
    )
    user = resp["users"][0]
    uid = None
    suid = None
    if "value" in user["uid"]:
        uid = user["uid"]["value"]
    if "dbfields" in user:
        dbfields = user["dbfields"]
        if "subscription.suid.2" in dbfields:
            suid = dbfields["subscription.suid.2"]
    return uid, suid


def get_password(uid, popid, email, strip_password):
    logging.info("getting collectors password")
    secret = os.environ["RPOP_SECRET"]
    resp = load_json(
        "{}:{}/api/v2/smtp_data".format(RPOP_HOST, RPOP_SMTP_API_PORT),
        post_params={"uid": uid, "popid": popid, "email": email, "secret": secret},
    )
    if strip_password:
        return resp["smtp_data"][
            "pass"
        ].strip()  # pop3 server accepts login and password with additional spaces

    return resp["smtp_data"]["pass"]


def get_imap_collected_data(cursor, popid):
    logging.info("getting collectors IMAP messages")
    cursor.execute(
        "SELECT foldername, uidvalidity, uidl as imap_id from rpop.rpop_imap_folders f left join rpop.rpop_imap_uidls m on f.folder_id = m.folder_id WHERE popid = %s",
        (popid,),
    )
    res = {}
    for row in cursor.fetchall():
        name = row["foldername"]
        if name not in res:
            res[name] = {"uidvalidity": row["uidvalidity"], "messages": []}

        if row["imap_id"]:
            res[name]["messages"].append(row["imap_id"])
    return res


def get_pop3_collected_data(cursor, popid):
    logging.info("getting collectors POP3 messages")
    cursor.execute("SELECT uidl from rpop.rpop_uidl WHERE popid = %s", (popid,))
    return [row["uidl"] for row in cursor.fetchall()]


def get_mdb_connection(uid):
    logging.info("getting mdb connection")
    shard_data = load_json(
        "{}/conninfo".format(SHARPEI_HOST), get_params={"mode": "master", "uid": uid}
    )
    shard = shard_data["addrs"][0]
    conninfo = "host={} port={} dbname={} user=imap".format(
        shard["host"], shard["port"], shard["dbname"]
    )
    return psycopg2.connect(conninfo)


def get_all_folders(cursor, uid):
    logging.info("getting all mdb folders")
    cursor.execute(
        "SELECT fid, parent_fid, name, type, uidvalidity from mail.folders WHERE uid=%s", (uid,)
    )
    return {x["fid"]: x for x in cursor.fetchall()}


def get_all_messages_with_imap_data(cursor, uid):
    logging.info("getting all mdb messages")
    cursor.execute("SELECT mid, fid, imap_id from mail.box WHERE uid=%s", (uid,))
    return {(x["fid"], x["imap_id"]): x for x in cursor.fetchall()}


def get_all_messages_with_pop3_data(cursor, uid):
    logging.info("getting all mdb messages with uidls")
    cursor.execute(
        """
        SELECT mid, old_uidl as uidl
            from mail.pop3_box as m,
            mail.folders as f
            where m.uid=%(uid)s and f.uid=%(uid)s and m.fid = f.fid and (f.pop3state).enabled=true
        """
        % {"uid": uid}
    )
    res = {}
    for row in cursor.fetchall():
        uidl = str(row["uidl"]) if row["uidl"] is not None else str(row["mid"])
        res[uidl] = row["mid"]
    return res


def build_db_path_to_fid_map(folders):
    cache = {}

    def build_name_by_folder(fid):
        if folders[fid]["type"] != "user":
            return folders[fid]["type"] + TYPE_SUFFIX

        if folders[fid]["parent_fid"] == 0 or folders[fid]["parent_fid"] is None:
            return folders[fid]["name"]

        if fid not in cache:
            cache[fid] = (
                build_name_by_folder(folders[fid]["parent_fid"]) + "|" + folders[fid]["name"]
            )
        return cache[fid]

    res = {}
    for fid, folder in folders.items():
        res[build_name_by_folder(fid)] = fid
    return res


def unlocalize(name):
    root_parent_name = name.split("|")[0]
    if root_parent_name in LOCALIZATION_NAMES:
        new_root = LOCALIZATION_NAMES[root_parent_name]
        return name.replace(root_parent_name, new_root, 1)
    return name


def folder_exists(path, db_path_to_fid):
    return path in db_path_to_fid


def get_fid_for_imap_name(imap_name, db_path_to_fid, all_imap_names):
    unlocalized_name = unlocalize(imap_name)
    if folder_exists(unlocalized_name, db_path_to_fid):
        return db_path_to_fid[unlocalized_name]

    if folder_exists(imap_name, db_path_to_fid):
        return db_path_to_fid[imap_name]

    # special imap logic treatment
    # https://a.yandex-team.ru/arc/trunk/arcadia/mail/imap/src/common/folder_list.cc?rev=7151345#L124
    for localized_name in LOCALIZATION_NAMES.keys():
        prefix = localized_name + "_"
        if imap_name.startswith(prefix) and folder_exists(localized_name, db_path_to_fid):
            suffix = imap_name.replace(prefix, "", 1)
            try:
                int_suffix = int(suffix)
                # there should be all folder srom _0 to _'suffix-1'
                for i in range(0, int_suffix - 1):
                    taken_name = prefix + str(i)
                    if taken_name not in all_imap_names:
                        return None  # not this case
                return db_path_to_fid[localized_name]
            except:
                pass
            break

    return None


def merge_folders(f, s):
    if f["uidvalidity"] != s["uidvalidity"]:
        raise Exception("uidvalidity mismatch")

    res = {}
    res["uidvalidity"] = f["uidvalidity"]
    res["messages"] = list(set(f["messages"]) | set(s["messages"]))
    return res


def uidvalidity_matches(name1, name2, collected_data):
    return collected_data[name1]["uidvalidity"] == collected_data[name2]["uidvalidity"]


def uidvalidity_exists(uidvalidity, all_folders):
    for folder in all_folders.values():
        if folder["uidvalidity"] == uidvalidity:
            return True
    return False


def build_imap_name_to_fid_map(all_folders, collected_data):
    db_path_to_fid = build_db_path_to_fid_map(all_folders)
    imap_name_to_fid = {}
    imap_folder_names = collected_data.keys()
    for imap_name in imap_folder_names:
        if not uidvalidity_exists(collected_data[imap_name]["uidvalidity"], all_folders):
            continue
        fid = get_fid_for_imap_name(imap_name, db_path_to_fid, imap_folder_names)
        if not fid:
            continue

        imap_name_to_fid[imap_name] = fid
    return imap_name_to_fid


def find_all_keys_by_value(d, item):
    res = []
    for key, value in d.items():
        if value == item:
            res.append(key)
    return res


def process_folders_with_same_fid(imap_name_to_fid, all_folders, collected_data):
    db_path_to_fid = build_db_path_to_fid_map(all_folders)
    names = list(imap_name_to_fid.keys())
    for imap_name in names:
        if imap_name not in imap_name_to_fid:
            continue

        fid = imap_name_to_fid[imap_name]
        all_names = find_all_keys_by_value(imap_name_to_fid, fid)
        if len(all_names) > 2:
            raise Exception(
                "folder localization problems(too many candidates): all_names={}, imap_name_to_fid={}".format(
                    all_names, imap_name_to_fid
                )
            )
        elif len(all_names) == 1:
            continue

        second_name = all_names[1] if all_names[0] == imap_name else all_names[0]

        if imap_name in db_path_to_fid and second_name in db_path_to_fid:
            raise Exception(
                "folder localization problems(both imap_names in db_path_to_fid): all_names={}, imap_name_to_fid={}".format(
                    all_names, imap_name_to_fid
                )
            )

        if imap_name in db_path_to_fid:
            imap_name_to_fid[imap_name] = db_path_to_fid[imap_name]
            continue

        if second_name in db_path_to_fid:
            imap_name_to_fid[second_name] = db_path_to_fid[second_name]
            continue

        if uidvalidity_matches(imap_name, second_name, collected_data):
            collected_data[imap_name] = merge_folders(
                collected_data[imap_name], collected_data[second_name]
            )
            del collected_data[second_name]
            del imap_name_to_fid[second_name]
        else:
            raise Exception(
                "folder localization problems(uidvalidity doesn't match): all_names={}, imap_name_to_fid={}".format(
                    all_names, imap_name_to_fid
                )
            )

    if len(imap_name_to_fid.keys()) != len(set(imap_name_to_fid.values())):
        raise Exception(
            "folder localization problems(inconsistent imap_name_to_fid): imap_name_to_fid={}".format(
                imap_name_to_fid
            )
        )


def calc_optimized_mids_count(skipped_mids, predownloaded_mids):
    if not skipped_mids or not predownloaded_mids:
        return 0

    skipped_mids = sorted(skipped_mids)
    predownloaded_mids = sorted(predownloaded_mids)

    min_sum_len = len(predownloaded_mids)

    skipped_mids_index = 0
    predownloaded_mids_index = 0
    while skipped_mids_index < len(skipped_mids) or predownloaded_mids_index < len(
        predownloaded_mids
    ):
        if skipped_mids_index == len(skipped_mids) or predownloaded_mids_index == len(
            predownloaded_mids
        ):
            return min(min_sum_len, sum(1 for mid in skipped_mids if mid <= predownloaded_mids[-1]))

        if skipped_mids[skipped_mids_index] < predownloaded_mids[predownloaded_mids_index]:
            min_sum_len = min(
                min_sum_len,
                skipped_mids_index + 1 + len(predownloaded_mids) - predownloaded_mids_index,
            )
            skipped_mids_index += 1
        else:
            min_sum_len = min(
                min_sum_len,
                skipped_mids_index + len(predownloaded_mids) - predownloaded_mids_index - 1,
            )
            predownloaded_mids_index += 1

    return min_sum_len


def calc_last_mid_with_skipped_mids(all_mids, collected_mids, collector):
    last_mid_for_skipped_mids = max(collected_mids) if collected_mids else 0
    not_collected_mids = set(all_mids) - set(collected_mids)
    skipped_mids = [mid for mid in not_collected_mids if mid < last_mid_for_skipped_mids]
    if len(skipped_mids) < MAX_MIDS_COUNT:
        return last_mid_for_skipped_mids, set(skipped_mids)
    # can't migrate
    last_mid_for_predownloaded_mids = min(not_collected_mids) - 1 if not_collected_mids else 0
    predownloaded_mids = [mid for mid in collected_mids if mid > last_mid_for_predownloaded_mids]

    raise Exception(
        "too many skipped_mids count: skipped_mids_len={} ({}), predownloaded_mids_len={}, optimized_mids_len={}, for collector: is_on={}, use_imap={}".format(
            len(skipped_mids),
            str(skipped_mids)[1:1000],
            len(predownloaded_mids),
            calc_optimized_mids_count(skipped_mids, predownloaded_mids),
            collector["is_on"],
            collector["use_imap"],
        )
    )


def calc_imap_collector_pos_with_skipped(all_folders, all_messages, collected_data, collector):
    imap_name_to_fid = build_imap_name_to_fid_map(all_folders, collected_data)
    process_folders_with_same_fid(imap_name_to_fid, all_folders, collected_data)

    collected_mids = []
    for name, data in collected_data.items():
        if name in imap_name_to_fid:
            fid = imap_name_to_fid[name]
            if all_folders[fid]["uidvalidity"] != collected_data[name]["uidvalidity"]:
                if collector["is_on"] == 0 or collector["error_status"] == "login error":
                    # just skip ids from current folder for collectors doesn't running
                    continue
                raise Exception(
                    "uidvalidity mismatch name={}, collected_uidvalidity={}, actual_uidvalidity={}".format(
                        name, collected_data[name]["uidvalidity"], all_folders[fid]["uidvalidity"]
                    )
                )
            for imap_id in data["messages"]:
                msg = (fid, imap_id)
                if msg in all_messages:
                    collected_mids.append(all_messages[msg]["mid"])

    all_mids = []
    for _, msg in all_messages.items():
        all_mids.append(msg["mid"])

    return calc_last_mid_with_skipped_mids(all_mids, collected_mids, collector)


def get_imap_collector_pos_with_skipped(popid, rpopdb_cursor, src_uid, collector):
    # rpopdb
    collected_data = get_imap_collected_data(rpopdb_cursor, popid)

    # mdb
    with get_mdb_connection(src_uid) as mdb_conn:
        mdb_cursor = mdb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
        total_folders = get_all_folders(mdb_cursor, src_uid)
        total_messages = get_all_messages_with_imap_data(mdb_cursor, src_uid)

        return calc_imap_collector_pos_with_skipped(
            total_folders, total_messages, collected_data, collector
        )


def calc_pop3_collector_pos_with_skipped(all_messages, uidls, collector):
    all_mids = all_messages.values()
    collected_mids = [mid for uidl, mid in all_messages.items() if uidl in uidls]
    return calc_last_mid_with_skipped_mids(all_mids, collected_mids, collector)


def get_pop3_collector_pos_with_skipped(popid, rpopdb_cursor, src_uid, collector):
    # rpopdb
    uidls = get_pop3_collected_data(rpopdb_cursor, popid)

    # mdb
    with get_mdb_connection(src_uid) as mdb_conn:
        mdb_cursor = mdb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
        messages = get_all_messages_with_pop3_data(mdb_cursor, src_uid)
        return calc_pop3_collector_pos_with_skipped(messages, uidls, collector)


def migrate(popid, conninfo):
    dst_uid = "UNKNOWN"
    src_uid = "UNKNOWN"
    try:
        logging.info("migrate: %s, %s", popid, conninfo)
        with psycopg2.connect(conninfo) as rpopdb_conn:
            rpopdb_cursor = rpopdb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

            collector = get_collector(rpopdb_cursor, popid)
            ignore_folders_struct = collector["use_imap"] == 0

            login = collector["login"]
            if ignore_folders_struct:
                login = (
                    login.strip()
                )  # pop3 server accepts login and password with additional spaces

            dst_uid = get_uid_by_suid(collector["uname"])
            if dst_uid is None:
                # collector set up for not existing user
                # so just mark it migrated
                mark_rpop_migrated(collector["uname"], popid)
                return

            src_uid, src_suid = get_uid_by_login(login)
            if (src_uid is None) or (src_suid is None) or src_suid == "":
                logging.info(
                    "source account was deleted, just migrate with empty data: src_uid=%s, src_suid=%s",
                    str(src_uid),
                    str(src_suid),
                )
                res = migrate_collector(
                    dst_uid,
                    login,
                    "",
                    "",
                    0,
                    [],
                    collector["popid"],
                    collector["create_date"],
                    ignore_folders_struct,
                    collector["server"],
                    collector["is_on"] != 0,
                    True,
                )
                if res == "duplicate_collector":
                    # already migrated
                    mark_rpop_migrated(collector["uname"], popid)
                return

            token = None
            password = None
            if (
                (login in PREGENERATED_TOKENS)
                and collector["error_status"] == "ok"
                and collector["is_on"] == 1
            ):
                token = PREGENERATED_TOKENS[login]
            else:
                password = get_password(dst_uid, popid, collector["email"], ignore_folders_struct)

            if collector["use_imap"]:
                last_mid, skipped_mids = get_imap_collector_pos_with_skipped(
                    popid, rpopdb_cursor, src_uid, collector
                )
            else:
                last_mid, skipped_mids = get_pop3_collector_pos_with_skipped(
                    popid, rpopdb_cursor, src_uid, collector
                )

            ignore_invalid_credentials = (
                collector["is_on"] == 0 or collector["error_status"] == "login error"
            )
            res = migrate_collector(
                dst_uid,
                login,
                password,
                token,
                last_mid,
                skipped_mids,
                collector["popid"],
                collector["create_date"],
                ignore_folders_struct,
                collector["server"],
                collector["is_on"] != 0,
                ignore_invalid_credentials,
            )
            if res == "collector_from_himself" or res == "duplicate_collector":
                # it's broken collector that should be deleted later
                mark_rpop_migrated(collector["uname"], popid)

    except HTTPError as e:
        context_id = e.hdrs["Y-Context"] if "Y-Context" in e.hdrs else "no context"
        logging.error(
            "HTTPError: %s, resp=%s, popid=%s, dst_uid=%s, src_uid=%s",
            context_id,
            e.read().decode("utf-8"),
            str(popid),
            str(dst_uid),
            str(src_uid),
        )
    except BaseException as e:
        logging.exception(
            "exception: %s, popid: %s, dst_uid: %s, src_uid: %s",
            str(e),
            str(popid),
            str(dst_uid),
            str(src_uid),
        )


def add_migration(popid, conninfo):
    global migration_queue, queue_mutex, queue_event
    queue_mutex.acquire()
    try:
        logging.info("add migration: %s, %s", popid, conninfo)
        migration_queue.append((popid, conninfo))
        queue_event.set()
    finally:
        queue_mutex.release()


def add_multiple_migrations(migrations):
    global migration_queue, queue_mutex, queue_event
    queue_mutex.acquire()
    try:
        for popid, conninfo in migrations.items():
            if int(popid) % PROCESSES_COUNT == CURRENT_PROCESS_NUM:
                logging.info("add migration: %s, %s", popid, conninfo)
                queue_event.set()
                migration_queue.append((popid, conninfo))
    finally:
        queue_mutex.release()


in_progress = Event()


def process_queue():
    global migration_queue, queue_mutex, queue_event, in_progress
    while True:
        logging.info("waiting queue")
        queue_event.wait()

        queue_empty = False
        while not queue_empty:
            in_progress.wait()

            queue_mutex.acquire()
            migration = None
            try:
                if migration_queue:
                    migration = migration_queue[0]
                    if len(migration_queue) > 1:
                        migration_queue = migration_queue[1:]
                    else:
                        migration_queue = []
                else:
                    queue_empty = True
                    queue_event.clear()
            except BaseException as e:
                logging.exception(str(e))
            finally:
                queue_mutex.release()

            if migration is not None:
                migrate(*migration)


processing_threads = [Thread(target=process_queue) for i in range(0, THREADS_COUNT)]


def ping(params):
    return "pong"


def migrate_single(params):
    global in_progress
    in_progress.set()
    add_migration(params["popid"], params["conninfo"])
    return "OK"


def start_migration(params):
    global in_progress
    in_progress.set()

    for conninfo in SHARDS:
        with psycopg2.connect(conninfo) as conn:
            cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute(
                "SELECT popid, uname from rpop.pop_profile \
                    WHERE server ~* '(\.yandex\.(ru|by|com|com.tr|net|kz|ua)\.?$)|(\.ya\.ru\.?$)' \
                    AND error_status <> 'migrated' \
                    AND create_date < timestamp '2020-09-01 00:00:00' \
                    LIMIT %s",
                (int(params["count"]) / 4,),
            )
            migrations = {}
            for c in cursor.fetchall():
                migrations[str(c["popid"])] = conninfo
            add_multiple_migrations(migrations)
    return "OK"


def remaining_collectors(params):
    total_count = 0
    for conninfo in SHARDS:
        with psycopg2.connect(conninfo) as conn:
            cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
            cursor.execute(
                "SELECT count(*) as current_count from rpop.pop_profile \
                    WHERE server ~* '(\.yandex\.(ru|by|com|com.tr|net|kz|ua)\.?$)|(\.ya\.ru\.?$)' \
                    AND error_status <> 'migrated'"
            )
            row = cursor.fetchone()
            total_count += int(row["current_count"])
    return "total_count: " + str(total_count)


def stop_migration(params):
    global in_progress
    logging.info("stop migration")
    in_progress.clear()
    return "OK"


class WebServer(HTTPServer):
    address_family = AF_INET6

    class Handler(BaseHTTPRequestHandler):
        path_handlers = {
            "/ping": ping,
            "/migrate": migrate_single,
            "/start": start_migration,
            "/remaining_collectors": remaining_collectors,
            "/stop": stop_migration,
        }

        def do_GET(self):
            self.process_query()

        def do_POST(self):
            self.process_query()

        def process_query(self):
            try:
                req = urlparse(self.path)

                params = dict()
                if req.query != "":
                    pairs = req.query.split("&")
                    for pair in pairs:
                        key, val = pair.split("=")
                        params[key] = unquote(val)

                resp = self.path_handlers[req.path](params)
                self.send_response(200)
                self.send_header("Content-Length", len(resp))
                self.end_headers()
                self.wfile.write(resp.encode("utf-8"))
            except BaseException as e:
                logging.exception(str(e))
                self.send_response(500)
                self.send_header("Content-Length", len(str(e)))
                self.end_headers()
                self.wfile.write(str(e).encode("utf-8"))

    def __init__(self, port):
        HTTPServer.__init__(self, ("", port), WebServer.Handler)


def load_tokens_file(filename):
    with open(filename, "r", encoding="utf-8") as f:
        lines = f.readlines()

    return dict([l.split(";") for l in lines])


def main():
    if len(sys.argv) < 3:
        print("usage: migrator.py <process count> <current_process_num>")
        exit(-1)

    global PREGENERATED_TOKENS
    PREGENERATED_TOKENS = load_tokens_file(TOKENS_FILE)

    global PROCESSES_COUNT, CURRENT_PROCESS_NUM
    PROCESSES_COUNT = int(sys.argv[1])
    CURRENT_PROCESS_NUM = int(sys.argv[2])

    server = WebServer(8080 + CURRENT_PROCESS_NUM)
    [thread.start() for thread in processing_threads]
    server.serve_forever()


if __name__ == "__main__":
    main()
