# -*- coding: utf-8 -*-
import os
import re
import sys
import urllib
import time
import json

from mpfs.engine.process import setup_anyone_script, dbctl
setup_anyone_script()

from mpfs.core.mrstat.report import YT_TOKEN, Job
from mpfs.core.filesystem.cleaner.worker import SyncDBCheckerMixin
from mpfs.metastorage.mongo.util import decompress_data
from mpfs.core.services.zaberun_service import Zaberun

YT_PROXY = Job.yt_server


def get_ips_from_lenulca(date):
    stream = os.popen('yt read "//home/mpfs-stat/tmp/mpfs_mulca_clean_intersection/bad_stids_detailed/%s{ip}" --format dsv' % date)
    pattern = re.compile('ip=([^\t]+)')
    ips = []
    for line in stream:
        line = line.strip()
        result = pattern.search(line)
        if result:
            ips.append(result.group(1))
    return list(set(ips))


def get_bad_stids(date):
    stream = os.popen('yt read "//home/mpfs-stat/tmp/mpfs_mulca_clean_intersection/bad_stids/%s{subkey}" --format dsv' % date)
    pattern = re.compile('subkey=([^\t]+)')
    stids = []
    for line in stream:
        line = line.strip()
        result = pattern.search(line)
        if result:
            stids.append(result.group(1))
    return stids


def search_by_users_collections(spec):
    """
    Бежит по всем шардам и коллекциям с пользовательскими данными

    По spec достает документы и раззиповывает zdata
    """
    for shard_name, collection_name in SyncDBCheckerMixin().shard_collection_generator():
        if shard_name == 'mongos':
            coll = dbctl().database()[collection_name]
        else:
            conn = dbctl().mapper.rspool.get_connection_for_rs_name(shard_name)
            coll = conn[collection_name][collection_name]

        for doc in coll.find(spec):
            doc['zdata'] = decompress_data(doc['zdata'])
            yield shard_name, collection_name, doc


def or_pattern(stids, append_urlencoded=False):
    pattern = '('
    for stid in stids:
        pattern += stid + '|'
        if append_urlencoded:
            pattern += urllib.quote(stid) + '|'
    return pattern.strip('|') + ')'


def grep_lenulca(date, stids):
    pattern = or_pattern(stids, append_urlencoded=True)
    for stid in stids:
        pattern += stid + '|'
        pattern += urllib.quote(stid) + '|'
    pattern = pattern.strip('|') + ')'
    cmd = "yt map 'egrep \"{pattern}\" || true' "\
        "--src '//statbox/lenulca-access-log/{date}' "\
        "--dst //home/mpfs-stat/tmp/mpfs_mulca_clean_intersection/bad_stids_detailed/{date} --format=dsv"\
        "".format(**{'date': date, 'pattern': pattern})
    os.system(cmd)


def add_to_ignore_list(stids):
    tmp_file_path = '/tmp/stids_ignore_list_%i' % int(time.time())
    os.system('yt download //home/mpfs-stat/storage/deleted_stids_ignore_list > %s' % tmp_file_path)

    stids_ignore_set = set()
    if os.path.isfile(tmp_file_path):
        with open(tmp_file_path) as fh:
            stids_ignore_set = set(fh.read().splitlines())
    else:
        return 0, 0

    new_stids = set(stids)
    new_stids_ignore_set =  stids_ignore_set | new_stids
    if new_stids_ignore_set == stids_ignore_set:
        return len(stids_ignore_set), len(stids_ignore_set)
    with open(tmp_file_path, 'w') as fh:
        fh.writelines([i + '\n' for i in new_stids_ignore_set])
    os.system('yt upload //home/mpfs-stat/storage/deleted_stids_ignore_list < %s' % tmp_file_path)
    return len(stids_ignore_set), len(new_stids_ignore_set)


def crypted_encode_stids(stids):
    result = []
    z = Zaberun()
    for stid in stids:
        processed_stid = z.crypt_agent.encrypt(stid)
        processed_stid = re.sub(r'\+', r'-', processed_stid)
        processed_stid = re.sub(r'\/', r'_', processed_stid).strip('=')
        result.append((stid, processed_stid))
    return result




def main(date):
    print 'Выгружаем stid-ы'
    bad_stids = get_bad_stids(date)

    if len(bad_stids) == 0:
        print '\tПлохих стидов не найдено. Проверь http://yt.yandex.net/plato/#page=navigation&path=//home/mpfs-stat/tmp/mpfs_mulca_clean_intersection/bad_stids'
        quit()
    else:
        print '\tНайдено %i плохих stid-ов' % len(bad_stids)

    print 'Добавляем stid-ы в игнор лист'
    before, after = add_to_ignore_list(bad_stids)
    print '\tДобавлено %i stid' % (after - before)

    file_pref = 'inspect_%s' % date

    print 'Ищем их в БД'
    spec = {'data.stids': {'$elemMatch': {'stid': {'$in': bad_stids}}}}
    docs = list(search_by_users_collections(spec))
    if len(docs) == 0:
        print "\tОк. В базе нет"
    else:
        print "\t\033[91mТревога!\033[0m Найдено в БД %i документов" % len(docs)
        with open('/tmp/%s_alarm' % file_pref, 'w') as fh:
            for shard_name, collection_name, doc in docs:
                print >> fh, 'Shard: "%s", collection: "%s"' % (shard_name, collection_name)
                print >> fh, json.dumps(doc, indent=4)
                print >> fh, '====================================='

    stids_tuples = crypted_encode_stids(bad_stids)
    with open('/tmp/%s_stids_and_crypted' % file_pref, 'w') as map_fh:
        map_fh.writelines(["\t".join(i) + '\n' for i in stids_tuples])
    print 'Смотри в файлы:'
    os.system('ls -la /tmp/%s_*' % file_pref)


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print '> ./inspect_stid_intersection.py YYYY-MM-DD'
        quit()

    os.environ["YT_PROXY"] = YT_PROXY
    os.environ["YT_TOKEN"] = YT_TOKEN
    date = sys.argv[1]
    main(date)
