#! /usr/bin/env python2.7
# -*- coding: utf8 -*-
import collections
import json
import sys
import time
import traceback
import urllib
import urlparse

import requests

reload(sys)
sys.setdefaultencoding('utf-8')

from broken_guids import BROKEN_GROUP_IDS
from broken_paths import BROKEN_PATHS

MPFS_HOST = "http://mpfs.disk.yandex.net"
DJFS_HOST = "http://localhost:25699/api/legacy"

HEADERS = {'Content-Type': 'application/json'}
SUPPORTED_META = {'md5', 'mimetype', 'fs_symbolic_link', 'visible', 'empty', 'group', 'file_id', 'short_url', 'angle',
                  'mediatype', 'photoslice_album_type', 'width', 'albums_exclusions', 'video_info', 'aesthetics',
                  'pmid', 'height', 'alias_enabled', 'resource_id', 'sha256', 'hasfolders', 'numfolders', 'numfiles',
                  'drweb', 'etime', 'size', 'sizes', 'custom_preview', 'file_url', 'media_type', 'public', 'file_mid',
                  'storage_type', 'photoslice_time', 'versioning_status', 'fotki_data_stid', 'fotki_data_url',
                  'note_name', 'note_revision_created', 'note_revision_deleted', 'blocked', 'custom_properties',
                  'download_counter', 'views_counter', 'page_blocked_items_num', 'comment_ids', 'blockings', 'revision',
                  'public_hash', 'append_time', 'original_id', 'folder_type', 'numchildren', 'autouploaded'}


def print_diff(key, mpfs, djfs):
    print 'meta', key
    print json.dumps(mpfs, indent=2)
    print json.dumps(djfs, indent=2)


def get_data(host, uri, body):
    if not 'meta' in uri:
        if not uri.endswith('&'):
            uri = uri + '&'
        uri = uri + 'meta=' + ','.join(SUPPORTED_META)

    response = requests.post(host + uri, data=body, headers=HEADERS)
    # print response.content
    if not response.ok:
        with open('results.log', 'a') as f:
            f.write(' host: %s, uri: %s, body: %s \n response code: %s\n response: %s' % (
                host, uri, body, response.status_code, response.content))
        return None
    return response


def rewrite_url(url):
    u = urlparse.urlparse(url)
    # not check owner_uid, consider + and %20 - the same
    params = map(urllib.unquote, filter(lambda x: not x.startswith('owner_uid='),
                                        sorted(u.query.replace('+', '%20').split('&'))))
    return {
        'scheme': u.scheme,
        'host': u.netloc,
        'path': u.path.replace('%3D', '='),
        'params': params
    }


def sizes_to_dict(sizes):
    return dict([(size['name'], size['url']) for size in sizes])


def same_beauty(mpfs, djfs):
    if abs(mpfs - djfs) > 0.001:
        print_diff('aesthetics', mpfs, djfs)
        return False
    return True


def same_urls(key, from_trash, mpfs_url, djfs_url, mpfs_media_type=None):
    m = rewrite_url(mpfs_url)
    d = rewrite_url(djfs_url)
    # hack filename and mediatype parameter for trash links
    if from_trash:
        d['params'] = filter(lambda x: not x.startswith('filename='), d['params'])
        m['params'] = filter(lambda x: not x.startswith('filename='), m['params'])
        mPathParts = m['path'].split('/')
        dPathParts = d['path'].split('/')
        dPathParts[2] = mPathParts[2]
        d['path'] = '/'.join(mPathParts)

        d['params'] = filter(lambda x: not x.startswith('media_type='), d['params'])
        m['params'] = filter(lambda x: not x.startswith('media_type='), m['params'])

    # hack check media_type, if mpfs doesn't resolve it
    if ('media_type=unknown' in m['params']):
        d['params'] = filter(lambda x: not x.startswith('media_type='), d['params'])
        m['params'] = filter(lambda x: not x.startswith('media_type='), m['params'])

    # if mpfs use in file_url incorrect MediaType, other than mediatype field, replace. only for file_url
    if mpfs_media_type and len(filter(lambda x: x.startswith('media_type='), m['params'])) > 0:
        m['params'] = filter(lambda x: not x.startswith('media_type='), m['params'])
        m['params'].append('media_type=' + mpfs_media_type)
        m['params'] = sorted(m['params'])

    if m['scheme'] != d['scheme']:
        print_diff(key, m, d)
        return False
    if m['host'] != d['host']:
        print_diff(key, m, d)
        return False
    if m['params'] != d['params']:
        print_diff(key, m, d)
        return False

    # hack paths in download links, which responds for time
    if m['host'] == 'downloader.disk.yandex.ru':
        mPathParts = m['path'].split('/')
        dPathParts = d['path'].split('/')
        if mPathParts[3] != dPathParts[3] and abs(int(dPathParts[3], 16) - int(mPathParts[3], 16)) < 5:
            dPathParts[3] = mPathParts[3]
            d['path'] = '/'.join(mPathParts)

    if m['path'] != d['path']:
        print_diff(key, m, d)
        return False

    return True


def same_group(mpfs_group, djfs_group):
    isOk = True

    for key, mpfs_value in mpfs_group.items():
        djfs_value = djfs_group.get(key)
        if key == 'user_count':
            continue
        if key == 'gid' and (djfs_value in BROKEN_GROUP_IDS or mpfs_value in BROKEN_GROUP_IDS):
            continue
        if djfs_value != mpfs_value:
            print_diff('group ' + key, mpfs_value, djfs_value)
            isOk = False

    return isOk


def filter_broken_paths(resp_array, uid):
    # for item in resp_array:
    #     broken = False
    #     for p in BROKEN_PATHS:
    #         broken = broken or (uid + ':' + item.get('path')).startswith(p)
    #     print str(broken) + '   ' + item.get('path')
    return filter(lambda item: all(not (uid + ':' + item.get('path')).startswith(p) for p in BROKEN_PATHS), resp_array)


def same(mpfs_l, djfs_l, uid):
    isOk = True
    mpfs_l = filter_broken_paths(mpfs_l, uid)
    djfs_l = filter_broken_paths(djfs_l, uid)

    if len(mpfs_l) != len(djfs_l):
        print 'responses have different sizes: mpfs - ' + str(len(mpfs_l)) + ", djfs: " + str(len(djfs_l))
        return

    for mpfs, djfs in zip(mpfs_l, djfs_l):
        djfs_path = djfs.get('path')
        mpfs_path = mpfs.get('path')
        d_resource_id = djfs['meta']['resource_id']
        m_resource_id = mpfs['meta']['resource_id']

        if djfs_path != mpfs_path and d_resource_id and d_resource_id == m_resource_id:
            # print "files m=" + mpfs_path + ' and d=' + djfs_path + ' have different paths, but same resource_id ' + d_resource_id
            continue

        from_trash = djfs_path.startswith('/trash')

        if 'sizes' in mpfs['meta']:
            mpfs['meta']['sizes'] = sizes_to_dict(mpfs['meta']['sizes'])
            djfs['meta']['sizes'] = sizes_to_dict(djfs['meta']['sizes'])
        mpfs_media_type = mpfs['meta'].get('mediatype', None)
        for key, mpfs_value in mpfs.items():
            djfs_value = djfs.get(key)
            if (key == 'etime' or key == 'mtime') and int(djfs_value) < 0 and int(mpfs_value) < 0:
                continue

            if (key == 'utime' or key == 'ctime' or key == 'etime' or key == 'mtime') and abs(
                    int(djfs_value) - int(mpfs_value)) <= 1:
                continue

            if mpfs_value != djfs_value:
                if key == 'meta':
                    for meta_key, mpfs_meta_value in mpfs_value.items():
                        djfs_meta_value = djfs_value.get(meta_key)
                        if meta_key == 'sizes':
                            for size, mpfs_url in djfs_meta_value.items():
                                djfs_url = djfs_meta_value.get(size)
                                isOk = isOk and same_urls('size.' + size, from_trash, mpfs_url, djfs_url)
                        elif meta_key == 'custom_preview':
                            isOk = isOk and same_urls('custom_preview', from_trash, mpfs_meta_value,
                                                      djfs_meta_value)
                        elif meta_key == 'group':
                            isOk = isOk and same_group(mpfs_meta_value, djfs_meta_value)
                        elif meta_key == 'fotki_data_url':
                            isOk = isOk and same_urls(meta_key, lambda x: x, mpfs_meta_value, djfs_meta_value)
                        elif meta_key == 'file_url':
                            isOk = isOk and same_urls(meta_key, from_trash, mpfs_meta_value, djfs_meta_value,
                                                      mpfs_media_type)
                        elif meta_key == 'aesthetics':
                            isOk = isOk and same_beauty(mpfs_meta_value, djfs_meta_value)
                        elif meta_key == 'autouploaded' or meta_key == 'comment_ids':
                            continue
                        elif meta_key == 'alias_enabled':
                            isOk = isOk and int(djfs_meta_value) == int(mpfs_meta_value)
                        elif (meta_key == 'media_type' or meta_key == 'mediatype') \
                                and mpfs_meta_value == 'unknown' and djfs_meta_value != 'unknown':
                            continue
                        elif (meta_key == 'media_type' or meta_key == 'mediatype') and from_trash:
                            continue
                        elif (meta_key == 'etime' or meta_key == 'photoslice_time') \
                                and int(djfs_meta_value) < 0 and int(mpfs_meta_value) < 0:
                            continue
                        elif mpfs_meta_value != djfs_meta_value:
                            print_diff(meta_key, mpfs_meta_value, djfs_meta_value)
                            isOk = False
                    for meta_key in djfs_value:
                        if meta_key == 'views_counter' and djfs_value.get('public_hash'):
                            continue
                        if meta_key not in mpfs_value:
                            print 'meta', meta_key, 'only in djfs', djfs_value.get(meta_key)
                            isOk = False
                else:
                    print key
                    print json.dumps(mpfs_value, indent=2)
                    print json.dumps(djfs_value, indent=2)
                    isOk = False
        for key in djfs:
            if key not in mpfs and key != '__type':
                print key, 'only in djfs'
                isOk = False

        if not isOk:
            print 'file path: ' + djfs_path
            break

    return isOk


def strip_uid(str):
    if ':' in str:
        return str.split(':')[1]
    else:
        return str


def remove_duplicates_in_file_ids(body):
    js = json.loads(body.replace('\\\\', '\\'), encoding='UTF-8')
    file_ids = map(strip_uid, js)
    counter = collections.Counter(file_ids)

    return json.dumps(filter(lambda f: counter.get(strip_uid(f)) == 1, js))


def do_ckeck(lines):
    offset = int(sys.argv[1])
    print_body = sys.argv.count('--body') > 0
    continue_on_error = sys.argv.count('--continue') > 0
    start_time = time.time()
    start_time_index = offset
    for _id, line in enumerate(lines):
        if _id < offset:
            start_time = time.time()
            continue
        print _id

        uri, paths = line[:-1].split('\t')
        body = remove_duplicates_in_file_ids(paths)

        mpfs_response = get_data(MPFS_HOST, uri, body)
        djfs_response = get_data(DJFS_HOST, uri, body)
        uid = urlparse.parse_qs(urlparse.urlparse(uri).query)['uid'][0]

        if not mpfs_response and not djfs_response:
            continue

        if not mpfs_response or not djfs_response:
            print 'error in one of responses, see log'
            is_same = False
        else:
            try:
                mpfs_data = json.loads(mpfs_response.text)
                djfs_data = json.loads(djfs_response.text)

                is_same = same(mpfs_data, djfs_data, uid)
            except Exception as e:
                print 'error', uri, paths
                print e
                traceback.print_exc()
                is_same = False

        if (_id % 100 == 0) or not is_same:
            prev_time = start_time
            start_time = time.time()
            print str(_id - start_time_index) + ' records took ' + time.strftime("%H:%M:%S",
                                                                                 time.gmtime(
                                                                                     start_time - prev_time))
            start_time_index = _id

            with open('results.log', 'a') as f:
                f.write('%d %s\n' % (_id, is_same))
        if not is_same:
            print '-------------'
            print uri
            print body
            print
            if print_body:
                print mpfs_response.text if mpfs_response else None
                print
                print djfs_response.text if djfs_response else None
            if continue_on_error:
                with open('errors.log', 'a') as f:
                    f.write(line)
            else:
                return

    print 'done'


if __name__ == "__main__":
    do_ckeck(sys.stdin)
