#!/usr/bin/python
# -*- coding: utf-8 -*-

import argparse
import datetime
import os
from os.path import expanduser
import subprocess
import urllib2
import json

TOP_SELECT = 200
TOP_UPLOAD = 70


def get_file_name_sign():
    yesterday_date = datetime.date.today() - datetime.timedelta(days=1)
    return yesterday_date.strftime('%Y%m%d')


def get_auxiliary_files(dry_run=True):
    sign = get_file_name_sign()
    if not dry_run:
        tmp_file = '/var/tmp/yandex/disk/logs-cron/hashes-%s' % sign
    else:
        tmp_file = expanduser('~/test-moderation-queue-%s' % sign)

    top_selected_file = '%s-top-%s' % (tmp_file, TOP_SELECT)
    return tmp_file, top_selected_file


def get_log_file(dry_run=True):
    if not dry_run:
        sign = get_file_name_sign()
        log = 'yandex_disk_log-reader-tskv.log.%s.gz' % sign
    else:
        log = 'yandex_disk_log-reader-tskv.log.*.gz'

    return log


def main(dry_run=True):
    sign = get_file_name_sign()
    now = subprocess.check_output('date', shell=True)
    print '[Moderation Queue: %s] Start moderation-queue.py at %s' % (sign, now.strip())

    working_directory = '/u0/archive_logs/mpfs'
    os.chdir(working_directory)

    os.system('/usr/bin/read-disk-property')

    # hardcoded temporary :)
    mpfs_host = 'mpfs1g.disk.yandex.net'
    print '[Moderation Queue: %s] Use MPFS host: %s' % (sign, mpfs_host)

    log = get_log_file(dry_run=dry_run)
    print '[Moderation Queue: %s] Count most popular files in log: %s/%s' % (sign, working_directory, log)

    tmp_file, top_selected_file = get_auxiliary_files(dry_run=dry_run)
    print '[Moderation Queue: %s] Write all increments to file %s' % (sign, tmp_file)
    print '[Moderation Queue: %s] Top selected file: %s' % (sign, top_selected_file)

    print '[Moderation Queue: %s] Search for increments in %s/mpfs*/%s' % (sign, working_directory, log)
    print "[Moderation Queue: %s] zgrep 'Increment counter for hash' %s/mpfs*/%s | awk '{ print $NF }' >> %s" % (sign, working_directory, log, tmp_file)
    os.system("zgrep 'Increment counter for hash' %s/mpfs*/%s | awk '{ print $NF }' >> %s" % (working_directory, log, tmp_file))

    print '[Moderation Queue: %s] Write top %s hashes to file %s' % (sign, TOP_SELECT, top_selected_file)
    os.system('sort %s | uniq -c | sort -rn | head -n %s > %s' % (tmp_file, TOP_SELECT, top_selected_file))

    # Publish $TOP_UPLOAD viewed files to Moderation Queue
    i = 0
    for line in open(top_selected_file, 'r'):
        if i >= TOP_UPLOAD:
            break

        count, private_hash = line.strip().split()
        print '[Moderation Queue: %s] Get %s data' % (sign, private_hash)
        url = 'http://%s/json/public_info?meta=&private_hash=%s' % (mpfs_host, private_hash)
        try:
            response = urllib2.urlopen(url)
        except urllib2.URLError as e:
            # TODO: What to do?
            continue

        content = response.read()
        data = json.loads(content)

        if 'code' in data:
            print '%s is blocked or removed' % private_hash
            continue

        resource = data['resource']
        file_type = resource['type']
        file_name = resource['name'].lower()
        short_url = resource['meta']['short_url']
        if file_type != 'dir':
            types_to_check = ['video', 'image', 'compressed', 'unknown']
            media_type = resource['meta']['mediatype']
            if media_type not in types_to_check:
                print 'skipping mediatype %s' % media_type
                continue

            if file_name.endswith(('.apk',)):
                print 'skipping filename %s' % file_name
                continue

        i += 1
        print '[Moderation Queue: %s] %s %s %s' % (sign, i, file_type, short_url)
        if not dry_run:
            os.system('put-to-moderation-queue.py -l %s -t %s' % (short_url, file_type))
            pass

    print '[Moderation Queue: %s] Remove tmp file %s' % (sign, tmp_file)
    os.system('rm %s' % tmp_file)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='Dry run mode.')
    parser.add_argument('--no-dry-run', dest='dry_run', action='store_false')
    parser.set_defaults(dry_run=True)
    args = parser.parse_args()
    kwargs = vars(args)
    main(**kwargs)
