import hashlib
import json
import logging
import os
import subprocess
import time
import types


DEBUG = False
DEBUG_OUT = True
DELIMITER = '=' * 65
DELIMITER_LITE = '-' * 65


log = logging.getLogger()


def out(cmds, fail=True, env=None, shell=False):
    """
    get command output
    """
    def out_debug(msg):
        if DEBUG_OUT:
            logging.debug('out: ' + msg)

    if isinstance(cmds, types.StringTypes):
        cmd_str = cmds
    else:
        cmd_str = ' '.join(cmds)

    log.info(DELIMITER)
    log.info('RUN: %s' % (cmd_str, ))

    if env:
        out_debug('env: %r' % (env, ))

    if env is None:  # not "if env:" because there can be empty dict: env={}
        local_env = dict(os.environ)
    else:
        local_env = dict(env)

    local_env.update({'LANG': 'en_US.UTF-8'})

    p = subprocess.Popen(cmds, close_fds=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                         env=local_env, shell=shell)
    s = p.stdout.read()
    ret = p.wait()

    if fail and ret != 0:
        raise Exception("Process ret=%d: %s" % (ret, s))

    log.info('%s\n%s\n%s\n' % (DELIMITER_LITE, s, DELIMITER_LITE))
    return s


def get_file_md5_and_size(filename):
    fh = open(filename, 'rb')
    data = fh.read()
    fh.close()

    hash_ = hashlib.md5(data).hexdigest()

    return hash_, len(data)


def check_resource(name, data):
    """
  "resources" : {
    "archive.tar.gz" : {
      "@class" : "ru.yandex.iss.Resource",
      "uuid" : "nirvana-job-launcher-archive.tar-a89a194a-469e-4480-9c66-dc8d5f3699cc.gz",
      "verification" : {
        "checksum" : "MD5:d4512ede8804f65d5f7fffc092030118",
        "checkPeriod" : "0d6h0m"
      },
      "urls" : [ "https://nirvana.yandex-team.ru/api/storage/a89a194a-469e-4480-9c66-dc8d5f3699cc/data" ],
      "size" : 315704231,
      "cached" : false
    },  "
    """
    http_urls = [u for u in data['urls'] if u.startswith('http')]
    if not http_urls:
        print "  no http(s) urls"
        return

    url = http_urls[0]
    temp_file = "tmp-%s.%d.tmp" % (name, data['size'])

    if os.path.exists(temp_file):
        os.remove(temp_file)

    try:
        print "download %dmb from %s" % (data['size'] >> 20, url)

        args = [
            "curl",
            url,
            "-s",
            "-o", temp_file]
        start_time = time.time()
        out(args, fail=False)
        print "# %.2f sec" % (time.time() - start_time, )

        size_cfg = data['size']
        size_disk = os.path.getsize(temp_file)

        if size_cfg and size_cfg != size_disk:
            print "WRONG SIZE: %d instead of %d" % (size_disk, size_cfg)
            return

        hash_, data_size = get_file_md5_and_size(temp_file)
        if size_cfg and size_cfg != data_size:
            print "WRONG READ SIZE: %d instead of %d" % (size_disk, size_cfg)
            return

        if 'verification' in data:
            verification = data['verification']
            if 'checksum' in verification:
                checksum = verification['checksum']
                if checksum.upper().startswith('MD5:'):
                    checksum = checksum[4:]
                    if checksum.lower() != hash_.lower():
                        print 'ERROR: wrong MD5: %s instead of %s' % (hash, checksum)
                        return
    finally:
        if os.path.exists(temp_file):
            os.unlink(temp_file)

filename = 'dump.json'

dump = json.loads(open(filename).read())

resources = dump['resources']

for i, entry in enumerate(resources.iteritems()):
    name, data = entry
    print '--- %d: %s' % (i, name)
    check_resource(name, data)
