import os
import hashlib
import requests

BLOCKSIZE = 65536  # 64KB


def file_md5(filepath):
    hasher = hashlib.md5()

    with open(filepath, 'rb') as f:
        buffer = f.read(BLOCKSIZE)
        while len(buffer) > 0:
            hasher.update(buffer)
            buffer = f.read(BLOCKSIZE)

    return hasher.hexdigest()


def remote_file_md5(url):
    hasher = hashlib.md5()

    session = requests.Session()
    session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3))

    req = session.get(url, stream=True)

    if req.status_code != 200:
        return req.status_code, None

    for line in req.iter_content(BLOCKSIZE):
        hasher.update(line)

    return req.status_code, hasher.hexdigest()


def detect_collisions(src_dir, dst_url, exclude_list, exclude_re=None):
    stats = {
        'created': [],
        'updated': [],
        'http_errors': [],
        'collisions': [],
        'errors': [],
    }

    for dirpath, _, filenames in os.walk(src_dir):
        for name in filenames:
            if name in exclude_list:
                continue

            if exclude_re and exclude_re.match(name):
                continue

            src_filepath = os.path.normpath(os.path.join(dirpath, name))
            file_relpath = os.path.relpath(src_filepath, src_dir)
            dst_fileurl = dst_url + file_relpath

            try:
                dst_status_code, dst_md5 = remote_file_md5(dst_fileurl)

                if dst_status_code == 200:
                    # file should be updated
                    src_md5 = file_md5(src_filepath)

                    if src_md5 != dst_md5:
                        # file hash collision detected
                        stats['collisions'].append((
                            src_filepath,
                            src_md5,
                            dst_fileurl,
                            dst_md5
                        ))

                    stats['updated'].append(src_filepath)
                elif dst_status_code == 404:
                    # new file should be created
                    stats['created'].append(src_filepath)
                else:
                    # http error found
                    stats['http_errors'].append((dst_status_code, dst_fileurl))

            except Exception as e:
                stats['errors'].append((dst_fileurl, str(e)))

    return stats
