#!/usr/bin/env python

import cStringIO
import argparse
import multiprocessing
import itertools

import libarchive
from PIL import Image
from captcha.generation.image_generator.noise import whitenoise


def parse_args():
    parser = argparse.ArgumentParser(description='Add noise to each image in an archive')
    parser.add_argument('-s', '--src', type=argparse.FileType('r'), required=True, help='Source archive')
    parser.add_argument('-d', '--dst', type=argparse.FileType('w'), required=True, help='Destination archive (.tar.gz)')
    parser.add_argument('-j', '--jobs', type=int, default=0, help='Number of processes to handle images')
    parser.add_argument('--scale', type=float, default=whitenoise.DEFAULT_SCALE)
    parser.add_argument('--remove-metadata', action='store_true', help='Remove metadata header from images (useful for debug)')

    return parser.parse_args()


def read_archive(path):
    with libarchive.Archive(path) as archive:
        for entry in archive:
            if not entry.isfile():
                continue
            content = archive.read(entry.size)
            yield entry, content


def process_image(content, scale=None, remove_metadata=False):
    metadata, image_str = content.split('\0', 1)
    image = Image.open(cStringIO.StringIO(image_str))
    original_format = image.format

    image = whitenoise.add_noise_to_single_img(image, scale=scale, noconvert=True)

    output = cStringIO.StringIO()
    image.save(output, format=original_format)
    image_str = output.getvalue()

    if remove_metadata:
        return image_str
    else:
        return metadata + '\0' + image_str


class ImageProcessor(object):
    def __init__(self, **kws):
        self.kws = kws

    def __call__(self, item):
        entry, content = item
        return entry, process_image(content, **self.kws)


def process_images(images_iter, scale, remove_metadata, jobs):
    process_one = ImageProcessor(scale=scale, remove_metadata=remove_metadata)

    if jobs <= 0:
        return itertools.imap(process_one, images_iter)
    else:
        pool = multiprocessing.Pool(processes=jobs)
        return pool.imap_unordered(process_one, images_iter)


def write_archive(path, images_iter):
    with libarchive.Archive(path, 'w', format='tar', filter='gzip') as archive:
        for entry, content in images_iter:
            archive.write(entry, content)


def main():
    args = parse_args()
    images_iter = read_archive(args.src)
    processed_iter = process_images(images_iter, args.scale, args.remove_metadata, args.jobs)
    write_archive(args.dst, processed_iter)


if __name__ == '__main__':
    main()
