import os
import shutil
import random
import tarfile
import datetime
import argparse
import tensorflow as tf


def create_tmp_folder():
    date = str(datetime.datetime.now())
    number = random.randint(0, 1000000)
    path = "{}-{}".format(date, number)
    os.mkdir(path)
    return path


def unpack_tarfile(input_filename, output_folder='.'):
    with tarfile.open(input_filename, "r:gz") as tar:
        tar.extractall(path=output_folder)


def answer_to_label(answer):
    ANSWER_TO_LABEL = {"yes": 1, "no": 0}
    return ANSWER_TO_LABEL[answer]


def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def create_tf_example(name, label, dataset_path):
    image = open(os.path.join(dataset_path, "image/{}.jpg".format(name)), "rb").read()
    mask = open(os.path.join(dataset_path, "mask/{}.png".format(name)), "rb").read()
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'name':  bytes_feature(name.encode('utf8')),
        'image/encoded':   bytes_feature(image),
        'mask/encoded':   bytes_feature(mask),
        'label':     int64_feature(label),
    }))
    return tf_example


def main():
    parser = argparse.ArgumentParser("Tool for generate tfrecord from dataset")
    parser.add_argument("--dataset", required=True, help="Path to tar.gz file with dataset")
    parser.add_argument("--tfrecord", required=True, help="Path to output tfrecord")
    args = parser.parse_args()

    print("Unpacking dataset: {}".format(args.dataset))
    print("Creating temporary folder")
    tmp_folder_path = create_tmp_folder()
    unpack_tarfile(args.dataset, output_folder=tmp_folder_path)

    print("Creating tfrecord: {}".format(args.tfrecord))
    writer = tf.python_io.TFRecordWriter(args.tfrecord)
    f = open(os.path.join(tmp_folder_path, "list.txt"), "r")
    processed_count = 0
    for line in f:
        items = line.strip().split(",")
        name, label = items[0], answer_to_label(items[1])
        tf_example = create_tf_example(name, label, tmp_folder_path)
        if (tf_example is None):
            continue
        writer.write(tf_example.SerializeToString())
        processed_count += 1
        if (processed_count % 1000 == 0):
            print("Processed {} items".format(processed_count))
    f.close()
    writer.close()
    print("Removing temporary folder")
    shutil.rmtree(tmp_folder_path)

if __name__ == '__main__':
    main()
