import argparse
import difflib
import sys

import yt.wrapper as yt
import yt.yson as yson


def parse_args(description=None):
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("file1")
    parser.add_argument("file2")
    parser.add_argument("--format", required=True, type=yt.create_format, help="https://wiki.yandex-team.ru/yt/userdoc/formats")
    parser.add_argument("--diff-yson-format", required=False, choices=["text", "pretty"], default="pretty")
    parser.add_argument('--html', type=str, required=False, help="HTML for generated diff")
    parser.add_argument('--diff-type', choices=['unified_diff', 'ndiff'], default='ndiff', required=False, help="Type of difference generator")
    parser.add_argument('--skip-autogenerated-columns', action='store_true', required=False, help="Skip columns which start with '_'")

    return parser.parse_args()


def to_dict(row):
    if isinstance(row, yt.yamr_record.SimpleRecord):
        output = {
            "key": row.key,
            "value": row.value
        }
        if isinstance(row, yt.yamr_record.SubkeyedRecord):
            output["subkey"] = row.subkey
        return output
    return row


def filter_autogenerated(row_dict):
    return {key: value for key, value in row_dict.iteritems() if not key.startswith("_")}


def load_rows(filename, fmt, skip_autogenerated):
    with open(filename) as f:
        for x in fmt.load_rows(f):
            row_dict = to_dict(x)
            if skip_autogenerated:
                row_dict = filter_autogenerated(row_dict)
            yield row_dict


def get_lines(rows, yson_format):
    return yson.dumps(rows, yson_type="list_fragment", yson_format=yson_format).splitlines(True)


def main():
    args = parse_args(description=__doc__)

    file1 = sorted(load_rows(args.file1, args.format, args.skip_autogenerated_columns))
    file2 = sorted(load_rows(args.file2, args.format, args.skip_autogenerated_columns))

    if file1 == file2:
        sys.exit(0)

    file1_lines = get_lines(file1, args.diff_yson_format)
    file2_lines = get_lines(file2, args.diff_yson_format)

    if args.diff_type == 'ndiff':
        # Slow, but more user-friendly
        diff = difflib.ndiff(file1_lines, file2_lines)
    elif args.diff_type == 'unified_diff':
        # Fast, but less user-friendly
        diff = difflib.unified_diff(file1_lines, file2_lines)
    else:
        raise ValueError("Wrong diff_type {}".format(args.diff_type))

    if diff:
        print "".join(diff)

    if args.html is not None:
        # Warning: HtmlDiff uses ndiff inside so could be very slow on big diff
        html_diff = difflib.HtmlDiff()
        with open(args.html, 'w') as f:
            f.write(html_diff.make_table(file1_lines, file2_lines, context=True))

    sys.exit(1)
