#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
from collections import defaultdict
import pyaml
from tqdm import tqdm

tree = defaultdict(lambda: {"left": set(), "right": set()})
left_tree = defaultdict(set)
rewrites = {}
rewrites_back = defaultdict(set)


def get_orphans(tree, rewrites):
    keys = set(rewrites.keys())
    orphans = {x for x in tree if x not in rewrites}
    orphans = {x for x in orphans if len(tree[x]["left"]) == 1}
    orphans = {x for x in orphans if not (tree[x]["right"] - keys)}
    return orphans


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('filename')
    args = parser.parse_args()

    with codecs.open(args.filename, "r", "utf8") as f:
        for line in tqdm(f):
            sp = [x for x in line.split() if x not in {'{', '}'}]
            ancestors = set()
            for i, as_ in enumerate(sp):
                if i > 0 and sp[i - 1] != as_:
                    tree[as_]["left"].add(sp[i - 1])
                if len(sp) > (i + 1) and sp[i + 1] != as_:
                    tree[as_]["right"].add(sp[i + 1])
                for ancestor in ancestors:
                    if as_ != ancestor:
                        left_tree[as_].add(ancestor)
                ancestors.add(as_)

    orphans = get_orphans(tree, rewrites)
    while orphans:
        for x in orphans:
            parent = list(tree[x]["left"])[0]
            rewrites[x] = parent
            rewrites_back[parent].add(x)
            for y in rewrites_back[x]:
                rewrites[y] = parent
                rewrites_back[parent].add(y)
            rewrites_back.pop(x)
        print(len(rewrites), len(orphans))
        orphans = get_orphans(tree, rewrites)

    with open("rewrites.yaml", "wb") as f:
        f.write(pyaml.dumps(rewrites))
    with open("rewrites_back.yaml", "wb") as f:
        f.write(pyaml.dumps(rewrites_back))
    with open("tree.yaml", "wb") as f:
        f.write(pyaml.dumps(tree))
    with open("left_tree.yaml", "wb") as f:
        f.write(pyaml.dumps(left_tree))


if __name__ == "__main__":
    main()
