from collections import defaultdict


class DataFrame(object):
    def __init__(self, columns, rows):
        self._columns = columns
        self._rows = rows

    def __repr__(self):
        return str(self.to_json())

    def __str__(self):
        return str(self.to_json())

    def rows(self):
        for row in self._rows:
            yield dict(zip(self._columns, row))

    def map(self, func):
        builder = DataFrameBuilder()
        for row in self.rows():
            builder.add_row(func(row))
        return builder.dump()

    def subset(self, columns):
        builder = DataFrameBuilder()
        for row in self.rows():
            nrow = dict(zip(columns, [row.get(x, None) for x in columns]))
            builder.add_row(nrow)
        return builder.dump()

    def __len__(self):
        return len(self._rows)

    def filter(self, by):
        builder = DataFrameBuilder()
        for row in self.rows():
            if type(by) is dict:
                for k, v in by.iteritems():
                    if row.get(k, None) != v:
                        break
                else:
                    builder.add_row(row)
            elif by(row):
                builder.add_row(row)

        return builder.dump()

    def to_json(self):
        return {
            'rows': self._rows,
            'columns': self._columns
        }

    def sorted(self, column):
        builder = DataFrameBuilder()
        rows = sorted(list(self.rows()), key=lambda x: x[column])
        for row in rows:
            builder.add_row(row)
        return builder.dump()

    def sort_columns(self, move_to_front=[]):
        builder = DataFrameBuilder()
        for column in move_to_front:
            builder.add_column(column)
        for column in sorted(self.columns()):
            builder.add_column(column)
        for row in self.rows():
            builder.add_row(row)
        return builder.dump()

    def move_to_first(self, column):
        id = self._columns.index(column)
        self._columns[0], self._columns[id] = self._columns[id], self._columns[0]
        for row in self._rows:
            row[0], row[id] = row[id], row[0]
        return self

    def __getitem__(self, item):
        result = []
        for row in self.rows():
            result.append(row[item])
        return result

    def columns(self):
        return list(self._columns)


class DataFrameBuilder(object):
    def __init__(self):
        self.columns = list()
        self.rows = []

    def add_row(self, row):
        for k, _ in row.iteritems():
            if k not in self.columns:
                self.columns.append(k)

        self.rows.append(row)

    def add_column(self, column):
        if column not in self.columns:
            self.columns.append(column)

    def dump(self):
        result_rows = []
        for row in self.rows:
            result_row = []
            for column in self.columns:
                result_row.append(row.get(column, None))
            result_rows.append(result_row)

        return DataFrame(list(self.columns), result_rows)


def from_rows(rows):
    result = DataFrameBuilder()
    for row in rows:
        result.add_row(row)
    return result.dump()


def recast(df, id_vars, melted='variable', melted_var='value'):
    grouped = groupby(df, lambda row: tuple(row[x] for x in id_vars))
    builder = DataFrameBuilder()
    for key, rows in grouped.iteritems():
        dct = dict(zip(id_vars, key))
        for row in rows:
            dct[row[melted]] = row[melted_var]
        builder.add_row(dct)

    return builder.dump()


def groupby(df, func):
    grouped = defaultdict(list)
    for row in df.rows():
        key = func(row)
        if key is not None:
            grouped[key].append(row)

    return grouped


def natural_join(df1, df2):
    intersection = set(df1.columns()).intersection(df2.columns())

    def projection(row):
        return tuple(row[x] for x in intersection)

    g1 = groupby(df1, projection)
    g2 = groupby(df2, projection)

    result = DataFrameBuilder()

    for k1, v1 in g1.iteritems():
        if k1 in g2:
            v2 = g2[k1]

            for i1 in v1:
                for i2 in v2:
                    add = i1.copy()
                    add.update(i2)
                    result.add_row(add)

    return result.dump()


def map_reduce(df, mapper, reducer=None):
    grouped = groupby(df, mapper)
    result = DataFrameBuilder()
    for key, lst in grouped.iteritems():
        sub_df = from_rows(lst)
        if reducer is not None:
            new_df = reducer(key, sub_df)
            if type(new_df) is dict:
                new_df = from_rows([new_df])
        else:
            new_df = sub_df
        if new_df:
            for row in new_df.rows():
                result.add_row(row)

    return result.dump()


def read_json(data, handle=None, empty_dict_as_regular_item=False, max_depth=None):
    builder = DataFrameBuilder()

    def go(node, stack):
        if (len(stack) < max_depth or max_depth is None) and \
            type(node) is dict and \
           (not empty_dict_as_regular_item or node):
            for k, v in node.iteritems():
                stack.append(k)
                go(v, stack)
                stack.pop()
        else:
            row = handle(stack, node)
            if row:
                builder.add_row(row)

    stack = []
    go(data, stack)
    return builder.dump()


def write_csv(df, out):
    import csv
    writer = csv.writer(out)
    writer.writerow(df._columns)
    for row in df._rows:
        writer.writerow(row)
