from maps.wikimap.stat.assessment.report.lib import edits, feedback, moderation, tracker, util

from maps.wikimap.stat.libs.common.lib.geobase_region import (
    FILES as GEOBASE_FILES,
    GEOBASE_JOB_MEMORY_LIMIT,
    geobase_region_id
)

from nile.api.v1 import Record, aggregators, extractors


_GRADE_CORRECT = b'correct'
_GRADE_INCORRECT = b'incorrect'
_DEFAULT_TREE_PATH = [b'all', b'unknown']
_DEFAULT_REGION_TREE = b'\t10000\t'
_ROOT_TREE = b'\tall\t'


def _tree_name_from_path(path):
    return b'\t' + b'\t'.join(path) + b'\t'


def _tree_names_for_unknown_values(tree_name_col, value_col):
    def make_tree_names(records):
        for record in records:
            if record.get(tree_name_col) is not None:
                yield record
                continue

            value = record.get(value_col)
            if type(value) is not bytes:
                value = str(value).encode()

            tree_path = _DEFAULT_TREE_PATH + [value]
            for level in range(len(tree_path)):
                yield Record(
                    record,
                    {
                        tree_name_col: _tree_name_from_path(tree_path[0:level + 1])
                    }
                )

    return make_tree_names


def _get_latest_grades(date, assessment_unit_table, assessment_grade_table):
    '''
    assessment_unit_table:
    | unit_id | entity_id | entity_domain | action_by | action | ... |
    |---------+-----------+---------------+-----------+--------+-----|
    | ...     | ...       | ...           | ...       | ...    | ... |

    assessment_grade_table:
    | unit_id | graded_by | graded_at | value | ... |
    |---------+-----------+-----------+-------+-----|
    | ...     | ...       | ...       | ...   | ... |

    result:
    | entity_id | entity_domain | action_by | action | graded_by | grade |
    |-----------+---------------+-----------+--------+-----------+-------|
    | ...       | ...           | ...       | ...    | ...       | value |
    '''

    return assessment_grade_table.filter(
        util.filter_by_date('graded_at', date)
    ).join(
        assessment_unit_table,
        type='inner',
        by='unit_id'
    ).groupby(
        'unit_id'
    ).aggregate(
        entity_id=aggregators.any('entity_id'),
        entity_domain=aggregators.any('entity_domain'),
        action_by=aggregators.any('action_by'),
        action=aggregators.any('action'),
        graded_by=aggregators.last('graded_by', by='graded_at'),
        grade=aggregators.last('value', by='graded_at')
    ).project(
        'grade', 'action', 'entity_domain', 'entity_id',
        action_by=util.extract_as_uint('action_by'),
        graded_by=util.extract_as_uint('graded_by')
    )


def _split_latest_grades_by_domain(latest_grades):
    '''
    latest_grades:
    | entity_id | entity_domain | action_by | action | graded_by | grade |
    |-----------+---------------+-----------+--------+-----------+-------|
    | ...       | ...           | ...       | ...    | ...       | ...   |

    result:
    (feedback_latest_grades, moderation_latest_grades, edit_latest_grades,
    tracker_latest_grades), where each table has the following form:
    | entity_id | action_by | action | graded_by | grade |
    |-----------+-----------+--------+-----------+-------|
    | ...       | ...       | ...    | ...       | ...   |
    '''

    def split(records, feedback_latest_grades, moderation_latest_grades, edit_latest_grades, tracker_latest_grades):
        for record in records:
            domain = record.entity_domain
            record = record.transform('entity_domain')

            if domain == b'feedback':
                feedback_latest_grades(record)
            elif domain == b'moderation':
                moderation_latest_grades(record)
            elif domain == b'edits':
                edit_latest_grades(record)
            elif domain == b'tracker':
                tracker_latest_grades(record)

    return latest_grades.map(split)


def _lat_lon_to_region_id(tasks):
    '''
    tasks:
    | lat | lon | task_id | action_by | action | graded_by | grade |
    |-----+-----+---------+-----------+--------+-----------+-------|
    | ... | ... | ...     | ...       | ...    | ...       | ...   |

    result:
    | region_id | task_id | action_by | action | graded_by | grade |
    |-----------+---------+-----------+--------+-----------+-------|
    | ...       | ...     | ...       | ...    | ...       | ...   |
    '''

    return tasks.project(
        extractors.all(exclude=['lat', 'lon']),
        region_id=extractors.custom(
            lambda lat, lon: geobase_region_id(lon, lat),
            'lat', 'lon'
        ),
        files=GEOBASE_FILES,
        memory_limit=GEOBASE_JOB_MEMORY_LIMIT
    )


def _region_id_to_region_name_tree(tasks, major_regions_map):
    '''
    tasks:
    | region_id | task_id | action_by | action | graded_by | grade |
    |-----------+---------+-----------+--------+-----------+-------|
    | ...       | ...     | ...       | ...    | ...       | ...   |

    major_regions_map:
    | region_id (string) | region_tree | ... |
    |--------------------+-------------+-----|
    | ...                | ...         | ... |

    result:
    | region_name_tree | task_id | action_by | action | graded_by | grade |
    |------------------+---------+-----------+--------+-----------+-------|
    | ...              | ...     | ...       | ...    | ...       | ...   |
    '''

    region_id_to_region_tree = major_regions_map.project(
        region_id=extractors.custom(lambda region_id: int(region_id), 'region_id'),
        region_tree='region_tree'
    )

    return tasks.join(
        region_id_to_region_tree,
        type='left',
        by='region_id',
    ).project(
        extractors.all(exclude=['region_id', 'region_tree']),
        region_name_tree=extractors.custom(
            lambda region_tree: region_tree or _DEFAULT_REGION_TREE,
            'region_tree'
        )
    )


def _task_id_to_task_name_tree(tasks, task_tariff_map):
    '''
    tasks:
    | region_name_tree | task_id | action_by | action | graded_by | grade |
    |------------------+---------+-----------+--------+-----------+-------|
    | ...              | ...     | ...       | ...    | ...       | ...   |

    task_tariff_map:
    | task_id | task_name_tree | seconds_per_task |
    |---------+----------------+------------------|
    | ...     | ...            | ...              |

    result:
    | region_name_tree | task_name_tree | action_by | action | graded_by | grade |
    |------------------+----------------+-----------+--------+-----------+-------|
    | ...              | ...            | ...       | ...    | ...       | ...   |
    '''

    return tasks.join(
        task_tariff_map,
        type='left',
        by='task_id'
    ).map(
        _tree_names_for_unknown_values(
            tree_name_col='task_name_tree', value_col='task_id'
        )
    ).project(
        extractors.all(exclude=['seconds_per_task', 'task_id']),
    )


def _action_by_to_tree(tasks, puid_map):
    '''
    tasks:
    | region_name_tree | task_name_tree | action_by | action | graded_by | grade |
    |------------------+----------------+-----------+--------+-----------+-------|
    | ...              | ...            | ...       | ...    | ...       | ...   |

    puid_map:
    | puid | puid_tree |
    |------+-----------|
    | ...  | ...       |

    Result:
    | region_name_tree | task_name_tree | action_by_tree | action | graded_by | grade |
    |------------------+----------------+----------------+--------+-----------+-------|
    | ...              | ...            | ...            | ...    | ...       | ...   |
    '''

    return tasks.join(
        puid_map,
        type='left',
        by_left='action_by',
        by_right='puid'
    ).map(
        _tree_names_for_unknown_values(
            tree_name_col='puid_tree', value_col='action_by'
        )
    ).project(
        extractors.all(exclude=['puid', 'puid_tree', 'action_by']),
        action_by_tree='puid_tree'
    )


def _action_to_tree(tasks):
    '''
    tasks:
    | region_name_tree | task_name_tree | action_by_tree | action | graded_by | grade |
    |------------------+----------------+----------------+--------+-----------+-------|
    | ...              | ...            | ...            | ...    | ...       | ...   |

    Result:
    | region_name_tree | task_name_tree | action_by_tree | action_tree | graded_by | grade |
    |------------------+----------------+----------------+-------------+-----------+-------|
    | ...              | ...            | ...            | ...         | ...       | ...   |
    '''

    def action_to_tree(records):
        for record in records:
            yield record.transform('action', action_tree=_ROOT_TREE)
            yield record.transform('action', action_tree=_ROOT_TREE + record.action + b'\t')

    return tasks.map(action_to_tree)


def _graded_by_to_tree(tasks, puid_map):
    '''
    tasks:
    | region_name_tree | task_name_tree | action_by_tree | action_tree | graded_by | grade |
    |------------------+----------------+----------------+-------------+-----------+-------|
    | ...              | ...            | ...            | ...         | ...       | ...   |

    puid_map:
    | puid | puid_tree |
    |------+-----------|
    | ...  | ...       |

    Result:
    | region_name_tree | task_name_tree | action_by_tree | action_tree | graded_by_tree | grade |
    |------------------+----------------+----------------+-------------+----------------+-------|
    | ...              | ...            | ...            | ...         | ...            | ...   |
    '''

    def puid_tree_or_root_tree_if_no_grades():
        def extractor(puid_tree, graded_by):
            if puid_tree is not None:
                return puid_tree
            if graded_by is None:
                return _ROOT_TREE
            return None

        return extractors.custom(extractor, 'puid_tree', 'graded_by')

    return tasks.join(
        puid_map,
        type='left',
        by_left='graded_by',
        by_right='puid'
    ).project(
        extractors.all(exclude=['puid', 'puid_tree']),
        graded_by_tree=puid_tree_or_root_tree_if_no_grades()
    ).map(
        _tree_names_for_unknown_values(
            tree_name_col='graded_by_tree', value_col='graded_by'
        )
    ).project(
        extractors.all(exclude=['graded_by'])
    )


def _convert_grade(tasks):
    '''
    Splits tasks by grade:
    - to calculate the amount of resolved tasks, tasks without grades are
      accounted in the 'quantity' column;
    - to calculate the amount of assessed tasks, tasks with grades are accounted
      in the corresponding columns.

    tasks_with_graded_by_tree:
    | action_by_tree | action_tree | region_name_tree | task_name_tree | graded_by_tree | grade |
    |----------------+-------------+------------------+----------------+----------------+-------|
    | ...            | ...         | ...              | ...            | ...            | ...   |

    result:
    | action_by_tree | action_tree | region_name_tree | task_name_tree | graded_by_tree | correct | incorrect | quantity |
    |----------------+-------------+------------------+----------------+----------------+---------+-----------+----------|
    | ...            | ...         | ...              | ...            | ...            | ...     | ...       | ...      |
    '''

    def convert_grade(records):
        for record in records:
            yield record.transform(
                'grade',
                correct=(1 if record.grade and record.grade == _GRADE_CORRECT else 0),
                incorrect=(1 if record.grade and record.grade == _GRADE_INCORRECT else 0),
                quantity=(1 if record.grade is None else 0)
            )

    return tasks.map(convert_grade)


def _aggregate_tasks(tasks, isodate_str):
    '''
    Aggregates data by `action_by_tree`, `action_tree`, `region_name_tree`,
    `task_name_tree` and `graded_by_tree`. Values in other columns are summed.

    tasks:
    | action_by_tree | action_tree | region_name_tree | task_name_tree | graded_by_tree | correct | incorrect | quantity |
    |----------------+-------------+------------------+----------------+----------------+---------+-----------+----------|
    | ...            | ...         | ...              | ...            | ...            | ...     | ...       | ...      |

    Result:
    | fielddate | action_by_tree | action_tree | region_name_tree | task_name_tree | graded_by_tree | correct_total | incorrect_total | quantity_total |
    |-----------+----------------+-------------+------------------+----------------+----------------+---------------+-----------------+----------------|
    | date      | ...            | ...         | ...              | ...            | ...            | SUM(...)      | SUM(...)        | SUM(...)       |
    '''

    return tasks.groupby(
        'action_by_tree', 'action_tree', 'region_name_tree', 'task_name_tree', 'graded_by_tree'
    ).aggregate(
        correct_total=aggregators.sum('correct'),
        incorrect_total=aggregators.sum('incorrect'),
        quantity_total=aggregators.sum('quantity')
    ).project(
        extractors.all(),
        fielddate=extractors.const(isodate_str.encode())
    )


def prepare_report(
    job,
    date,
    assessment_unit_table,
    assessment_grade_table,
    feedback_table,
    commit_event_table,
    moderation_log,
    task_active_table,
    task_closed_table,
    major_regions_map,
    task_tariff_map,
    puid_map,
    issues,
    components,
    assessment_log
):
    latest_grades = _get_latest_grades(date, assessment_unit_table, assessment_grade_table)
    feedback_latest_grades, moderation_latest_grades, edit_latest_grades, tracker_latest_grades =\
        _split_latest_grades_by_domain(latest_grades)

    feedback_tasks = feedback.get_tasks(job, date, feedback_latest_grades, feedback_table, commit_event_table)
    moderation_tasks = moderation.get_tasks(job, commit_event_table, moderation_latest_grades, moderation_log)
    edit_tasks = edits.get_tasks(job, date, edit_latest_grades, commit_event_table, task_active_table, task_closed_table)
    tracker_task = tracker.get_tasks(job, tracker_latest_grades, issues, components, assessment_log)
    tasks = job.concat(feedback_tasks, moderation_tasks, edit_tasks, tracker_task)

    tasks = _lat_lon_to_region_id(tasks)
    tasks = _region_id_to_region_name_tree(tasks, major_regions_map)
    tasks = _task_id_to_task_name_tree(tasks, task_tariff_map)
    tasks = _action_by_to_tree(tasks, puid_map)
    tasks = _action_to_tree(tasks)
    tasks = _graded_by_to_tree(tasks, puid_map)
    tasks = _convert_grade(tasks)

    return _aggregate_tasks(tasks, date)
