import json


def get_attribute(doc, key):
    for attr in doc.ArchiveInfo.GtaRelatedAttribute:
        if attr.Key == key:
            return attr.Value
    raise Exception('Document does not have "{}" attribute'.format(key))


class DjDoc:
    def __init__(self, grouping, group, doc, pos, ignore_features):
        dj_object_key = get_attribute(doc, 'dj_object_key')
        dj_profile_hr_json = get_attribute(doc, 'dj_profile_hr_json')
        dj_features_hr_json = get_attribute(doc, 'dj_features_hr_json')
        self.id = '{}/{}/{}'.format(grouping.Attr, group.CategoryName, dj_object_key)
        self.profile = json.loads(dj_profile_hr_json)
        self.features = json.loads(dj_features_hr_json)
        for x in ignore_features:
            self.features.pop(x, None)
        self.ranking_factors_slice_borders = doc.DocRankingFactorsSliceBorders
        self.pos = pos


class DjReport:
    def __init__(self, data, ignore_features=[]):
        from sandbox.projects.common.base_search_quality.tree import meta_pb2
        report = meta_pb2.TReport()
        report.ParseFromString(data)
        self.docs = {}
        pos = 0
        for grouping in report.Grouping:
            for group in grouping.Group:
                for doc in group.Document:
                    dj_doc = DjDoc(grouping, group, doc, pos, ignore_features)
                    self.docs[dj_doc.id] = dj_doc
                    pos += 1
