import sys

def filter_dbd_core_fields(data):
    out_data = []

    allowed_fields = ['image_url', 'image_mds', 'page_url', 'image_crc', 'page_screenshot_mds', 'title', 'snippet', 'options']

    for elem in data:
        out_elem = {'query': elem['query'],
                    'region_id': elem['region_id'],
                    'platform': elem['platform'],
                    'device': elem['device'],
                    'queryfresh': elem['queryfresh']
        }

        for field in elem:
            if field.startswith('left_') and field[5:] in allowed_fields:
                out_elem[field] = elem[field]
            elif field.startswith('right_') and field[6:] in allowed_fields:
                out_elem[field] = elem[field]

        out_data.append(out_elem)

    return out_data


def calc_dbd_signal(data):
    dbd_signal = {}
    doc_fields = {}

    query_fields = ['query', 'region_id', 'platform', 'device']

    for elem_i, elem in enumerate(data):
        print >>sys.stderr, '{}/{}'.format(elem_i + 1, len(data))

        if 'right_options' in elem and 'support_point' in elem['right_options']:
            sp_prefix = 'right_'
            doc_prefix = 'left_'
        elif 'left_options' in elem and 'support_point' in elem['left_options']:
            sp_prefix = 'left_'
            doc_prefix = 'right_'
        else:
            print >>sys.stderr, "Unknown inputValues"
            print >>sys.stderr, elem
            continue

        values = {}
        for field in elem:
            if field.startswith(doc_prefix):
                field_suffix = field[len(doc_prefix):]
                values[field_suffix] = elem[field]

        for field in query_fields:
            values[field] = elem[field]

        key = (values['query'], values['region_id'], elem['platform'], values['image_url'], values['page_url'])

        if key not in doc_fields:
            doc_fields[key] = values

        if key not in dbd_signal:
            dbd_signal[key] = {1: (0, 0), 2: (0, 0), 3: (0, 0)}

        n_tasks = elem['overlap'] * 2
        n_wins = int(float(elem['probability']) * n_tasks + 0.01)

        sp_rank = elem[sp_prefix + 'options']['support_point']['rank']
        winning_label = elem['label']

        cur_doc_wins, cur_doc_comps = dbd_signal[key][sp_rank]
        cur_doc_comps += n_tasks
        if doc_prefix.startswith(winning_label):
            cur_doc_wins += n_wins
        else:
            cur_doc_wins += n_tasks - n_wins
        dbd_signal[key][sp_rank] = cur_doc_wins, cur_doc_comps

    for key in dbd_signal.keys():
        for sp_rank in [1, 2, 3]:
            cur_doc_wins, cur_doc_comps = dbd_signal[key][sp_rank]
            if cur_doc_comps == 0:
                cur_doc_comps += 2
                cur_doc_wins += 1
                dbd_signal[key][sp_rank] = cur_doc_wins, cur_doc_comps
            # now cur_doc_comps should be 2 in all cases
            if cur_doc_comps != 2:
                print >>sys.stderr, "Unexpected wins number", cur_doc_comps, "for key", key
                dbd_signal.pop(key)
                doc_fields.pop(key)
                break

    out_data = []

    for key in doc_fields:
        out_elem = doc_fields[key]
        out_elem['dbd_signal_lin_rank'] = float(dbd_signal[key][1][0] * 3 + dbd_signal[key][2][0] * 2 + dbd_signal[key][3][0] * 1) / 12.0
        out_elem['dbd_signal_const_rank'] = float(dbd_signal[key][1][0] * 1 + dbd_signal[key][2][0] * 1 + dbd_signal[key][3][0] * 1) / 6.0
        out_elem['dbd_signal_exp_rank'] = float(dbd_signal[key][1][0] * 4 + dbd_signal[key][2][0] * 2 + dbd_signal[key][3][0] * 1) / 14.0
        out_elem['dbd_signal_slow_lin_rank'] = float(dbd_signal[key][1][0] * 2 + dbd_signal[key][2][0] * 1.5 + dbd_signal[key][3][0] * 1) / 9.0
        out_data.append(out_elem)

    return out_data
