import argparse
import os
import yt.wrapper as yt

def module_filter(module):
    module_name = getattr(module, '__name__', '')
    if 'numpy' in module_name:
        return False
    if 'yt_yson_bindings' in module_name:
        return False
    if 'hashlib' in module_name:
        return False
    if 'hmac' in module_name:
        return False

    module_file = getattr(module, '__file__', '')
    if not module_file:
        return False
    if module_file.endswith('.so'):
        return False

    return True

def reducer_only_new(key, rows):
    row = dict()
    row['id'] = key
    rows_list = list()
    for row in rows:
        rows_list.append(row)

    if len(rows_list) == 1:
        # unique value, need to find is it old or new
        if rows_list[0]['@table_index'] == 1:
            # from new table
            del rows_list[0]["@table_index"]
            yield rows_list[0]

def reducer_only_unique_single_table(key, rows):
    row = dict()
    row['id'] = key
    rows_count = 0
    last_row = None
    for row in rows:
        rows_count += 1
        last_row = row
    new_row = dict()
    new_row['id'] = last_row['id']
    yield new_row

def reducer_only_unique(key, rows):
    row = dict()
    row['id'] = key
    rows_count = 0
    last_row = None
    for row in rows:
        rows_count += 1
        last_row = row
    if rows_count == 2:
        new_row = dict()
        new_row['id'] = last_row['id']
        yield new_row

class FilterCrashGroups(object):
    def __init__(self, crashes_threshold, devices_thershold):
        self.crashes_threshold = crashes_threshold
        self.devices_thershold = devices_thershold

    def __call__(self, row):
        if 'norm(ym:cr2:crashes)' in row:
            crashes_percent = row['norm(ym:cr2:crashes)']
            if float(crashes_percent) >= self.crashes_threshold:
                yield row
                return
        if 'ym:cr2:crashesDevicesPercentage' in row:
            crashes_devices_percent = row['ym:cr2:crashesDevicesPercentage']
            if float(crashes_devices_percent) >= self.devices_thershold:
                yield row
                return



def main():
    parser = argparse.ArgumentParser(description='Filters crashes by specific thresholds, APIKey, and date and save it to YT')
    parser.add_argument('--crashes_threshold', type=float, required=False, default=0.5, help='Crashes count threshold in percents')
    parser.add_argument('--crashes_devices_threshold', type=float, required=False, default=0.5, help='Crashes devices count threshold in percents')
    parser.add_argument('--yt_token', required=True, help='Yt Token')
    parser.add_argument('--cluster', required=False, default='hahn', help='Yt Cluster to save results in')
    parser.add_argument('--api_key', required=True, help='APIKey for application')
    parser.add_argument('--out_path', required=True, help='Root path in YT')
    parser.add_argument('--date', required=False, help='Date in format YYYY-MM-DD, default yesterday')
    args = parser.parse_args()
    yt.config["token"] = args.yt_token
    yt.config["proxy"]["url"] = "{}.yt.yandex.net".format(args.cluster)
    yt.config['pickling']['dynamic_libraries']['enable_auto_collection'] = True
    yt.config['pickling']['module_filter'] = module_filter
    api_key = args.api_key # 14836
    date = args.date # '2019-11-06'
    crashes_threshold = args.crashes_threshold
    devices_threshold = args.crashes_devices_threshold
    out_path = args.out_path
    out_path_root = os.path.join(out_path, api_key)
    path_unfiltered = os.path.join(out_path_root, 'new', date)
    path_filtered = os.path.join(out_path_root, 'filtered', date)
    if not yt.exists(os.path.join(out_path_root, 'filtered')):
        yt.create('map_node', os.path.join(out_path_root, 'filtered'))
    yt.run_map(FilterCrashGroups(crashes_threshold, devices_threshold), path_unfiltered, path_filtered)
    yt.set_attribute(path_filtered, 'crashes_threshold', crashes_threshold)
    yt.set_attribute(path_filtered, 'devices_threshold', devices_threshold)


if __name__ == "__main__":
    main()
