#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
import json
import re


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_tsv')
    parser.add_argument('--input_json')
    parser.add_argument('--output_tsv')
    args = parser.parse_args()

    regexes = json.load(open(args.input_json))

    with codecs.open(args.input_tsv, 'r', 'utf8') as f_in, \
            codecs.open(args.output_tsv, 'w', 'utf8') as f_out:
        for line in f_in:
            good = True
            tabs = line.strip().split('\t')
            for regex in regexes:
                if 'force_status' in regex or 'regexp' not in regex:
                    continue
                if not re.search(regex['regexp'], tabs[0]):
                    continue
                if any([w in tabs[0] for w in regex['whitelist']]):
                    continue
                if 'regexp_replace' in regex:
                    tabs[0] = re.sub(
                        regex['regexp'], regex['regexp_replace'], tabs[0]
                    )
                else:
                    good = False
            if good:
                f_out.write('\t'.join(tabs) + '\n')



if __name__ == "__main__":
    main()
