#!/bin/sh -e

if [ $# -lt 1 -o $# -gt 3 ]; then
    echo "usage: <input_file> [filter_regexp] [include_regexp]"
    exit 1
fi

if [ -n "$2" ] ; then
	EXCLUDE_EXPR='$3 !~ /'"$2"'/'
fi
if [ -n "$3" ] ; then
	INCLUDE_EXPR='|| $3 ~ /'"$3"'/ '
fi

SHORTNAME="${1%%.*}"
NAMES="${SHORTNAME}.names"
IDS="${SHORTNAME}.ids"

if [ "${1%.gz}" = "$1" ] ; then
    SRCCMD="cat $1"
else
    SRCCMD="gzip -cd $1"
fi

$SRCCMD | awk "$EXCLUDE_EXPR $INCLUDE_EXPR"'{print $2 > "'"$IDS"'"; print $3 > "'"$NAMES"'"}'

echo "sorting ids"
sort -n --parallel=$(nproc) -o $IDS $IDS
