function isort(ARRAY, ELEMENTS,    best, i, j) {
    for (i = 2; i <= ELEMENTS; ++i) {
        best = ARRAY[i - 1];
        for (j = i; j <= ELEMENTS; ++j) {
            if (hostCmp(ARRAY[j], best) > 0) {
                ARRAY[i - 1] = ARRAY[j];
                ARRAY[j] = best;
                best = ARRAY[i - 1];
            }
        }
    }
}

function geoFromDc(dc,  geo) {
    if (dc == "ws" || dc == "myt" || dc == "iva") {
        geo = "msk";
    } else {
        geo = dc;
    }
    return geo;
}

function geoTag(host,   tag) {
    if (match(host, "^[a-zA-Z]+")) {
        tag = substr(host, RSTART, RLENGTH);
        tag = geoFromDc(tag);
    } else {
        tag="unk";
    }
    return tag;
}

function backboneUrl(tag) {
    return hosts[tag]":"copyPorts[tag];
}

function fastboneUrl(tag,    sep, host) {
    return fastboneHost(tag)":"copyPorts[tag];
}

function fastboneHost(tag,    sep, host) {
    host = hosts[tag];
    if (match(host, "\\.disk\\.yandex") || match(host, "\\.mail\\.yandex")) {
        sep = index(host, ".");
        return "fbs-"substr(host, 1, sep - 1)".mail.yandex.net";
    } if (match(host, "yandex")) {
        return "fb-"host;
    } else {
        #ip
        return host;
    }
}

function hostCmp(h1, h2,    h1Tag, h2Tag, w1, w2, cTag) {
    cTag = geoTag(currentTag);
    h1Tag = geoTag(h1);
    h2Tag = geoTag(h2);

    w1 = 0;
    w2 = 0;
    if (cTag == h1Tag) {
        w1 = 10;
    }
    if (cTag == h2Tag) {
        w2 = 10;
    }
#both are old lucenes
    if (h1Tag == "unk" && h2Tag == "unk") {
        w1 = rand() * 2;
        w2 = rand() * 2;
    } else if (h1Tag == "unk") {
        w2 = 1;
    } else if (h2Tag == "unk") {
        w1 = 1;
    }
#    print "CMP: "h1" <> "h2": "h1Tag"/"h2Tag","w1"/"w2","cTag;
    return w1 - w2;
}

function mergeTagShards(service, tag, start, end,
        #local vars:
        rangeList,
        newRangeStart,
        newRangeEnd,
        deleteRanges,
        sep,
        rangeStart,
        rangeEnd,
        newRangeList,
        range)
{
    if (!((service, tag) in tagShards)) {
        tagShards[service, tag]=start"-"end;
        return;
    }
    rangeList = tagShards[service, tag];
#    print "merging shards for host: "host": list="rangeList", new="start"-"end;
    split(rangeList, ranges, ";");
    newRangeStart = start;
    newRangeEnd = end;
    delete deleteRanges;
    for (r in ranges) {
        range = ranges[r];
#        print oldRange" "range;
        sep = index(range, "-");
        rangeStart = int(substr(range, 1, sep - 1));
        rangeEnd = int(substr(range, sep + 1));
        if (start == rangeEnd + 1) {
            #expand existing range to the right
            newRangeStart = rangeStart;
            newRangeEnd = end;
            deleteRanges[range] = range;
        }
        if (end == rangeStart - 1) {
            #expand existing range to the left
            rangeStart = start;
            newRangeEnd = rangeEnd;
            deleteRanges[range] = range;
        }
    }
    newRangeList = "";
    for (r in ranges) {
        range = ranges[r];
        if (range in deleteRanges) {
#            print "deleting old range: "range;
            continue;
        }
        if (newRangeList != "") {
            newRangeList = newRangeList";";
        }
        newRangeList = newRangeList range;
    }
    if (newRangeList != "") {
        newRangeList = newRangeList";";
    }
    newRangeList = newRangeList newRangeStart"-"newRangeEnd;
#    print "merged list: "newRangeList;
    tagShards[service, tag] = newRangeList;
}

function main() {
#parse searchmap entry
	if (match($0, "^[ ]*?#") || $0 == "") {
        next;
    }
    service = $1;
    split($2, cols, ",");
    delete params;
    for (k in cols) {
        sep = index(cols[k], ":");
        key = substr(cols[k], 1, sep - 1);
        value = substr(cols[k], sep + 1);
        params[key] = value;
    }
    host = params["host"];
    port = params["search_port"];
    if (!("tag" in params)) {
        copyPort = port + 4;
        tag = host"_"port;
    } else {
        tag = params["tag"];
        old = 0;
		split(tag, ii, "_");
        port = ii[2];
		copyPort = port + 4;
    }
    lineShards = params["shards"];
    sep = index(lineShards, "-");
    start = int(substr(lineShards, 1, sep - 1));
    end = int(substr(lineShards, sep + 1));

    if (tag == currentTag) {
        mergeTagShards(service, tag, start, end);
    }

    cmdPorts[tag] = port;
    copyPorts[tag] = copyPort;
    hosts[tag] = host;
    if (!(tag, service) in serviceTags) {
        if (!(tag in services)) {
            services[tag] = service;
        } else {
            services[tag] = services[tag]";"service;
        }
        serviceTags[tag, service] = 1;
    }
#create taglist for earch %65534 shard
    for (s = start; s <= end; s++) {
        if (!((service, s, tag) in shardsTagsSet)) {
            shardTags[service, s]=tag" "shardTags[service, s];
            shardsTagsSet[service, s, tag]=1;
        }
    }

#    allHosts[host] = host;

#combine multiply records for single tag
    if (!(tag in tagStart)) {
        tagStart[service, tag] = start;
    } else {
        if (tagStart[service, tag] > start) {
            tagStart[service, tag] = start;
        }
    }
    if (!(tag in tagEnd)) {
        tagEnd[service, tag] = end;
    } else {
        if (tagEnd[service, tag] < end) {
            tagEnd[service, tag] = end;
        }
    }
}

function printServiceJobs(service, tag,
        #local vars:
        rangeList,
        range,
        sep,
        start,
        end,
        jobs,
        prevShardSources,
        shard,
        shardSources,
        jobStartShard,
        jobStartEnd)
{
    rangeList = tagShards[service, tag];
#    print "RL: "rangeList;
    split(rangeList, ranges, ";");
    for (range in ranges) {
        range = ranges[range];
        sep = index(range, "-");
        start = int(substr(range, 1, sep - 1));
        end = int(substr(range, sep + 1));

#        print "Range: "range;
        # form jobs for each range
        # possible splitting job if multiply source ranges are needed
        # (ex.: many to one copying, resharding)

        prevShardSources = "";
        delete jobs;
        jobStartShard = start;
        for (shard = start; shard <= end; shard++) {
            shardSources = shardTags[service, shard];
#            print "ss["shard"]: "shardSources;
            if (prevShardSources == "") {
                prevShardSources = shardSources;
            }
            if (shardSources != prevShardSources) {
                # next shard has different source list
                # split job
#                print "ssplit: "shard;
                jobs[jobStartShard, shard - 1] = prevShardSources;
                jobStartShard = shard;
                prevShardSources = shardSources;
            }
        }
        # final job
        jobs[jobStartShard, end] = shardSources;

        for (jobKey in jobs) {
#            print "JK: "jobKey;
            split(jobKey, jobRange, SUBSEP);
            jobShardStart = jobRange[1];
            jobShardEnd = jobRange[2];
            urls = "";
            sources = jobs[jobKey];
            sourcesCount = split(sources, sourcesTags, " ");
            isort(sourcesTags, sourcesCount);
            for (i = 1; i <= sourcesCount; i++) {
                source = sourcesTags[i];
                if (currentTag == source) {
                    continue;
                }
                fbUrl = fastboneUrl(source);
                if (urls == "") {
                    urls = fbUrl;
                } else {
                    urls = urls";"fbUrl;
                }
            }
            for (i = 1; i <= sourcesCount; i++) {
                source = sourcesTags[i];
                if (currentTag == source) {
                    continue;
                }
                host = backboneUrl(source);
                if (urls == "") {
                    urls = host;
                } else {
                    urls = urls";"host;
                }
            }
            print "&service="service"&from="urls"&shards="jobShardStart"-"jobShardEnd;
        }
    }
}

function printJobs(tag) {
    tagServices = services[tag];
    split(tagServices, svcs, ";");
    for (service in svcs) {
        service = svcs[service];
#        print "Service: "service;
        printServiceJobs(service, currentTag);
    }
}

BEGIN{
    maxINum=0;
    if (tag == "") {
        print "missing required argument: tag";
        exit 1;
    }
    currentTag = tag;
}

{
    main();
}

END {
    printJobs(currentTag);
}
