importClass(ru.yandex.misc.log.mlf.LoggerFactory);
importClass(ru.yandex.chemodan.util.json.JsonNodeUtils)
importClass(ru.yandex.chemodan.mpfs.MpfsUser)
importPackage(ru.yandex.inside.yt.kosher.cypress)
importPackage(ru.yandex.inside.yt.kosher.tables)

var logger = LoggerFactory.getLogger("html_generator");

uidS = "50273844"
ytPath = YPath.simple("//home/disk-dev/tolmalev/auto_thema/tolmalev_last_365_days_clusters_12000")

//uidS = "122625849"
//ytPath = YPath.simple("//home/disk-dev/tolmalev/auto_thema/122625849_last_1095_days_clusters_12000")

var executer = java.util.concurrent.Executors.newFixedThreadPool(20, new ru.yandex.misc.thread.factory.ThreadNameIndexThreadFactory("html_generator"));

uid = MpfsUser.of(uidS)

var ajd =  (new File2("/tmp/word_adj")).readLines().filter(function(s) {return !s.isEmpty()});
var noun =  (new File2("/tmp/word_rus.txt")).readLines().filter(function(s) {return !s.isEmpty()});

getField(cvi2tProcessor, "textVectors").putAll(diskSearchClient.getTextCvVectors(ajd))
getField(cvi2tProcessor, "textVectors").putAll(diskSearchClient.getTextCvVectors(noun))

var w1 = ajd
var w2 = noun
var connector = " "

//var all_words = (new File2("/tmp/word_rus.txt")).readLines().filter(function(s) {return !s.isEmpty()});
//var all_words = adj.flatMap(function(a) {return nouns.map(function(n){return a + " " + n})})

nodes = Cf.arrayList()
ytClient.tables().read(Option.none(), false, ytPath, YTableEntryTypes.JACKSON, function(it) {
    while (it.hasNext()) {
        nodes.add(it.next())
    }
})

nodes = nodes.take(50)


allResourceIds = Cf.arrayList()
for (var i = 0; i < nodes.size(); i++) {
    json = nodes.get(i)
    items = JsonNodeUtils.getNode(json.get("items").textValue())

    for (var j = 0; j < items.size(); j++) {
        item = items.get(j)
        id = item.get("id").textValue()

        allResourceIds.add(uidS + ":" + id)
    }
}
print(allResourceIds.size())

allFileInfos = Cf.hashMap()
allResourceIds.paginate(50).forEach(function(part) {
    logger.info("loading info from mpfs")
    infos = mpfsClient.bulkInfoByResourceIds(uid, part, Cf.list("/disk", "/photounlim"))
    allFileInfos.putAll(infos.toMapMappingToKey(function(i) {return i.meta.resourceId.get().toString() }))
})

logger.info("loaded all info from mpfs")

function getPreviewUrl(id) {
    resId = uidS + ":" + id
    infoO = allFileInfos.getO(resId)

    return infoO.flatMapO(function(r) {return r.meta.preview })
}

function find_word(node, words) {
    node = ru.yandex.chemodan.util.json.JsonNodeUtils.getNode(node.textValue())
    var i2t_l = Cf.x(node.elements()).toList().map(function(n) {return n.numberValue().byteValue()});
    var i2t = java.lang.reflect.Array.newInstance(java.lang.Byte.TYPE, 200)
    for (var y = 0; y < 200; y++) {i2t[y] = i2t_l.get(y)}

    start = Instant.now()
    var vectors = diskSearchClient.getTextCvVectors(words)
    logger.info("get {} vectors in {}", words.size(), ru.yandex.misc.time.TimeUtils.secondsStringToNow(start))

    var found = vectors.entries().map2(function(b2) {
        var finalB2 = b2
        return executer.submit(new java.util.concurrent.Callable(function() {return ru.yandex.chemodan.app.lentaloader.reminder.Cvi2tProcessor.dotProduct(i2t, finalB2);}));
    }).map2(function(f){return f.get()}).sortedBy2Desc().first()

    return found._1 + "(" + found._2 + ")"
}

function find_words(node, words, cnt) {
    node = ru.yandex.chemodan.util.json.JsonNodeUtils.getNode(node.textValue())
    var i2t_l = Cf.x(node.elements()).toList().map(function(n) {return n.numberValue().byteValue()});
    var i2t = java.lang.reflect.Array.newInstance(java.lang.Byte.TYPE, 200)
    for (var y = 0; y < 200; y++) {i2t[y] = i2t_l.get(y)}

    //start = Instant.now()
    //var vectors = diskSearchClient.getTextCvVectors(words)
    //logger.info("get {} vectors in {}", words.size(), ru.yandex.misc.time.TimeUtils.secondsStringToNow(start))

    var found = words.zip(cvi2tProcessor.dotProduct(i2t, words)).sortedBy2Desc().get1().take(cnt);

    return found
}

cols = 8
sb = new java.lang.StringBuilder()

for (var i = 0; i < nodes.size(); i++) {
    logger.info("processing cluster {} / {}", i, nodes.size())

    json = nodes.get(i)
    items = JsonNodeUtils.getNode(json.get("items").textValue())

    start = Instant.now()
    w1s = find_words(json.get("i2t_average"), w1, 50)
    logger.info("found top w1 in {}", ru.yandex.misc.time.TimeUtils.secondsStringToNow(start))

    start = Instant.now()
    w2s = find_words(json.get("i2t_average"), w2, 50)
    logger.info("found top w2 in {}", ru.yandex.misc.time.TimeUtils.secondsStringToNow(start))

    words = w1s.plus(w2s)
        .plus(w1s.flatMap(function(a) {return w2s.map(function(n){return a + " " + n})}))
        .plus(w2s.flatMap(function(a) {return w2s.map(function(n){return a + " и " + n})}))


    start = Instant.now()
    sb.append("<h1>Cluster " + i + ", size : " + items.size() + " / " + find_word(json.get("i2t_average"), words) + "</h1>")
    sb.append("<table>")
    logger.info("found top total in {}", ru.yandex.misc.time.TimeUtils.secondsStringToNow(start))

    o = 0
    for (var j = 0; j < items.size(); j++) {
        if (o % cols == 0) {
            sb.append("<tr>")
        }
        item = items.get(j)
        match = item.get("match").numberValue().intValue()
        beauty = item.get("beauty").numberValue()
        id = item.get("id").textValue()

        preview = getPreviewUrl(id)
        if (preview.isPresent()) {
            sb.append("<td>")
            sb.append(match + "/ " + beauty + "<br/>")
            sb.append("<img src=\"" + preview.get() + "\"/>")
            sb.append("</td>")
            o++
        }
        if (o % cols == 0) {
            sb.append("</tr>")
        }
    }
    if (!(o % cols == 0)) {
        sb.append("</tr>")
    }
    sb.append("</table>")
}

f = new File2("/tmp/" + ytPath.name() + "_3.html")
f.write(sb.toString())