package ru.yandex.solomon.experiments.gordiychuk.grid;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.logging.log4j.Level;

import ru.yandex.monlib.metrics.registry.MetricRegistry;
import ru.yandex.solomon.experiments.gordiychuk.recovery.MetricsPushScheduler;
import ru.yandex.solomon.main.logger.LoggerConfigurationUtils;
import ru.yandex.solomon.util.actors.AsyncActorBody;
import ru.yandex.solomon.util.actors.AsyncActorRunner;
import ru.yandex.solomon.util.host.HostUtils;

import static java.util.stream.Collectors.groupingBy;
import static ru.yandex.solomon.experiments.gordiychuk.grid.IoUtils.gzReader;
import static ru.yandex.solomon.experiments.gordiychuk.grid.IoUtils.gzWriter;
import static ru.yandex.solomon.experiments.gordiychuk.grid.IoUtils.ls;
import static ru.yandex.solomon.experiments.gordiychuk.grid.IoUtils.mkdir;

/**
 * @author Vladimir Gordiychuk
 */
public class EstimationSplitCli {

    public static void main(String[] args) {
        if (args.length != 1) {
            System.err.println("Usage: tool {dir}");
            System.exit(1);
        }

        try {
            LoggerConfigurationUtils.simpleLogger(Level.INFO);
            MetricsPushScheduler.schedulePush();
            Path root = Path.of(args[0]);
            var source = root.resolve("shards");
            var target = root.resolve("result");
            mkdir(source);
            mkdir(target);
            var progressMetric = MetricRegistry.root().gaugeDouble("progress");
            var paths = ls(source);

            AtomicInteger completed = new AtomicInteger();
            AtomicInteger index = new AtomicInteger();
            AsyncActorBody body = () -> {
                var idx = index.getAndIncrement();
                if (idx >= paths.size()) {
                    return CompletableFuture.completedFuture(AsyncActorBody.DONE_MARKER);
                }

                var path = paths.get(idx);
                String shardId = path.getFileName().toString();
                return CompletableFuture.supplyAsync(() -> {
                    readAndSplit(shardId, path, target);
                    progressMetric.set(completed.incrementAndGet() * 100. / paths.size());
                    String progress = String.format("%.2f%%", progressMetric.get());
                    System.out.println("Processing at " + HostUtils.getShortName() + " progress: " + progress);
                    return null;
                });
            };

            AsyncActorRunner runner = new AsyncActorRunner(body, ForkJoinPool.commonPool(), 10);
            runner.start().join();

            System.exit(0);
        } catch (Throwable e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    private static void readAndSplit(String shardId, Path source, Path target) {
        try (var reader = gzReader(source);
             var stream = reader.lines())
        {
            stream.collect(groupingBy(line -> line.substring(0, line.indexOf('|'))))
                    .entrySet()
                    .parallelStream()
                    .forEach(entry -> {
                        String numId = entry.getKey();
                        Path file = target.resolve(numId).resolve(shardId);
                        mkdir(file.getParent());
                        write(numId, file, entry.getValue());
                    });
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static void write(String numId, Path path, List<String> lines) {
        int offset = numId.length() + 1;
        try (var writer = gzWriter(path)) {
            for (String line : lines) {
                writer.write(line, offset, line.length() - offset);
                writer.newLine();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
