package com.twitch.spark.queries;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;

import java.util.List;


public final class ConvertToSequenceFiles {

    public static void main(String[] args) throws Exception {

        String access = args[0];
        String secret = args[1];
        String path = args[2];

        SparkConf conf = new SparkConf().setAppName("ConvertToSequenceFiles");

        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        Class[] classes = new Class[]{
                Netflow.class,
                Netflow.NetflowJSON.class,
        };
        conf.registerKryoClasses(classes);

        JavaSparkContext jsc = new JavaSparkContext(conf);
        Utilities.setAuth(jsc, access, secret);

        JavaRDD<String> lines = jsc.textFile(Utilities.inputPath(path))
                .repartition(jsc.defaultParallelism() * 20);

        JavaRDD<Netflow> flows = lines.map(new Utilities.ParseJSON());

        JavaRDD<Netflow> filtered = flows
                .filter(new NetflowFilters.FilterNulls());


        List<Netflow> top500 = filtered.takeOrdered(500, new Utilities.SortByBytesDesc());
        Utilities.printFlows(top500);
    }


}
