package com.twitch.spark.queries;

import java.net.InetAddress;
import java.util.*;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;

public final class TopOutboundFlows {
    public InetAddress ipFilter;


    public static void main(String[] args) throws Exception {

        String access = args[0];
        String secret = args[1];
        String path = args[2];
        String output = args[3];

        SparkConf conf = new SparkConf().setAppName("TopOutboundFlows");

        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.set("spark.kryo.referenceTracking", "false");
        conf.set("spark.kryo.registrationRequired", "true");
        conf.set("spark.akka.frameSize", "64");
        Class[] classes = new Class[]{
                Netflow.class,
                Netflow.NetflowJSON.class,
                CIDRMask.class,
                Utilities.SortByBytesDesc.class,
                Utilities.ParseJSON.class,
                NetflowFilters.FilterNulls.class,
                NetflowFilters.FilterToOutboundTraffic.class,
                NetflowFilters.FilterLargeTransfers.class,
        };
        conf.registerKryoClasses(classes);

        JavaSparkContext jsc = new JavaSparkContext(conf);
        Utilities.setAuth(jsc, access, secret);

        JavaRDD<String> lines = jsc.textFile(Utilities.inputPath(path)).repartition(jsc.defaultParallelism() * 10);

        JavaRDD<Netflow> flows = lines.map(new Utilities.ParseJSON());

        JavaRDD<Netflow> filtered = flows.filter(new NetflowFilters.FilterNulls())
                .filter(new NetflowFilters.FilterToOutboundTraffic())
                .filter(new NetflowFilters.FilterLargeTransfers());


        List<Netflow> top500 = filtered.takeOrdered(500, new Utilities.SortByBytesDesc());
        Utilities.printFlows(top500);
        jsc.parallelize(top500).saveAsTextFile(Utilities.outputPath(output));
    }
}