package main

import (
	"encoding/json"
	"flag"
	"fmt"
	"log"
	"math/rand"
	"net"
	"regexp"
	"time"

	"code.justin.tv/ids/nfconvert/query"
	"github.com/crowdmob/goamz/aws"
	"github.com/crowdmob/goamz/s3"
	"github.com/crowdmob/goamz/sqs"
)

const (
	fileRexpStr     = `^data/(?P<state>\w+)/(?P<device>\S+)/(\d{4})/(\d{2})/(\d{2})/nfcapd\.(\d{12})`
	sqsPollInterval = 200 * time.Millisecond
)

var (
	fileRexp   = regexp.MustCompile(fileRexpStr)
	resolveIPs bool
)

func main() {
	var (
		path     string
		secret   string
		access   string
		token    string
		ip       string
		name     string
		outbound bool
	)
	flag.StringVar(&path, "path", "data/raw/sfo01-br01/2015/01/05/nfcapd.2015010507", "path of files to look up")
	flag.StringVar(&secret, "secret-key", "", "aws_secret_access_key")
	flag.StringVar(&access, "access-key", "", "aws_access_key_id")
	flag.StringVar(&token, "token", "", "aws_secret_token")
	flag.StringVar(&ip, "ip", "", "ip to filter to")
	flag.StringVar(&name, "name", "", "name for query (leave blank for random name)")
	flag.BoolVar(&outbound, "outbound", false, "filter to outbound large flows")
	flag.BoolVar(&resolveIPs, "resolve", false, "resolve IP addresses in output")
	flag.Parse()

	auth, err := aws.GetAuth(access, secret, token, time.Now().Add(time.Hour*1))
	if err != nil {
		log.Fatal(err)
	}

	rand.Seed(time.Now().UnixNano())
	var filter query.Filter
	if outbound {
		filter = &query.LargeOutboundFlows{20000000}
	} else if ip != "" {
		filter = &query.IPFilter{IP: net.ParseIP(ip)}
	} else {
		log.Fatal("No filter selected")
	}
	if name == "" {
		name = fmt.Sprintf("q-%d", rand.Uint32())
	}

	n := submitQuery(auth, path, filter, name)
	log.Printf("Finished submitting query. Name is %d. There are %d files to process.\n", name, n)
}

// getfiles fetches a list of key names in an S3 bucket which match a prefix
func getfiles(bucket *s3.Bucket, prefix string) ([]string, error) {
	results := make([]string, 0)
	log.Println("getting data from s3 (this could take a while)")
	log.Printf("checking %s\n", prefix)
	// page through results
	done := false
	lastKey := ""
	for !done {
		response, err := bucket.List(prefix, "", lastKey, 100000)
		if err != nil {
			return nil, err
		}
		for _, key := range response.Contents {
			results = append(results, key.Key)
		}
		if len(response.Contents) > 0 {
			lastKey = response.Contents[len(response.Contents)-1].Key
		}
		done = !response.IsTruncated
	}
	return results, nil
}

func filter(a []string, f func(string) bool) []string {
	output := make([]string, 0)
	for _, s := range a {
		if f(s) {
			output = append(output, s)
		}
	}
	return output
}

func identifyFiles(auth aws.Auth, path string) []string {
	bucket := query.GetDataBucket(auth)
	// identify files for processing
	rawFiles, err := getfiles(bucket, path)
	if err != nil {
		log.Fatal(err)
	}
	return filter(rawFiles, fileRexp.MatchString)
}

func submitQuery(auth aws.Auth, path string, filter query.Filter, name string) int {
	files := identifyFiles(auth, path)
	// push the files into SQS
	tasks, err := query.GetTaskQueue(auth)
	if err != nil {
		log.Fatal(err)
	}
	filterRPC := filter.Marshal()

	batchRequest(tasks, files, filterRPC, name)
	return len(files)
}

func batchRequest(taskQ *sqs.Queue, files []string, filterRPC query.FilterRPC, queryName string) error {
	log.Println("Making requests for work")
	marshaled := make([]string, 0)
	for _, f := range files {
		req := query.QueryRequest{f, filterRPC, queryName}
		msg, err := json.Marshal(req)
		if err != nil {
			return err
		}
		marshaled = append(marshaled, string(msg))
	}

	for _, batch := range chunk(marshaled, 10) {
		_, err := taskQ.SendMessageBatchString(batch)
		if err != nil {
			return err
		}
	}
	log.Printf("requested %d files for processing\n", len(files))
	return nil
}

func chunk(strings []string, chunksize int) [][]string {
	result := make([][]string, 0)
	slice := make([]string, 0)
	for i, s := range strings {
		if i%chunksize == 0 && i > 0 {
			result = append(result, slice)
			slice = make([]string, 0)
		}
		slice = append(slice, s)
	}
	result = append(result, slice)
	return result
}
