package main

import (
	"flag"
	"log"
	"regexp"
	"strings"
	"time"

	"github.com/crowdmob/goamz/aws"
	"github.com/crowdmob/goamz/s3"
	"github.com/crowdmob/goamz/sqs"
)

const (
	bucket         = "ids-netflows"
	queueName      = "ids-netflow-conversions"
	rawDataPrefix  = "data/raw/"
	doneDataPrefix = "data/json/"
	region         = "us-west-2"
	fileRexpStr    = `^data/(?P<state>\w+)/(?P<device>\S+)/(\d{4})/(\d{2})/(\d{2})/nfcapd\.(\d{12})`
)

var fileRexp = regexp.MustCompile(fileRexpStr)

func main() {
	var (
		statOnly bool
		all      bool
		secret   string
		access   string
		token    string
	)
	flag.BoolVar(&statOnly, "stat", false, "just print stats on state, don't actually take any action")
	flag.BoolVar(&all, "all", false, "reconvert all files")
	flag.StringVar(&secret, "secret-key", "", "aws_secret_access_key")
	flag.StringVar(&access, "access-key", "", "aws_access_key_id")
	flag.StringVar(&token, "token", "", "aws_secret_token")
	flag.Parse()

	auth, err := aws.GetAuth(access, secret, token, time.Now().Add(time.Hour*1))
	if err != nil {
		log.Fatal(err)
	}

	region := aws.Regions[region]

	s3client := s3.New(auth, region)
	bucket := s3client.Bucket(bucket)

	// identify files for processing
	rawFiles, err := getfiles(bucket, rawDataPrefix)
	if err != nil {
		log.Fatal(err)
	}
	doneFiles, err := getfiles(bucket, doneDataPrefix)
	if err != nil {
		log.Fatal(err)
	}
	rawFiles = filter(rawFiles, fileRexp.MatchString)
	doneFiles = filter(doneFiles, fileRexp.MatchString)

	undoneFiles := getUndoneFiles(rawFiles, doneFiles)
	if statOnly {
		log.Printf("example raw: %s", rawFiles[0])
		log.Printf("example done: %s", doneFiles[0])
		log.Printf("example undone: %s", undoneFiles[0])
		log.Printf("%d raw files identified\n", len(rawFiles))
		log.Printf("%d done files identified\n", len(doneFiles))
		log.Printf("that leaves %d files (%.1f%%)\n", len(undoneFiles), 100.0*float64(len(undoneFiles))/float64(len(rawFiles)))
		log.Printf("that's probably about %v hours of computation time", len(undoneFiles)/30)
		return
	}
	// push the files into SQS
	sqs := sqs.New(auth, region)
	queue, err := sqs.GetQueue(queueName)
	if err != nil {
		log.Fatal(err)
	}

	var target = undoneFiles
	if all {
		target = rawFiles
	}

	for _, f := range target {
		log.Println(f)
		_, err := queue.SendMessage(f)
		if err != nil {
			// retry once
			_, err := queue.SendMessage(f)
			if err != nil {
				log.Fatal(err)
			}
		}
	}
	log.Println("Done")
}

func getfiles(bucket *s3.Bucket, prefix string) ([]string, error) {
	results := make([]string, 0)
	log.Println("getting data from s3 (this could take a while)")
	// page through results
	done := false
	lastKey := ""
	for !done {
		response, err := bucket.List(prefix, "", lastKey, 100000)
		if err != nil {
			return nil, err
		}
		for _, key := range response.Contents {
			results = append(results, key.Key)
		}
		lastKey = response.Contents[len(response.Contents)-1].Key
		done = !response.IsTruncated
	}
	return results, nil
}

func getUndoneFiles(rawFiles, doneFiles []string) []string {
	rawNames := stripPrefix(rawFiles, rawDataPrefix)
	doneNames := stripSuffix(stripPrefix(doneFiles, doneDataPrefix), ".json.gz")
	log.Printf("Checking these: %s, %s, %s, %s\n", rawNames[0], rawNames[1], rawNames[2], rawNames[3])
	log.Printf("Against these: %s, %s, %s, %s\n", doneNames[0], doneNames[1], doneNames[2], doneNames[3])
	return findMissingElements(rawNames, doneNames)
}

func filter(a []string, f func(string) bool) []string {
	output := make([]string, 0)
	for _, s := range a {
		if f(s) {
			output = append(output, s)
		}
	}
	return output
}

func stripPrefix(a []string, prefix string) []string {
	output := make([]string, len(a))
	for i, s := range a {
		output[i] = strings.TrimPrefix(s, prefix)
	}
	return output
}

func stripSuffix(a []string, suffix string) []string {
	output := make([]string, len(a))
	for i, s := range a {
		output[i] = strings.TrimSuffix(s, suffix)
	}
	return output
}

// list all strings which are present in source but not target
func findMissingElements(source []string, target []string) []string {
	tset := make(map[string]struct{})
	for _, t := range target {
		tset[t] = struct{}{}
	}
	output := make([]string, 0)
	for _, s := range source {
		_, present := tset[s]
		if !present {
			output = append(output, s)
		}
	}
	return output
}
