package main

import (
	"bufio"
	"fmt"
	"hash/fnv"
	"io/ioutil"
	"log"
	"math"
	"os"
	"sort"

	"github.com/google/pprof/profile"
)

func main() {
	log.SetFlags(0)

	sc := bufio.NewScanner(os.Stdin)
	sc.Split(bufio.ScanLines)

	fileStacks := make(map[string]map[uint64]float64)

	for sc.Scan() {
		filename := sc.Text()
		buf, err := ioutil.ReadFile(filename)
		if err != nil {
			log.Fatalf("ReadFile(%q); err = %q", filename, err)
		}

		prof, err := profile.ParseData(buf)
		if err != nil {
			log.Fatalf("profile.ParseData; err = %q", err)
		}

		stacks := make(map[uint64]float64)
		fileStacks[filename] = stacks

		for _, sample := range prof.Sample {
			var fns []string
			for _, loc := range sample.Location {
				for _, line := range loc.Line {
					fns = append(fns, line.Function.Name)
				}
			}

			h := fnv.New64a()
			for i, fn := range fns {
				fmt.Fprintf(h, "%s\n", fn)

				key := h.Sum64()

				// Calculating "strength" is a key part of the work
				//
				// Correcting by a factor of "len(fns)" makes CPU profiles
				// appear more clustered
				//
				// Exponential decay in either direction (considering a matching
				// root as valuable, or a full match all the way to the leaf as
				// most valuable) results in different behavior .. CPU profiles
				// show better clustering when a prefix match has high value.
				//
				// Heap profiles have more than one entry in sample.Value.

				// strength := math.Pow(0.8, float64(len(fns)-i-1)) * float64(sample.Value[0])
				strength := math.Pow(0.8, float64(i)) * float64(sample.Value[0]) / float64(len(fns))
				stacks[key] += strength
			}

		}

		normalize(stacks)
	}

	considerFileStacks(fileStacks)

	err := sc.Err()
	if err != nil {
		log.Fatalf("scan; err = %q", err)
	}
}

func dot(a, b map[uint64]float64) float64 {
	var product float64
	for key := range a {
		product += a[key] * b[key]
	}
	return product
}

func normalize(m map[uint64]float64) {
	magnitude := math.Sqrt(dot(m, m))
	if magnitude > 0 {
		for key := range m {
			m[key] = m[key] / magnitude
		}
	}
}

func considerFileStacks(fileStacks map[string]map[uint64]float64) {
	// printAllPairs(fileStacks)
	find(fileStacks)
}

func find(fileStacks map[string]map[uint64]float64) {
	{
		// Remove zero-magnitude profiles
		ours := make(map[string]map[uint64]float64)
		for file, stacks := range fileStacks {
			if dot(stacks, stacks) > 0 {
				ours[file] = stacks
			}
		}
		if len(ours) == 0 {
			return
		}
		fileStacks = ours
	}

	var c cluster
	var filenames []string
	for filename, m := range fileStacks {
		c.points = append(c.points, m)
		filenames = append(filenames, filename)
	}

	dots := c.dots(c.average())

	max := indexMax(dots)
	min := indexMin(dots)

	// log.Printf("max %f %s", dots[max], filenames[max])
	// log.Printf("min %f %s", dots[min], filenames[min])

	reps := []int{max, min}

	var winner []int
	for j := 2; j < 10; j++ {
		for k := 0; k < 10; k++ {
			centers := make([]map[uint64]float64, len(reps))
			for i := range reps {
				centers[i] = c.points[reps[i]]
			}
			winner = kmeansWinners(&c, centers)

			followers := make([]*cluster, len(reps))
			for i := range followers {
				followers[i] = new(cluster)
			}
			for i := range winner {
				c2 := followers[winner[i]]
				c2.points = append(c2.points, c.points[i])
			}

			nextReps := make([]int, len(reps))
			for i, c2 := range followers {
				avg := c2.average()
				max := indexMax(c.dots(avg))
				nextReps[i] = max
			}

			log.Printf("%v %v", reps, nextReps)

			same := true
			for i := range reps {
				if reps[i] != nextReps[i] {
					same = false
				}
			}
			if same {
				break
			}
			reps = nextReps
		}
		log.Printf("reps %v", reps)

		dots = nil
		for i := range winner {
			dots = append(dots, dot(c.points[i], c.points[reps[winner[i]]]))
			// log.Printf("%d %f %s", winner[i], dot(c.points[i], c.points[reps[winner[i]]]), filenames[i])
		}
		min := indexMin(dots)
		reps = append(reps, min)

	}

	for _, idx := range reps {
		log.Printf("%s", filenames[idx])
	}

	// for i := 2; i < 10; i++ {
	// 	winner = kmeansWinners(&c, reps)

	// 	dots = nil
	// 	for i := range winner {
	// 		dots = append(dots, dot(c.points[i], c.points[reps[winner[i]]]))
	// 		// log.Printf("%d %f %s", winner[i], dot(c.points[i], c.points[reps[winner[i]]]), filenames[i])
	// 	}
	// 	min := indexMin(dots)
	// 	reps = append(reps, min)
	// }

	// for i := range winner {
	// 	log.Printf("%d %f %s", winner[i], dots[i], filenames[i])
	// }

	// average := sum(v...)
	// normalize(average)

	// type other struct {
	// 	filename string
	// 	product  float64
	// }

	// var others []other

	// for file, stack := range fileStacks {
	// 	others = append(others, other{filename: file, product: dot(average, stack)})
	// }

	// sort.Slice(others, func(i, j int) bool {
	// 	oi, oj := others[i], others[j]
	// 	if oi.product != oj.product {
	// 		return oi.product < oj.product
	// 	}
	// 	return oi.filename < oj.filename
	// })

	// for _, o := range others {
	// 	log.Printf("%f %s", o.product, o.filename)
	// }

	// farthest := others[0]
	// closest := others[len(others)-1]
}

func kmeansWinners(c *cluster, centers []map[uint64]float64) []int {
	winner := make([]int, len(c.points))

	dotses := make([][]float64, len(centers))
	for i, center := range centers {
		dotses[i] = c.dots(center)
	}

	for i := range c.points {
		var best float64
		for rep := range dotses {
			if val := dotses[rep][i]; val > best {
				best = val
				winner[i] = rep
			}
		}
	}

	return winner
}

func sum(v ...map[uint64]float64) map[uint64]float64 {
	out := make(map[uint64]float64)
	for _, m := range v {
		for key := range m {
			out[key] += m[key]
		}
	}
	return out
}

func printAllPairs(fileStacks map[string]map[uint64]float64) {
	for filename, stacks := range fileStacks {
		type other struct {
			filename string
			product  float64
		}

		var others []other

		for file2, stack2 := range fileStacks {
			others = append(others, other{filename: file2, product: dot(stacks, stack2)})
		}

		sort.Slice(others, func(i, j int) bool {
			oi, oj := others[i], others[j]
			if oi.product != oj.product {
				return oi.product < oj.product
			}
			return oi.filename < oj.filename
		})

		for _, o := range others {
			if filename < o.filename {
				log.Printf("%f %s %s", o.product, filename, o.filename)
			}
		}
	}
}

type cluster struct {
	points []map[uint64]float64
}

func (c *cluster) average() map[uint64]float64 {
	average := sum(c.points...)
	normalize(average)
	return average
}

func (c *cluster) dots(m map[uint64]float64) []float64 {
	d := make([]float64, len(c.points))
	for i, point := range c.points {
		d[i] = dot(m, point)
	}
	return d
}

func indexMax(f []float64) int {
	idx, val := -1, 0.0
	for i, v := range f {
		if idx == -1 || v > val {
			idx, val = i, v
		}
	}
	return idx
}

func indexMin(f []float64) int {
	idx, val := -1, 0.0
	for i, v := range f {
		if idx == -1 || v < val {
			idx, val = i, v
		}
	}
	return idx
}
