variable "config" {
        default = {
                cluster_name = "spark-cluster"
                instance_type = "r3.2xlarge"
                num_slaves = 1
                keypair_name = "Dev/spencer"
                script_dir = "./provision"
                sshkey = ""
        }
}

variable "amis" {
        // supports HVM AMIs only
        default = {
                us-west-2 = "ami-05260d35"
        }
}

variable "vals" {
        default = {
                whitelisted_ips = "10.0.0.0/8,192.16.64.0/21,199.9.248.0/21,185.42.204.0/22,33.99.10.96/29,38.99.10.97/32"

                vpc = "vpc-0713b162"
                subnet = "subnet-f2a42397"

                region = "us-west-2"
                zone = "us-west-2a"
        }
}

provider "aws" {
        region = "${var.vals.region}"
}

// Instances //
resource "aws_instance" "slave" {
        count = "${var.config.num_slaves}"
        ami = "${ lookup(var.amis, var.vals.region) }"
        availability_zone = "${var.vals.zone}"
        instance_type = "${var.config.instance_type}"
        associate_public_ip_address = false

        security_groups = [
                "${aws_security_group.slaves.id}",
                "${aws_security_group.slave-admin.id}",
                "${aws_security_group.allow-master.id}",
                "${aws_security_group.ssh-access.id}",
                "${aws_security_group.aws-dns-access.id}"
        ]
        subnet_id = "${var.vals.subnet}"

        key_name = "${var.config.keypair_name}"

        tags {
                Name = "${var.config.cluster_name}-slave-${count.index}"
        }

        connection {
                type = "ssh"
                user = "ubuntu"
                host = "${self.private_ip}"
                key_file = "${var.config.sshkey}"
                timeout = "90s"
        }


        ebs_optimized = true
        ebs_block_device {
                device_name = "/dev/sdc"
                volume_type = "gp2"
                volume_size = 1024
                delete_on_termination = true
        }

        ebs_block_device {
                device_name = "/dev/sdd"
                volume_type = "gp2"
                volume_size = 1024
                delete_on_termination = true
        }

        ebs_block_device {
                device_name = "/dev/sde"
                volume_type = "gp2"
                volume_size = 1024
                delete_on_termination = true
        }

        ebs_block_device {
                device_name = "/dev/sdf"
                volume_type = "gp2"
                volume_size = 1024
                delete_on_termination = true
        }

        provisioner "remote-exec" {
                inline = [
                        "sudo mkdir -p /etc/spark",
                        "sudo chown ubuntu /etc/spark/",
                        "echo ${aws_instance.master.private_ip} > /etc/spark/master-ip",

                        "sudo mkdir -p /etc/service/spark",
                        "sudo chown ubuntu /etc/service/spark/",
                        "mkdir /etc/service/spark/log",

                        "sudo mkdir -p /var/opt/spark",
                        "sudo chown ubuntu /var/opt/spark",

                        "sudo mkfs.xfs /dev/xvdb",
                        "sudo mount /dev/xvdb /var/opt/spark",
                        
                        "mkdir /var/opt/spark/jobs",
                        "mkdir /var/opt/spark/scratch",
                        "mkdir /var/opt/spark/lib",
                        "sudo mkfs.xfs /dev/xvdc",
                        "sudo mkfs.xfs /dev/xvdd",
                        "sudo mkfs.xfs /dev/xvde",
                        "sudo mkfs.xfs /dev/xvdf",
                        "mkdir /var/opt/spark/scratch/mnt1 && sudo mount /dev/xvdc /var/opt/spark/scratch/mnt1 -o noatime",
                        "mkdir /var/opt/spark/scratch/mnt2 && sudo mount /dev/xvdd /var/opt/spark/scratch/mnt2 -o noatime",
                        "mkdir /var/opt/spark/scratch/mnt3 && sudo mount /dev/xvde /var/opt/spark/scratch/mnt3 -o noatime",
                        "mkdir /var/opt/spark/scratch/mnt4 && sudo mount /dev/xvdf /var/opt/spark/scratch/mnt4 -o noatime",
                ]
        }
        provisioner "file" {
                source = "${var.config.script_dir}/spark-env.sh"
                destination = "/etc/spark/spark-env.sh"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/metrics.properties"
                destination = "/opt/spark/conf/metrics.properties"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/spark-defaults.conf"
                destination = "/opt/spark/conf/spark-defaults.conf"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/run_worker.sh"
                destination = "/etc/service/spark/run"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/logger"
                destination = "/etc/service/spark/log/run"
        }
        provisioner "remote-exec" {
                inline = [
                        "chmod +x /etc/service/spark/run /etc/service/spark/log/run",
                ]
        }
}

resource "aws_instance" "master" {
        count = 1
        ami = "${lookup(var.amis, var.vals.region)}"
        availability_zone = "${var.vals.zone}"
        instance_type = "${var.config.instance_type}"
        security_groups = [
                "${aws_security_group.master.id}",
                "${aws_security_group.master-admin.id}",
                "${aws_security_group.allow-slaves.id}",
                "${aws_security_group.ssh-access.id}",
                "${aws_security_group.aws-dns-access.id}"
        ]
        subnet_id = "${var.vals.subnet}"

        key_name = "${var.config.keypair_name}"
        tags {
                Name = "${var.config.cluster_name}-master"
        }

        connection {
                type = "ssh"
                user = "ubuntu"
                host = "${aws_instance.master.private_ip}"
                key_file = "${var.config.sshkey}"
                timeout = "90s"
        }

        provisioner "remote-exec" {
                inline = [
                        "sudo mkdir -p /etc/spark",
                        "sudo chown ubuntu /etc/spark/",
                        "echo ${aws_instance.master.private_ip} > /etc/spark/master-ip",

                        "sudo mkdir -p /etc/service/spark",
                        "sudo chown ubuntu /etc/service/spark/",
                        "mkdir /etc/service/spark/log",

                        "sudo mkdir -p /var/opt/spark",
                        "sudo mkfs.ext4 /dev/xvdb",
                        "sudo mount /dev/xvdb /var/opt/spark",
                        "sudo chown ubuntu /var/opt/spark",
                        "mkdir /var/opt/spark/jobs",
                        "mkdir -p /var/opt/spark/scratch/mnt1",
                        "mkdir /var/opt/spark/lib"
                ]
        }
        provisioner "file" {
                source = "${var.config.script_dir}/spark-env.sh"
                destination = "/etc/spark/spark-env.sh"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/metrics.properties"
                destination = "/opt/spark/conf/metrics.properties"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/spark-defaults.conf"
                destination = "/opt/spark/conf/spark-defaults.conf"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/run_master.sh"
                destination = "/etc/service/spark/run"
        }
        provisioner "file" {
                source = "${var.config.script_dir}/logger"
                destination = "/etc/service/spark/log/run"
        }
        provisioner "remote-exec" {
                inline = [
                        "chmod +x /etc/service/spark/run /etc/service/spark/log/run",
                ]
        }
}


// Security Groups //
// Lots of running around here to avoid circular references :\
// top-level identifier for master
resource "aws_security_group" "master" {
        name = "${var.config.cluster_name}-master"
        description = "identifier for spark master for references by other security groups"
        vpc_id = "${var.vals.vpc}"

        ingress {
                from_port = 7077
                to_port = 7077
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
}
resource "aws_security_group" "slaves" {
        name = "${var.config.cluster_name}-slaves"
        description = "identifier for spark slaves for references by other security groups"
        vpc_id = "${var.vals.vpc}"
}

resource "aws_security_group" "allow-slaves" {
        name = "${var.config.cluster_name}-allow-slaves"
        description = "allow any inbound traffic from ${var.config.cluster_name}-slave security group"
        vpc_id = "${var.vals.vpc}"
        // Allow anything to/from slaves
        ingress {
                from_port = 0
                to_port = 65535
                protocol = "tcp"
                security_groups = ["${aws_security_group.slaves.id}"]
                self = true
        }
        ingress {
                from_port = 0
                to_port = 65535
                protocol = "udp"
                security_groups = ["${aws_security_group.slaves.id}"]
                self = true
        }
        ingress {
                from_port = -1
                to_port = -1
                protocol = "icmp"
                security_groups = ["${aws_security_group.slaves.id}"]
                self = true
        }
}
resource "aws_security_group" "allow-master" {
        name = "${var.config.cluster_name}-allow-master"
        description = "allow any inbound traffic from ${var.config.cluster_name}-master security group"
        vpc_id = "${var.vals.vpc}"
        // Allow anything to/from slaves
        ingress {
                from_port = 0
                to_port = 65535
                protocol = "tcp"
                security_groups = ["${aws_security_group.master.id}"]
                self = true
        }
        ingress {
                from_port = 0
                to_port = 65535
                protocol = "udp"
                security_groups = ["${aws_security_group.master.id}"]
                self = true
        }
        ingress {
                from_port = -1
                to_port = -1
                protocol = "icmp"
                security_groups = ["${aws_security_group.master.id}"]
                self = true
        }
}


resource "aws_security_group" "master-admin" {
        name = "${var.config.cluster_name}-master-admin"
        description = "administrative access to the master node of the spark cluster ${var.config.cluster_name}"
        vpc_id = "${var.vals.vpc}"
        // Allow special administrative ports
        // SparkContext web UIs
        ingress {
                from_port = 4040
                to_port = 4045
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // Mesos web UI
        ingress {
                from_port = 8080
                to_port = 8081
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // History server
        ingress {
                from_port = 18080
                to_port = 18080
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // Tachyon Web UI
        ingress {
                from_port = 19999
                to_port = 19999
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // Hadoop Jobtracker
        ingress {
                from_port = 50030
                to_port = 50030
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // HDFS Namenode Info - local ephemeral storage
        ingress {
                from_port = 50070
                to_port = 50070
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // HDFS Namenode Info - ebs storage
        ingress {
                from_port = 60070
                to_port = 60070
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
}

resource "aws_security_group" "ssh-access" {
        name = "${var.config.cluster_name}-ssh-access"
        description = "allow SSH access"
        vpc_id = "${var.vals.vpc}"
        // SSH
        ingress {
                from_port = 22
                to_port = 22
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
}

resource "aws_security_group" "slave-admin" {
        name = "${var.config.cluster_name}-slave-admin"
        description = "administrative access to the slave nodes of the spark cluster ${var.config.cluster_name}"
        vpc_id = "${var.vals.vpc}"
        // Allow special administrative ports
        // Mesos web UI
        ingress {
                from_port = 8080
                to_port = 8081
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // Hadoop Tasktrackers
        ingress {
                from_port = 50060
                to_port = 50060
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // HDFS Datanode Info
        ingress {
                from_port = 50075
                to_port = 50075
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
        // HDFS Datanode Info - ebs
        ingress {
                from_port = 60075
                to_port = 60075
                protocol = "tcp"
                cidr_blocks = ["${split(",", var.vals.whitelisted_ips)}"]
        }
}

resource "aws_security_group" "aws-dns-access" {
        name = "${var.config.cluster_name}-aws-dns-access"
        description = "access for AWS DNS servers"
        vpc_id = "${var.vals.vpc}"

        ingress {
                from_port = 53
                to_port = 53
                protocol = "tcp"
                cidr_blocks = [
                        "10.192.66.32/32",
                        "10.192.71.133/32",
                        "10.192.72.76/32"
                ]
        }
}

output "master-ip" {
        value = "${aws_instance.master.private_ip}"
}

output "slave-ips" {
        value = "${join("\n", aws_instance.slave.*.private_ip)}"
}

output "num-slaves" {
        value = "${var.config.num_slaves}"
}

output "name" {
        value = "${var.config.cluster_name}"
}

output "keypair" {
        value = "${var.config.keypair_name}"
}
