package redshift

import (
	"context"
	"database/sql"
	"fmt"
	"time"

	"github.com/pkg/errors"
	log "github.com/sirupsen/logrus"
)

const (
	statNameError   = "redshift.delete_and_vacuum_rows.error"
	statNameSuccess = "redshift.delete_and_vacuum_rows.success"
)

// DeleteAndVacuumRows cleans up tables by removing rows older than 30 days.
// It also runs a vacuum opertion to attempt to reclaim space left by the deleted rows.
// Redshift has some large tables with potentially hundreds of billions of rows. Keeping all these rows
// increases query disk space, which causes sever performance degredation across the entire cluster. By keeping our
// tables as small as possible, we can attempt to keep query performance stable.
func (c *Client) DeleteAndVacuumRows(ctx context.Context, tableName string) error {
	deleteStart := time.Now()
	deleteStr := `
        DELETE FROM %s
        WHERE       date_trunc('day', time_utc) < GETDATE() - interval '31 days'
    `

	deleteStatement := fmt.Sprintf(deleteStr, tableName)
	result, err := c.db.ExecContext(ctx, deleteStatement)
	deleteElapsed := time.Since(deleteStart)

	if err != nil {
		c.stats.ExecutionTime(statNameError, deleteElapsed)
		switch err {
		case sql.ErrNoRows:
			return nil
		default:
			return errors.Wrap(err, "pq: failed to delete rows")
		}
	}

	c.stats.ExecutionTime(statNameSuccess, deleteElapsed)
	affected, _ := result.RowsAffected()

	log.WithFields(log.Fields{
		"rows_deleted": affected,
		"elapsed_time": deleteElapsed,
	}).Info("deleted rows")

	vacuumStart := time.Now()
	vacuumStr := "VACUUM DELETE ONLY %s"

	vacuumStatement := fmt.Sprintf(vacuumStr, tableName)
	_, err = c.db.ExecContext(ctx, vacuumStatement)
	vacuumElapsed := time.Since(vacuumStart)

	if err != nil {
		c.stats.ExecutionTime(statNameError, deleteElapsed)
		return errors.Wrap(err, "pq: failed to vacuum rows")
	}

	c.stats.ExecutionTime(statNameSuccess, deleteElapsed)
	log.WithField("elapsed_time", vacuumElapsed).Info("vacuumed rows")

	return nil
}
