package main

import (
	"crypto/tls"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"math/rand"
	"os"
	"path"
	"regexp"
	"strings"
	"time"

	"github.com/go-resty/resty/v2"

	"a.yandex-team.ru/library/go/certifi"
	"a.yandex-team.ru/library/go/core/log"
	"a.yandex-team.ru/library/go/core/log/zap"
	"a.yandex-team.ru/library/go/yandex/tvm"
	"a.yandex-team.ru/library/go/yandex/tvm/tvmauth"
	"a.yandex-team.ru/library/go/yatool"
	"a.yandex-team.ru/security/impulse/models"
	"a.yandex-team.ru/security/impulse/workflow/internal/api"
	"a.yandex-team.ru/security/impulse/workflow/internal/checkout"
	"a.yandex-team.ru/security/impulse/workflow/internal/dedup"
)

const codeQlMaxCodeLen = 10000

type PathFlowEntry struct {
	FileURL string `json:"file_url"`
	// Code    string `json:"code"`
}

type PathFlow struct {
	PathFlowEntries []PathFlowEntry `json:"path_flow"`
}

type RawReport struct {
	Folder     string
	Language   string
	QuerySuite string
	Content    []byte
}

type CodeQL struct {
	home                   string
	langsArg               *string
	languages              []string
	qlsArg                 *string
	querySuites            []string
	basePath               string
	queriesBranch          *string
	checkoutMeta           *checkout.Checkout
	rawReport              map[int]RawReport
	report                 []*models.NewVulnerabilityDeduplicationRequestDTO
	codeqlSearchPath       string
	tvmToken               *string
	oauthToken             *string
	impulseAPI             *api.ImpulseAPI
	logger                 log.Logger
	organizationID         *int
	projectID              *int
	httpc                  *resty.Client
	arcadiaExternalPattern *regexp.Regexp
}

func (r *CodeQL) Init(sourcePath string, checkoutMeta *checkout.Checkout) error {

	// logger
	logger, err := zap.New(zap.ConsoleConfig(log.InfoLevel))
	if err != nil {
		panic(fmt.Sprintf("failed to create logger: %s", err))
	}
	r.logger = logger

	// restyClient
	certPool, err := certifi.NewCertPool()
	if err != nil {
		return err
	}
	r.httpc = resty.New().
		SetRetryCount(2).
		SetLogger(r.logger.Fmt()).
		SetHeader("User-Agent", "impulse-codeql <security@yandex-team.ru>").
		SetTLSClientConfig(&tls.Config{RootCAs: certPool})

	// codeql environment
	codeqlEnvironment, err := NewCodeqlEnvironment(CodeqlHome, r.httpc, r.queriesBranch)
	if err != nil {
		panic(fmt.Sprintf("failed to create codeqlEnvironment: %s", err))
	}
	err = codeqlEnvironment.Prepare()
	if err != nil {
		panic(fmt.Sprintf("failed to prepare codeqlEnvironment: %s", err))
	}

	// prepare languages
	r.languages = prepareLanguages(r.langsArg)

	// prepare querySuites
	r.querySuites = prepareQueries(r.qlsArg, r.languages, codeqlEnvironment)

	// general
	r.basePath = sourcePath
	r.checkoutMeta = checkoutMeta
	r.rawReport = make(map[int]RawReport)

	// codeql specific
	r.home = codeqlEnvironment.Home
	r.codeqlSearchPath = codeqlEnvironment.SearchPath
	arcadiaExternalPattern, err := regexp.Compile(`file:/place/sandbox-data/tasks/\d+/\d+/\d+/src/a\.yandex-team\.ru`)
	if err != nil {
		panic(fmt.Sprintf("failed to compile arcadia pattern regex: %s", err))
	}
	r.arcadiaExternalPattern = arcadiaExternalPattern

	// init rand seed
	rand.Seed(time.Now().UTC().UnixNano())

	r.logger.Fmt().Infof("[+] r.home: %s", r.home)
	r.logger.Fmt().Infof("[+] r.basePath: %s", r.basePath)
	r.logger.Fmt().Infof("[+] r.codeqlSearchPath: %s", r.codeqlSearchPath)

	// impulse client
	var tvmClient *tvmauth.Client
	if r.tvmToken != nil && *r.tvmToken != "" {
		tvmSettings := tvmauth.TvmAPISettings{
			SelfID: SelfTvmID,
			ServiceTicketOptions: tvmauth.NewIDsOptions(
				*r.tvmToken,
				[]tvm.ClientID{ImpulseTvmID}),
			TVMHost: "https://tvm-api.yandex.net",
			TVMPort: 443,
		}

		tvmClient, err = tvmauth.NewAPIClient(tvmSettings, r.logger)
		if err != nil {
			r.logger.Errorf("Could not create tvm client: %+v", err)
			return err
		}
	}
	impulseAPIOpts := api.Options{
		Endpoint: "https://impulse.sec.yandex-team.ru/",
		TvmID:    ImpulseTvmID,
	}
	if r.oauthToken != nil && *r.oauthToken != "" {
		impulseAPIOpts.OAuthToken = *r.oauthToken
	}
	r.impulseAPI = api.New(tvmClient, &impulseAPIOpts)

	return nil
}

func (r *CodeQL) downloadCodeQLArchive(archivePath string, lang string, databasePath string) error {
	if r.organizationID == nil || r.projectID == nil || *r.organizationID == 0 || *r.projectID == 0 {
		return fmt.Errorf("[+] error on fetching archive. orgID or projectID not specified")
	}

	archiveMeta, err := r.impulseAPI.GetCodeqlIndex(*r.organizationID, *r.projectID, lang, "latest")
	if err != nil {
		return fmt.Errorf("[+] codeql index not found. org: %d, proj: %d, lang: %s, err: %v", *r.organizationID, *r.projectID, lang, err)
	}

	r.logger.Fmt().Infof("[+] codeql database archive found. use it.")

	resp, err := r.httpc.R().Get(archiveMeta.MdsURL)
	if err != nil {
		return fmt.Errorf("[+] codeql index download error. org: %d, proj: %d, lang: %s, err: %v", *r.organizationID, *r.projectID, lang, err)
	}

	f, err := os.Create(archivePath)
	if err != nil {
		return fmt.Errorf("[+] codeql creating file for codeql index error. err: %v", err)
	}

	_, err = f.Write(resp.Body())
	_ = f.Close()
	if err != nil {
		return fmt.Errorf("[+] codeql write to file for codeql index error. err: %v", err)
	}

	err = os.Mkdir(databasePath, 0775)
	if err != nil {
		return fmt.Errorf("[+] error codeql index archive. err: %v", err)
	}

	args := []string{"tar", "zxvf", archivePath, "-C", databasePath}
	err = execCommand(args)
	if err != nil {
		return fmt.Errorf("[+] error on untar codeql index archive. err: %v", err)
	}

	return nil
}

func (r CodeQL) buildCodeQLDatabase(repoNum int, folder string, lang string, isArcadia bool, randSuffix int64) (string, error) {
	databasePath := fmt.Sprintf("/tmp/codeqldb_%s_%d_%d", lang, repoNum, randSuffix)
	sourcePath := path.Join(r.basePath, folder)
	// this path possibly will be overwritten
	yaPath := "ya"
	// for ya-bin build without musl
	yaBinSourcePath := path.Join(r.basePath, "src/a.yandex-team.ru/devtools/ya/bin")
	yaBinExecutablePath := path.Join(r.basePath, "ya-bin")
	// non compilable languages requires empty buildCommand
	buildCommand := ""

	// prepare for compilabale languages
	if value, ok := SupportedLanguages[lang]; ok && isArcadia && value.NeedsCompilation {
		yaPath, err := yatool.FindYa(sourcePath)
		if err != nil {
			return "", err
		}

		// if ya-bin executable is not exists - build it
		if _, err = os.Stat(yaBinExecutablePath); os.IsNotExist(err) {
			args := []string{yaPath, "make", "-r", yaBinSourcePath, "-I", r.basePath}
			err = execCommand(args)
			r.logger.Fmt().Infof("[+] ya-bin build error if any: %v\n", err)
			args = []string{"ls", "-l", r.basePath}
			_ = execCommand(args)
		}

		// make first codeqldb build attempt with ya-bin
		buildCommand = fmt.Sprintf("%s --no-respawn make --rebuild", yaBinExecutablePath)
		databasePath = fmt.Sprintf("/tmp/codeqldb_%s_%d_%d_with_yabin", lang, repoNum, randSuffix)
	}

	// first build attempt
	args := buildArgs(r.home, lang, sourcePath, databasePath, buildCommand)
	err := execCommand(args)
	r.logger.Fmt().Infof("[+] codeql database first build attempt. args: %#v. err: %v.\n", args, err)
	if err == nil {
		return databasePath, nil
	}

	// second attempt for golang
	if lang == "go" {
		databasePath = fmt.Sprintf("/tmp/codeqldb_%s_%d_%d_with_goflags", lang, repoNum, randSuffix)
		args = buildArgs(r.home, lang, sourcePath, databasePath, buildCommand)
		err = execCommand(args, "GOFLAGS=-mod=mod")
		r.logger.Fmt().Infof("[+] codeql database second build attempt. args: %#v. err: %v.\n", args, err)
		if err == nil {
			return databasePath, nil
		}
	}

	// second attempt for compilable languages for arcadia projects
	if value, ok := SupportedLanguages[lang]; ok && isArcadia && value.NeedsCompilation {
		// try to use default ya tool
		buildCommand = fmt.Sprintf("%s make --rebuild", yaPath)
		databasePath = fmt.Sprintf("/tmp/codeqldb_%s_%d_%d_with_yatool", lang, repoNum, randSuffix)
		args = buildArgs(r.home, lang, sourcePath, databasePath, buildCommand)
		err = execCommand(args)
		r.logger.Fmt().Infof("[+] codeql database second build attempt. args: %#v. err: %v.\n", args, err)
		if err == nil {
			return databasePath, nil
		}
	}

	// third attempt for compilable languages for arcadia projects
	if value, ok := SupportedLanguages[lang]; ok && isArcadia && value.NeedsCompilation {
		// try to use codeql built-in heuristic method (without ya tool)
		buildCommand = ""
		databasePath = fmt.Sprintf("/tmp/codeqldb_%s_%d_%d_without_yatool", lang, repoNum, randSuffix)
		args = buildArgs(r.home, lang, sourcePath, databasePath, buildCommand)
		err = execCommand(args)
		r.logger.Fmt().Infof("[+] codeql database third build attempt. args: %#v. err: %v.\n", args, err)
		if err == nil {
			return databasePath, nil
		}
	}

	return "", fmt.Errorf("failed to build codeql database for project")
}

func (r *CodeQL) runImpl(repoNum int, folder string, isArcadia bool, lang string, querySuite string) error {
	randSuffix := rand.Int63()
	reportPath := fmt.Sprintf("/tmp/codeqlreport_%d_%d", repoNum, randSuffix)
	databasePath := fmt.Sprintf("/tmp/codeqldb_%s_%d_%d", lang, repoNum, randSuffix)
	codeqlCliBinary := path.Join(r.home, "codeql-cli", "codeql")
	archivePath := fmt.Sprintf("/tmp/codeqldb_%d_%d.tar.gz", repoNum, randSuffix)

	// check codeql database archive
	err := r.downloadCodeQLArchive(archivePath, lang, databasePath)

	// create database
	if err != nil {
		r.logger.Fmt().Infof("[+] no index found. build codeqldb\n")
		databasePath, err = r.buildCodeQLDatabase(repoNum, folder, lang, isArcadia, randSuffix)
		if err != nil {
			r.logger.Fmt().Infof("[+] failed to build codeqldb\n")
			return err
		}
	}

	// upgrade database
	args := []string{codeqlCliBinary, "database", "upgrade", databasePath}
	err = execCommand(args)
	if err != nil {
		return err
	}

	// analyze
	args = []string{
		codeqlCliBinary, "database", "analyze", databasePath, querySuite,
		"--search-path", r.codeqlSearchPath,
		"--additional-packs", r.home,
		"--format", "sarifv2.1.0",
		"--output", reportPath,
	}
	err = execCommand(args)
	if err != nil {
		return err
	}

	byteValue, err := ioutil.ReadFile(reportPath)
	if err != nil {
		return err
	}

	r.rawReport[repoNum] = RawReport{
		Folder:     folder,
		Language:   lang,
		QuerySuite: querySuite,
		Content:    byteValue,
	}
	return nil
}

func (r *CodeQL) Run() error {
	repoNum := 0

	for folderPath, folderMeta := range r.checkoutMeta.Folders {
		for _, lang := range r.languages {
			for _, querySuite := range r.querySuites {
				if strings.Contains(querySuite, lang) {
					repoNum += 1
					r.logger.Fmt().Infof("Run analysis on folder: %s, lang: %s, querySuite: %s.\n", folderPath, lang, querySuite)
					err := r.runImpl(repoNum, folderPath, folderMeta.IsArcadia, lang, querySuite)
					if err != nil {
						r.logger.Fmt().Infof("Error %v on folder: %s, lang: %s, querySuite: %s. Skip..\n", err, folderPath, lang, querySuite)
					}
				}
			}
		}
	}

	return nil
}

func (r CodeQL) ExtractPathFlows(result Result, info checkout.Folder) []PathFlow {
	depthLimit := 25
	pathFlows := []PathFlow{}
	prevFileURL := ""
	for _, codeFlow := range result.CodeFlows {
		for _, threadFlow := range codeFlow.ThreadFlows {
			pathFlow := PathFlow{}
			for _, threadFlowLocation := range threadFlow.Locations {
				location := threadFlowLocation.Location
				relativePath := location.PhysicalLocation.ArtifactLocation.URI
				if !strings.HasPrefix(relativePath, "/") {
					relativePath = "/" + relativePath
				}
				lineNumber := location.PhysicalLocation.Region.StartLine
				fileURL, _ := checkout.GenerateFileURLWithLineNumber(info, relativePath, lineNumber)

				matched := r.arcadiaExternalPattern.MatchString(location.PhysicalLocation.ArtifactLocation.URI)
				if matched {
					parts := strings.SplitN(location.PhysicalLocation.ArtifactLocation.URI, "/", 10)
					if len(parts) == 10 {
						myInfo := info
						myInfo.Repository = "svn+ssh://arcadia.yandex.ru/arc/trunk/arcadia"
						relativePath = parts[9]
						if !strings.HasPrefix(relativePath, "/") {
							relativePath = "/" + relativePath
						}
						fileURL, _ = checkout.GenerateFileURLWithLineNumber(myInfo, relativePath, lineNumber)
					}
				}

				if prevFileURL == fileURL {
					continue
				} else {
					prevFileURL = fileURL
				}

				pathFlow.PathFlowEntries = append(pathFlow.PathFlowEntries, PathFlowEntry{FileURL: fileURL})
				if len(pathFlow.PathFlowEntries) > depthLimit {
					break
				}
			}
			if len(pathFlow.PathFlowEntries) <= depthLimit {
				pathFlows = append(pathFlows, pathFlow)
			}
		}
	}
	return pathFlows
}

func (r *CodeQL) Normalize() error {
	if len(r.rawReport) == 0 {
		r.report = []*models.NewVulnerabilityDeduplicationRequestDTO{}
		return nil
	}

	r.report = make([]*models.NewVulnerabilityDeduplicationRequestDTO, 0)

	for _, rawReport := range r.rawReport {
		report := CodeQLReport{}
		err := json.Unmarshal(rawReport.Content, &report)
		if err != nil {
			r.logger.Fmt().Infof("Could not unmarshal CodeQL report: %v\n", err)
			continue
		}

		if len(report.Runs) == 0 {
			r.logger.Fmt().Infof("Empty Runs")
			continue
		}

		rules := make(map[string]Rule)
		for _, rule := range report.Runs[0].Tool.Driver.Rules {
			rules[rule.ID] = rule
		}

		for _, result := range report.Runs[0].Results {
			// relative path from project root
			filename := result.Locations[0].PhysicalLocation.ArtifactLocation.URI
			if filename != "" && filename[0] != '/' {
				filename = "/" + filename
			}
			// absolute path
			fullFilePath := path.Join(r.basePath, rawReport.Folder, filename)
			// line number
			lineNumber := result.Locations[0].PhysicalLocation.Region.StartLine
			// dedup hash value
			hash, _ := dedup.GetLineHash(fullFilePath, lineNumber)
			// url to code in repository
			fileURL, _ := checkout.GenerateFileURLWithLineNumber(r.checkoutMeta.Folders[rawReport.Folder], filename, lineNumber)
			code := ""
			if parsedCode, err := getCode(fullFilePath, lineNumber); err == nil {
				code = *parsedCode
			}
			if len(code) > codeQlMaxCodeLen {
				code = code[:codeQlMaxCodeLen]
			}
			// collect path flows
			pathFlows := r.ExtractPathFlows(result, r.checkoutMeta.Folders[rawReport.Folder])

			keyProps := models.VulnerabilityProperties{
				"filename":    filename,
				"rule":        result.RuleID,
				"line_number": lineNumber,
				"line_hash":   hash,
				"description": rules[result.RuleID].FullDescription.Text,
			}
			displayProps := models.VulnerabilityProperties{
				"code":              code,
				"severity":          getSeverity(rules[result.RuleID]),
				"file_url":          fileURL,
				"path_flows":        pathFlows,
				"tags":              rules[result.RuleID].Properties.Tags,
				"references":        rules[result.RuleID].Properties.GetReferences(),
				"security_severity": rules[result.RuleID].Properties.SecuritySeverity,
				"precision":         rules[result.RuleID].Properties.Precision,
			}

			r.report = append(r.report, &models.NewVulnerabilityDeduplicationRequestDTO{
				Severity:          getSeverity(rules[result.RuleID]),
				Category:          rules[result.RuleID].ShortDescription.Text,
				KeyProperties:     keyProps,
				DisplayProperties: displayProps,
			})
		}
	}

	return nil
}

func (r *CodeQL) Type() models.ScanTypeName {
	return models.CODEQL
}

func (r *CodeQL) RawReport() string {
	rawReport := "[\n"
	for _, rawReportStruct := range r.rawReport {
		rawReport += strings.TrimRight(string(rawReportStruct.Content), "\n") + ",\n"
	}
	if len(rawReport) > 4 {
		rawReport = rawReport[:len(rawReport)-2]
	}
	rawReport += "]"

	return rawReport
}

func (r *CodeQL) Report() interface{} {
	return r.report
}

func (r *CodeQL) Version() string {
	return "0.1"
}
