package app

import (
	"bytes"
	"net/http"
	"regexp"
	"sort"
	"strings"
	"unicode/utf8"

	spamconf "code.justin.tv/chat/classy/config"
	"code.justin.tv/chat/golibs/async"
	"code.justin.tv/chat/golibs/errx"
	"code.justin.tv/chat/golibs/gojiplus"
	"code.justin.tv/chat/zuma/app/api"
	"code.justin.tv/chat/zuma/backend"
	"code.justin.tv/chat/zuma/internal/models"
	"code.justin.tv/feeds/feeds-common/entity"

	"golang.org/x/net/context"
	"golang.org/x/text/unicode/norm"
)

const (
	// defaultMaxCharacterLength defines the default max character length before truncation.
	defaultMaxCharacterLength = 1000
)

var (
	badUnicodeRe      = regexp.MustCompile(`[\x00\x02-\x08\x0a-\x1f\x7f]|\\n|\\t|\x{202e}`)
	hangulFillerRegex = regexp.MustCompile("[\u115F|\u1160|\u3164|\uFFA0]{2,}")
)

func (h *handlers) ExtractMessage(ctx context.Context, rw http.ResponseWriter, req *http.Request) {
	var params api.ExtractMessageRequest
	if err := gojiplus.ParseJSONFromRequest(req, &params); err != nil {
		gojiplus.ServePublicError(ctx, rw, req, err, http.StatusBadRequest)
		return
	}

	if err := validateExtractMessageParams(params); err != nil {
		gojiplus.ServePublicError(ctx, rw, req, err, http.StatusBadRequest)
		return
	}

	// Truncate the message before enforcement so dropped parts of the message
	// are not enforced.
	maxChars := defaultMaxCharacterLength
	if params.MaxCharacterLength != nil {
		maxChars = *params.MaxCharacterLength
	}
	messageText := truncate(params.MessageText, maxChars)
	messageText = sanitize(messageText)
	// Fetch data required to enforce and parse the message.
	requiredData, err := h.fetchExtractMessageData(ctx, params.SenderID, messageText, params.ContainerOwner)
	if err != nil {
		gojiplus.ServeError(ctx, rw, req, err, http.StatusInternalServerError)
		return
	}

	// Parse message text for emoticons.
	emoticons, err := h.Backend.ParseEmoticons(ctx, messageText, requiredData.senderEmoteSets)
	if err != nil {
		gojiplus.ServeError(ctx, rw, req, err, http.StatusInternalServerError)
		return
	}
	requiredData.emotesMatches = emoticons

	// Return a 403 Forbidden if there is a reason to deny the message.
	if err := enforceMessage(params, requiredData); err != nil {
		gojiplus.ServePublicError(ctx, rw, req, err, http.StatusForbidden)
		return
	}

	resp := api.ExtractMessageResponse{
		Risk: exportMessageRisk(requiredData.spamLikelihood, requiredData.automodResp),
		Content: exportMessageContent(models.MessageContent{
			Text:      messageText,
			Emoticons: requiredData.emotesMatches,
		}),
		Sender: exportMessageSender(models.MessageSender{
			UserID:      requiredData.senderSiteUser.UserID,
			Login:       requiredData.senderSiteUser.Login,
			DisplayName: requiredData.senderSiteUser.DisplayName,
			ChatColor:   requiredData.senderTMIUser.Color,
			Badges:      requiredData.senderBadges,
		}),
	}
	gojiplus.ServeJSON(rw, req, resp)
}

type extractMessageData struct {
	automodResp           backend.AutoModResponse
	spamLikelihood        float64
	senderTMIUser         backend.TMIUser
	senderSiteUser        backend.SiteUser
	senderBadges          []models.UserBadge
	senderEmoteSets       []int
	isSenderChannelBanned bool
	isSenderIgnored       bool
	ownerRoomProperties   *backend.RoomProperties
	emotesMatches         []models.MessageContentEmoticon
}

// fetchExtractMessageData fetches data in parallel that is needed to enforce
// whether the message will be sent and parse the message for metadata.
func (h *handlers) fetchExtractMessageData(ctx context.Context, senderID, messageText string, containerOwner *entity.Entity) (extractMessageData, error) {
	// Set the owner ID to the user ID if the container owner is a user entity.
	var ownerID *string
	if containerOwner != nil && containerOwner.Namespace() == entity.NamespaceUser {
		id := containerOwner.ID()
		ownerID = &id
	}

	var (
		// default to clean if we don't check automod
		automodResp           = backend.AutoModResponse{IsClean: true}
		spamLikelihood        = 0.0
		senderTMIUser         backend.TMIUser
		senderSiteUser        backend.SiteUser
		senderBadges          []models.UserBadge
		senderEmoteSets       []int
		isSenderChannelBanned = false
		isSenderIgnored       = false
		ownerRoomProperties   *backend.RoomProperties
	)

	fns := []func() error{
		// Get spam classification score.
		func() error {
			val, err := h.Backend.GetSpamConfidence(ctx, senderID, messageText)
			if err != nil {
				return errx.Wrap(err, "fetching spam confidence")
			}
			spamLikelihood = val
			return nil
		},

		// Fetch sender's user properties.
		func() error {
			val, found, err := h.Backend.GetSiteUser(ctx, senderID)
			if err != nil {
				return errx.Wrap(err, "fetching sender's site user")
			} else if !found {
				return errx.New("site user not found", errx.Fields{"user_id": senderID})
			}
			senderSiteUser = val
			return nil
		},

		// Fetch sender's chat properties.
		func() error {
			val, found, err := h.Backend.GetTMIUser(ctx, senderID)
			if err != nil {
				return errx.Wrap(err, "fetching sender's tmi user")
			} else if !found {
				return errx.New("tmi user not found", errx.Fields{"user_id": senderID})
			}
			senderTMIUser = val
			return nil
		},

		// Fetch sender's badges.
		func() error {
			val, err := h.Backend.GetUserBadges(ctx, senderID, ownerID)
			if err != nil {
				return errx.Wrap(err, "fetching sender's badges")
			}
			senderBadges = val
			return nil
		},
	}

	// Check additional properties if the container has an owner.
	if ownerID != nil {
		fns = append(fns,
			// Fetch owner's chatroom properties.
			func() error {
				val, found, err := h.Backend.GetRoomProperties(ctx, *ownerID)
				if err != nil {
					return errx.Wrap(err, "fetching owner's room properties")
				} else if !found {
					return errx.New("room properties not found", errx.Fields{"user_id": *ownerID})
				}
				ownerRoomProperties = &val
				return nil
			},

			// Check if automod approves of the message text.
			func() error {
				val, err := h.Backend.AutoModCheckMessage(ctx, senderID, *ownerID, messageText)
				if err != nil {
					return errx.Wrap(err, "checking automod")
				}
				automodResp = val
				return nil
			},

			// Check if the user is channel banned in the owner's channel.
			func() error {
				_, isBanned, err := h.Backend.GetBan(ctx, *ownerID, senderID)
				if err != nil {
					return errx.Wrap(err, "fetching ban status")
				}
				isSenderChannelBanned = isBanned
				return nil
			},

			// Check if the sender is ignored by the owner.
			func() error {
				blocks, err := h.Backend.ListUserBlocks(ctx, *ownerID)
				if err != nil {
					return errx.Wrap(err, "fetching user blocks")
				}

				for _, blockedID := range blocks {
					if blockedID == senderID {
						isSenderIgnored = true
						break
					}
				}
				return nil
			},
		)
	}

	if err := async.DoAsync(fns...); err != nil {
		return extractMessageData{}, err
	}

	// Fetch sender's emote sets.
	var err error
	senderEmoteSets, err = h.Backend.GetEmoteSets(ctx, senderID, senderSiteUser.IsStaff)
	if err != nil {
		return extractMessageData{}, errx.Wrap(err, "fetching sender's emote sets")
	}

	return extractMessageData{
		automodResp:           automodResp,
		spamLikelihood:        spamLikelihood,
		senderTMIUser:         senderTMIUser,
		senderSiteUser:        senderSiteUser,
		senderBadges:          senderBadges,
		senderEmoteSets:       senderEmoteSets,
		isSenderChannelBanned: isSenderChannelBanned,
		isSenderIgnored:       isSenderIgnored,
		ownerRoomProperties:   ownerRoomProperties,
	}, nil
}

// enforceMessage will check if the sender is allowed to send the message text to
// the given container.
// Returns an error with the deny reason or nil if the message is allowed.
func enforceMessage(params api.ExtractMessageRequest, requiredData extractMessageData) error {
	// Deny if the message has a high spam classification score.
	if requiredData.spamLikelihood >= spamconf.DropThreshold() {
		return api.ErrLikelySpam
	}

	// Deny if the message contains a banned word.
	if requiredData.automodResp.ContainsBannedWords {
		return api.ErrContainsBannedWord
	}

	// Deny if the sender is suspended.
	if requiredData.senderSiteUser.HasViolatedDMCA ||
		requiredData.senderSiteUser.HasViolatedTOS ||
		requiredData.senderSiteUser.IsDeleted {
		return api.ErrSenderIsSuspendedOrDeleted
	}

	// Deny if the sender does not have a verified email and the owner's room
	// requires it.
	if requiredData.ownerRoomProperties != nil &&
		requiredData.ownerRoomProperties.ChatRequireVerifiedAccount &&
		!requiredData.senderSiteUser.VerifiedEmail {
		return api.ErrSenderHasUnverifiedEmail
	}

	// Deny if the sender is banned in the owner's channel.
	if requiredData.isSenderChannelBanned {
		return api.ErrSenderIsChannelBanned
	}

	// Deny if the sender is ignored by the owner.
	if requiredData.isSenderIgnored {
		return api.ErrSenderIsIgnored
	}

	return nil
}

// truncate trims a string to the given character limit.
func truncate(s string, maxChars int) string {
	runes := bytes.Runes([]byte(s))
	if len(runes) > maxChars {
		return string(runes[:maxChars])
	}
	return s
}

// sanitize strips a string of invalid or undesired unicode.
func sanitize(s string) string {
	// Strip invalid unicode.
	if !utf8.ValidString(s) {
		valid := make([]rune, 0, utf8.RuneCountInString(s))
		for _, r := range s {
			if r != utf8.RuneError {
				valid = append(valid, r)
			}
		}
		s = string(valid)
	}

	// Strip unicode that messes up rendering.
	s = badUnicodeRe.ReplaceAllString(s, "")
	s = hangulFillerRegex.ReplaceAllString(s, "")

	s = strings.TrimSpace(s)
	s = norm.NFC.String(s)
	return s
}

// censor scans a string for banned word patterns and replaces matches with the
// replacement string. Whitespace is preserved. Returns the censored string along
// with a list of banned words found.
func censor(s, replacement string, phrases []string) (censored string, found []string) {
	// bodyWords contains whitespace-delimited words. Multiple spaces are represented
	// by an empty string in tokens. For example, "   " (3 whitespaces) is split
	// into ["", ""] (two empty-string tokens).
	bodyWords := strings.Split(s, " ")

	// Sorting from largest to smallest string so we can stop comparing as soon as we find a large match
	sort.Slice(phrases, func(i, j int) bool {
		return len(phrases[i]) < len(phrases[j])
	})

	largeEnoughPhrases := []string{}
	for _, phrase := range phrases {
		// Its ok to use len, byte count will not match chinese runes, which are valid words to ban
		if len(strings.Trim(phrase, wildcard)) >= 3 {
			largeEnoughPhrases = append(largeEnoughPhrases, phrase)
		}
	}

	for i := 0; i < len(bodyWords); i++ {
		for _, phrase := range largeEnoughPhrases {
			phraseWords := strings.Fields(phrase)
			matchedIndices := findPhraseMatchIndices(phraseWords, bodyWords, i)
			if len(matchedIndices) > 0 {
				for _, matchedIndex := range matchedIndices {
					found = append(found, bodyWords[matchedIndex])
					bodyWords[matchedIndex] = replacement
				}
				i = i + len(matchedIndices) - 1
				break
			}
		}
	}
	return strings.Join(bodyWords, " "), found
}

// TODO: merge this and the phrasesMatchedIndices helper methods on clue into a
// unified pattern matching library
func findPhraseMatchIndices(phraseWords, subjWords []string, index int) []int {
	if len(phraseWords) > len(subjWords)-index {
		return []int{}
	}
	result := []int{}
	for i := 0; i < len(phraseWords); i++ {
		subjWord := subjWords[i+index]
		if subjWord == "" {
			continue
		}
		phraseWord := phraseWords[i]
		if !isPatternMatch(strings.ToLower(phraseWord), strings.ToLower(subjWord)) {
			return []int{}
		}
		result = append(result, i+index)
	}
	return result
}

const wildcard = "*"

func isPatternMatch(pattern, subj string) bool {
	// If the pattern _is_ a wildcard, it matches everything
	if pattern == wildcard {
		return true
	}

	parts := strings.Split(pattern, wildcard)

	if len(parts) == 1 {
		// No wildcards in pattern, so test for equality
		return subj == pattern
	}

	leadingGlob := strings.HasPrefix(pattern, wildcard)
	trailingGlob := strings.HasSuffix(pattern, wildcard)

	// Go over the leading parts and ensure they match.
	for i := 0; i < len(parts)-1; i++ {
		idx := strings.Index(subj, parts[i])

		switch i {
		case 0:
			// Check the first section. Requires special handling.
			if !leadingGlob && idx != 0 {
				return false
			}
		default:
			// Check that the middle parts match.
			if idx < 0 {
				return false
			}
		}

		// Trim evaluated text from subj as we loop over the pattern.
		subj = subj[idx+len(parts[i]):]
	}

	// Reached the last section. Requires special handling.
	return trailingGlob || strings.HasSuffix(subj, parts[len(parts)-1])
}

func validateExtractMessageParams(params api.ExtractMessageRequest) error {
	if params.SenderID == "" {
		return errx.New("sender ID is empty")
	}

	if len(params.MessageText) == 0 {
		return errx.New("message text is empty")
	}

	return nil
}
