package com.yandex.burp.extensions;

import java.nio.charset.Charset;
import java.util.List;

import burp.IBurpExtenderCallbacks;
import burp.IExtensionHelpers;
import burp.IHttpRequestResponse;
import burp.IRequestInfo;
import burp.IResponseInfo;
import com.google.common.base.Splitter;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import deduper.BKTree;
import deduper.BinaryWordSeg;
import deduper.HammingDistance;
import deduper.HtmlSeg;
import deduper.Simhash;

/*
import java.io.PrintWriter;
 */

/**
 * Created by ezaitov on 25.03.2017.
 */
public class EntryPointDeduplicator {
    private IBurpExtenderCallbacks callbacks;
    private IExtensionHelpers helpers;

    private BKTree<Long> dubTree;
    private BloomFilter<String> dubBloomFilter;

    public EntryPointDeduplicator(IBurpExtenderCallbacks callbacks) {
        this.callbacks = callbacks;
        this.helpers = callbacks.getHelpers();
        this.dubBloomFilter = BloomFilter.create(Funnels.stringFunnel(Charset.defaultCharset()), 1000);
        this.dubTree = new BKTree<>(new HammingDistance());
    }

    public boolean isFullDuplicate(IHttpRequestResponse messageInfo) {
/*        PrintWriter stdout = new PrintWriter(callbacks.getStdout(), true);
*/
        IResponseInfo respInfo = helpers.analyzeResponse(messageInfo.getResponse());

        if (dubBloomFilter == null) return false;

        HashFunction m_hash = Hashing.murmur3_32();
        if (helpers.bytesToString(messageInfo.getResponse()).length() > respInfo.getBodyOffset()) {
            String body = helpers.bytesToString(messageInfo.getResponse()).substring(respInfo.getBodyOffset());

            /* full-dub detection */
            String dedupHashValue = m_hash.hashBytes(helpers.stringToBytes(body)).toString();
//            stdout.println("URL body Hash (full dub detection): " + dedupHashValue);
            if (dubBloomFilter.mightContain(dedupHashValue)) {
                return true;
            }
            dubBloomFilter.put(dedupHashValue);
        }

        return false;
    }

    public boolean isDuplicateURL(IHttpRequestResponse messageInfo) {
/*
        PrintWriter stdout = new PrintWriter(callbacks.getStdout(), true);
*/
        if (dubBloomFilter == null) return false;

        IRequestInfo requestInfo = helpers.analyzeRequest(messageInfo.getHttpService(), messageInfo.getRequest());
        if (requestInfo== null) return true;

        HashFunction m_hash = Hashing.murmur3_32();

        /* not sure if Burp has a deduplication here, make it double sure */
        String hashInput = requestInfo.getUrl().getPath() + "?";

        if (requestInfo.getUrl().getQuery() != null && requestInfo.getUrl().getQuery().length() > 0) {
            List<String> qsList = Splitter.on('&').trimResults().splitToList(requestInfo.getUrl().getQuery());
            if (qsList.size() > 0) {
                for (String param : qsList) {
                    for (String k : Splitter.on("=").splitToList(param)) {
                        hashInput += "&" + k;
                    }
                }
            }
        }

        String dedupHashValue = "URL:" + requestInfo.getMethod() + m_hash.hashBytes(helpers.stringToBytes(hashInput)).toString();
//        stdout.println("URL PathParams Hash: " + dedupHashValue);
        if (dubBloomFilter.mightContain(dedupHashValue)) {
            return true;
        }
        dubBloomFilter.put(dedupHashValue);
        return false;
    }

    public boolean isHalfDuplicate(IHttpRequestResponse messageInfo) {
/*        PrintWriter stdout = new PrintWriter(callbacks.getStdout(), true);
*/
        IResponseInfo respInfo = helpers.analyzeResponse(messageInfo.getResponse());
        IRequestInfo requestInfo = helpers.analyzeRequest(messageInfo.getHttpService(), messageInfo.getRequest());

        /* half-dub detection */
        if (dubTree == null) return false;

        if (helpers.bytesToString(messageInfo.getResponse()).length() > respInfo.getBodyOffset()) {
            String body = helpers.bytesToString(messageInfo.getResponse()).substring(respInfo.getBodyOffset());

            Simhash simHash;
            if (respInfo.getHeaders().stream().filter(c -> c.toUpperCase()
                    .contains("HTML")).findFirst().isPresent()) {
                simHash = new Simhash(new HtmlSeg());
            } else {
                simHash = new Simhash(new BinaryWordSeg());
            }
            long docHash = simHash.simhash64(body);
//            stdout.println("Body hash: " + docHash);
            if (dubTree.isEmpty()) {
//                stdout.println("Adding to BK-tree: " + docHash  + " (" + requestInfo.getUrl().toString() + ")");
                dubTree.add(docHash);
            } else {
                if (dubTree.find(docHash) <= 3) {
//                    stdout.println("Best match (skipping): " +  dubTree.findBestWordMatchWithDistance(docHash) + " (" + requestInfo.getUrl().toString() + ")");
                    return true;
                } else {
//                    stdout.println("Best match: " +  dubTree.findBestWordMatchWithDistance(docHash) + " (" + requestInfo.getUrl().toString() + ")");
                }
                dubTree.add(docHash);
            }
        } else {
            /* responses with no body will not be sent to active scan */
            return true;
        }
        return false;
    }
}
