#!/home/lester/anaconda3/bin/python

import sys
from http.server import BaseHTTPRequestHandler, HTTPServer
import socket
import traceback
import numpy
import io
import datetime
import os

import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
import pandas as pd
from sklearn.externals import joblib

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve

import requests

class TCMSDetector:
    def __init__(self, filename_vectorizer, filename_predictor):
        self.vectorizer = joblib.load(filename_vectorizer)
        self.predictor = joblib.load(filename_predictor)

    def get_probability(self, tokens_text):
        X = np.array([tokens_text])
        tfidf = self.vectorizer.transform(X)
        p = self.predictor.predict_proba(tfidf)[:, 1][0]
        return p, p > 0.925

def get_tokens(host):
    r = requests.get('http://localhost:8371/gettokens?host=%s' % host)
    return r.text

bitrix      = TCMSDetector("model_vectorizer_bitrix.pkl", "model_svmsgd_bitrix.pkl")
joomla      = TCMSDetector("model_vectorizer_joomla.pkl", "model_svmsgd_joomla.pkl")
drupal      = TCMSDetector("model_vectorizer_drupal.pkl", "model_svmsgd_drupal.pkl")
opencart    = TCMSDetector("model_vectorizer_opencart.pkl", "model_svmsgd_opencart.pkl")
wordpress   = TCMSDetector("model_vectorizer_wordpress.pkl", "model_svmsgd_wordpress.pkl")
dle         = TCMSDetector("model_vectorizer_dle.pkl", "model_svmsgd_dle.pkl")

class HttpProcessor(BaseHTTPRequestHandler):
    def get_result_row(self, name, result):
        name_str = name
        if result[1]:
            name_str = "<b>" + name + "</b>"
        row = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (name_str, str(result[0]), str("DETECTED" if result[1] else "NOT DETECTED"))
        return row

    def do_GET(self):
        page = open("page.html").read()

        if "/query" in self.path:
            host = self.path.split("/query?host=")[1]
            self.send_response(200)
            self.end_headers()

            tokens = get_tokens(host)
            content = "<table style='width:400px'>"
            content += self.get_result_row("1C-Bitrix", bitrix.get_probability(tokens))
            content += self.get_result_row("OpenCart", opencart.get_probability(tokens))
            content += self.get_result_row("Drupal", drupal.get_probability(tokens))
            content += self.get_result_row("Joomla", joomla.get_probability(tokens))
            content += self.get_result_row("WordPress", wordpress.get_probability(tokens))
            content += self.get_result_row("DLE", dle.get_probability(tokens))
            content += "</table>"

            self.wfile.write(page.replace("$body", content).encode())
            return

        self.send_response(200)
        self.send_header("content-type", "text/html")
        self.end_headers()
        self.wfile.write(page.replace("$body", "").encode())

class HTTPServerV6(HTTPServer):
    address_family = socket.AF_INET6

def main():
    port = 8370

    if len(sys.argv) > 1:
        port = int(sys.argv[1])

    print("Starting on port %d..." % port)

    server = HTTPServerV6(("::", port), HttpProcessor)

    try:
        server.serve_forever()
    except KeyboardInterrupt:
        print("Terminating...")

if __name__ == "__main__":
    main()
