#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import sys
import pandas as pd
sys.path.append("/home/terminutz/arcadia/quality/yaqlib")
from bs4 import BeautifulSoup
from urllib.request import urlopen
from serpparser.tagger import SerpTagger
from serpparser.serp_parser_common import SerpMetadata
from multiprocessing import Pool
import argparse
import codecs
import os

from yql.api.v1.client import YqlClient

def parse_class(class_name):
    if class_name.startswith("t-construct-adapter__"):
        return class_name[len("t-construct-adapter__"):]
    return None

def get_classes(url):
 #   try:
    parsed_html = urlopen(url).read()
    bs = BeautifulSoup(parsed_html, "html.parser")

    serp_item = bs.select_one(".serp-item")
    item_classes = serp_item.get('class')
    adapters = set()
    for class_name in item_classes:
        if class_name.startswith("t-construct-adapter__"):
            adapters.add(class_name[len("t-construct-adapter__"):])
    if serp_item.select_one("a.bno__app"):
        adapters.add("bno_app")
    if serp_item.select_one(".bno__social"):
        adapters.add("ua_social_block")
    if serp_item.select_one(".bno__showcase.i-bem"):
        adapters.add("social_bna_vk")
    return adapters

def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        "--out",
        dest="out_path",
        type=str,
        help="path to output file"
    )
    parser.add_argument(
        "--token",
        dest="yql_token",
        type=str
    )
    parser.add_argument(
        "--sbs_ticket",
        dest="sbs_ticket",
        type=str
    )
    parser.add_argument(
        "--sysid",
        dest="sysid",
        type=str
    )
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = argument_parser()

    client = YqlClient(db='hahn', token=args.yql_token)
    request = client.query(
        """SELECT
        CAST(Yson::LookupString(`query`, "text") as String) as query,
        CAST(Yson::LookupInt64(`query`, "region") as Int64) as region,
        CAST(Yson::LookupString(`page`, "html-url") as String) as `html-url`
        FROM `home/sbs/prod/sbs_plan_pages`
        WHERE `ticket-id` = {}
        and Yson::LookupString(`page`, "sys-id") == "{}"
        """.format(args.sbs_ticket, args.sysid),
        syntax_version=1
    )

    pool = Pool(8)
    request.run()
    queries = []
    regions = []
    htmls = []
    classes = []
    for table in request.get_results():
        table.fetch_full_data()
        for row in table.rows:
            queries += [row[0]]
            regions += [row[1]]
            htmls += [row[2]]
        classes = pool.map(get_classes, htmls)
    result_df = pd.DataFrame()
    result_df["query"] = queries
    result_df["region"] = regions
    result_df["classes"] = classes

    result_df.to_csv(args.out_path, sep="\t", index=None)
