#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import sys
import pandas as pd
sys.path.append("/home/terminutz/arcadia/quality/yaqlib")
from bs4 import BeautifulSoup
from urllib.request import urlopen
from serpparser.tagger import SerpTagger
from serpparser.serp_parser_common import SerpMetadata
from multiprocessing import Pool
import argparse
import codecs
import os

def parse_class(class_name):
    if class_name.startswith("t-construct-adapter__"):
        return class_name[len("t-construct-adapter__"):]
    return None

def get_classes(url):
 #   try:
    parsed_html = urlopen(url).read()
    bs = BeautifulSoup(parsed_html, "html.parser")

    serp_item = bs.select_one(".serp-item")
    item_classes = serp_item.get('class')
    adapters = set()
    for class_name in item_classes:
        if class_name.startswith("t-construct-adapter__"):
            adapters.add(class_name[len("t-construct-adapter__"):])
    if serp_item.select_one("a.bno__app"):
        adapters.add("bno_app")
    if serp_item.select_one(".bno__social"):
        adapters.add("ua_social_block")
    if serp_item.select_one(".bno__showcase.i-bem"):
        adapters.add("social_bna_vk")
    return adapters

def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        "--in",
        dest="input_path",
        type=str,
        help="path to output file"
    )
    parser.add_argument(
        "--out",
        dest="out_path",
        type=str,
        help="path to output file"
    )
    parser.add_argument(
        "--platform",
        dest="platform",
        type=str
    )
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = argument_parser()

    yandex_meta = SerpMetadata("yandex", args.platform)
    tagger = SerpTagger()
    pool = Pool(8)

    result_dfs = []
    for filename in os.listdir(args.input_path):
        print("Proccesing file: {}".format(filename))
        df = pd.read_csv(os.path.join(args.input_path, filename), sep="\t")

        result_df = pd.DataFrame()
        result_df['query'] = df['query']
        result_df['region'] = df['query_region']
        snippet_classes = pool.map(get_classes, (url for url in df['html-sys-0']))
        result_df['wins'] = df['wins-sys-0-vs-1']
        result_df['classes'] = snippet_classes
        result_dfs += [result_df]

    pd.concat(result_dfs).to_csv(args.out_path, sep="\t", index=None)
