# -*- coding: utf-8 -*-
import csv
import os
import sys
import yt.wrapper as yt

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
import django
django.setup()

from datetime import datetime
from django.conf import settings
from django.core.mail.message import EmailMultiAlternatives
from email.mime.text import MIMEText

from travel.avia.stat_admin.lib import scraper
from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper
from travel.avia.stat_admin.scripts.data.scraper_data import SCRAPER_DATA

BATCH_SIZE = 25000
MAX_RESULTS = 50
SERP_GEO_ID = 213
RECEPIENTS = ['rasp-process@yandex-team.ru',
              'arancio@yandex-team.ru',
              'bfa@yandex-team.ru']
ALLOWED_ENVS = ['production', 'development']
AVIA_HOST = 'https://avia.yandex.ru'
YT_TABLE_TEMPLATE = '//home/rasp/reference/position/{log_date}'


def make_scrapper_data(data_key):
    scrapper_data = []
    scrapper_data_dict = {}

    for line in SCRAPER_DATA[data_key].strip().split('\n'):
        splitted_line = line.strip().split(';')
        scrapper_data.append((splitted_line[0], SERP_GEO_ID))
        scrapper_data_dict[splitted_line[0]] = splitted_line[1]

    return scrapper_data, scrapper_data_dict


def send_results(scrapper_results):
    today_str = datetime.now().strftime('%d.%m.%y')

    files = []
    for data_key in scrapper_results.keys():
        ofile = open('/tmp/positions_%s_%s.csv' % (today_str, data_key), 'w')
        owriter = csv.writer(ofile, delimiter=';', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
        headers = ['phrase', 'position', 'hostname_position', 'url_hostname_position', 'url']
        owriter.writerow(headers)

        for query, position, hostname_position, url_hostname_position, our_url in scrapper_results[data_key]:
            row = [query, position, hostname_position, url_hostname_position, our_url]
            owriter.writerow(row)

        ofile.flush()
        ofile.close()
        files.append(ofile)

    mail = EmailMultiAlternatives(
        subject=u'Позиции в выдаче %s' % today_str,
        body=u'Файл во вложении:\n\n',
        from_email=settings.SERVER_EMAIL,
        to=RECEPIENTS,
    )

    for ofile in files:
        with open(ofile.name, 'r') as f:
            data_tab = f.read().decode('utf-8')

        attachment = MIMEText(data_tab.encode('cp1251'), 'csv', 'cp1251')
        attachment.add_header(
            'Content-Disposition', 'attachment',
            filename=os.path.basename(ofile.name)
        )

        mail.attach(attachment)

    mail.send()


def write_results_to_yt(scrapper_results):
    configure_wrapper(yt)

    yt_table = YT_TABLE_TEMPLATE.format(
        log_date=datetime.now().date().strftime('%Y-%m-%d')
    )

    if yt.exists(yt_table):
        yt.remove(yt_table)

    yt.create('table', yt_table, recursive=True)

    rows = []
    for data_key, results in scrapper_results.items():
        position_sum = 0
        hostname_position_sum = 0
        today_str = datetime.now().strftime('%Y-%m-%d')

        for result in results:
            query, position, hostname_position, url_hostname_position, our_url = result

            try:
                position_sum += int(position)
            except ValueError:
                pass

            try:
                hostname_position_sum += int(hostname_position)
            except ValueError:
                pass

        row = {
            'eventdate': today_str,
            'links_group': data_key,
            'position_sum': position_sum,
            'hostname_position_sum': hostname_position_sum,
            'result_count': len(scrapper_results[data_key]),
        }

        rows.append(row)

    yt.write_table(yt_table, rows, format=yt.YsonFormat())

    yt.set(
        yt_table + "/@_read_schema",
        [
            {"type": "string", "name": "eventdate"},
            {"type": "string", "name": "links_group"},
            {"type": "int64", "name": "position_sum"},
            {"type": "int64", "name": "hostname_position_sum"},
            {"type": "int64", "name": "result_count"},
        ]
    )


def RequestModifier(request):
    request['per-set-parameters'].pop('ignored-incorrect-region', None)
    request['per-set-parameters'].pop('ignored-not-used-params', None)
    request['per-set-parameters'].pop('extract-debug-info', None)


def get_results(scrapper_data, scrapper_data_dict):
    scrapper_results = []
    steps_number = int(round(len(scrapper_data) / BATCH_SIZE)) + 1

    for x in range(steps_number):
        result = (
            scraper.Configure()
            .ForGoogle()
            .SetMeta('7613c8e866e94cf8bb40c2c44c4db99d', 'robot-avia', 'check positions')
            .SetResultsOnPage(MAX_RESULTS)
            .SetRequestModifier(RequestModifier)
            .BuildScraper()
            .DownloadBatch(scrapper_data[x * BATCH_SIZE: x * BATCH_SIZE + BATCH_SIZE])
        )

        for r in result:
            query = r['query'].encode('utf-8')
            urls = r['urls']
            our_url = scrapper_data_dict[query]

            hostname_position = '-'
            position = '-'
            url_hostname_position = '-'

            for x, url in enumerate(urls):
                if url.startswith(AVIA_HOST):
                    hostname_position = x + 1
                    url_hostname_position = url

                if url == our_url:
                    position = x + 1

            scrapper_results.append((query, position, hostname_position, url_hostname_position, our_url))

    return scrapper_results


def main():
    current_env = settings.YANDEX_ENVIRONMENT_TYPE

    if current_env not in ALLOWED_ENVS:
        sys.exit()

    scrapper_results = {}
    for data_key in SCRAPER_DATA.keys():
        scrapper_data, scrapper_data_dict = make_scrapper_data(data_key)
        scrapper_results[data_key] = get_results(scrapper_data, scrapper_data_dict)

    send_results(scrapper_results)
    write_results_to_yt(scrapper_results)

if __name__ == '__main__':
    main()
