#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
import datetime
import requests
import time
import gzip
import StringIO
import json
from collections import defaultdict, Counter
requests.packages.urllib3.disable_warnings()
from nile.api.v1 import statface as ns


def check_serpset(id_):
    retries = 0
    j = None
    while not j and retries < 3:
        try:
            j = requests.get(
                'https://metrics-calculation.qloud.yandex-team.ru'
                '/api/qex/metric-observation?regional=RU&evaluation=VIDEO'
                '&left-serp-set={s}&right-serp-set={s}'
                '&metric=video_nonplayable-5_share_touch'.format(s=id_),
                verify=False
            ).json()
        except Exception as e:
            retries += 1
            print('oops: {}'.format(e))
            time.sleep(5)
    for check in j[0]['checks']:
        if check['leftState'] != 'PASSED' or check['rightState'] != 'PASSED':
            return False
    return True


def get_date(id_):
    retries = 0
    j = None
    while not j and retries < 3:
        try:
            j = requests.get(
                'http://metrics.yandex-team.ru/services/api/serpset/info'
                '?id={s}'.format(s=id_),
                verify=False
            ).json()
        except Exception as e:
            retries += 1
            print('oops: {}'.format(e))
            time.sleep(5)
    return datetime.datetime.strptime(
        j['date'].split('T')[0], '%Y-%m-%d'
    ).date()


def get_serps(id_):
    retries = 0
    j = None
    while not j and retries <= 3:
        try:
            j = requests.get(
                'https://metrics-calculation.qloud.yandex-team.ru/api/json/'
                '{}?regional=RU&evaluation=VIDEO&aspect=availability_mobile'
                '&absolute=false&serpset-filter=onlySearchResult'.format(id_),
                verify=False
            )
        except Exception as e:
            print('oops: {}'.format(e))
            retries += 1
            time.sleep(5)
    return json.load(gzip.GzipFile(fileobj=StringIO.StringIO(j.content)))


def parse_date(s):
    return datetime.datetime.strptime(s, '%Y-%m-%d').date()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('date')
    parser.add_argument('--output_urls', required=True)
    parser.add_argument('--output_serpsets', required=True)
    args = parser.parse_args()

    args.date = parse_date(args.date)

    serpsets = requests.get(
        'https://metrics.yandex-team.ru/services/api/serpset/list/RU/VIDEO/'
        '?from={}T00:00:00.000&to={}T00:00:00.000'
        '&cronSerpDownloadId=2624'.format(
            args.date - datetime.timedelta(days=31),
            args.date - datetime.timedelta(days=1)
        ), verify=False
    ).json()

    print('getting serpsets')
    good_serpsets = [
        serpset for serpset in serpsets if check_serpset(serpset)
    ]

    print('getting dates')
    dates = {
        get_date(id_): id_ for id_ in good_serpsets
    }

    urls = set()
    serpsets_data = {}

    print('getting serps')
    for date in sorted(dates):
        id_ = dates[date]
        serpset = get_serps(id_)
        for serp in serpset:
            if 'components' not in serp:
                continue
            for comp in serp['components']:
                if 'judgements.factor_vdp' not in comp:
                    continue
                urls.add(comp['componentUrl']['pageUrl'])
        serpsets_data[str(date)] = serpset

    with codecs.open(args.output_urls, 'w', 'utf8') as f:
        f.write('\n'.join(sorted(urls)))
    json.dump(serpsets_data, codecs.open(
        args.output_serpsets, 'w', 'utf8'
    ), indent=2, sort_keys=True)


if __name__ == "__main__":
    main()
