#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import urllib2
import json
import argparse

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--file", dest="file", required=True,
                        help="file with queries")
    return parser

def main():
    ok=["xalyava.tv","bestserials.net","eurouser.tv","4doma.info","serialguru.ru","kino-goda.com","kinozadrot.net","youtube.com","1tv.ru","kinopoisk.ru"]
    bad=["mega-torrent.org","ekhoplanet.ru","alt.megapeer.org","itelevisor.ru","newsowik.kodim0308prm.mil.id","rxtv.ru","kinoclips.net","07kbr.ru"]
    middle=["vkdiz.ru","new-serials.ru","wordyou.ru","dok911.ru","nizaika.ru","kino-serial.net","fanserials.tv","uakino.net","pressa.today","rsute.ru","namkino.ru","vk.com"]
    args = HandleOption().parse_args()
    queries=[]
    with open(args.file, "r") as f:
        for line in f.xreadlines():
            queries.append(line)
    for q in queries:
        adapt=urllib2.quote(q.split('\t')[0])
        path = 'https://hamster.yandex.ru/touchsearch?&text='+adapt+'&srcask=QUICK,QUICK_SAMOHOD&rearr=frp=0.99&noredirect=1&lr=213&numdoc=20&json_dump=searchdata.docs'
        u=urllib2.urlopen(path)
        data=u.read().decode('utf8')
        count=0
        caught=0
        replace=0
        ok_c=0
        bad_c=0
        middle_c=0
        try:
            result=json.loads(data)
            for r in result['searchdata.docs']:
                brvar=0
                count+=1
                rep=replace
                for h in bad:
                   if h in r['url_parts']['hostname']:
                        if count>5:
                            bad_c+=1
                            replace+=1
                        else:
                            caught+=1
                        break
                for h in middle:
                    if h in r['url_parts']['hostname']:
                        if count>5:
                            middle_c+=1
                            replace+=1
                        else:
                            caught+=1
                for h in ok:
                    if h in r['url_parts']['hostname']:
                        if count>5:
                            ok_c+=1
                        break
                if count>5 and replace==rep:
                    print '\t'.join(['Unknown',r['url_parts']['hostname'],r['url']])
                    replace+=1
                if caught==replace:
                    break
        except:
            print >> sys.stderr, 'json error'
            continue
        print '\t'.join(['Stats', str(count), str(ok_c), str(middle_c), str(bad_c)])

if __name__ == '__main__':
    main()
