# coding=utf-8
import codecs
import os
import shutil

import scraper

USERNAME = 'robot-patologoanatom'

def download_htmls(queries_list, out_dir):
    with open('oauth_token.txt') as f:
        oauth_token = f.read().strip()

    scraper_obj = (scraper.Configure()
               # .ForGoogleWeb()
               .ForGoogleTouch()
               # .ForYandex()
               .SetMeta(USERNAME, 'robot-patologoanatom')
               .SetVerbose()
               .SetUseProdScraper()
               .SetOauthToken(oauth_token)
               .BuildScraper())

    for k, v in scraper_obj.params.__dict__.items():
        print "\t", k, ':', v

    ticket = scraper_obj.StartBatch(queries_list)
    # ticket = "1452773824796"
    print "ticket", ticket
    results = scraper_obj.WaitBatch(ticket)

    print "out files to", out_dir
    shutil.rmtree(out_dir, ignore_errors=True)
    try:
        os.mkdir(out_dir)
    except:
        pass
    for i, result in enumerate(results):
        status = result["status"]["status"]
        query = result["serp-request-explained"]["per-query-parameters"]["query-text"]
        fname = os.path.join(out_dir, "q%05d.htm" % i)
        print i, status, fname, query.encode('utf8')
        if status != "done":
            continue
        html = result["serp-page"]["raw-content"]
        with codecs.open(fname, 'wb', encoding='utf8') as fo:
            fo.write(html)

if __name__ == '__main__':
    queries_fname = "top_queries_uk.txt"
    sample_size = 10
    queries_list = []
    with open(queries_fname) as f:
        for i, l in enumerate(f):
            if i >= sample_size: break
            queries_list.append(l.strip())
    download_htmls(queries_list, "out")
