#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import sys
from bs4 import BeautifulSoup
import copy
import random
import datetime
import urllib

def main():
    querynum = 250

    html = urllib.urlopen("https://yandex.ru").read()
    soup = BeautifulSoup(html, 'html.parser')
    headerlist = []
    newslist = soup.find_all(attrs={"class":'news__list'})
    print >> sys.stderr, "Raw headers:"
    for n in newslist:
        items = n.find_all(attrs={"class":"list__item"})
        for i in items:
            print >> sys.stderr, i.get_text()
            elem = i.get_text().replace(u"«", "").replace(u":", "").replace(u"»", "").replace(u"–", "").replace(u"’", "").replace(u",", "").replace(u"  ", " ").lower().encode("utf-8")
            headerlist.append(elem)
    cleanlist = []
    for h in headerlist:
        tmp = h.split(" ")
        i = 0
        while i < len(tmp):
            if len(tmp[i]) <= 4:
                del tmp[i]
            else:
                i += 1
        for i in range(len(tmp)):
            tmp2 = copy.copy(tmp)
            del tmp2[i]
            cleanlist.append(" ".join(tmp2))
        i = 0
        while i < len(tmp) - 1:
            cleanlist.append(" ".join(tmp[i:i+3]))
            cleanlist.append(" ".join(tmp[i:i+2]))
            i+=1
    cleanlist = list(set(cleanlist))
    random.shuffle(cleanlist)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")
    i = 0
    for q in cleanlist[:querynum]:
        print q + "\t213\t" + ts + "_" + str(i)
        i += 1

if __name__ == "__main__":
    main()
