#!/usr/bin/env python2

from mapreducelib import Record, TemporaryTable, MapReduce as MR
from logparse import parseReqans
from mymrutils import *
import re
from hashlib import md5
import random

trforums = [('http://forum.donanimhaber.com/', 'http://forum.donanimhaber.com/'),
        ('http://www.msxlabs.org/', 'http://www.msxlabs.org/'),
        ('http://www.mackolik.com/Forum/Default.aspx', 'http://www.mackolik.com/Forum/'),
        ('http://forum.memurlar.net/', 'http://forum.memurlar.net/'),
        ('http://www.chip.com.tr/forum/', 'http://www.chip.com.tr/forum/'),
        ('http://www.akormerkezi.com/forum.asp', 'http://www.akormerkezi.com/forum.asp'),
        ('http://www.teknikportal.com/', 'http://www.teknikportal.com/'),
        ('http://www.androidpit.com.tr/forum', 'http://www.androidpit.com.tr/forum'),
        ('http://www.egitimhane.com/forum/index.php', 'http://www.egitimhane.com/forum/'),
        ('http://adslteknikservis.com/forum/default.asp', 'http://adslteknikservis.com/forum/'),
        ('http://forum.minecraftturk.com/forumlar/', 'http://forum.minecraftturk.com/'),
        ('http://ilgiliforum.com/', 'http://ilgiliforum.com/'),
        ('http://kozanbilgi.net/forum.html', 'http://kozanbilgi.net/forum'),
        ('http://forums.tr.leagueoflegends.com/board/', 'http://forums.tr.leagueoflegends.com/board/'),
        ('http://www.ajansspor.com/forum/default.asp', 'http://www.ajansspor.com/forum/'),
        ('http://hocam.com/forum_giris.php', 'http://hocam.com/forum'),
        ('http://diziadami.com/Forumdan_Konular', 'http://diziadami.com/'),
        ('http://tahribat.com/Forum', 'http://tahribat.com/'),
        ('http://en.kioskea.net/forum/', 'http://en.kioskea.net/forum/')]
#        ('http://www.transfermarkt.com.tr/uebersicht/index/forum/tr', 'http://www.transfermarkt.com.tr/uebersicht/index/forum/tr')



def main():
    MR.useDefaults(username='snippets', server='cedar00.search.yandex.net:8013', verbose=True)
    DST = 'likhomanov/tr_forum_show_count'
    with mktmp() as tmp:
        for src in ['reqans_log/{}'.format(d) for d in strdaterange((2014, 7, 30), (2014, 8, 3))]:
            MR.runMap(getData, srcTable=src, dstTable=tmp.name, appendMode=True)
        MR.runReduce(Summarizer(readValues=False), srcTable=tmp.name, dstTable=DST)

def getData(rec):
    req, ress = parseReqans(rec.value)
    checkwww(req)
    if serpLang(req) != 'com.tr':
        return
    for res in ress:
        url = res.get('url', '')
        f = testForum(url)
        if f:
            yield Record(f, '', '')

def testForum(url):
    for forum, pfx in trforums:
        if url.startswith(pfx):
            return forum
    if url.startswith('http://www.transfermarkt.com.tr/') and '/forum/' in url:
        return 'http://www.transfermarkt.com.tr/uebersicht/index/forum/tr'
    return None

if __name__ == '__main__':
    main()

