#!/usr/bin/env python2
# encoding: utf-8

import os, os.path, sys, cgi
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, 'WORKING_DIR')
import re, time
from psycopg2.sql import SQL, Identifier, Placeholder
from subprocess import check_output, STDOUT
from log_utils import writelog
from db_utils import getPGCredentials, getPGdb


PG = {
    "host":    "actdb01f.mail.yandex.net,actdb01h.mail.yandex.net,actdb01i.mail.yandex.net",
    "port":    6432,
    "db":      "pldb",
    "charset": "cp1251"
}
RULES_DIR = "WORKING_DIR/rules"
DB_ROWS_BATCH_SIZE = 100000


print "Content-type:text/plain\r\n\r\n"

getPGCredentials(PG)
form = cgi.FieldStorage()
db, i, d, filter_seo_abuse, filtered = None, 0, form.getfirst('date', 'CURRENT_DATE'), form.getfirst('filter_seo_abuse', ''), {}
params = ()
query = SQL("SELECT DISTINCT uid AS uid FROM history.user_activity WHERE module IN ('hound', 'search', 'mobile', 'wmi', 'mailbox_oper', 'sendbernar') AND last_dt = {}")
if filter_seo_abuse:
    try:
        check_output(r'cd %s && git checkout master' % RULES_DIR, stderr=STDOUT, shell=True, universal_newlines=True)
        e = check_output(r'cd %s && git checkout stable 2>&1 && git pull -Xtheirs 2>&1' % RULES_DIR, stderr=STDOUT, shell=True, universal_newlines=True)
        if e:
            m = re.match(r'reject|error|fatal', e)
        if e and m:
            writelog(r'Pulling commits into stable branch from remote error: %s' % e)
        check_output(r'cd %s && git checkout master && git merge --ff-only origin/master 2>&1' % RULES_DIR, stderr=STDOUT, shell=True, universal_newlines=True)
        with open(RULES_DIR + '/seo_abuse_u.roll') as f:
            for row in f:
                row = row.lstrip()
                if row[:1] == '#':
                    continue
                m = re.match(r'\d+', row)
                if m:
                    filtered[m.group(0)] = 1
    except Exception, e:
        writelog("Exception while seo abuse UIDs retrieving: %s" % str(e), True)
if re.match(r'\d\d\d\d-\d\d-\d\d', d):
    query = query.format(Placeholder())
    params += (d,)
else:
    query = query.format(Identifier("CURRENT_DATE"))

while True:
    try:
        db = getPGdb(PG, mode='read-only')
        break
    except Exception, e:
        writelog("PGaaS connection error: %s" % str(e), True)
        time.sleep(5)
    i += 1
    if i > 9 and not i % 10:
        writelog("DB connect: %d attempt" % i)
try:
    cursor = db.cursor()
    cursor.execute("SET NAMES 'KOI8R'")
    cursor.execute(query, vars=params)
    #data = cursor.fetchall()
    while True:
        data = cursor.fetchmany(DB_ROWS_BATCH_SIZE)
        if len(data) > 0:
            for row in data:
                if row[0] not in filtered:
                    print row[0]
        else:
            break
except Exception, e:
    writelog("PGaaS connection error: %s" % str(e), True)

