# This script will help you to get pfound from cron serps by filter
# Metrics API : https://wiki.yandex-team.ru/jandekspoisk/ocenkakachestvapoiska/metrics2/api?from=isearch#sravnenieserpov-obshhajatablicanedljaprodakshen1


import json, requests, sys, getopt

#isDebugMode = True
isDebugMode = False
prevSerpSetId = ''

def debug(param):
    if isDebugMode :
        print param     

def getTextFromUrl(url, params):
    resp = requests.get(url=url, params=params)
    return resp.text

def getJsonFromUrl(url, params):
    data = json.loads(getTextFromUrl(url, params))
    debug(json.dumps(data, sort_keys = True, indent = 4, separators=(',', ': ')))
    return data

def listToLine(lst):
    line = ''
    for item in lst:
        line += str(item) + '\t'
    return line[:-1]

def getJsonBySerpSetId(leftSerpSetId, rightSerpSetId, region, flt = '', leftFilter = 'onlySearchResult', rightFilter = 'onlySearchResult'):
    url = 'https://metrics-calculation.qe.yandex-team.ru/api/qex/metric-observation'

    params = {
        'regional' : region,
        'evaluation' : 'WEB',
        'left-serp-set' : leftSerpSetId,
        'right-serp-set' : rightSerpSetId,
        'pre-filter' : flt,
        'left-serp-set-filter' : leftFilter,
        'right-serp-set-filter' : rightFilter
    }

    data = getJsonFromUrl(url, params)
    return data 

def getSerpSetIdList(cronId, region):
    url = 'http://metrics.yandex-team.ru/services/api/serpset/list/' + region + '/WEB/'

    params = {
        #'from' : fromDate,
        'cronSerpDownloadId' : cronId
    }

    dataStr = getTextFromUrl(url, params)
    dataStr = dataStr.replace('[', '').replace(']', '').replace(' ', '')
    return dataStr.split(',')

def getMetricByListFromJson(json, metricsList):
    resultRow = []
    for metric in json:
        if metric['metric']['name'] in metricsList:
            resultRow.append(metric['leftData']['value'])
            resultRow.append(metric['rightData']['value'])
            resultRow.append(metric['diff']['percent'])
            resultRow.append(metric['diff']['pValue'])
            resultRow.append(metric['diff']['signification'])
    return resultRow

def getResultRow(leftSerpSetId, rightSerpSetId, region, flt, metricsList, leftWhatToShow, rightWhatToShow):
    resultRow = []
    try:
        data = getJsonBySerpSetId(leftSerpSetId, rightSerpSetId, region, flt, leftWhatToShow, rightWhatToShow) #left serp, right serp, region
        resultRow.extend(getMetricByListFromJson(data, metricsList))

    except ValueError as e:
        resultRow.append(e)
    except Exception as e:
        resultRow.append(e)

    return resultRow
    

def printSerpsByCronId(cronId, region, flt, metricsList, whatToShow):
    serpSetIdList = getSerpSetIdList(cronId, region)
    print str(len(serpSetIdList)) + ' serpsets found. [serpset-id ' + listToLine(metricsList) + ']'
    debug(serpSetIdList)
    global prevSerpSetId

    for serpSetId in serpSetIdList:
        if not prevSerpSetId:
            prevSerpSetId = serpSetId

        resultRow = getResultRow(serpSetId, prevSerpSetId, region, flt, metricsList, whatToShow, whatToShow)    
            
        prevSerpSetId = serpSetId
        print listToLine(resultRow)

def printSerpBySerpIds(serpId1, serpId2, region, flt, metricsList, whatToShow):
        resultRow = getResultRow(serpId1, serpId2, region, flt, metricsList, whatToShow, whatToShow)            
        print listToLine(resultRow)

def printAllMetrics(serpSetId = '2740275', region = 'TR'):
    try:
        data = getJsonBySerpSetId(serpSetId, serpSetId, region) #left serp, right serp, region
        resultRow = [serpSetId]
	
        for metric in data:
            print metric['metric']['name']
	
    except ValueError as e:
        resultRow.append(e)

def usage():
    print '''
metrics_pfound_helper.py -c <cronids> -r <region> -f <filter>

cronids=1165,1167          #Y.TR_MIXED_DEDUPLICATED.tail porno, #G.TR_MIXED_DEDUPLICATED.tail porno
                           #Could be found here : http://metrics.yandex-team.ru/view/cron/download?type=tr

region=TR                  #also RU and so on

filter=query-9961          #TR_MIXED_VALIDATE_DEDUPLICATED.porno
                           #Could be found in your serp comparison URL (filter param)

metrics=pfound-5, judged5  #List is available via --help-metrics 
'''
    
def main(argv):
    cronIdList = ''
    region = ''
    flt = ''
    metricsList = ''
    serpIdList = ''
    firstSerpId = ''
    whatToShow = ''
   
    try:
        opts, args = getopt.getopt(argv,"hc:r:f:m:s:w:",["cronids=","region=","filter=","metrics=","help-metrics","serpids=","what-to-show="])
        
    except getopt.GetoptError as e:
        print e
        print 'metrics_pfound_helper.py -c <cronids> -r <region> -f <filter> -m <metrics> -s <serpids>'
        print 'serpids option will compare the first serpid with each other'
        sys.exit(2)
        
    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit()
            
        elif opt in ("-c", "--cronids"):
            cronIdList = arg
            
        elif opt in ("-r", "--region"):
            region = arg
            
        elif opt in ("-f", "--filter"):
            flt = arg
            
        elif opt in ("-m", "--metrics"):
            metricsList = arg

        elif opt in ("-s", "--serpids"):
            serpIdList = arg

        elif opt in ("-w", "--what-to-show"):
            whatToShow = arg

        elif opt in ("--help-metrics"):
            printAllMetrics()
            sys.exit()
            
    if len(metricsList) == 0:
        metricsList = 'pfound-5,judged5'

    if not whatToShow:
        whatToShow = 'onlySearchResult'

    if cronIdList and region:
        for cronId in cronIdList.split(','):
            print 'CronId = ' + cronId
            printSerpsByCronId(cronId, region, flt, metricsList.split(','), whatToShow)

    if serpIdList and region:
        for serpId in serpIdList.split(','):
            if not firstSerpId:
                firstSerpId = serpId
                continue
            printSerpBySerpIds(firstSerpId, serpId, region, flt, metricsList.split(','), whatToShow)
    else:
        usage()
    

if __name__ == "__main__":
    main(sys.argv[1:])

#printSerpsByCronId(1165, 'TR', 'query-9961') Y.TR_MIXED_DEDUPLICATED.tail porno
#printSerpsByCronId(1167, 'TR', 'query-9961') G.TR_MIXED_DEDUPLICATED.tail porno
