# -*- coding: utf-8 -*-


import nile
import datetime
import json
import imp
import math
import uatraits
import re

from qb2.api.v1.typing import *
from functools import partial
from collections import defaultdict
from urlparse import urlparse, parse_qsl

from nile.api.v1 import (
    clusters,
    Record,
    aggregators as na,
    extractors as ne,
    filters as nf,
    cli,
    with_hints,
)

OUTPUT_PATH = "//home/images/images_queries_cube"

SPV_FIELDS = [
    'IMAGES.ImgQueryFactors.QInteriorProbV1',  # interiors
    'IMAGES.ImgQueryFactors.QCommercialProbV3',  # visual commerce
    'ImgPorno.pl',
    'UPPER.money_source_2_total_docs_count',  # market
    'UPPER.money_source_2_top10_docs_count',
    'UPPER.money_source_256_total_docs_count',  # schemaorg
    'UPPER.money_source_256_top10_docs_count',
    'UPPER.money_source_4096_total_docs_count',  # turbo
    'UPPER.money_source_4096_top10_docs_count',
    'UPPER.money_source_16384_total_docs_count',  # market-cpa
    'UPPER.money_source_16384_top10_docs_count',
    'UPPER.money_source_8192_total_docs_count',  # market in similar
    'UPPER.money_source_8192_top10_docs_count',
    'UPPER.money_source_32768_total_docs_count',  # very similar all
    'UPPER.money_source_32768_top10_docs_count',
    'UPPER.money_source_65536_total_docs_count',  # very similar marketplace
    'UPPER.money_source_65536_top10_docs_count',
    'UPPER.ApplyImagesBlender.fmls',
    'UPPER.PrismBigBLog.prism_cluster',
    'UPPER.PrismBigBLog.prism_segment',
]
RV_FIELDS = [
    'dnorm',
    'norm',
    'cm2',
    'vcomm',
]
REARR_FIELDS = [
    'wizdetection_img_query_clothes_prob_v1_prob'
]
CGI_FIELDS = [
    'source',
    'rpt',
    'cbir_page',
]
TECH_WHITELIST = [

]
BLOCK_SHEMA = Struct[
              "pos": String,
              "imageUrl": String,
              "htmlUrl": String,
              "clicked_url": String,
              "isClicked": Bool,
              "isShown": Bool,
              "dwelltimeStart": Integer,
              "dwelltime": Integer,
              "longGreenurls": Integer,
              "shortGreenurls": Integer,
              "downloads": Integer,
              "docId": String,
              "bsPath": String,
              "bsVars":  Dict[String, String],
              "descriptionUrl": String,
              "descriptionDataSource": String,
              "market_offers_incut_info": String
              ]

EVENT_SHEMA = Struct[
              "pos": String,
              "convertedPath": String,
              "ts": Integer,
              "url": String,
              "vars": Dict[String, String]
              ]
SPS_SHEMA = Struct[
            "spsv6": Float,
            "sessiontime": Float,
            "total_downloads": Float,
            "total_greenurls": Float,
            "request_count": Float,
            "last_greenurl": Float,
            "last_download": Float,
            "short_greenurls": Float
            ]

SCHEMA = {
    'uid': String,  # yandexuid
    'service': String,  # cbir or images
    'PassportUID': Optional[String],  # puid, if logged in
    'ts': Integer,  # timestamp in seconds
    'ReqID': String,  # request id
    'serpid': String,  # serp id
    'parent_reqid': Optional[String],  # parent request id for related
    'query': String,  # user query
    'ui': String,  # desktop|mobile|touch|app|pad|other
    'domain': String,  # service domain
    'url': String,  # full request url
    'referer': String,  # referer url
    'SearchPropsValues': Dict[String, String],  # select search props (see SPV_FIELDS)
    'RelevValues': Dict[String, String],  # select relevs (see RV_FIELDS)
    'cgiParams': Dict[String, String],  # select cgi (see CGI_FIELDS)
    'testids': List[String],  # list of active test-ids
    'UserAgent': Optional[String],  # unparsed user-agent
    'UserRegion': Integer,  # region in number format
    'ICookie': Optional[String],  # icookie
    'spsv6': List[SPS_SHEMA],  # spsv6 and components for serp
    'blocks': List[BLOCK_SHEMA],  # blocks info
    'events': List[EVENT_SHEMA],  # events info
}


def gather_events(r):
    """Gathers request events without duplicates."""
    res = []
    res.extend(r.GetClicks())
    for event in r.GetOwnEvents():
        if event.IsA('TClick'):
            continue
        res.append(event)
    return res


def parse_cgi_params(url):
    """Gets cgi-dict from url."""
    parsed = urlparse(url)
    return dict(parse_qsl(parsed.query, keep_blank_values=True))


def list2dict(l):
    result = {}
    for key, value in l:
        result[key] = value
    return result


def get_service(r):
    """Define service by request."""
    service = ''
    if r.IsA('TWebRequestProperties'):
        service = 'web'
    elif r.IsA('TImagesRequestProperties'):
        service = 'images'
    elif r.IsA('TVideoRequestProperties'):
        service = 'video'
    elif r.IsA('TCbirRequestProperties'):
        service = 'cbir'
    elif r.IsA('TMapsRequestProperties'):
        service = 'maps'
    elif r.IsA('TPortalRequestProperties'):
        service = 'portal'
    elif r.IsA('TNewsRequestProperties'):
        service = 'news'
    elif r.IsA('TYandexCollectionsRequestProperties'):
        service = 'collections'
    return service

#  This class is used to manipulate user's events.
class actionType(object):
    def __init__(self):
        self.PATTERNS = {
            "PATTERN_GREENURL": re.compile(
                r"^/image/.*/(site|url|title|link|button|snippet|collections|commercial/(incut/)?(similar|description|related|text|contacts|sitelink|((incut|behavioral)/)?thumb)|(market_offers|direct|polaroid/market)/click|(duplicates/(rating|price|model-rating|button|title|url)/(market|schemaorg)))"),
            "PATTERN_OPEN": re.compile(
                r"^/image/.*/(serp/results/(keyboard|slideshow|thumb)/|preview/(imgclick|next|prev|rim/thumb|arrow))"),
            "PATTERN_CLOSE": re.compile(r"^/image/.*/preview(/swipe)?/close"),
            "PATTERN_DOWNLOAD": re.compile(
                r"^/image/.*/preview/(duplicates/image(/othersize.*)?|othersize..*|favorite|save|longtap|share$)"),
            "PATTERN_EDITOR": re.compile(
                r"^/image/.*/preview/editor/(open|action|paint|drag|color|(line|text)/size|crop|exit/close)"),
            "PATTERN_AUXILARY": re.compile(
                r"^/image/.*/(popular/(grid|item)|serp/hover|preview/(rim/more|scroll/start|double_click)|scroll$)"),
            "PATTERN_CBIR": re.compile(r"^/image/((new/preview/((similar/crop)|othersize$))|(touch/preview/similar$))"),
            "ISAT_CBIR_OCR_COPY": re.compile(r"^/image/.*/cbir/ocr/copy"),
            "ISAT_CBIR_OCR_TRANSLATE": re.compile(r"^/image/.*/cbir/ocr/translate"),
            "ISAT_CBIR_OCR_SEARCH": re.compile(r"^/image/.*/cbir/ocr/search"),
            "ISAT_CBIR_TAG": re.compile(r"^/image/.*/cbir/tags/item"),
            "ISAT_CBIR_OBJECT": re.compile(r"^/image/.*/cbir/object-answer/item"),
            "ISAT_CBIR_MARKET": re.compile(r"^/image/.*/cbir/market/item"),
            "ISAT_CBIR_MARKET_PAGE": re.compile(r"^/image/.*/cbir/page/market/item"),
            "ISAT_CBIR_IMG_DOWNLOAD": re.compile(r"^/image/.*/cbir/results/thumb"),
            "ISAT_CBIR_SITE_THUMB": re.compile(r"^/image/.*/cbir/(sites|page/sites)/item/thumb"),
            "ISAT_CBIR_OTHERSIZE": re.compile(r"^/image/.*/cbir/othersizes/item"),
            "ISAT_CBIR_SITE": re.compile(r"^/image/.*/cbir/sites/item/site"),
            "ISAT_CBIR_SITE_PAGE": re.compile(r"^/image/.*/cbir/page/sites/item/site"),
            "ISAT_CBIR_GREEN_URL": re.compile(r"^/image/.*/cbir/results/(site|duplicates/url)")
        }
        self.not_greenurl_sign = [ "show", "scroll", "close", "more", "expand", "collapse", "shown", "other/view", "sites/load", "cbir/similar/button", "sites/item/thumb", "url/search", "panel/drag"]
        self.SHORT_THR = 15
        self.OVERLONG_THR = 120

    def has_not_greenurl_token(self, path):
        for token in self.not_greenurl_sign:
            if token in path:
                return True
        return False

    def is_greenurl(self, pattern):
        return pattern in {"PATTERN_GREENURL", "PATTERN_CBIR", "ISAT_CBIR_GREEN_URL", "ISAT_CBIR_OCR_TRANSLATE",
                           "ISAT_CBIR_OCR_SEARCH", "ISAT_CBIR_OCR_COPY", "ISAT_CBIR_OBJECT", "ISAT_CBIR_MARKET",
                           "ISAT_CBIR_MARKET_PAGE", "ISAT_CBIR_SITE", "ISAT_CBIR_SITE_PAGE"}

    def is_download(self, pattern):
        return pattern in {"PATTERN_DOWNLOAD", "ISAT_CBIR_IMG_DOWNLOAD", "ISAT_CBIR_SITE_THUMB", "ISAT_CBIR_OTHERSIZE"}

    def get_action_type(self, path, vardict):
        if 'popup' in path:
            return None
        if 'navig' in path:
            return None

        if "-action-type" in vardict:
            if vardict["-action-type"] == "greenurl":
                return "PATTERN_GREENURL"
            if vardict["-action-type"] == "download":
                return "PATTERN_DOWNLOAD"

        for pattern, reg in self.PATTERNS.iteritems():
            if reg.search(path):
                if self.is_greenurl(pattern) and self.has_not_greenurl_token(path):
                    return None
                return pattern
        return None

    def get_event_url(self, event):
        url = ''
        if hasattr(event, 'Url'):
            if event.Url is not None:
                url = event.Url
        return url

    def parse_vars(self, vars_str):
        """Returns event vars as dict."""
        if isinstance(vars_str, list):
            return vars_str
        return [x.split('=', 1) for x in vars_str.split(',') if '=' in x]

    def get_vars(self, event):
        """Correct vars extraction depending on event type."""
        varslist = []
        if event.IsA('TClick'):
            varslist = event.GetVars()
        elif event.IsA('TImageShow'):
            varslist = self.parse_vars(event.Vars)
        elif event.IsA('TImageNavig'):
            varslist = event.Vars
        return list2dict(varslist)

    def get_var_by_name(self, vars, name):
        """Gets var from dict by name."""
        if name in vars:
            return vars[name]
        elif len(name) > 1 and name[1:] in vars:
            return vars[name[1:]]
        return None

    def get_data_source(self, path, vars):
        """Defines data source type of event."""
        dataSource = self.get_var_by_name(vars, '-data-source')

        if dataSource is None:
            for item in ['market', 'schemaorg', 'turbo']:
                if item in path:
                    return item
            return 'organic'
        return dataSource

    def is_sps_action(self, path):
        if 'popup' in path:
            return False
        if 'navig' in path:
            return False

        for pattern, reg in self.PATTERNS.iteritems():
            if self.PATTERNS[pattern].search(path):
                return True
        return None

    def is_useful_action(self, path):
        return self.is_sps_action(path)

    def short_threshold(self):
        return self.SHORT_THR

    def overlong_threshold(self):
        return self.OVERLONG_THR


#  This class is used to parse information about shown blocks on serp.
class resultBlock(object):
    def __init__(self, block=None):
        self.Pos = '-1'
        self.Url = None
        self.HtmlUrl = None
        self.DocumentID = None
        self.ImageID = None
        self.Shown = False
        self.Clicked = False
        self.Clicks = 0
        self.Download = 0
        self.ShortGreenurls = 0
        self.LongGreenurls = 0
        self.DataSource = ''
        self.ClickedUrl = None
        self.FirstActionTS = None
        self.LastActionTS = None
        self.Path = None
        self.Vars = None
        self.DescriptionUrl = None
        self.DescriptionDataSource = None
        self.MarketOffersInfo = ''

        if block is not None:
            self.update(block)

    def update(self, result):
        """Parse new result on serp to struct."""
        if hasattr(result, 'Position'):
            self.Pos = str(result.Position)

        if hasattr(result, 'Url'):
            self.Url = result.Url

        if hasattr(result, 'HtmlUrl'):
            self.HtmlUrl = result.HtmlUrl

        if hasattr(result, 'Markers'):
            if 'documentid' in result.Markers:
                self.DocumentID = result.Markers['documentid']
            if 'ImageId' in result.Markers:
                self.ImageID = result.Markers['ImageId']

    def bs_update(self, bspath, bsvars, source, url):
        """Parse information about blockstat blocks."""
        self.Path = bspath
        self.Vars = bsvars
        self.DescriptionDataSource = source
        self.DescriptionUrl = url

    def add_market_incut_offer(self, bsvars):
        if '-item' in bsvars and '-url' in bsvars:
            self.MarketOffersInfo = self.MarketOffersInfo + '\t' if self.MarketOffersInfo != '' else ''
            self.MarketOffersInfo = self.MarketOffersInfo + '{0}={1}'.format(bsvars['-item'], bsvars['-url'])
        elif '-url' in bsvars:
            self.MarketOffersInfo = self.MarketOffersInfo + '\t' if self.MarketOffersInfo != '' else ''
            self.MarketOffersInfo = self.MarketOffersInfo + 'none={0}'.format(bsvars['-url'])

    def add_show(self):
        self.Shown = True

    def add_greenurl(self, is_short, data_source, event):
        self.Clicked = True
        self.Clicks += 1
        self.ShortGreenurls += 1 if is_short else 0
        self.DataSource += data_source + ';'
        self.ClickedUrl = event.Url if hasattr(event, 'Url') else ''

    def add_download(self):
        self.Download += 1

    def add_short_greenurl(self):
        self.ShortGreenurls += 1

    def add_long_greenurl(self):
        self.LongGreenurls += 1

    def update_view_times(self, ts):
        if self.FirstActionTS is None:
            self.FirstActionTS = ts
            self.LastActionTS = ts
            return

        tmp = self.FirstActionTS
        self.FirstActionTS = min(self.FirstActionTS, ts)
        self.LastActionTS = max(self.LastActionTS, max(tmp, ts))
        return

    def get_view_time(self):
        return 0 if self.FirstActionTS is None else self.LastActionTS - self.FirstActionTS


#  This class is used to parse and aggregate request information.
class requestFilter(object):
    def __init__(self, testids):
        self.testids = testids
        self.lastBlockPos = None
        self.lastGreenurlTS = None
        self.isLastActionGreenurl = False

    def clean_data(self):
        self.lastBlockPos = None
        self.lastGreenurlTS = None
        self.isLastActionGreenurl = False

    def get_testid(self, r):
        for ti in self.testids:
            if r.HasTestID(ti):
                return ti
        return ''

    def get_ui(self, r):
        ui = ''
        if r.IsA('TDesktopUIProperties'):
            ui = 'desktop'
        elif r.IsA('TTouchUIProperties'):
            ui = 'touch'
        elif r.IsA('TMobileUIProperties'):
            ui = 'mobile'
        elif r.IsA('TMobileAppUIProperties'):
            ui = 'mobileapp'
        elif r.IsA('TPadUIProperties'):
            ui = 'pad'
        elif r.IsA('TSiteSearchUIProperties'):
            ui = 'sitesearch'
        return ui

    def parse_request_info(self, r):
        return {'reqid': r.ReqID,
                'ui': self.get_ui(r),
                'region': r.ServiceDomRegion,
                'userRegion': r.UserRegion,
                'query': r.Query,
                'serpid': r.SerpID if hasattr(r, 'SerpID') else '',
                'requestTS': r.Timestamp,
                'qcpv3': float(r.SearchPropsValues.get('IMAGES.ImgQueryFactors.QCommercialProbV3', '0'))
                }

    def parse_blocks_info(self, r):
        blocks_dict = defaultdict(lambda: resultBlock())

        for b in r.GetMainBlocks():
            result = b.GetMainResult()
            blocks_dict[str(result.Position)] = resultBlock(result)

        market_incut_dict = defaultdict(str)

        for block in r.GetBSBlocks():
            pos, url, source = '', '', ''
            bspath = block.Path
            bsvars = {k: v for k, v in block.GetVars()}
            if 'serp/results/snippet/description' in block.Path:
                if '-pos' in bsvars:
                    pos = str(bsvars['-pos'])
                elif 'pos' in bsvars:
                    pos = str(bsvars['pos'])
                if '-data-source' in bsvars:
                    source = str(bsvars['-data-source'])
                if '-url' in bsvars:
                    url = str(bsvars['-url'])

                if pos in blocks_dict: blocks_dict[pos].bs_update(bspath, bsvars, source, url)

            if 'market_offers' in block.Path:
                pos = 'market_offers_'

                if '-pos' in bsvars:
                    pos = pos + str(bsvars['-pos'])

                blocks_dict[pos].add_market_incut_offer(bsvars)

        return blocks_dict

    def parse_shows(self, r, blocks):
        actType = actionType()
        for event in sorted(r.GetOwnEvents(), key=lambda event: event.ClientTimestamp):
            if event.IsA('TImageShow'):
                for var in actType.parse_vars(event.Vars):
                    if var[0] == '-pos' or var[0] == 'pos':
                        blocks[str(var[1])].add_show()

    def gather_events(self, r):
        res = []
        res.extend(r.GetClicks())
        for event in r.GetOwnEvents():
            if event.IsA('TClick'):
                continue
            res.append(event)
        return res

    def parse_useful_events(self, r, blocks):
        actType = actionType()
        events = self.gather_events(r)

        for event in sorted(events, key=lambda event: event.Timestamp):
            currentActionType = None
            varsdict = []

            if not hasattr(event, 'ConvertedPath'):
                continue

            varsdict = actType.get_vars(event)
            currentActionType = actType.get_action_type(event.ConvertedPath, varsdict)
            currentPos = actType.get_var_by_name(varsdict, '-pos')
            if currentPos is None or currentActionType is None:
                continue
            currentPos = str(currentPos)
            blocks[currentPos].update_view_times(event.Timestamp)
            if 'new' in event.ConvertedPath and ('next' in event.ConvertedPath or 'prev' in event.ConvertedPath):
                if currentPos.isdigit():
                    blocks[str(int(currentPos) - 1)].update_view_times(event.Timestamp)

            if self.isLastActionGreenurl:
                ts_diff = event.Timestamp - self.lastGreenurlTS
                if 0 < ts_diff < actType.short_threshold() and not "serp/hover" in event.ConvertedPath:
                    blocks[currentPos].add_short_greenurl()
                if ts_diff > actType.overlong_threshold():
                    blocks[currentPos].add_long_greenurl()

            if actType.is_download(currentActionType):
                blocks[currentPos].add_download()

            if actType.is_greenurl(currentActionType):
                dataSource = actType.get_data_source(event.ConvertedPath, varsdict)
                blocks[currentPos].add_greenurl(False, dataSource, event)
                self.isLastActionGreenurl = True
                if self.isLastActionGreenurl: self.lastGreenurlTS = event.Timestamp
            else:
                self.isLastActionGreenurl = False

        return events

    def check_prev_action(self, r, blocks):
        if blocks is None or not self.isLastActionGreenurl:
            return

        reqTS = r.Timestamp
        if 0 <= reqTS - self.lastGreenurlTS < 15:
            blocks[self.lastBlockPos].add_short_greenurl()
        return


#  This class is used to calculate SpS for serps.
class SessionData(object):
    def __init__(self):
        self.short_greenurls = 0
        self.session_time = 0
        self.last_downloads = 0
        self.last_greenurls = 0
        self.request_count = 0
        self.last_greenurl_ts = 0
        self.last_download_ts = 0
        self.tot_downloads = 0
        self.tot_greenurls = 0

    def clear(self):
        self.short_greenurls = 0
        self.session_time = 0
        self.last_downloads = 0
        self.last_greenurls = 0
        self.request_count = 0
        self.last_greenurl_ts = 0
        self.last_download_ts = 0
        self.tot_downloads = 0
        self.tot_greenurls = 0

    def inc_short_greenurls(self, cnt):
        self.short_greenurls += cnt

    def inc_session_time(self, cnt):
        self.session_time += cnt

    def inc_last_downloads(self, cnt):
        self.last_downloads += cnt

    def inc_last_greenurls(self, cnt):
        self.last_greenurls += cnt

    def inc_request_count(self, cnt):
        self.request_count += cnt

    def update_last_greenurl(self, ts):
        self.last_greenurl_ts = ts

    def update_last_download(self, ts):
        self.last_download_ts = ts

    def update_tot_greenurl(self):
        self.tot_greenurls += 1

    def update_tot_download(self):
        self.tot_downloads += 1

    def reqCount(self):
        return self.request_count

    def shortCount(self):
        return self.short_greenurls

    def lastDownload(self):
        return self.last_downloads

    def lastGreenurl(self):
        return self.last_greenurls

    def totalDownload(self):
        return self.tot_downloads

    def totalGreenurl(self):
        return self.tot_greenurls

    def sesstime(self):
        return self.session_time

    def lastGUts(self):
        return self.last_download_ts

    def lastDLts(self):
        return self.last_greenurl_ts


@with_hints(output_schema=dict(SCHEMA))
def images_actions(groups, main_query):
    import libra

    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for keys, records in groups:
        uid = keys.key
        try:
            session = libra.ParseSession(records, 'blockstat.dict')
        except:
            continue

        serpID2Events = defaultdict(list)
        serpID2Sps = defaultdict(dict)
        req = requestFilter([])

        # Aggregate actions by serpID
        for r in session:
            serpid = ''
            if not r.IsA('TImagesRequestProperties') and not r.IsA('TCbirRequestProperties'):
                continue
            events = gather_events(r)
            if len(events) == 0:
                continue
            serpid = r.SerpID if hasattr(r, 'SerpID') else r.ReqID

            serpID2Events[serpid].append(r)
            serpID2Events[serpid].extend(events)

        # Iterate through actions and calculate SpS for serpid
        for serpID, events in serpID2Events.iteritems():
            sessData = SessionData()
            serpid = serpID
            reqid = set()
            parentReqid = set()
            actType = actionType()
            prevActionType = None
            currentActionType = None
            prevActionTimestamp = None
            currentActionTimestamp = None
            path = ''

            for event in sorted(events, key=lambda event: event.Timestamp):
                if event.IsA("TYandexRequestProperties"):
                    currentActionType = "PATTERN_REQUEST"
                    reqid.add(event.ReqID)
                    parentReqid.add(event.WebParentReqId if event.WebParentReqId is not None else '')
                    sessData.inc_request_count(1)

                if hasattr(event, 'ConvertedPath'):
                    currentActionType = actType.get_action_type(event.ConvertedPath, {})

                if currentActionType is None or event.IsA('TYandexTechEvent'):
                    continue

                currentActionTimestamp = event.Timestamp
                if prevActionTimestamp is None:
                    prevActionTimestamp = event.Timestamp
                    prevActionType = currentActionType
                ts_diff = max(currentActionTimestamp - prevActionTimestamp, 0)
                sessData.inc_session_time(min(ts_diff, 15))

                # Short Greenurls
                if actType.is_greenurl(
                        prevActionType) and ts_diff < actType.short_threshold() and currentActionTimestamp != prevActionTimestamp and (
                        currentActionType == "PATTERN_REQUEST" or hasattr(event,
                                                                          'ConvertedPath') and not "serp/hover" in event.ConvertedPath):
                    sessData.inc_short_greenurls(1)

                if actType.is_greenurl(currentActionType):
                    sessData.update_tot_greenurl()
                    sessData.update_last_greenurl(currentActionTimestamp)
                if actType.is_download(currentActionType):
                    sessData.update_tot_download()
                    sessData.update_last_download(currentActionTimestamp)

                prevActionTimestamp = currentActionTimestamp
                prevActionType = currentActionType

            if currentActionTimestamp - sessData.lastGUts() < 4:
                sessData.inc_last_greenurls(1)
            elif currentActionTimestamp - sessData.lastDLts() < 4:
                sessData.inc_last_downloads(1)

            serpID2Sps[serpID] = {'request_count': sessData.reqCount(),
                                  'sessiontime': sessData.sesstime() / 60.,
                                  'short_greenurls': sessData.shortCount(),
                                  'last_greenurl': sessData.lastGreenurl(),
                                  'last_download': sessData.lastDownload(),
                                  'total_greenurl': sessData.totalGreenurl(),
                                  'total_download': sessData.totalDownload(),
                                  'spsv6': sessData.sesstime() / 60. + 6 * sessData.lastGreenurl() + 6 * sessData.lastDownload() - 2 * sessData.shortCount()}
            sessData.clear()
            reqid = set()
            parentReqid = set()
            actType = actionType()
            prevActionType = None
            currentActionType = None
            prevActionTimestamp = None
            currentActionTimestamp = None
            path = ''

        # Iterate through requests and gather events, shown blocks, etc
        for r in session:
            if not r.IsA('TImagesRequestProperties') and not r.IsA('TCbirRequestProperties'):
                continue
            ui = req.get_ui(r)
            actType = actionType()
            spv = r.SearchPropsValues
            rv = getattr(r, 'RelevValues', {})
            rearr = getattr(r, 'RearrValues', {})
            cv = parse_cgi_params(r.FullRequest)

            res_blocks = []
            res_events = []

            blocks = req.parse_blocks_info(r)
            req.parse_shows(r, blocks)
            req.parse_useful_events(r, blocks)

            for pos, block in blocks.iteritems():
                res_blocks.append({"pos": str(pos), "imageUrl": block.Url if block.Url is not None else '',
                                   "htmlUrl": block.HtmlUrl if block.HtmlUrl is not None else '',
                                   "clicked_url": block.ClickedUrl if block.ClickedUrl is not None else '',
                                   "isClicked": block.Clicked,
                                   "isShown": block.Shown,
                                   "dwelltimeStart": block.FirstActionTS if block.FirstActionTS is not None else -1,
                                   "dwelltime": block.get_view_time() if block.FirstActionTS is not None else -1,
                                   "longGreenurls": block.LongGreenurls, "shortGreenurls": block.ShortGreenurls,
                                   "downloads": block.Download,
                                   "docId": block.DocumentID if block.DocumentID is not None else '',
                                   "bsPath": block.Path if block.Path is not None else '',
                                   "bsVars": block.Vars if block.Vars is not None else {},
                                   "descriptionUrl": block.DescriptionUrl if block.DescriptionUrl is not None else '',
                                   "descriptionDataSource": block.DescriptionDataSource if block.DescriptionDataSource is not None else '',
                                   "market_offers_incut_info": block.MarketOffersInfo
                                   })

            for event in r.GetClicks():
                varsdict = actType.get_vars(event)
                res_events.append(
                    {"pos": str(actType.get_var_by_name(varsdict, '-pos')) if actType.get_var_by_name(varsdict,
                                                                                                      '-pos') is not None else "None",
                     "convertedPath": event.ConvertedPath,
                     "ts": event.Timestamp,
                     "url": actType.get_event_url(event),
                     "vars": varsdict})
            for event in r.GetOwnEvents():
                if event.IsA('TClick'):
                    continue
                if event.IsA('TYandexTechEvent') and event.Path not in TECH_WHITELIST:
                    continue
                varsdict = actType.get_vars(event)
                res_events.append(
                    {"pos": str(actType.get_var_by_name(varsdict, '-pos')) if actType.get_var_by_name(varsdict,
                                                                                                      '-pos') is not None else "None",
                     "convertedPath": event.ConvertedPath,
                     "ts": event.Timestamp,
                     "url": actType.get_event_url(event),
                     "vars": varsdict})

            # [08-09-2021] в RelevValues начали также писать значения из RearrValues,чтобы не добавлять новую колонку
            relev = {x: rv[x] for x in RV_FIELDS if x in rv}
            relev.update({x: rearr[x] for x in REARR_FIELDS if x in rearr})
            res = {
                "uid": uid, "service": get_service(r), "ts": r.Timestamp, "ReqID": r.ReqID, "serpid": r.SerpID,
                "query": r.Query, "ui": ui, "domain": r.ServiceDomRegion,
                "url": r.FullRequest, 'referer': r.Referer,
                "SearchPropsValues": {x: spv[x] for x in SPV_FIELDS if x in spv},
                "RelevValues": relev,
                "cgiParams": {x: cv[x] for x in CGI_FIELDS if x in cv},
                "testids": [test.TestID for test in r.GetTestInfo()],
                "UserAgent": r.UserAgent, 'UserRegion': getattr(r, 'UserRegion'),
                "ICookie": getattr(r, "ICookie"),
                "spsv6": [{"spsv6": serpID2Sps[r.SerpID].get('spsv6', 0),
                           "sessiontime": serpID2Sps[r.SerpID].get('sessiontime', 0),
                           "total_downloads": serpID2Sps[r.SerpID].get('total_download', 0),
                           "total_greenurls": serpID2Sps[r.SerpID].get('total_greenurl', 0),
                           "request_count": serpID2Sps[r.SerpID].get('request_count', 0),
                           "last_greenurl": serpID2Sps[r.SerpID].get('last_greenurl', 0),
                           "last_download": serpID2Sps[r.SerpID].get('last_download', 0),
                           "short_greenurls": serpID2Sps[r.SerpID].get('short_greenurls', 0)}],
                "blocks": res_blocks,
                "events": res_events,

            }
            if getattr(r, 'PassportUID'):
                res['PassportUID'] = r.PassportUID
            main_query(Record(**res))


@cli.statinfra_job(
    options=[
        cli.Option('use_sample', is_flag=True),
        cli.Option('out_path', default=OUTPUT_PATH),
    ]
)
def make_job(job, options):
    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], use_default_tentative_pool_trees=True,
                              job_io={"table_writer": {"max_row_weight": 128 * 1024 * 1024}}),
        templates=dict(job_root=options.out_path, title='Images queries cube'),
    )

    for date in options.dates:

        to_insert = '' if not options.use_sample else 'sample_by_uid_1p/'

        if datetime.datetime.strptime(date, "%Y-%m-%d") < datetime.datetime(2020, 3,
                                                                            4):  # dates before are included in Search sessions
            usersessions = '//user_sessions/pub/{}search/daily/{}/clean'.format(to_insert, date)
        else:
            usersessions = '//user_sessions/pub/{}images/daily/{}/clean'.format(to_insert, date)

        queries = job.table(usersessions) \
            .groupby('key') \
            .sort('subkey') \
            .reduce(images_actions,
                    files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                           nile.files.RemoteFile('statbox/resources/libra.so'),
                           nile.files.StatboxWheel('yandex_baobab_api')],
                    memory_limit=4000
                    ) \
            .project(ne.all(), fielddate=ne.const(date)) \
            .sort('uid', 'ts') \
            .put('$job_root/{}'.format(date))

    return job


if __name__ == '__main__':
    cli.run()
