from nile.api.v1 import (
        Record,
        files,
        cli,
        with_hints,
)

from nile.api.v1.datetime import date_range

import re


def get_ui(r):
    ui = ''
    if r.IsA('TDesktopUIProperties'):
        ui = 'desktop'
    elif r.IsA('TTouchUIProperties'):
        ui = 'touch'
    elif r.IsA('TMobileUIProperties'):
        ui = 'mobile'
    elif r.IsA('TMobileAppUIProperties'):
        ui = 'mobileapp'
    elif r.IsA('TPadUIProperties'):
        ui = 'pad'
    elif r.IsA('TSiteSearchUIProperties'):
        ui = 'sitesearch'
    return ui


def is_images_greenurl(path):
    if path is None:
        return False
    PATTERN_GREENURL = re.compile(
            r"^/image/.*/(site|url|title|link|button|snippet|collections|commercial/(incut/)?(similar|description|related|text|contacts|sitelink|((incut|behavioral)/)?thumb)|(market_offers|direct)/click|(duplicates/(rating|price|model-rating|button|title|url)/(market|schemaorg)))")
    if PATTERN_GREENURL.search(path):
        return True
    else:
        return False


@with_hints(output_schema=dict(uid=str,
                               reqid=str,
                               timestamp=str,
                               cm2=float,
                               vcomm=float,
                               path=str,
                               ui=str,
                               query=str,
                               userRegion=int))
def parse_greenurls(actions):
    for key, records in actions:
        sessions = None
        try:
            import libra
            uid = key.key
            sessions = libra.ParseSession(records, 'blockstat.dict')
        except (NameError, AttributeError, TypeError):
            raise
        except Exception:
            continue

        for request in sessions:
            timestamp = ''
            cm2 = 0
            vcomm = 0
            if not request.IsA('TImagesRequestProperties'):
                continue
            reqid = request.ReqID
            query = request.Query

            if hasattr(request, 'RelevValues'):
                cm2 = float(request.RelevValues.get('cm2', 0))
                vcomm = float(request.RelevValues.get('vcomm', 0))
            for event in request.GetOwnEvents():
                timestamp = str(event.Timestamp)
                is_greenurl = is_images_greenurl(event.ConvertedPath)
                if is_greenurl:
                    yield Record(uid=uid,
                                 reqid=reqid,
                                 timestamp=timestamp,
                                 cm2=cm2,
                                 vcomm=vcomm,
                                 path=event.ConvertedPath,
                                 ui=get_ui(request),
                                 query=query,
                                 userRegion=request.UserRegion)


@cli.statinfra_job(options=[cli.Option('date', default='?')])
def make_job(job, nirvana, options):
    strdate = options.dates[0]
    start_date, end_date = options.dates[0], options.dates[-1]
    job_root = nirvana.output_tables[0]
    job = job.env(
            yt_spec_defaults=dict(
                pool_trees=["physical"],
                tentative_pool_trees=["cloud"]
                ),
            templates=dict(
                tmp_files='//home/images/dev/anastasiiait/jobs_tmp/abt_purchase_' + options.date,
                job_root=job_root
            )
    )
    for date in date_range(start_date, end_date):
        log = job.table('//user_sessions/pub/search/daily/{}/clean'.format(date))
        log.groupby('key').sort('subkey') \
                .reduce(
                        parse_greenurls,
                        memory_limit=3*1024,
                        files=[
                            files.RemoteFile('//statbox/resources/libra.so'),
                            files.RemoteFile('//statbox/statbox-dict-last/blockstat.dict')
                        ]
                )\
                .put(nirvana.output_tables[0]+date)
    return job


if __name__ == '__main__':
    cli.run()
