#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import ExternalInput, AttributeExternalInput
from crypta.profile.utils.segment_utils.builders import RegularSegmentBuilder
from crypta.profile.utils.segment_utils.processors import DayProcessor, LogProcessor

day_processor_query = ur"""
$urls_6_11 = Hyperscan::Grep("gdz\\.ru/class-1|gdz\\.ru/class-2|gdz\\.ru/class-3|gdz\\.ru/class-4|gdz-putina\\.info/1-4-klassi,
yagdz\\.com/1-4-klass|megaresheba\\.ru/publ/gdz/.+/1_klass|megaresheba\\.ru/publ/gdz/.+/2_klass,
megaresheba\\.ru/publ/gdz/.+/3_klass|megaresheba\\.ru/publ/gdz/.+/4_klass|ugdz\\.ru/reshebniki/1-klass,
ugdz\\.ru/reshebniki/2-klass|ugdz\\.ru/reshebniki/3-klass|ugdz\\.ru/reshebniki/4-klass,
gdzputina\\.net/1-klass|gdzputina\\.net/2-klass|gdzputina\\.net/3-klass|gdzputina\\.net/4-klass,
gdzplus\\.ru/1-klass|gdzplus\\.ru/2-klass|gdzplus\\.ru/3-klass|gdzplus\\.ru/4-klass,
shkololo\\.ru/.+/1-klass|shkololo\\.ru/.+/2-klass|shkololo\\.ru/.+/3-klass|shkololo\\.ru/.+/4-klass,
gdzhaha\\.com/gdz/class-1|gdzhaha\\.com/gdz/class-2|gdzhaha\\.com/gdz/class-3,
gdzhaha\\.com/gdz/class-4|gdz\\.ltd/1-class|gdz\\.ltd/2-class|gdz\\.ltd/3-class|gdz\\.ltd/4-class,
gdz\\.fm/.+/1-klass|gdz\\.fm/.+/2-klass|gdz\\.fm/.+/3-klass|gdz\\.fm/.+/4-klass");

$urls_11_16 = Hyperscan::Grep("gdz\\.ru/class-5|gdz\\.ru/class-6|gdz\\.ru/class-7|gdz\\.ru/class-8|gdz\\.ru/class-9,
gdz-putina\\.info/5-klass|gdz-putina\\.info/6-klass|gdz-putina\\.info/7-klass,
gdz-putina\\.info/8-klass|gdz-putina\\.info/9-klass|yagdz\\.com/5-klass|yagdz\\.com/6-klass,
yagdz\\.com/7-klass/|yagdz\\.com/8-klass|yagdz\\.com/9-klass|megaresheba\\.ru/publ/gdz/.+/5_klass,
megaresheba\\.ru/publ/gdz/.+/6_klass|megaresheba\\.ru/publ/gdz/.+/7_klass,
megaresheba\\.ru/publ/gdz/.+/8_klass|megaresheba\\.ru/publ/gdz/.+/9_klass|ugdz\\.ru/reshebniki/5-klass,
ugdz\\.ru/reshebniki/6-klass|ugdz\\.ru/reshebniki/7-klass|ugdz\\.ru/reshebniki/8-klass,
ugdz\\.ru/reshebniki/9-klass|gdzputina\\.net/5-klass|gdzputina\\.net/6-klass|gdzputina\\.net/7-klass,
gdzputina\\.net/8-klass|gdzputina\\.net/9-klass|gdzplus\\.ru/5-klass|gdzplus\\.ru/6-klass,
gdzplus\\.ru/7-klass|gdzplus\\.ru/8-klass|gdzplus\\.ru/9-klass|shkololo\\.ru/.+/5-klass,
shkololo\\.ru/.+/6-klass|shkololo\\.ru/.+/7-klass|shkololo\\.ru/.+/8-klass|shkololo\\.ru/.+/9-klass,
gdzhaha\\.com/gdz/class-5|gdzhaha\\.com/gdz/class-6|gdzhaha\\.com/gdz/class-7,
gdzhaha\\.com/gdz/class-8|gdzhaha\\.com/gdz/class-9|gdz\\.ltd/5-class|gdz\\.ltd/6-class,
gdz\\.ltd/7-class|gdz\\.ltd/8-class|gdz\\.ltd/9-class|gdz\\.fm/.+/5-klass|gdz\\.fm/.+/6-klass,
gdz\\.fm/.+/7-klass|gdz\\.fm/.+/8-klass|gdz\\.fm/.+/9-klass");

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT DISTINCT
    CAST(yandexuid AS String) AS id,
    'yandexuid' AS id_type,
     CASE
        WHEN $urls_6_11(url) THEN 'with_children_6_11'
        WHEN $urls_11_16(url) THEN 'with_children_11_16'
        ELSE NUll
     END AS segment_name
FROM `{input_table}`
WHERE yandexuid != 0 AND ($urls_6_11(url) OR $urls_11_16(url))
"""


class ReqansMapper(object):
    def __init__(self, min_age_measure, max_age_measure, min_class_measure, max_class_measure, class_age_offset):
        self.min_age_measure = min_age_measure
        self.max_age_measure = max_age_measure
        self.min_class_measure = min_class_measure
        self.max_class_measure = max_class_measure
        self.class_age_offset = class_age_offset

    def is_about_child(self, lemmas):
        required_children_words = {
            'ребенок', 'малыш', 'мальчик', 'девочка', 'конкурс', 'сарафан', 'платье', 'игрушка', 'игра',
            'мультфильм', 'детсад', 'младенец', 'кукла', 'юбка'
        }
        if not set(lemmas).intersection(required_children_words):
            return None

        for i in range(len(lemmas) - 1):
            if lemmas[i].isdigit() and self.min_age_measure <= int(lemmas[i]) < self.max_age_measure and \
                    lemmas[i + 1] in {'год', 'месяц', 'неделя'}:

                if lemmas[i + 1] == 'год':
                    return int(lemmas[i])
                else:
                    return 0

        return None

    def is_about_schoolchild(self, lemmas):
        for i in range(len(lemmas) - 1):
            if lemmas[i].isdigit() and self.min_class_measure <= int(lemmas[i]) <= self.max_class_measure and \
                    (lemmas[i + 1].startswith('класс')):
                return int(lemmas[i]) + self.class_age_offset
        return None

    def __call__(self, row):
        if row['lemmas'] is None or len(row['lemmas']) == 0:
            child_age = None
            child_age_by_class = None
        else:
            child_age = self.is_about_child(row['lemmas'])
            child_age_by_class = self.is_about_schoolchild(row['lemmas'])

        if child_age is not None or child_age_by_class is not None:
            if child_age == 0:
                yield {
                    'id': str(row['yandexuid']),
                    'id_type': 'yandexuid',
                    'segment_name': 'with_children_0_1'
                }

            elif child_age in range(1, 3):
                yield {
                    'id': str(row['yandexuid']),
                    'id_type': 'yandexuid',
                    'segment_name': 'with_children_1_3',
                }

            elif child_age in range(3, 6):
                yield {
                    'id': str(row['yandexuid']),
                    'id_type': 'yandexuid',
                    'segment_name': 'with_children_3_6',
                }

            elif child_age in range(6, 11) or child_age_by_class in range(6, 11):
                yield {
                    'id': str(row['yandexuid']),
                    'id_type': 'yandexuid',
                    'segment_name': 'with_children_6_11',
                }

            elif child_age in range(11, 16) or child_age_by_class in range(11, 16):
                yield {
                    'id': str(row['yandexuid']),
                    'id_type': 'yandexuid',
                    'segment_name': 'with_children_11_16',
                }


def reducer(key, rows):
    yield key


class ProcessedMetricsForWithChildrenByAges(DayProcessor):
    def requires(self):
        return AttributeExternalInput(
            table=os.path.join(
                config.METRICS_PARSED_DIR,
                self.date,
            ),
            attribute_name='closed',
            attribute_value=True,
            columns=('yandexuid', 'url'),
        )

    def process_day(self, inputs, output_path):
        self.yql.query(
            day_processor_query.format(
                input_table=inputs.table,
                output_table=output_path,
            ),
            transaction=self.transaction,
        )


class ProcessedReqansForWithChildrenByAges(DayProcessor):
    def requires(self):
        return AttributeExternalInput(
            table=os.path.join(
                config.REQANS_PARSED_DIR,
                self.date,
            ),
            attribute_name='closed',
            attribute_value=True,
            columns=('yandexuid', 'lemmas'),
        )

    def process_day(self, inputs, output_path):
        min_age_measure = 1
        max_age_measure = 16
        min_class_measure = 1
        max_class_measure = 9
        class_age_offset = 6

        self.yt.run_map_reduce(
            ReqansMapper(min_age_measure, max_age_measure, min_class_measure, max_class_measure, class_age_offset),
            reducer,
            inputs.table,
            output_path,
            reduce_by=('id', 'id_type', 'segment_name'),
        )


SEGMENT_APP_DICT = {
    'with_children_0_1': [
        'com.wachanga.babycare',
        'ru.babylife.babylife',
        'com.amila.parenting',
        'com.babyjoy.android',
        'com.drillyapps.babydaybook',
        'au.com.penguinapps.android.babyfeeding.client.android',
        'innmov.babymanager',
        'com.meshukraina.kroha',
        'com.sao.babytime',
        'com.notissimus.akusherstvo.android',
        'ru.familion.mamsbook',
        'ru.childfeed.app',
        'com.nighp.babytracker_android',
        'jp.fmcm.babysdiary',
        'dd.moments',
        'com.ipavel.kiddylog',
        'sasha.mymalysh.da',
        'net.relaxio.babysleep',
        'com.alexzbirnik.babydayka',
        'com.softservice.babymonitor',
        'nicebytes.childwithmilk',
        'feigenson.baubay',
        'com.nighp.babytracker',
        'com.nighp.firstyear',
        'com.sevenlogics.babynursing',
        'au.com.penguinapps.feedbaby',
        'com.seacloud.baby-connect',
        'com.bitmethod.eatsleep.lite',
        'com.bhi.babywatchlite',
        'com.walshitech.basicbabyfeedings',
        'com.walshitech.basicbabysleeps',
        'com.walshitech.basicbaby',
        'com.krkids.breast-feed-tracker',
    ],
    'with_children_1_3': [
        'com.gn1.trainy',
        'com.yovogames.kindergarten',
        'com.edujoy.baby_puzzles',
        'com.gokids.colors2',
        'com.edujoy.baby_puzzles',
        'com.arrowstar.smartkidselite',
        'com.edujoy.masha.games',
        'sgolovanov.childrenpiramid',
        'com.goodsofttech.puzzlefortoddlers',
        'com.iabuzz.puzzle4kidsanimals',
        'com.funsorting.malyavoknet',
        'com.starbox.puzzlecar',
        'com.gokids.transport2',
        'com.gokids.transport2',
        'com.gamemagica.babymagica_free',
        'com.gokids.learnanim',
        'com.yovogames.trainadventure',
        'com.floridagamesstudio.kidspuzzle',
        'com.appquiz.baby_blocks',
        'ru.sergeiandreev.gameswithbaby2',
        'com.stripey.train',
        'stripey.monstertruck',
        'stripey.car',
        'stripey.squigglefish',
        'com.stripey.wildwest',
        'com.sagosago.babies',
        'com.sagosago.friends',
        'com.sagosago.toolbox',
        'com.sagosago.forestflyer',
        'com.sagosago.bugbuilder',
        'com.sagosago.babiesdressup',
    ],
    'with_children_3_6': [
        'com.binibambini.abc',
        'com.binibambini.minilite',
        'net.cleverbit.saveanimalsfree',
        'densamed.letterslearning',
        'com.amayasoft.bookstorem4.ru',
        'com.miniklerogreniyor.gamepack',
        'com.binibambini.miniabclite',
        'com.amayasoft.books',
        'com.amayasoft.books.ar',
        'com.kuzia.skaskik',
        'com.biika.skazkamal',
        'com.alterego.skazka.littlekids',
        'com.alphabeten',
        'jqsoft.games.kids.alphabet',
        'com.binibambini.drawabc',
        'com.indigokids.mashacoloring',
        'com.yovogames.painting',
        'com.orange.coloring.learn.kids',
        'com.yovogames.paintingvehicles',
        'com.yovogames.cleaninghouse',
        'com.binibambini.numbersmonsterslite',
        'ru.publishing1c.fixiesmath',
        'com.imult.papermates',
        'com.tocaboca.tocalifefarm',
        'com.tocaboca.tocatown',
        'com.tocaboca.tocavacation',
        'com.tocaboca.tocatown',
        'com.tocaboca.tocaschool',
        'com.tocaboca.tocavacation',
        'com.tocaboca.tocacity',
        'com.tocaboca.tocanature',
        'com.drpanda.city2',
        'com.drpanda.town.street',
        'com.drpanda.restaurant3',
        'com.drpanda.drpandacandyfactory.fun',
        'com.drpanda.school',
        'com.tgelec.setracker',
        'org.findmykids.app',
        'ru.kidcontrol.gpstracker',
        'gl.kid.alert',
        'com.3gtc.secure5',
    ],
}


def extract_children_from_taxi(row):
    children_mapping = {
        '0_4': 'with_children_1_3',
        '3_7': 'with_children_3_6',
        '6_12': 'with_children_6_11',
    }

    for child_info in row['child_stat']:
        if child_info in children_mapping:
            yield {
                'id': row['passport_uid'],
                'id_type': 'puid',
                'segment_name': children_mapping[child_info],
            }


with_children_query_template = """
PRAGMA yt.InferSchema;

$with_children_ids = (
    SELECT id, id_type, segment_name
    FROM `{app_metrica_table}`
    UNION ALL
    SELECT id, id_type, segment_name
    FROM `{taxi_data_table}`
);

$with_children_yandexuid = (
    SELECT id, id_type, segment_name
    FROM `{metrics_table}`
    UNION ALL
    SELECT id, id_type, segment_name
    FROM `{reqans_table}`
);

$with_children_crypta_id = (
    SELECT matching.cryptaId AS crypta_id, with_children.segment_name AS segment_name
    FROM $with_children_ids AS with_children
    INNER JOIN `{id_to_crypta_id_table}` AS matching
    USING (id, id_type)
);

$with_children_crypta_id_hhid = (
    SELECT matching.hhid AS hhid, with_children.segment_name AS segment_name
    FROM $with_children_crypta_id AS with_children
    INNER JOIN `{crypta_id_to_hhid_table}` AS matching
    USING (crypta_id)
);

$with_children_yandexuid_hhid = (
    SELECT matching.hhid AS hhid, with_children.segment_name AS segment_name
    FROM $with_children_yandexuid AS with_children
    INNER JOIN `{yandexuid_to_hhid_table}` AS matching
    ON with_children.id == matching.yuid
);

$segment_hhids = (
    SELECT hhid, segment_name
    FROM (
        SELECT *
        FROM $with_children_crypta_id_hhid
        UNION ALL
        SELECT *
        FROM $with_children_yandexuid_hhid
    )
    GROUP BY hhid, segment_name
);

$segment_yandexuid_to_hhid = (
    SELECT
        matching.yuid AS id,
        'yandexuid' AS id_type,
        segment.segment_name AS segment_name
    FROM $segment_hhids AS segment
    INNER JOIN `{hhid_to_yandexuid_table}` AS matching
    USING (hhid)
);

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT id, id_type, segment_name
FROM (
    SELECT *
    FROM $segment_yandexuid_to_hhid
    UNION ALL
    SELECT *
    FROM $with_children_yandexuid
)
GROUP BY id, id_type, segment_name
"""

app_metrica_query = """
$app_to_segment_name = AsDict(
{app_to_segment_name}
);

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT id, id_type, $app_to_segment_name[app] AS segment_name
FROM `{devid_by_app_table}`
WHERE DictContains($app_to_segment_name, app)
"""


class WithChildrenByAges(RegularSegmentBuilder):
    name_segment_dict = {
        'with_children_0_1': 1026,
        'with_children_1_3': 1027,
        'with_children_3_6': 1028,
        'with_children_6_11': 1029,
        'with_children_11_16': 1030,
        'girl_0_1': 1894,
        'girl_1_3': 1886,
        'girl_3_6': 1888,
        'girl_6_11': 1889,
        'girl_11_16': 1887,
        'boy_0_1': 1895,
        'boy_1_3': 1890,
        'boy_3_6': 1892,
        'boy_6_11': 1893,
        'boy_11_16': 1891,
    }

    keyword = 547
    number_of_days = 35

    def requires(self):
        return {
            'ProcessedMetrics': LogProcessor(
                ProcessedMetricsForWithChildrenByAges,
                self.date,
                self.number_of_days,
            ),
            'ProcessedReqans': LogProcessor(
                ProcessedReqansForWithChildrenByAges,
                self.date,
                self.number_of_days,
            ),
            'TaxiData': ExternalInput(config.TAXI_DATA_LOG),
            'DevidByApp': ExternalInput(config.DEVID_BY_APP_MONTHLY_TABLE),
        }

    def prepare_with_children_by_app(self, with_children_by_app_table):
        app_segment_name_tuples = []

        for segment_name, apps in SEGMENT_APP_DICT.iteritems():
            for app in apps:
                app_segment_name_tuples.append('AsTuple("{}", "{}"),'.format(app, segment_name))

        self.yql.query(
            app_metrica_query.format(
                devid_by_app_table=self.input()['DevidByApp'].table,
                output_table=with_children_by_app_table,
                app_to_segment_name='\n'.join(app_segment_name_tuples),
            ),
            transaction=self.transaction,
        )

    def build_segment(self, inputs, output_path):
        with self.yt.TempTable() as taxi_puid_table, \
                self.yt.TempTable() as app_metrica_table:
            self.yt.run_map(
                extract_children_from_taxi,
                inputs['TaxiData'].table,
                taxi_puid_table,
            )

            self.prepare_with_children_by_app(app_metrica_table)

            self.yql.query(
                with_children_query_template.format(
                    metrics_table=inputs['ProcessedMetrics'].table,
                    reqans_table=inputs['ProcessedReqans'].table,
                    app_metrica_table=app_metrica_table,
                    taxi_data_table=taxi_puid_table,
                    id_to_crypta_id_table=config.VERTICES_NO_MULTI_PROFILE,
                    crypta_id_to_hhid_table=config.HOUSEHOLD_CRYPTA_ID_TO_HHID,
                    yandexuid_to_hhid_table=config.HOUSEHOLD_REVERSED_TABLE,
                    hhid_to_yandexuid_table=config.HOUSEHOLD_ENRICH_TABLE,
                    output_table=output_path,
                ),
                transaction=self.transaction,
            )
