from __future__ import unicode_literals

import os
import time
import requests
import logging

from collections import defaultdict

from yt.wrapper import JsonFormat
from crypta.lib.python.identifiers.generic_id import GenericID

# Crypta staff loader
# ID: 09c8e4dab7004aa1a6b8d5955c4b4710
# Pass: 92288da9bacd4d8ab4d157eff40a1b5f


def batch_iterate(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx : min(ndx + n, l)]


class IdType(object):

    # Existing ids
    EMAIL = "email"
    PHONE = "phone"

    # accounts field
    JABBER = "jabber"
    ICQ = "icq"
    SKYPE = "skype"
    TWITTER = "twitter"
    MOI_KRUG = "moi_krug"
    PERSONAL_SITE = "personal_site"
    LIVEJOURNAL = "livejournal"
    GITHUB = "github"
    FACEBOOK = "facebook"
    VKONTAKTE = "vkontakte"
    HABRAHABR = "habrahabr"
    INSTAGRAM = "instagram"
    FLICKR = "flickr"
    TUMBLR = "tumblr"
    BLOGSPOT = "blogspot"
    TELEGRAM = "telegram"
    YAMB = "yamb"
    ASSISTENT = "assistent"
    STRAVA = "strava"

    # contacts
    SITE = "site"
    BLOG = "blog"  # Could be url or LiveJournal account

    STAFF = "staff"
    STAFF_LOGIN = "staff_login"
    PASSPORT_LOGIN = "passport_login"

    UNKNOWN = "unknown"

    KNOWN_KEYS = {
        JABBER,
        ICQ,
        SKYPE,
        TWITTER,
        MOI_KRUG,
        PERSONAL_SITE,
        LIVEJOURNAL,
        GITHUB,
        FACEBOOK,
        VKONTAKTE,
        HABRAHABR,
        INSTAGRAM,
        FLICKR,
        TUMBLR,
        BLOGSPOT,
        TELEGRAM,
        STRAVA,
        YAMB,
        ASSISTENT,
        EMAIL,
        PHONE,
        SITE,
        BLOG,
        PASSPORT_LOGIN,
        STAFF_LOGIN,
        STAFF,
    }
    EXTRA_KEYS = {
        "personal_email": EMAIL,
        "another_work_email": EMAIL,
        "gmail": EMAIL,
        "apple_id": EMAIL,
        "home_email": EMAIL,
        "play_market_id": EMAIL,
        "home_page": SITE,
        "login_mk": MOI_KRUG,
        "mk": MOI_KRUG,
        "login_lj": BLOG,
        "login_skype": SKYPE,
    }

    @classmethod
    def clean_item(cls, key):
        """Return normalized key for contact and account items"""
        return cls._norm(cls.KNOWN_KEYS, cls.EXTRA_KEYS, key)

    @classmethod
    def _norm(cls, known, extra, key):
        """Check key in known set, or in extra dict, default unknown"""
        if key in known:
            return key
        return extra.get(key, cls.UNKNOWN)


class StaffLoader(object):

    """Class for pull data from staff api and parse user ids"""

    API_HOST = "https://staff-api.yandex-team.ru/v3/persons"
    FIELDS = (
        "is_deleted",
        "uid",
        "login",
        "guid",
        "id",
        "yandex.login",
        "work_email",
        "work_phone",
        "personal.mobile_phone_number",
        "phones",
        "emails",
        "accounts",
        "contacts",
        "official",
        "personal.gender",
        "personal.birthday",
        "name",
    )
    PAGE_SIZE = 300

    def __init__(self, sleep_time=2, with_dismissed=False, with_robots=False, with_homeworkers=False):
        """
        :param sleep_time: - optional sleep time between staff page read
        :param with_dismissed: - optional flag to fetch dismissed users
        :param with_robots: - optional flag to fetch robots
        :param with_homeworkers: - optional flag to fetch homeworkers
        """
        self.logger = make_logger()
        self.sleep_time = sleep_time
        self.skip_config = {
            # "dismissed": not with_dismissed,
            "robot": not with_robots,
            "homeworker": not with_homeworkers,
        }

    def process(self):
        """Fetch and parse staff reseponse"""
        result = self.parse(self.fetch())
        self.logger.info("Done...")
        return result

    def fetch(self):
        """Read staff api page by page"""
        current_page = 1
        max_page_found = 1

        session = requests.Session()
        token = self._get_access_token()
        session.headers.update({"Authorization": "OAuth {}".format(token)})

        while current_page <= max_page_found:
            response = session.get(
                url=self.API_HOST,
                params={
                    "_fields": ",".join(self.FIELDS),
                    "_limit": self.PAGE_SIZE,
                    "_page": current_page,
                    "is_deleted": False,
                    "official.is_dismissed": False,
                    "official.is_homeworker": False,
                    "official.is_robot": False,
                },
            )
            self.logger.debug("Fetching page {}/{} url: {}".format(current_page, max_page_found, response.url))

            json_data = response.json()
            yield json_data
            max_page_found = max(max_page_found, json_data["pages"])

            current_page += 1
            time.sleep(self.sleep_time)

    def parse(self, staff_data):
        """Clean data from staff and user ids data response"""
        self.staff_login_map = defaultdict(lambda: dict(data=defaultdict(list), meta=dict()))

        good_counter = 0
        bad_counter = 0
        for part in staff_data:
            for row in part["result"]:
                try:
                    self._parse_row(row)
                    good_counter += 1
                except:
                    self.logger.exception("Can't parse row")
                    bad_counter += 1
        self.logger.info("Good records: {}".format(good_counter))
        self.logger.info("Bad records: {}".format(bad_counter))

        def prepare_staff(pair):
            (staff_login, row) = pair
            staff = dict(data=dict(row["data"]), **row["meta"])
            staff.update(
                {
                    IdType.STAFF_LOGIN: staff_login,
                    IdType.PASSPORT_LOGIN: row["data"][IdType.PASSPORT_LOGIN][0]
                    if row["data"][IdType.PASSPORT_LOGIN]
                    else None,
                }
            )
            staff["data"].pop(IdType.STAFF_LOGIN, None)
            staff["data"].pop(IdType.PASSPORT_LOGIN, None)
            return staff

        return list(map(prepare_staff, self.staff_login_map.iteritems()))

    def _parse_row(self, row):
        """Parse logins pairs from staff data chank"""
        if self._skip_row(row):
            # skip dismissed persons, or robot accounts
            return

        staff_login = row["login"].strip().lower()

        self.staff_login_map[staff_login]["data"][IdType.STAFF_LOGIN].append(staff_login)
        self.staff_login_map[staff_login]["data"][IdType.PASSPORT_LOGIN].append(
            GenericID("login", str(row["yandex"]["login"])).normalize
        )

        # get phones
        self.staff_login_map[staff_login]["data"][IdType.PHONE].extend(item["number"] for item in row["phones"])
        self.staff_login_map[staff_login]["data"][IdType.PHONE].append(row["work_phone"])

        # get emails
        self.staff_login_map[staff_login]["data"][IdType.EMAIL].extend(item["address"] for item in row["emails"])
        self.staff_login_map[staff_login]["data"][IdType.EMAIL].append(row["work_email"])

        # get extra data telegram, github, etc. (also emails/phones may came from this lines too)
        for key in ("accounts", "contacts"):
            for item in row[key]:
                self.staff_login_map[staff_login]["data"][IdType.clean_item(item["type"])].append(item["value"])

        # get bday info
        self.staff_login_map[staff_login]["meta"]["dt"] = row.get("personal", {}).get("birthday")
        self.staff_login_map[staff_login]["meta"]["gender"] = row.get("personal", {}).get("gender")
        self.staff_login_map[staff_login]["meta"]["name"] = row.get("name", {})

        # clean values
        for id_type, id_values in self.staff_login_map[staff_login]["data"].iteritems():

            def try_normalize(value):
                try:
                    identifier = GenericID(id_type, str(value))
                    if identifier.is_significant():
                        return identifier.normalize
                except:
                    if isinstance(value, (str, unicode)):
                        return unicode(value).lower().strip()
                    return value

            self.staff_login_map[staff_login]["data"][id_type] = list(
                sorted(set(filter(None, map(try_normalize, id_values))))
            )

    def _skip_row(self, row):
        if row["is_deleted"]:
            return True
        for key, value in self.skip_config.iteritems():
            if value and row["official"]["is_{key}".format(key=key)]:
                return True
        return False

    def _get_access_token(self):
        """Get OAuth access token"""
        # manual get access token
        # https://oauth.yandex-team.ru/authorize?response_type=token&client_id=09c8e4dab7004aa1a6b8d5955c4b4710
        return os.environ.get("STAFF_EXPORTER_TOKEN")


class YtProcessor(object):

    """Class to create new YT table and load data"""

    def __init__(self, yt_client):
        self.logger = make_logger()
        self.yt = yt_client

    def process_table(self, data, table_name):
        self._create_table(table_name)
        self.yt.mount_table(table_name, sync=True)
        self.logger.info("Upload data to YT table: {}".format(table_name))
        for batch in batch_iterate(data, 1000):
            self.yt.insert_rows(
                table_name,
                batch,
                update=True,
                format=JsonFormat(encoding="utf-8"),
            )
        self.yt.unmount_table(table_name, sync=True)
        self.yt.mount_table(table_name, sync=True)
        self.logger.info("Upload data finish")

    def _create_table(self, table_name):
        self.logger.info("(re)Create YT table: {}".format(table_name))
        # self.yt.remove(table_name, force=True)
        self.yt.create(
            "table",
            path=table_name,
            recursive=True,
            ignore_existing=True,
            attributes={
                "schema": [
                    {"name": IdType.STAFF_LOGIN, "type": "string", "sort_order": "ascending"},
                    {"name": IdType.PASSPORT_LOGIN, "type": "string"},
                    {"name": "data", "type": "any"},
                    {"name": "name", "type": "any"},
                    {"name": "gender", "type": "any"},
                    {"name": "dt", "type": "string"},
                ],
                "dynamic": True,
                "optimize_for": "scan",
            },
        )


def make_logger():
    """Use for verbose logging on local machine"""
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    logger.addHandler(console)
    return logger
