import json
import os
import pickle
import time
from collections import defaultdict
from pathlib import Path
from pprint import pprint
from tqdm import tqdm
import colorama
import yaml
from lxml import etree
from urllib3.util import Timeout

from intranet.wiki.tools.wikiclient import EnvType, Flavor, B2BCreds, get_contour, normalize_slug
from intranet.wiki.tools.conf2wiki.configs import Cfg
from intranet.wiki.tools.conf2wiki.parser import ConfluenceXmlParser


NOT_SET = object()


class PageWasNotImported(Exception):
    pass


class EntitiesParser:
    @classmethod
    def get_node_properties(cls, elm):
        prop = {}
        for sub_elm in elm:
            if sub_elm.tag == 'property':
                name = sub_elm.attrib.get('name')

                # --- refs to other entities ---

                children = sub_elm.getchildren()
                if len(children) == 1 and children[0].tag == 'id':
                    subklass = sub_elm.attrib.get('class')
                    id_node = sub_elm.getchildren()[0]
                    prop[name] = {'class': subklass, 'id': id_node.text}

                    continue

                if name in {'content', 'sourceContent', 'containerContent'}:
                    subklass = sub_elm.attrib.get('class')
                    try:
                        id_node = sub_elm.getchildren()[0]
                        prop[name] = {'class': subklass, 'id': id_node.text}
                    except IndexError:
                        pass
                elif name == 'body':
                    prop[name] = sub_elm.text
                else:
                    prop[name] = sub_elm.text

            elif sub_elm.tag == 'id':
                prop['id'] = sub_elm.text

            elif sub_elm.tag == 'collection':
                name = 'collection_' + sub_elm.attrib.get('name')
                prop[name] = []
                for e in sub_elm.getchildren():
                    children = e.getchildren()
                    if len(children) > 0 and children[0].tag == 'id':
                        prop[name].append(children[0].text)

        return prop

    @classmethod
    def do_pass(cls, cfg: Cfg):
        ALLOWED_CLASSES = frozenset(
            ['Space', 'SpaceDescription', 'Page', 'Attachment', 'ConfluenceUserImpl', 'BodyContent']
        )
        IGNORED_CLASSES = frozenset(
            [
                'User2ContentRelationEntity',
                'Notification',
                'ContentProperty',
                'SpacePermission',
                'CustomContentEntityObject',
                'OutgoingLink',
                'Comment',
                'Label',
                'Labelling',
                'ReferralLink',
                'LikeEntity',
                'InternalUser',
                'PersonalInformation',
                'HibernateMembership',
                'ConfluenceBandanaRecord',
                'Content2ContentRelationEntity',
                'GlobalDescription',
                'ChangedValueEntity',
                'PageTemplate',
                'AuditRecordEntity',
                'SchedulerClusteredJob',
                'ContentPermissionSet',
                'AffectedObjectEntity',
                'BlogPost',
                'InternalGroupAttribute',
                'SchedulerClusteredJob',
                'BucketPropertySetItem',
                'ContentProperty',
                'User2ContentRelationEntity',
                'ContentPermission',
                'ApplicationImpl',
                'DirectoryImpl',
                'DirectoryMapping',
                'Connection',
                'InternalGroup',
            ]
        )

        ignored_counter = defaultdict(int)
        loaded_counter = defaultdict(int)
        entities = defaultdict(dict)

        for event, elm in tqdm(etree.iterparse(cfg.xml_path, tag='object')):
            klass = elm.attrib.get('class')

            if klass in IGNORED_CLASSES:
                ignored_counter[klass] += 1
                continue
            else:
                loaded_counter[klass] += 1

            props = cls.get_node_properties(elm)

            if klass in ALLOWED_CLASSES:
                entities[klass][props['id']] = props
            else:
                print(f'Unknown klass {klass}')

        with (open(cfg.path / cfg.content_bin, 'wb')) as wb:
            pickle.dump(entities, wb)

        print('Step 1')
        print('--- Loaded Entities:')
        pprint(loaded_counter)
        print('--- Ignored Entities:')
        pprint(ignored_counter)

        print('Step 2')
        user_resolve = {user['id']: user['name'] for user in entities['ConfluenceUserImpl'].values()}

        with (open(cfg.path / cfg.users_bin, 'wb')) as wb:
            pickle.dump(user_resolve, wb)

        print('Step 3')  # -------------------------------------------------------------------------------
        pages = {}

        for page in entities['Page'].values():
            if page.get('originalVersionId') or 'originalVersion' in page:
                continue

            if 'space' not in page:
                space = 'default'
            else:
                space = entities['Space'][page['space']['id']]['key']

            if 'collection_bodyContents' not in page or len(page['collection_bodyContents']) == 0:
                body = ''
            else:
                body = entities['BodyContent'][page['collection_bodyContents'][0]]['body']

            if page['contentStatus'] == 'draft':
                continue

            if (page['title'] is None or len(page['title'])) and (body is None or len(body) == 0):
                continue

            pages[page['id']] = {
                'parent_id': page.get('parent')['id'] if 'parent' in page else None,
                'body': body,
                'title': page['title'],
                'space': space,
                'created_at': page['creationDate'],
                'modified_at': page['lastModificationDate'],
            }

        for _, a in entities['Attachment'].items():
            page_id = a['containerContent']['id']

            if page_id not in pages:
                continue

            if 'files' not in pages[page_id]:
                pages[page_id]['files'] = []

            pages[page_id]['files'].append({'filename': a['title'], 'id': a['id']})

        # ----------------------------------------------------------------------------
        page_title_lut = {}
        page_id_lut = {}
        # ----------------------------------------------------------------------------

        slugs = {}
        max_length = 0

        uniq_child_slug = defaultdict(set)  # parent_id или space -> slug
        CUT = 16

        for page_id, page in pages.items():
            space = page['space']
            if page['parent_id']:
                slug_p = page['parent_id']
            else:
                slug_p = '$$' + space

            normalized_slug_base = normalize_slug(page['title'] or 'untitled')[:CUT]
            normalized_slug = normalized_slug_base
            counter = 2

            while normalized_slug in uniq_child_slug[slug_p]:
                normalized_slug = normalized_slug_base + str(counter)
                counter += 1

            uniq_child_slug[slug_p].add(normalized_slug)
            page['slug_cut'] = normalized_slug

        for page_id, page in pages.items():
            space = page['space']

            parent_slugs = []
            nav = page
            while True:
                parent_slugs.append(nav['slug_cut'])
                if nav['parent_id'] is not None:
                    if nav['parent_id'] in pages:
                        nav = pages[nav['parent_id']]
                    else:
                        print(f'No page {nav["parent_id"]} -- bad parent')
                        break

                else:
                    break

            parent_slugs.append(normalize_slug(space))

            candidate = '/'.join(part_slug for part_slug in reversed(parent_slugs))

            assert len(candidate) < 255, candidate

            if space not in page_title_lut:
                page_title_lut[space] = {}

            if space not in page_id_lut:
                page_id_lut[space] = {}

            base = candidate
            cntr = 2

            while candidate in slugs:
                candidate = base + str(cntr)
                cntr += 1

            page_title_lut[space][page['title']] = candidate
            page_id_lut[space][page_id] = candidate

            page['slug'] = candidate
            page['id'] = page_id

            max_length = max(max_length, len(candidate))
            slugs[candidate] = page

        with (open(cfg.path / cfg.lut_bin, 'wb')) as wb:
            pickle.dump((page_id_lut, page_title_lut), wb)

        with (open(cfg.path / cfg.pages_bin, 'wb')) as wb:
            pickle.dump(pages, wb)

        print(f'Total pages: {len(pages)}')
        return entities


class ConfluenceImporter:
    def __init__(self, path='/home/neofelis/imports/org'):
        self.path = Path(path)
        with open(self.path / 'config.yaml', 'r') as rf:
            raw_data = yaml.safe_load(rf)
            raw_data['path'] = self.path
            raw_data['xml_path'] = self.path / 'raw' / 'entities.xml'
            raw_data['attachments_path'] = self.path / 'raw' / 'attachments'
            self.cfg = Cfg.parse_obj(raw_data)

    def parse_entities(self):
        EntitiesParser.do_pass(self.cfg)

    def debug_raw(self, raw, slug='something', space='product'):
        parser = self._get_parser(True)
        print(colorama.Fore.LIGHTGREEN_EX)
        print(raw)
        print('    ')
        print('    ')
        print(colorama.Fore.LIGHTCYAN_EX)
        print(parser.parse(raw, slug, space))
        print(colorama.Fore.RED + '\n'.join(parser.warnings) + colorama.Fore.RESET)

    def cleanup_unoriginals(self):
        pages = self.get_pages()
        entities = self.get_entities()
        pageid_lut, pagetitle_lut = self.get_lut()
        ids = set()
        slugs = set()
        tgt_lut = set()
        for space, kv in pagetitle_lut.items():
            if space == 'default':
                continue
            for title, slug in kv.items():
                tgt_lut.add(slug)

        for key, page in entities['Page'].items():
            if page['originalVersionId'] is not None:
                ids.add(key)
                slugs.add(pages[key]['slug'])

        print(tgt_lut.intersection(slugs))

    def test_attaches(self, offset=0, limit=1):
        pages = self.get_pages()

        with open(self.cfg.path / 'missing_attaches.txt', 'w') as w:
            for page in pages.values():
                if 'files' not in page:
                    continue
                missing_files = [
                    file
                    for file in page['files']
                    if not os.path.exists(self.cfg.attachments_path / str(page['id']) / str(file['id']) / '1')
                ]
                if missing_files:
                    w.write(f'Space {page["space"]}; Page {page["title"]}; PageID {page["id"]} \n')
                    for file in missing_files:
                        should_be = Path('./attachments') / str(page['id']) / str(file['id']) / '1'
                        w.write(f' - {file["filename"]} -- {should_be} \n')

    def upload_attaches(self, offset=0, limit=1, only_space=None, resume_on=None, skip=False):
        only_space = only_space or self.cfg.only
        pages = self.get_pages()
        s = self.get_wiki_api()
        s.wiki_api.use_api_v2_public()

        s2 = self.get_wiki_api()

        idx = 0
        if isinstance(only_space, str):
            only_space = {only_space}
        if only_space:
            print(f'LIMITING IMPORT TO SPACE {only_space}')
            only_space = {o.lower() for o in only_space}

        with open(self.cfg.path / 'attaches.log', 'a') as err_log:
            waiting_for_file = resume_on is not None

            for page in pages.values():
                if only_space and page['space'].lower() not in only_space:
                    continue

                if 'files' not in page:
                    continue
                has_files = any(
                    [
                        os.path.exists(self.cfg.attachments_path / str(page['id']) / str(file['id']) / '1')
                        for file in page['files']
                    ]
                )
                if not has_files:
                    continue
                if offset > 0:
                    print(
                        colorama.Fore.LIGHTBLACK_EX
                        + f'> {idx}. Processing https://wiki.yandex.ru/{page["slug"]} [SKIPPED]'
                        + colorama.Fore.RESET
                    )
                    offset -= 1
                    idx += 1
                    continue

                print(f'> {idx}. Processing https://wiki.yandex.ru/{page["slug"]}')
                idx += 1

                slug = page['slug'].rstrip('.')
                slug = normalize_slug(slug)

                code, data = s.wiki_api.api_call(
                    'get',
                    f'pages?slug={slug}&fields=redirect',
                )

                if code == 404:
                    err_log.write(
                        f"""{page["space"]}:{page["title"]} -> https://wiki.yandex.ru/{slug}; Page not found can't import files\n"""
                    )
                    print(
                        f'{colorama.Fore.WHITE}{colorama.Back.RED}'
                        + f'    > {slug}  Page not found cant import files'
                        + f'{colorama.Fore.RESET}{colorama.Back.RESET}'
                    )
                    continue
                if data['redirect'] is not None:
                    err_log.write(
                        f"""{page["space"]}:{page["title"]} -> https://wiki.yandex.ru/{slug}; Page is a redirect won't import files\n"""
                    )
                    print(
                        f'{colorama.Fore.WHITE}{colorama.Back.RED}'
                        + f'    > {slug}  Page is redirect'
                        + f'{colorama.Fore.RESET}{colorama.Back.RESET}'
                    )
                    continue

                uploaded_files = self.get_page_files(slug, s2)

                for file in page['files']:
                    fname = self.cfg.attachments_path / str(page['id']) / str(file['id']) / '1'
                    friendly_name = file['filename']

                    if not os.path.exists(fname):
                        print(
                            colorama.Fore.RED
                            + f'    > {fname}  ({page["title"]} {friendly_name}) does not exists'
                            + colorama.Fore.RESET
                        )
                        continue

                    size = os.path.getsize(fname)

                    size_mb = size / 1024 / 1024

                    if friendly_name in uploaded_files and skip:
                        size_delta = abs(float(uploaded_files[friendly_name]['size']) - size_mb)
                        if size_delta < 0.01:
                            print(
                                colorama.Fore.YELLOW
                                + f'[FILE EXISTING] {friendly_name} has already uploaded [SKIPPED]'
                                + colorama.Fore.RESET
                            )
                            continue
                        else:
                            print(
                                colorama.Fore.LIGHTBLUE_EX
                                + f'[FILE EXISTING] {friendly_name} has already uploaded but SIZE is different'
                                + colorama.Fore.RESET
                            )

                    if waiting_for_file:
                        if friendly_name != resume_on:
                            print(f'    > Uploading {friendly_name} ({size_mb:.2f} mb) [SKIPPED]')
                            continue
                        else:
                            print(colorama.Fore.LIGHTCYAN_EX + 'NOW WE ARE WORKING -------' + colorama.Fore.RESET)
                            waiting_for_file = False

                    if size == 0:
                        print(
                            colorama.Fore.RED
                            + f'    > {fname}  ({page["title"]} {friendly_name}) is empty, will kindly refuse to upload it :('
                            + colorama.Fore.RESET
                        )
                        continue

                    if size_mb > 200:
                        err_log.write(
                            f"""{page["space"]}:{page["title"]} -> https://wiki.yandex.ru/{slug}; {friendly_name} ({size_mb:.2f} mb) is too big, will kindly refuse to upload it\n"""
                        )
                        print(
                            f'{colorama.Fore.WHITE}{colorama.Back.RED}'
                            + f'    > {fname}  ({page["title"]} {friendly_name}) ({size_mb:.2f} mb) is too big, will kindly refuse to upload it :('
                            + f'{colorama.Fore.RESET}{colorama.Back.RESET}'
                        )
                        continue

                    print(f'    > Uploading {friendly_name} ({size_mb:.2f} mb) ({fname})')
                    self.upload_file(fname, friendly_name, slug, s2)

                limit -= 1
                if limit == 0:
                    break

    def get_page_files(self, slug, s):
        files = {}
        attempts = 10
        while attempts > 0:
            try:
                code, data = s.wiki_api.api_call('get', f'frontend/{slug}/.files')
                if code == 404:
                    raise PageWasNotImported()
                files = {f['name']: f for f in data['data']['data']}
                break
            except PageWasNotImported:
                raise
            except Exception as e:
                print(f'Get PageFiles Error {str(e)}')
                attempts -= 1
                time.sleep(5)
        return files

    def upload_file(self, fname, friendly_name, slug, s):
        files = {'file': (friendly_name, open(fname, 'rb'))}
        attempts = 10
        success = False
        while attempts > 0 and not success:
            try:
                code, data = s.wiki_api.api_call(
                    'post', 'frontend/.upload', files=files, timeout=Timeout(total=None, connect=60.0, read=60.0)
                )
                success = True
                break
            except Exception as e:
                print(e)
                attempts -= 1
                time.sleep(5)
        if not success:
            raise ValueError

        if code != 200:
            print(f'Failed to upload file {fname} ({code} {data})')
            raise ValueError

        storage_id = data['data']['storage_id']

        code, data = s.wiki_api.api_call(
            'post', f'frontend/{slug}/.attach?is_silent=true', json={'files': [storage_id]}
        )
        if code != 200:
            print(f'Failed to attach file {fname} к {slug} ({code})')
            raise ValueError

    def debug_page(self, desired_slug):
        cfg = self.cfg
        with (open(cfg.path / cfg.pages_bin, 'rb')) as rb:
            pages = pickle.load(rb)

        found = False
        for page in pages.values():
            if page['slug'] == desired_slug:
                raw = page['body']
                self.debug_raw(raw, page['slug'], page['space'])
                found = True
                break
        assert found, 'Page not found, quitting'

    def get_lut(self):
        cfg = self.cfg
        with (open(cfg.path / cfg.lut_bin, 'rb')) as rb:
            (page_id_lut, page_title_lut) = pickle.load(rb)
        return page_id_lut, page_title_lut

    def _get_parser(self, paranoid=True):
        cfg = self.cfg
        with (open(cfg.path / cfg.lut_bin, 'rb')) as rb:
            (page_id_lut, page_title_lut) = pickle.load(rb)

        with (open(cfg.path / cfg.users_bin, 'rb')) as wb:
            user_resolve = pickle.load(wb)

        return ConfluenceXmlParser(page_id_lut, page_title_lut, user_resolve, paranoid=paranoid)

    def get_wiki_api(self):

        s = get_contour(EnvType.PROD, Flavor.B2B)
        if self.cfg.tvm2:
            creds = B2BCreds(org_id=self.cfg.org_id, uid='780890239')  # robot-wiki
            print(colorama.Fore.RED + '!' * 80 + colorama.Fore.RESET)
            print(colorama.Fore.RED + 'Will use TVM to connect to prod' + colorama.Fore.RESET)
            print(colorama.Fore.RED + '!' * 80 + colorama.Fore.RESET)
            s.wiki_api.as_user(creds)
        else:
            s.wiki_api.org_id = self.cfg.org_id
            s.wiki_api.oauth(self.cfg.oauth)
            code, data = s.wiki_api.api_call('get', 'me', api='api/v2/public/')
            assert code == 200, data
            print(
                colorama.Fore.YELLOW
                + f'> Will work as a {data["username"]} in {data["org"]["dir_id"]} '
                + colorama.Fore.RESET
            )
        return s

    def prepare_spaces(self):

        entities = self.get_entities()

        s = self.get_wiki_api()
        s.wiki_api.use_api_v2_public()

        spaces = entities['Space']

        spaces['default'] = {
            'key': 'default',
            'title': 'Страницы, не привязанные ни к одному Space',
        }
        li = []
        for space in spaces.values():
            space_slug = normalize_slug(space['key'])
            space_name = space.get('name', space['key'])
            print(f'> Processing https://wiki.yandex.ru/{space_slug}')
            li.append(f' - ((/{space_slug} {space_name}))')
            code, data = s.wiki_api.api_call(
                'post',
                'pages?is_silent=true',
                json={'slug': space_slug, 'page_type': 'page', 'title': space_name, 'content': """{{tree }}"""},
            )
            if code == 400:
                if data['error_code'] == 'SLUG_OCCUPIED':
                    code, data = s.wiki_api.api_call('get', 'pages', params={'slug': space_slug})
                    if code == 403:
                        print(
                            colorama.Fore.YELLOW
                            + '[PAGE EXISTING] Someone already created a page :('
                            + colorama.Fore.RESET
                        )
                        continue
                    assert code == 200
                    page_id = data['id']
                    print(colorama.Fore.YELLOW + f'[PAGE EXISTING] Found page ID {page_id}' + colorama.Fore.RESET)
                    code, data = s.wiki_api.api_call(
                        'post',
                        f'pages/{page_id}?is_silent=true',
                        json={'title': space_name, 'content': """{{tree }}"""},
                    )
                    assert code == 200

        print('> Processing https://wiki.yandex.ru/allspaces')
        li = '\n'.join(li)
        s.wiki_api.api_call(
            'post',
            'pages?is_silent=true',
            json={'slug': 'allspaces', 'page_type': 'page', 'title': 'All Imported Spaces', 'content': li},
        )

    def get_entities(self):
        cfg = self.cfg
        with (open(cfg.path / cfg.content_bin, 'rb')) as wb:
            entities = pickle.load(wb)
        return entities

    def convert_import(self, offset=0, limit=5, paranoid=True, skip=False, only_space=None):
        only_space = only_space or self.cfg.only
        s = self.get_wiki_api()
        s.wiki_api.use_api_v2_public()

        pages = self.get_pages()

        parser = self._get_parser(paranoid)

        skip_first = offset
        cnt = limit

        idx = 0

        if isinstance(only_space, str):
            only_space = {only_space}
        if only_space:
            print(f'LIMITING IMPORT TO SPACE {only_space}')

            only_space = {o.lower() for o in only_space}

        with open(self.cfg.path / 'pages.log', 'a') as err_log:
            for page in pages.values():
                if only_space and page['space'].lower() not in only_space:
                    continue
                if skip_first > 0:
                    skip_first -= 1
                    idx += 1
                    continue

                print(f'> {idx}. Processing https://wiki.yandex.ru/{page["slug"]}')
                idx += 1

                timings0 = time.time()

                converted_body = parser.parse(page['body'], page['slug'], page['space'])

                timings1 = time.time()
                code, data = s.wiki_api.api_call(
                    'post',
                    'pages',
                    json={
                        'slug': page['slug'],
                        'page_type': 'page',
                        'title': page['title'] or 'Untitled',
                        'content': converted_body,
                    },
                )
                timings2 = time.time()
                if code == 400:
                    if skip:
                        print(colorama.Fore.YELLOW + '[SKIPPING]' + colorama.Fore.RESET)
                        continue

                    if data['error_code'] == 'SLUG_OCCUPIED':
                        code, data = s.wiki_api.api_call('get', 'pages', params={'slug': page['slug']})

                        if code == 403:
                            err = f'[ACCESS DENIED] to {page["space"]}:{page["title"]} -> https://wiki.yandex.ru/{page["slug"]}\n'
                            print(colorama.Fore.RED + err + colorama.Fore.RESET)
                            err_log.write(err)
                            continue

                        try:
                            page_id = data['id']
                        except KeyError:
                            print(data)
                            raise

                        print(colorama.Fore.YELLOW + f'[PAGE EXISTING] Found page ID {page_id}' + colorama.Fore.RESET)
                        code, data = s.wiki_api.api_call(
                            'post',
                            f'pages/{page_id}',
                            json={'title': page['title'] or 'Untitled', 'content': converted_body},
                        )

                if code != 200:
                    print(colorama.Fore.RED + '[HTTP ERROR]\n' + json.dumps(data, indent=4) + colorama.Fore.RESET)
                else:
                    print(
                        colorama.Fore.LIGHTGREEN_EX
                        + f'[OK] conversion took:{timings1 - timings0}ms; upload: {timings2 - timings1}ms\n'
                        + colorama.Fore.RESET
                    )

                if parser.warnings:
                    err_log.write(
                        f"""Page {page["space"]}:{page["title"]} -> https://wiki.yandex.ru/{page['slug']};\n"""
                    )
                    for warn in parser.warnings:
                        err_log.write(f' - {warn} \n')
                    print(colorama.Fore.RED + '\n'.join(parser.warnings) + colorama.Fore.RESET)
                parser.warnings.clear()
                cnt -= 1

                if cnt < 0:
                    break

    def get_pages(self):
        cfg = self.cfg
        with (open(cfg.path / cfg.pages_bin, 'rb')) as rb:
            pages = pickle.load(rb)
        return pages
