// ADVDEV-1479: Скрипт выгрузки данных из Блогов
const _ = require('lodash');
const fs = require('fs');
const got = require('got');
const sanitizeHTML = require('sanitize-html');

const POSTS_FILE = 'posts.json';
const COMMENTS_FILE = 'comments.json';
const SKIP_COMMENTS_FILE = 'skip.json';

const IMAGE_REGEXP = /<img\s[^]*?(?:src="(.*?)")*[^]*?\/>/g;

async function getTvmTicket(tvmConfig) {
    const { url, token, dsts, src } = tvmConfig;

    const res = await got(`${url}/tickets`, {
        headers: {
            Authorization: token,
            'Content-Type': 'application/json'
        },
        query: { dsts, src },
        json: true
    });

    return _.get(res, ['body', 'blogs', 'ticket']);
}

async function request(url, options, tvmConfig) {
    const ticket = await getTvmTicket(tvmConfig);

    options.method = 'GET';
    options.json = true;
    options.headers = {
        'x-ya-service-ticket': ticket,
        'Content-Type': 'application/json'
    };

    const res = await got(url, options);

    return res.body;
}

function formatPostToCMNT(getPostUrl, post) {
    const title = post.approvedTitle;
    const name = title.length <= 250 ? title : `${title.slice(0, 249)}…`;

    return {
        entityId: post._id,
        createTimestamp: new Date(post.publishDate).getTime(),
        public: true,
        name,
        description: '',
        entityUrl: getPostUrl(post.slug)
    };
}

async function getAllPostsForBlog(settings) {
    const {
        blog,
        fromPost,
        postsCountToReadFromBlog,
        getPostUrl,
        blogsApiUrl,
        tvm
    } = settings;

    let cmntPosts = [];
    let hasNext = true;
    let from = fromPost;

    const postsUrl = `${blogsApiUrl}/posts/${blog.slug}`;
    const options = { query: { size: postsCountToReadFromBlog, lang: blog.lang } };

    while (hasNext) {
        options.query.from = from;
        const posts = await request(postsUrl, options, tvm);

        const currentPosts = _.map(posts, formatPostToCMNT.bind(null, getPostUrl));

        cmntPosts = cmntPosts.concat(currentPosts);

        const lastPost = _.last(posts);

        // eslint-disable-next-line prefer-destructuring
        hasNext = lastPost.hasNext;
        from = lastPost._id;
    }

    return cmntPosts;
}

function defineReplyTo(comment, allComments) {
    const replyIdentities = comment.path.split('/');

    if (replyIdentities.length === 2) {
        return 0;
    }

    const parentCommentId = replyIdentities[replyIdentities.length - 2];
    const parentComment = _.find(allComments, { _id: parentCommentId });

    if (!parentComment) {
        console.warn(`No parent comment for comment id ${comment._id}`);

        return null;
    }

    return new Date(parentComment.created_at).getTime() * 1000; // мкс
}

function processSrc(src) {
    if (!src) {
        return '';
    }

    const prefix = src.startsWith('//') ? 'https:' : '';

    return `${prefix}${src}`;
}

function sanitizeText(commentBody) {
    const processedCommentBody = commentBody
        .replace(/^<blockquote>/, '«')
        .replace(/<blockquote>/g, '\n«')
        .replace(/<\/blockquote>/g, '»\n')
        .replace(/<\/p>|<br\s*\/?>/g, '\n')
        .replace(/<a\s[^]*?href="(.*?)"[^]*?>([^]*?)<\/a>/g, (str, href, text) => {
            const linkText = text.replace(IMAGE_REGEXP, (matchingStr, src) => {
                return processSrc(src);
            });

            const sanitiziedText = sanitizeHTML(linkText, { allowedTags: [] });

            if (!sanitiziedText) {
                return '';
            }

            if (!href || href === sanitiziedText) {
                return sanitiziedText;
            }

            return `${sanitiziedText} (${href})`;
        })
        .replace(IMAGE_REGEXP, (matchingStr, src) => {
            const replacedSrc = processSrc(src);

            return replacedSrc ? `\n${replacedSrc}\n` : '';
        });

    return sanitizeHTML(processedCommentBody, { allowedTags: [] });
}

function formatCommentToCMNT(postEntityId, allComments, comment) {
    const replyTo = defineReplyTo(comment, allComments);
    const authorId = Number(comment.authorId);

    if (replyTo === null || comment.isRemoved || Number.isNaN(authorId)) {
        fs.writeFileSync(SKIP_COMMENTS_FILE, `${JSON.stringify(comment)}\r\n`, { flag: 'a' });

        return;
    }

    return {
        entityId: postEntityId,
        timestamp: new Date(comment.created_at).getTime() * 1000, // мкс
        uid: authorId,
        replyTo,
        type: 'text',
        content: sanitizeText(comment.body.html)
    };
}

async function getPostComments(settings, postIdentity) {
    const {
        blog: { slug, lang },
        blogsApiUrl,
        tvm
    } = settings;
    const commentsUrl = `${blogsApiUrl}/comments/all/${slug}/${postIdentity}`;
    const options = { query: { lang } };

    const allComments = await request(commentsUrl, options, tvm);

    return _(allComments)
        .map(formatCommentToCMNT.bind(null, postIdentity, allComments))
        .compact()
        .map(JSON.stringify)
        .value();
}

function appendToFiles(posts, comments) {
    const postsData = `${posts.join('\r\n')}\r\n`;
    const commentsData = `${comments.join('\r\n')}\r\n`;

    fs.writeFileSync(POSTS_FILE, postsData, { flag: 'a' });
    fs.writeFileSync(COMMENTS_FILE, commentsData, { flag: 'a' });
}

function readSettings() {
    const [, , file] = process.argv;

    if (!file) {
        throw new Error('The settings file is not specified.');
    }

    return require(file);
}

(async () => {
    console.time('Unloading data from Blogs to files');

    let postsForWrite = [];
    let commentsForWrite = [];
    let lastProcessedPost = null;

    const settings = readSettings();

    const posts = await getAllPostsForBlog(settings);

    for (const post of posts) {
        try {
            const comments = await getPostComments(settings, post.entityId);

            if (comments.length === 0) {
                continue;
            }

            postsForWrite = postsForWrite.concat(JSON.stringify(post));
            commentsForWrite = commentsForWrite.concat(comments);

            if (postsForWrite.length === settings.postsCountForWriteToFile) {
                appendToFiles(postsForWrite, commentsForWrite);

                lastProcessedPost = JSON.parse(_.last(postsForWrite)).entityId;

                postsForWrite = [];
                commentsForWrite = [];
            }
        } catch (err) {
            console.log(`The last post from the processed stack of posts: ${lastProcessedPost}`);

            throw new Error(err);
        }
    }

    if (postsForWrite.length > 0) {
        appendToFiles(postsForWrite, commentsForWrite);
    }

    console.timeEnd('Unloading data from Blogs to files');
})();
