#!/usr/bin/env python2
# -*- coding: utf8 -*-

import urllib
import json
from optparse import OptionParser
import optparse
from collections import defaultdict, Counter
import sys

bug_types_list = ['dublicate_title', 'dublicate_images', 'remove_author', 'dublicate_text', 'cut_end_text',
                  'cut_start_text']


def get_options():
    p = optparse.OptionParser()
    p.add_option("-i", "--inputData", type=str)
    p.add_option("-b", "--bug_type", type=str)
    p.add_option("-r", "--result", type=str)
    (options, args) = p.parse_args()
    return options


def dublicate_TITLE(page):
    data = json.load(page)
    title = data['docs'][0]['construct'][0]['title']
    target = data['docs'][0]['construct'][0]["preview_content"]
    js_ob = {'text': title, 'content_type': 'title'}
    target.insert(0, js_ob)
    return json.dumps(data, target, indent=4)


def dublicate_IMAGES(page):
    data = json.load(page)
    image = data['docs'][0]['construct'][0]['cover']
    image_width = data['docs'][0]['construct'][0]['cover_width']
    target = data['docs'][0]['construct'][0]["preview_content"]
    js_ob = {'src': image, 'width': image_width, "content_type": "image"}
    target.insert(0, js_ob)
    return json.dumps(data, target, indent=4)


def remove_AUTHOR(page):
    data = json.load(page)
    if 'author' in data['docs'][0]['construct'][0]:
        del data['docs'][0]['construct'][0]['author']
        return json.dumps(data, indent=4)


def dublicate_TEXT(page):
    cnt = 0
    data = json.load(page)
    content = data['docs'][0]['construct'][0]['preview_content'][1]['content'][:8]
    target = data['docs'][0]['construct'][0]["preview_content"][1]['content']
    for value in content:
        target.insert(cnt, value)
        cnt += 1
    target.insert(cnt + 1, ' ')
    target.insert(cnt + 1, {"content_type": "br"}, )
    return json.dumps(data, target, indent=4)


def cut_END_TEXT(page):
    data = json.load(page)
    content = data['docs'][0]['construct'][0]['preview_content'][1]['content'][:-10]
    del data['docs'][0]['construct'][0]['preview_content'][1]['content']
    target = data['docs'][0]['construct'][0]["preview_content"]
    js_ob = {'content': content, 'content_type': 'paragraph'}
    target.insert(0, js_ob)
    return json.dumps(data, target, indent=4)


def cut_START_TEXT(page):
    data = json.load(page)
    content = data['docs'][0]['construct'][0]['preview_content'][1]['content'][10:]
    del data['docs'][0]['construct'][0]['preview_content'][1]['content']
    target = data['docs'][0]['construct'][0]["preview_content"]
    js_ob = {'content': content, 'content_type': 'paragraph'}
    target.insert(0, js_ob)
    return json.dumps(data, target, indent=4)


class color:
    BOLD = '\033[1m'
    END = '\033[0m'


def main():
    if sys.argv[1] == '--help':
        print(
            '\n' + color.BOLD + "Available options:" + color.END + '\n' + '\n' + "-i" + '\t' + "path to input file" + '\n' + "-b" + '\t' +
            "error selection in output json" + '\n' + '\n' + color.BOLD + "Available errors type:" + color.END + '\n' + '\n' + "1. dublicate_title" + "\n" + "2. dublicate_images"
            + "\n" + "3. remove_author" + "\n" + "4. dublicate_text" + "\n" + "5. cut_end_text" + "\n" + "6. cut_start_text")

    options = get_options()
    json_data = []
    inputData = open(options.inputData)
    bug_type = options.bug_type
    result = str(options.result) + str(bug_type)
    count_errors = 0
    count_total = 0
    for urls in inputData:
        count_total += 1

        try:
            url = (urllib.urlopen(urls))
            if bug_type not in bug_types_list:
                print('Error type is not correct')
                return
            elif bug_type == 'dublicate_title':
                json_data.append(json.dumps(json.loads(dublicate_TITLE(url))).encode('utf8'))
            elif bug_type == 'dublicate_images':
                json_data.append(json.dumps(json.loads(dublicate_IMAGES(url))).encode('utf8'))
            elif bug_type == 'remove_author':
                json_data.append(json.dumps(json.loads(remove_AUTHOR(url))).encode('utf8'))
            elif bug_type == 'dublicate_text':
                json_data.append(json.dumps(json.loads(dublicate_TEXT(url))).encode('utf8'))
            elif bug_type == 'cut_end_text':
                json_data.append(json.dumps(json.loads(cut_END_TEXT(url))).encode('utf8'))
            elif bug_type == 'cut_start_text':
                json_data.append(json.dumps(json.loads(cut_START_TEXT(url))).encode('utf8'))

        except (KeyError, ValueError, TypeError, IndexError, AttributeError, IOError) as e:
            count_errors += 1
            print(urls)
            print(e)

    print('TOTAL URLS IN POOL: ' + str(count_total))
    print('SUCCESSFULLY: ' + str(count_total - count_errors))
    print('ERRORS: ' + str(count_errors))

    try:
        with open(result[4:], 'w') as res_start_text:
            for val in json_data:
                res_start_text.write(str(val))
    except:
        pass


if __name__ == '__main__':
    main()

