#!/usr/bin/python3

from argparse import ArgumentParser
from collections import OrderedDict
from datetime import datetime
from dateutil.tz import tzlocal
import logging
from openpyxl import load_workbook
from openpyxl.cell.read_only import EmptyCell
from turbopages import TurboRecord, upload
import urllib


logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', level=logging.INFO)


TABLE_PATH = dict(
    debug='//home/travel/alittleprince/turbo/',
    test='//home/travel/testing/turbo-pages/',
    prod='//home/travel/prod/turbo-pages/',
)


class TurboPagesError(Exception):
    pass


def cell_to_str(c):
    if c.value is None:
        return ''
    # костыль для преобразования ссылок
    # не все гиперссылки определяются библиотекой
    s = str(c.value).strip()
    if s.startswith('=HYPERLINK'):
        s = s.split('"')[-2].strip()
    return s


def get_raw_data(fn):
    wb = load_workbook(fn)
    sheet_names = wb.sheetnames
    sheet = wb[sheet_names[0]]
    result = []
    for row in sheet.rows:
        if not row or type(row[0]) is EmptyCell:
            break
        line = [cell_to_str(c) for c in row]
        result.append(line)
    return result


def check_partial_match(first, second):
    for f, s in zip(first, second):
        if f == s:
            raise TurboPagesError(f'"{f}" repeats in {first} and {second}')


def get_cities(raw_data):
    if len(raw_data) < 2:
        raise TurboPagesError('no data to process')
    keys = raw_data[0]
    city_keys = keys[:4]
    hotel_keys = keys[4:9]
    raw_cities = OrderedDict()
    for row in raw_data[1:]:
        hotels = raw_cities.setdefault(tuple(row[:4]), [])
        hotels.append(row[4:])
    k1 = iter(raw_cities.keys())
    k2 = iter(raw_cities.keys())
    next(k2)
    [check_partial_match(*pair) for pair in zip(k1, k2)]
    cities = []
    for r_city, r_hotels in raw_cities.items():
        city = dict(zip(city_keys, r_city))
        hotels = [dict(zip(hotel_keys, itm)) for itm in r_hotels]
        # [h.update(get_mds_picture_size(h['hotel_picture'])) for h in hotels]
        city['hotels'] = hotels
        cities.append(city)
    return cities


def city_html(city, city_template, hotel_template):
    city = city.copy()
    hotels = (hotel_template.format(**itm, geo_id=city['geo_id']) for itm in city['hotels'])
    city['hotels'] = '\n'.join(hotels)
    return city_template.format(**city)


def city_link(city):
    fmt = 'https://travel.yandex.ru/search/rooms?currency=RUB&to={geo_id}&sort=relevance' \
        '&lang=ru&hotel_type=hotel&hotel_type=hostel&hotel_type=mini_hotel' \
        '&utm_source=serp&utm_medium=collections_more&utm_campaign={geo_id}'
    return fmt.format(**city)


def get_options():
    parser = ArgumentParser()
    parser.add_argument('filename', help='xlsx file to process')
    parser.add_argument('-d', '--destination', choices=['debug', 'test', 'prod'], default='debug',
        help='write to production table')
    return parser.parse_args()


def turbo_link(city, destination):
    fmt = 'https://travel.yandex.ru/search/rooms?currency=RUB&to={geo_id}&sort=relevance' \
        '&lang=ru&hotel_type=hotel&hotel_type=hostel&hotel_type=mini_hotel'
    travel = fmt.format(**city)
    prefix = {'test': 'hamster.', 'prod': ''}[destination]
    link = urllib.parse.quote(travel, safe='')
    return f'https://{prefix}yandex.ru/turbo?text={link}&turbo_saas_service=turbo&metahost=TURBO_CACHER:sas1-0029:17002&metahost=TURBO_HOST_CACHER:sas1-0029.search.yandex.net:17002'


def save_report(cities, options, starttime):
    destination = options.destination
    if destination == 'debug':
        return
    report = [turbo_link(itm, destination) for itm in cities]

    with open('report.txt', 'a', encoding='utf8') as f:
        f.write(
            f'\n\n{starttime}, working with "{options.filename}", destination is "{destination}"\n')
        f.write('\n'.join(report))


def main():
    starttime = datetime.now(tzlocal()).isoformat()
    options = get_options()
    logging.info(f'working with "{options.filename}"')
    logging.info(f'destination is "{options.destination}"')

    with open('./templates/city.html', encoding='utf8') as f:
        city_template = f.read()
    with open('./templates/hotel.html', encoding='utf8') as f:
        hotel_template = f.read()
    logging.info('templates loaded')

    raw_data = get_raw_data(options.filename)
    cities = get_cities(raw_data)
    logging.info('got %d cities', len(cities))

    table_path = TABLE_PATH[options.destination]
    table_path += starttime
    recs = [TurboRecord(city_link(c), city_html(c, city_template, hotel_template)) for c in cities]
    upload(TurboRecord, table_path, recs)
    save_report(cities, options, starttime)
    logging.info('done')


if __name__ == '__main__':
    main()
