import sys
import csv
import json
from functools import partial
from collections import OrderedDict
from nile.api.v1 import clusters, Record
from qb2.api.v1 import filters as qf
import pandas as pd
import math
import argparse

E=0.0818191908426

def get_p(zoom):
    return math.pow(2, zoom+8)/2

def get_beta(lat):
    return (math.pi * lat) / 180

def get_phi(beta):
    return (1-E * math.sin(beta))/(1 +  E * math.sin(beta))

def get_O(beta, phi):
    return math.tan(math.pi/4+beta/2)*math.pow(phi,E/2)

def get_tail_x(zoom, lon):
    return math.floor(get_p(zoom)*(1 + lon / 180) / 256)

def get_tail_y(zoom, lat):
    return math.floor(get_p(zoom)*(1-math.log(get_O(get_beta(lat),get_phi(get_beta(lat))))/math.pi) / 256)

def get_rank_order(row, top_10_src):
    if row.permalink in top_10_src.values:
        return 1
    elif row.is_geoproduct:
        return 2
    return 3

def main(cluster, src_path, result_path, zoom_start, zoom_end, min_opens = 11):
    stream = cluster.read(SRC_PATH)
    df = stream.as_dataframe()
    df.total_opens.fillna(0, inplace=True)
    
    icon_info_filter = cluster.read(ICON_INFO_PATH).as_dataframe().set_index('permalink')
    icon_info_filter = icon_info_filter[['disp_class']]
    icon_info_filter = icon_info_filter[~icon_info_filter.duplicated()]
    
    top_10 = df.sort_values(by='total_opens', ascending=False).head(int(len(df) * 0.1)).permalink
    get_rank = partial(get_rank_order, top_10_src=top_10)
    df['ranked'] = df.apply(get_rank, axis=1)
    
    default = df[(df.total_opens < min_opens) & (~df.is_geoproduct)].set_index('permalink').join(icon_info_filter,how='left')[['total_opens','ranked', 'disp_class', 'lat', 'lon']]
    default['default_zoom'] = default['disp_class'].apply(lambda z: min(z+ 11, 19))
    default = default[[ 'lat', 'lon','total_opens','ranked','default_zoom']].reset_index()
    
    df_without_nulls = df[~df.permalink.isin(default.permalink.values)]

    zoom_range = range(ZOOM_START,ZOOM_END+1)
    map_cnt_on_zoom = {13:1, 14:1, 15:1, 16:2, 17:4, 18:4 }
    pnrm_zooms_include = 15
    perm_on_pnrm_zoom_perc = 0.88
    used_permalink = set()
    df_rslt = []
    

    for zoom in zoom_range:
        df_temp = df_without_nulls[~df_without_nulls.permalink.isin(used_permalink)][['permalink', 'lon', 'lat', 'total_opens', 'ranked', 'is_geoproduct']]
        if zoom <= pnrm_zooms_include:
            yabs_total = len(df_temp[df_temp.is_geoproduct])
            df_yabs = df_temp.set_index('permalink').groupby(['is_geoproduct'])['total_opens'].nlargest(int(yabs_total * perm_on_pnrm_zoom_perc)).reset_index()
            yabs_perm_panoramic_zoom = df_yabs[df_yabs.is_geoproduct].permalink.values
            df_temp = df_temp[(~df_temp.is_geoproduct) | (df_temp.permalink.isin(yabs_perm_panoramic_zoom))]
        if zoom == 19:
            df_temp = df_temp[['permalink']]
            default_small = default[~default.permalink.isin(used_permalink)][['permalink']]
            rslt = pd.concat([df_temp, default_small])
            rslt.permalink = rslt.permalink.astype('object')
            records = [Record(
                      permalink=row['permalink']
                ) for index, row in rslt.iterrows()]
            cluster.write(ZOOM_PATH+str(zoom), records)
        else:
            default_small = default[(~default.permalink.isin(used_permalink))&(default.default_zoom == zoom)]
            df_zoom = pd.concat([df_temp, default_small[['permalink', 'lon', 'lat', 'total_opens', 'ranked']]])
            df_zoom['tail_x'] = df.apply(lambda x: get_tail_x(zoom, x.lon), axis=1)
            df_zoom['tail_y'] = df.apply(lambda x: get_tail_y(zoom, x.lat), axis=1)
            tailed = df_zoom.groupby(['tail_x', 'tail_y'])['permalink', 'total_opens', 'ranked'].apply(lambda x: x.sort_values(by=['ranked', 'total_opens'], ascending=[True, False]).head(map_cnt_on_zoom[zoom])).reset_index()
            records = [Record(
                      permalink=row['permalink'], 
                      tail_x=row['tail_x'], 
                      tail_y=row['tail_y'],
                       total_opens=row['total_opens']
                ) for index, row in tailed.iterrows()]
            cluster.write(ZOOM_PATH+str(zoom), records)
            rslt = tailed[['permalink']]
            used_permalink.update(set(tailed.permalink.values))

        rslt['min_zoom'] = zoom
        df_rslt.append(rslt)

    rank = df[['permalink', 'geo_id', 'is_geoproduct','lat', 'lon']].set_index('permalink').join(
        pd.concat(df_rslt).set_index('permalink'), how='inner')
    records = [Record(
                      permalink=index, 
                      min_zoom=row['min_zoom'], 
                      geo_id=row['geo_id'], 
                      is_geoproduct=row['is_geoproduct'], 
                      lat=row['lat'], 
                      lon=row['lon']
    ) for index, row in rank.iterrows()]
    cluster.write(RESULT_PATH, records)

if __name__ == '__main__':
    
    ZOOM_START = 13
    ZOOM_END = 19
    
    MINIMUM_TOTAL_OPENS_TO_RANK = 11
    
    parser = argparse.ArgumentParser()
    parser.add_argument('-v', required=True, help='version of ranking algorithm')
    args = parser.parse_args()
    
    RESULT_PATH = '//home/geo-analytics/poi_rank/' + args.v + '/filtered/moscow/rank'
    SRC_PATH = '//home/geo-analytics/poi_rank/' + args.v + '/filtered/moscow/data'
    ZOOM_PATH = '//home/geo-analytics/poi_rank/' + args.v + '/filtered/moscow/zooms/'
    ICON_INFO_PATH = '//home/geo-analytics/poi_rank/' + args.v + '/filtered/moscow/icon_info'
    cluster = clusters.Hahn()

    main(cluster, SRC_PATH, RESULT_PATH, ZOOM_START, ZOOM_END, MINIMUM_TOTAL_OPENS_TO_RANK)
