import pandas
from multiprocessing import Pool
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import boto3 
import json
import base64

client = boto3.client('lambda')

df = pandas.read_csv('~/Downloads/ytchannels.csv')
short_ids = df.channel_id.to_list()

proxy = True

def proxy_req(req):
    req = req.prepare()
    raw = '{}\r\n{}\r\n\r\n{}'.format(
        req.method + ' ' + req.url + ' ' + 'HTTP/1.1',
        '\r\n'.join('{}: {}'.format(k, v) for k, v in req.headers.items()),
        req.body,
    )

    result = client.invoke(
        FunctionName   = "marionette-dev-proxyserver",
        InvocationType = "RequestResponse",
        Payload        = json.dumps({
            "request": raw
        })
    )

    return base64.b64decode(json.loads(result['Payload'].read())['response'])

def get_id(short_id):
    try:
        if proxy:
            req = requests.Request('GET', "https://m.youtube.com%s"%short_id, headers={'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'})
            content = proxy_req(req)
        else:
            page = requests.get("https://m.youtube.com/%s"%short_id, headers={'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'})
            content = page.content

        soup = BeautifulSoup(content, 'html.parser')
        canonical = soup.find('link', {'rel': 'canonical'})
        channel_id = canonical['href'].split("youtube.com")[1]
        
        return [short_id, channel_id]
    except:
        return [short_id, '']
  
p = Pool(64)

i = 0
results = []
for res in tqdm(p.imap_unordered(get_id, short_ids), total=len(short_ids)):
    results.append(res)
    
    if i % 100 == 0:
        print(res)
        pandas.DataFrame(data=results, columns=['short_id', 'channel_id']).to_csv("ytmap_%d.csv"%i, index=False)
    i += 1

pandas.DataFrame(data=results, columns=['short_id', 'channel_id']).to_csv("ytmap.csv", index=False)