defmodule AssetSubtitleSearch do
  @twentyfour_hours 86_400_000
  @bucket "twitch-creative-video-repository"
  @non_word_chars [
    ".",
    " ",
    "/",
    "\\",
    "(",
    ")",
    ":",
    ",",
    ".",
    ";",
    "<",
    ">",
    "~",
    "!",
    "@",
    "#",
    "$",
    "%",
    "^",
    "&",
    "*",
    "|",
    "+",
    "=",
    "[",
    "]",
    "{",
    "}",
    "`",
    "~",
    "?",
    "-",
    "\"",
    "]",
    "\t",
    "\n",
    "\r",
    "\f",
    "\v",
    "'",
    "\r\n",
    "\n",
    "\r"
  ]
  use GenServer
  alias MissionControlEx.Web.{Repo, Asset}
  def start_link, do: GenServer.start_link(__MODULE__, [], name: __MODULE__)

  def fetch(query) do
    GenServer.call(__MODULE__, {:fetch, query})
  end

  def init([]) do
    {:ok, timer} = :timer.send_interval(@twentyfour_hours, self(), :scrape)
    Process.send_after(self(), :scrape, 1)
    {:ok, %{index: %{}, timer: timer}}
  end

  def handle_info(:scrape, state) do
    parent = self()
    Task.start(fn -> {:update_index, do_scrape()} end)
    {:noreply, state}
  end

  def handle_info({_, {:update_index, index}}, state), do: {:noreply, %{state | index: index}}
  def handle_info({:DOWN, _, _, _, :normal}, state), do: {:noreply, state}

  def handle_call({:fetch, query}, _from, state) do
    {:reply, state.index[query] || [], state}
  end

  def do_scrape do
    Repo.all(Asset)
    |> Flow.from_enumerable()
    |> Flow.partition(stages: 20)
    |> Flow.flat_map(&generate_word_count_index/1)
    |> Flow.partition(key: {:elem, 0})
    |> Flow.reduce(fn -> %{} end, fn {k, v}, acc ->
         Map.update(acc, k, [v], &[v | &1])
       end)
    |> Enum.into(%{}, fn {key, val} -> {key, val} end)
  end

  def generate_word_count_index(asset) do
    subtitles = get_subtitles(asset)
    metadata = get_metadata(asset)

    (subtitles ++ metadata)
    |> Enum.flat_map(&tokenize/1)
    |> Enum.group_by(& &1)
    |> Enum.map(fn {k, v} -> {k, {asset, Enum.count(v)}} end)
  end

  defp get_metadata(%Asset{metadata: nil}), do: []

  defp get_metadata(%Asset{metadata: metadata}),
    do: List.flatten(Map.keys(metadata) ++ Map.values(metadata))

  defp get_subtitles(%{source_subtitles: "None_Required"}), do: []

  defp get_subtitles(asset) do
    s3_path = subtitle_s3_path(asset)

    result =
      ExAws.S3.get_object(@bucket, s3_path)
      |> ExAws.request(
           ExAws.Config.new(:s3, %{
             access_key_id: "AKIAJ4RHGC5TBNDDSR3A",
             secret_access_key: "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"
           })
         )

    case result do
      {:ok, %{body: response}} -> [response]
      result -> []
    end
  end

  def subtitle_s3_path(%{source_subtitles: "None_Required"}), do: "None_Required"

  def subtitle_s3_path(asset) do
    subtitle_path = Transcode.s3_path(:subtitles, asset)

    case Utils.get_extname(subtitle_path) do
      ".srt" -> subtitle_path
      other -> String.replace(subtitle_path, other, ".srt")
    end
  end

  def tokenize(line) do
    {word, accum} =
      line
      |> to_string
      |> String.downcase()
      |> String.graphemes()
      |> Enum.reduce({"", []}, fn
           non_word_char, {"", accum} when non_word_char in @non_word_chars ->
             {"", accum}

           non_word_char, {word, accum} when non_word_char in @non_word_chars ->
             {"", [word | accum]}

           char, {word, accum} ->
             {word <> char, accum}
         end)

    [word | accum]
  end
end
