#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
import yt.wrapper as yt
from pytils import yt_config_set_defaults


def main():
    yt_config_set_defaults(yt)

    for country in yt.list(
        '//home/videolog/2018Q1_baskets/intent'
    ):
        all_queries = set()
        for table in yt.search(
            root='//home/videolog/2018Q1_baskets/intent/{}'.format(country),
            node_type="table"
        ):
            if 'classifiers' in table:
                continue
            for rec in yt.read_table(
                table
            ):
                all_queries.add(rec['text'].decode('utf8'))
        print('{}: {} queries'.format(country, len(all_queries)))
        with codecs.open('{}.tsv'.format(country), 'w', 'utf8') as f:
            f.write('\n'.join(sorted(all_queries)))


if __name__ == "__main__":
    main()
