import json
import logging
import os
import posixpath
import re
import socket
import stat
import time

from datetime import datetime

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment

from sandbox.common.types.misc import DnsType
from sandbox.projects.common.solomon import push_to_solomon_v2
from sandbox.projects.quasar.utils import YAVExportMixin


def chamber_csv2json(sftp_client, from_csv, size, report_template_type="test_results"):
    def get_device_results_common_report(raw_data):
        results = dict()
        # Save source raw data to metadata
        results["METADATA"] = {"RAW DATA": json.dumps(raw_data, sort_keys=False)}

        # Add source filename
        results["SOURCE FILENAME"] = from_csv.rsplit("/", 1)[1].rstrip(".csv")
        results["SOURCE FULLPATH"] = from_csv
        return results

    def get_device_simple_report_packaging(raw_data):
        # SERIAL_NUMBER,UUID,MAC,CARTON_DATETIME
        results = get_device_results_common_report(raw_data)
        results["SERIAL NUMBER"] = raw_data.pop(0)[1]
        results["UUID"] = raw_data.pop(0)[1]
        results["MAC"] = raw_data.pop(0)[1]
        results["CARTON_DATETIME"] = raw_data.pop(0)[1]
        return results

    def get_device_results(raw_data):
        results = get_device_results_common_report(raw_data)
        # Write the Device ID value in the Steps iterator
        results["DEVICE ID"] = ""

        # Initial keys SERIAL NUMBER,MODULE NO.,PD LINE,STATION,SW P/N,TECHNICIAN,ACCESS GROUP,DATE TIME,FIXTURE,TEST DURATION,TEST RESULT, ...
        results["SERIAL NUMBER"] = raw_data.pop(0)[1]
        results["MODULE NO."] = raw_data.pop(0)[1]
        results["PD LINE"] = raw_data.pop(0)[1]
        results["STATION"] = raw_data.pop(0)[1]
        # skip SW P/N,TECHNICIAN,ACCESS GROUP, pop DATE TIME
        results["DATE TIME"] = raw_data.pop(3)[1].lstrip("_")
        # skip SW P/N,TECHNICIAN,ACCESS GROUP,FIXTURE, pop TEST DURATION
        results["TEST DURATION"] = raw_data.pop(4)[1]
        # skip SW P/N,TECHNICIAN,ACCESS GROUP,FIXTURE, pop TEST RESULT
        results["TEST RESULT"] = raw_data.pop(4)[1]

        step_data = dict()  # dict with information about steps run device
        in_step = False
        step = ""

        other_data = dict()  # dict with the rest of unparsed keys

        for k, v in raw_data:
            if k.startswith("M_"):
                step, desc = k.split(" ->")
                step_data[step] = {desc: v}
                if desc == "Get_UUID":
                    results["DEVICE ID"] = v
                in_step = True
            elif in_step is True:
                step_data[step][k] = v
                if k.startswith("P/F"):
                    step = ""
                    in_step = False
            else:
                other_data[k] = v

        results["STEPS"] = step_data
        results["OTHER DATA"] = other_data

        return results

    f = sftp_client.open(from_csv)
    f.prefetch(size)  # prefetch asynchronously as we are reading it all!

    keys = f.readline().decode()
    keys = keys.rstrip("\r\n").split(",")

    json_data = list()  # dict with final json data
    for line in f:
        values = line.rstrip("\r\n").split(",")
        if values[0] != "":
            json_data.append(get_device_simple_report_packaging(zip(keys, values)) if report_template_type == "packaging_data"
                             else get_device_results(zip(keys, values)))

    f.close()

    return json.dumps(json_data, sort_keys=False, indent=4)


def push_sensors_to_solomon(service, sensors):
    """
    Mindly general function to send quasar'ish sandbox sensors to solomon

    :param str service: solomon service name, e.g. 'build_stats' or 'device_group_info' -- logical sensor group
    :param list sensors: list of per-sensor dicts like ::

        [
            {
                "labels": {
                        "sensor": "<sensor_name>",  # kinda-mandatory
                        # any extra labels for filtering
                    },
                "value": 123.45,
                "ts": "2019-02-03T04:11:43.000000Z"  # optional time in ISO-8601 format
            },
        ]

    If ts is not given it is filled with current ts on solomon side -- see https://wiki.yandex-team.ru/solomon/api/push/
    """
    solomon_token = sdk2.Vault.data('QUASAR', 'solomon_token')

    solomon_params = {
        'project': 'quasar',
        'cluster': 'yandexmini',
        'service': service,
    }

    logging.info('Pushing service %s sensors to Solomon: %s' % (service, sensors))

    push_to_solomon_v2(token=solomon_token, params=solomon_params, sensors=sensors)


def push_free_space_to_solomon(server_free_space, server_name, storage_name):
    """
    Push free space on the server, where 100% - 1.00, 0% - 0.00,
    i.e. when FTP mount filling is 2%: 1 - (2/100) = 0.98
    :param server_free_space: free space on the server, float value, f.ex. 0.98
    """
    timestamp = time.time()

    solomon_sensors = [
        {
            'labels': {
                'sensor': 'server_storage_freespace',
                'server_name': str(server_name),
                'storage_name': str(storage_name),
            },
            'ts': timestamp,
            'value': float(server_free_space),
        },
    ]

    push_sensors_to_solomon(service='manufacture', sensors=solomon_sensors)


def push_report_summary_to_solomon(report_type, date_of_tests, chamber_name, tests_passed, tests_failed):
    """
    :param product_name:
    :param date_of_tests:
    :param chamber_name:
    :param tests_passed:
    :param tests_failed:
    :return:
    """

    logging.info("Pushing statistics to Solomon:", report_type, date_of_tests, chamber_name, tests_passed, tests_failed)
    timestamp = time.mktime(datetime.strptime(date_of_tests + "T23:59:59", "%Y-%m-%dT%H:%M:%S").timetuple())

    solomon_sensors = [
        {
            'labels': {
                'sensor': 'pts_report_summary',
                'chamber_name': str(chamber_name),
                'report_type': str(report_type),
                'test_result': 'PASS'
            },
            'ts': timestamp,
            'value': int(tests_passed),
        },
        {
            'labels': {
                'sensor': 'pts_report_summary',
                'chamber_name': str(chamber_name),
                'report_type': str(report_type),
                'test_result': 'FAIL'
            },
            'ts': timestamp,
            'value': int(tests_failed),
        },
    ]

    push_sensors_to_solomon(service='manufacture', sensors=solomon_sensors)


class QuasarFetchFendaReports(YAVExportMixin, sdk2.Task):
    """
    https://st.yandex-team.ru/QUASAR-4363

    Fetch reports from server @ Fenda and put em in YT
    """

    class Requirements(YAVExportMixin.Requirements, sdk2.Task.Requirements):
        environments = [
            PipEnvironment('yandex-yt', use_wheel=True),
            PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=True),
        ]

        dns = DnsType.DNS64  # for external interactions
        cores = 1  # we are light task, only move some data over net

        class Caches(sdk2.Requirements.Caches):
            pass

    __DEFAULT_TTL = object()

    class Parameters(sdk2.Task.Parameters):
        # Maximum per-device reports placement max_depth
        MAX_PER_DEVICE_DEPTH = 5
        kill_timeout = 600  # should work at most 5 minutes, have some timeout here too
        run_time = sdk2.parameters.Integer('Run time, in seconds', default=300)

        with sdk2.parameters.Group('YT'):
            yt_proxy = sdk2.parameters.String('YT proxy', default='hahn')
            yt_base_path = sdk2.parameters.String('YT table base path', default='//home/quasar-dev/chamber/fenda')

        with sdk2.parameters.Group('SFTP'):
            user = sdk2.parameters.String('SFTP user', default='yandex')
            host = sdk2.parameters.String('SFTP host', default='fenda.ext.eine.yandex.net')
            port = sdk2.parameters.Integer('SFTP server port', default=1222)
            # an example of absolute SFTP reports path:
            # /opt/ftp/yandexmini/Yandex_Mini_Test_File/ATS_Test_Report
            reports_dir = sdk2.parameters.String('SFTP reports base path',
                                                 default='/opt/ftp/yandexmini/Yandex_Mini_Test_File')
            reports_limit = sdk2.parameters.Integer('Number of reports read at one time', default=100)
            archivate = sdk2.parameters.Bool('Move read reports to ARCH dir', default=True)
            reports_archive_subdir = sdk2.parameters.String('SFTP archive subdir name', default='ARCH')
            fenda_yav_sftp_key = sdk2.parameters.String(
                'YAV sftp key name',
                default='yandex.key'
            )

        with sdk2.parameters.Group('Mapping'):
            # Mapping between SFTP and YT directories to get/put new reports
            # Semi-colon separated list "SFTP_DIR1=yt-dir1;SFTP_DIR2=yt-dir2;"
            sftp_yt_mapping = sdk2.parameters.String('SFTP-YT Mapping', default="CI_Test_Report=ci-test;")

        with sdk2.parameters.Group('SSH'):
            ssh_port = sdk2.parameters.Integer('SSH server port', default=1222)
            ssh_user = sdk2.parameters.String('SSH user', default='yandex')
            fenda_yav_secret_version = sdk2.parameters.String(
                "YAV version for secret",
                default="ver-01dj7a3qr3wp5dxf0pmw0wdb2n"
            )

            fenda_yav_ssh_key = sdk2.parameters.String(
                'YAV sftp key name',
                default='yandex.key'
            )

        with sdk2.parameters.Group('Reports'):
            # Report type comma separated list
            # avilable types: "daily,per-device"
            report_type_list = sdk2.parameters.String('Report type list', default="per-device")
            # Strict names check against chamber name
            report_strict_check = sdk2.parameters.Bool('Strict names check', default=False)
            report_template_type = sdk2.parameters.String('Report template type', default="test_results")

        with sdk2.parameters.Group('Internal'):
            test = sdk2.parameters.Bool('Test some new and exciting feature', default=False)

    def _get_pkey(self, secret_version_name, key_name):
        """
        :return: `paramiko.RSAKey` to be used for connections
        """
        import paramiko as p

        key = None

        sftp_key_path = os.path.join(self.yav_export(secret_version_name, key_name), key_name)
        logging.info(sftp_key_path)

        try:
            key = p.RSAKey.from_private_key_file(sftp_key_path)
        except p.SSHException:
            pass

        return key

    def _report_freespace(self):
        storage_name = '/dev/sda1'
        free_space = self.get_free_space(storage_name)

        if free_space is not None:
            logging.info('Target has {%f} free space' % free_space)

            push_free_space_to_solomon(free_space, 'fenda-foothold-1', storage_name)
        else:
            logging.warn('Failed to get free space')

    def _get_sftp_yt_map(self):
        """
        :return: SFTP to YT directories map dictionary
        """
        return dict(item.split("=") for item in filter(None, str(self.Parameters.sftp_yt_mapping).split(";")))

    def _check_report_type(self, filename, report_type, chamber_name, strict_check=False):
        """
        Check filename format agains report filename
        :param type: type of report
        :param filename: report filename
        :return: true or false
        """
        pattern = r"^$"

        if strict_check:
            chamber_name_pattern = str(chamber_name).upper()
        else:
            chamber_name_pattern = r'\w+'

        if report_type == 'per-device':
            # Approved filename format is "Yandex_Mini_FG_ATS_Line#1_C0847D82F7AA_2019-11-27 20-46-09.csv"
            # Although we are now forced to use the pattern even for filenames with broken format,
            # i.e. like "Yandex_Mini_FG_ATS_Line#12019-11-27C0847D82F7AA2019-11-27 20-46-09.csv"
            # Fix: next possible format
            #      "Yandex_Mini_Yandex-ATS-38_Line#1_C0847DC773A8_2019-12-09 15-29-20.csv"
            # Fix2: LED chamber format
            #      "Yandex_Mini_Yandex-LED#2_Line#1_C0847DD5015A_2020-01-12 16-49-58.csv"
            pattern = \
                (r"^Yandex_Mini[0-9A-Za-z\-_\s]+%s[0-9A-Za-z\-_#]+Line#\d+\w+\d+\-\d{2}\-\d{2}.+\d{2}\-\d{2}\-\d{2}\.csv$"
                 % chamber_name_pattern
                 )
        elif report_type == 'daily':
            # for reports with type "packaging_data" filename format was set as: Yandex_Mini_SN_Report_daily_20210114.csv
            if self.Parameters.report_template_type == "packaging_data":
                # Yandex_Mini_SN_Report_daily_20200920
                pattern = r"^Yandex_Mini_SN_Report_daily_\d{8}\.csv$"
            else:
                pattern = \
                    (r"^Yandex_Mini[0-9A-Za-z\-_]+%s[0-9A-Za-z\-_]+Line#\d+_{0,2}\d{4}\-\d{2}\-\d{2}\.csv$"
                     % str(chamber_name).upper()
                     )

        if re.match(pattern, filename):
            return True
        else:
            logging.warn("%s does not matched %s pattern" % (filename, report_type))

    def _recursive_sftp(self, sftp, report_type, chamber_name, path, max_depth=16, files_limit=100, current_depth=0):
        """
        For the given path, get the List of all files in the directory tree
        :param sftp: paramiko SFTP connection handler
        :param path: start directory
        :param max_depth: subdirectory recursion maximum depth
        :return: list of files with a full path
        """
        reports = dict()
        reports_count = 0

        # If max_depth is 0, then maximum depth was reached, just return empty list
        if max_depth == 0:
            return reports, 0

        logging.info("Getting sftp list for %s, files limit %d" % (path, files_limit))
        # loop over list of SFTPAttributes (files with modes)
        for attr in sftp.listdir_attr(path):
            # Reports limit control
            if reports_count >= files_limit:
                break

            full_path = os.path.join(path, attr.filename)
            if stat.S_ISDIR(attr.st_mode):
                # If the file is a directory
                node_name = None
                # If 1-level sub-directory is date, define nodename as this date
                if (report_type == 'per-device') and (current_depth == 0):
                    if re.match(r"^_{0,1}\d{4}\-\d{2}\-\d{2}", attr.filename):
                        directory_name = attr.filename
                        node_name = directory_name.encode('utf-8', 'ignore').strip().lstrip('_')

                # Recurse sub-directories
                files, files_count = self._recursive_sftp(sftp,
                                                          report_type,
                                                          chamber_name,
                                                          full_path,
                                                          max_depth-1,
                                                          files_limit - reports_count,
                                                          current_depth+1)
                reports_count += files_count

                if node_name is not None:
                    reports[node_name] = files
                else:
                    if (report_type == 'per-device') and (current_depth == 0):
                        # Do not place reports to the root node, just 'YYYY-MM-YY' root nodes accepted
                        logging.warn('Root node collections are not allowed, source %s' % full_path)
                    else:
                        reports.update(files)

            else:
                # Bypass top level dir for per-device reports
                if (report_type == 'per-device') and current_depth == 0:
                    pass

                # Check filename corresponds to report type
                if not self._check_report_type(attr.filename,
                                               report_type,
                                               chamber_name,
                                               self.Parameters.report_strict_check
                                               ):
                    continue
                # Exclude symlinks from the list
                if stat.S_ISREG(attr.st_mode):  # is regular file?
                    # if the file is a file, add it to our dict
                    if (report_type == 'daily') and current_depth == 0:
                        report_date = None
                        if self.Parameters.report_template_type == "packaging_data":
                            report_date = attr.filename[-12:-4]  # get date from line like 'Yandex_Mini_SN_Report_daily_20210111.csv'
                        else:
                            date_search = re.search(r"\d{4}\-\d{2}\-\d{2}", attr.filename)
                            print("Searching result", date_search)
                            if date_search:
                                report_date = date_search.group(0)
                        if report_date:
                            reports[report_date] = {attr.filename: full_path}
                            reports_count += 1
                        else:
                            logging.warn("Couldn't create  DAILY report node")
                    else:
                        reports[attr.filename] = full_path
                        reports_count += 1

        return reports, reports_count

    def _get_reports(self, sftp, sftp_dir, yt_dir, report_type):  # 'LED_Test_Report', 'led', 'per-device'
        """
        :returns: filenames of report files to be processed
        """
        logging.info('Listing %s reports...' % report_type)

        all_reports = dict()
        max_depth = 1
        files_limit = self.Parameters.reports_limit
        if report_type == 'per-device':
            # Maximum per-device report max_depth due wrong placement,
            # i.e. like ./YYYY-MM-DD/ATS/<MAC>/PASS/*.csv
            max_depth = self.Parameters.MAX_PER_DEVICE_DEPTH

        reports, reports_count = self._recursive_sftp(sftp,
                                                      report_type,
                                                      yt_dir,
                                                      os.path.join(
                                                          str(self.Parameters.reports_dir),
                                                          sftp_dir),
                                                      max_depth,
                                                      int(files_limit))

        if len(reports) and reports_count:
            all_reports[yt_dir] = reports
        elif len(reports) or reports_count:
            logging.warn("Reports count is %s, but reports dictionary lenght is %s" % (reports_count, len(reports)))

        logging.info('found %d reports', reports_count)
        print(reports)
        # TODO: we can send a signal 'unproccessed reports found'

        return all_reports

    def _prepare_table(self, yt_path):
        """
        :param yt_path: YT relative path to the table
        :returns: pair (yt_client, yt_table_path) to write to
        """
        from yt.wrapper import YtClient, TablePath

        token = sdk2.Vault.data("robot-quasar-yt-token")
        client = YtClient("hahn", token)

        table = posixpath.join(self.Parameters.yt_base_path, yt_path)
        logging.info("_prepare_table yt_path=%s" % yt_path)

        # a schema for resulting table, matches fields in PTS reports JSON
        """
        "SOURCE FILENAME": "Yandex_Mini_FG_ATS_Line#1_C0847D835646_2019-11-13 15-57-00",
        "SOURCE FULLPATH": "/opt/ftp/yandexmini/Yandex_Mini_Test_File/CI_Test_Report/2019-11-29/PASS/... .csv",
        "SERIAL NUMBER": "C0847D835646",
        "DEVICE ID": "FF98F029B07FCBEBCCFA60FC",
        "USER SN": "193300013504B",
        "MODULE NO.": "Yandex_Mini",
        "PD LINE": "Line#1",
        "STATION": "FG_ATS",
        "DATE TIME": "2019-11-13 15:57:00",
        "TEST DURATION": "71.68",
        "TEST RESULT": "FAIL",
        "STEPS": "...",
        "OTHER DATA": "...",
        "METADATA": "..."
        """
        schema_for_test_results = [
            {'name': 'SOURCE FILENAME', 'type': 'string'},
            {'name': 'SOURCE FULLPATH', 'type': 'string'},
            {'name': 'SERIAL NUMBER', 'type': 'string'},
            {'name': 'DEVICE ID', 'type': 'string'},
            {'name': 'USER SN', 'type': 'string'},
            {'name': 'MODULE NO.', 'type': 'string'},
            {'name': 'PD LINE', 'type': 'string'},
            {'name': 'STATION', 'type': 'string'},
            {'name': 'DATE TIME', 'type': 'string'},
            {'name': 'TEST DURATION', 'type': 'string'},
            {'name': 'TEST RESULT', 'type': 'string'},
            {'name': 'STEPS', 'type': 'any'},
            {'name': 'OTHER DATA', 'type': 'any'},
            {'name': 'METADATA', 'type': 'any'},
        ]

        # SERIAL_NUMBER,UUID,MAC,CARTON_DATETIME
        schema_for_packaging_data = [
            {'name': 'SOURCE FILENAME', 'type': 'string'},
            {'name': 'SOURCE FULLPATH', 'type': 'string'},
            {'name': 'SERIAL NUMBER', 'type': 'string'},
            {'name': 'UUID', 'type': 'string'},
            {'name': 'MAC', 'type': 'string'},
            {'name': 'CARTON_DATETIME', 'type': 'string'},
            {'name': 'METADATA', 'type': 'any'},
        ]

        table_path = TablePath(table, append=True)

        logging.info('Uploading to %s', table)
        schema = schema_for_packaging_data if self.Parameters.report_template_type == "packaging_data" else schema_for_test_results

        if not client.exists(table_path):
            logging.info('It does not exist, creating..')
            client.create_table(table_path, attributes={'schema': schema})
        else:
            logging.info('It exists')

        return (client, table_path)

    def on_execute(self):
        self._report_freespace()

        sftp = self.connect_sftp()

        for report_type in str(self.Parameters.report_type_list).split(','):
            dirs = self._get_sftp_yt_map()
            for sftp_dir, yt_dir in dirs.items():
                reports = self._get_reports(sftp, sftp_dir, yt_dir, report_type)

                if reports:
                    end_time = time.time() + self.Parameters.run_time

                    processed = self._upload_reports(reports, sftp, end_time, report_type)
                    logging.info("%d reports processed" % len(processed))
                    logging.info(processed)

                    # not_processed = filter(lambda r: r not in processed, reports)
                    # logging.info("%d reports not proccessed" % len(not_processed))
                    # logging.info(not_processed)

    def _upload_reports(self, reports, sftp_client, end_time, prefix):
        """
        The 'do the work' function to process reports

        :param List[str] reports: of filenames to load
        :param List[str] all_reports: all reports @ server -- to check preliminaries
        :param YTClient yt_client: to upload via
        :param TablePath yt_table_path: to upload to
        :param SFTPClient sftp_client: to get from
        :param int end_time: max time to run, to prevent stucks
        :param str prefix: logging prefix

        :returns: list of reports processed (all what was moved to _old)
        """
        from paramiko import SSHException

        def sftp_mkdir_p(remote_directory):
            """Change to this directory, recursively making new folders if needed.
            Returns True if any folders were created."""
            if remote_directory == '/':
                # absolute path so change directory to root
                sftp_client.chdir('/')
                return
            if remote_directory == '':
                # top-level relative directory must exist
                return
            try:
                sftp_client.chdir(remote_directory)  # sub-directory exists
            except IOError:
                dirname, basename = os.path.split(remote_directory.rstrip('/'))
                sftp_mkdir_p(dirname)  # make parent directories
                sftp_client.mkdir(basename)  # sub-directory missing, so created it
                sftp_client.chdir(basename)
                return True

        def move_broken(b_report):
            original_path = posixpath.join(b_report)
            related_path = b_report.lstrip(self.Parameters.reports_dir)
            archive_path = posixpath.join(
                self.Parameters.reports_dir,
                'BAD',
                related_path)
            archive_dir_path = archive_path.rsplit("/", 1)[0]
            logging.info('[BAD!] moving bad report to %s' % archive_path)
            sftp_mkdir_p(archive_dir_path)

            # Remove instance to keep existing report
            try:
                sftp_client.remove(archive_path)
            except IOError:
                pass

            # Move existing report and create symlink
            attr = sftp_client.stat(original_path)
            if stat.S_ISREG(attr.st_mode):
                sftp_client.rename(original_path, archive_path)
                sftp_client.symlink(archive_path, original_path)

        def archivate(a_report):
            original_path = posixpath.join(a_report)
            related_path = a_report.lstrip(self.Parameters.reports_dir)
            archive_path = posixpath.join(
                self.Parameters.reports_dir,
                self.Parameters.reports_archive_subdir,
                related_path)
            archive_dir_path = archive_path.rsplit("/", 1)[0]
            logging.info('archiving report to %s' % archive_path)
            sftp_mkdir_p(archive_dir_path)

            # Remove instance to keep existing report
            try:
                sftp_client.remove(archive_path)
            except IOError:
                pass

            # Move existing report and create symlink
            try:
                attr = sftp_client.stat(original_path)
                if not stat.S_ISLNK(attr.st_mode):
                    sftp_client.rename(original_path, archive_path)
                    sftp_client.symlink(archive_path, original_path)
            except IOError as e:
                logging.exception("Unable to archivate %s, I/O error(%d): %s" % (original_path, e.errno, e.strerror))

            # TODO: we can send a signal 'archived a report'

        def upload_collection(yt_path, collected_reports):
            processed_list = list()
            data = list()

            for n, (collection_key, collecton_data) in enumerate(collected_reports.items(), start=1):
                if time.time() > end_time:
                    logging.info('End time of %d reached, exiting' % end_time)
                    break

                if isinstance(collecton_data, dict):
                    # Collect reports from descending node recursively
                    yt_path_collection = posixpath.join(yt_path, collection_key)
                    processed, parsed_data = upload_collection(yt_path_collection, collecton_data)
                    logging.info('[%d of %d %s] processing node %s of %d reports',
                                 n,
                                 len(collected_reports),
                                 prefix,
                                 collection_key,
                                 len(collecton_data))

                    # Write reports to YT table if found
                    if len(parsed_data):
                        logging.info("Data collected length %d" % len(parsed_data))

                        yt_client, yt_table_path = self._prepare_table(yt_path_collection)

                        # Do no add duplicated rows
                        existing_data = list(yt_client.read_table(yt_table_path, format="json"))
                        new_data = [row for row in parsed_data if row not in existing_data]

                        # Add new rows to the YT table
                        logging.info("New data rows: %d" % len(new_data))

                        yt_client.write_table(yt_table_path, new_data, format="json", raw=False)

                        processed_list += processed

                        # Collect and push statistics to Solomon
                        t_passed = 0
                        t_failed = 0
                        rsplit_yt_table_path = str(yt_table_path).rsplit("/", 3)
                        reports_type = rsplit_yt_table_path[1]
                        chamber_name = rsplit_yt_table_path[2]
                        date_of_tests = rsplit_yt_table_path[3]

                        for row in yt_client.read_table(yt_table_path, format="json"):
                            try:
                                if row['TEST RESULT'] == 'PASS':
                                    t_passed += 1
                                if row['TEST RESULT'] == 'FAIL':
                                    t_failed += 1
                            except KeyError:
                                continue

                        logging.info("Collect statistics found %d passed, %d failed reports" % (t_passed, t_failed))
                        if t_passed or t_failed:
                            push_report_summary_to_solomon(
                                reports_type,
                                date_of_tests,
                                chamber_name,
                                t_passed,
                                t_failed)

                        if self.Parameters.archivate:
                            for report_file in processed:
                                archivate(report_file)
                else:
                    try:
                        # Collect and parse reports
                        path = collecton_data.encode('utf-8', 'ignore').strip()
                        size = sftp_client.stat(path).st_size

                        logging.info('[%d of %d %s] processing report %s of %d bytes',
                                     n,
                                     len(collected_reports),
                                     prefix,
                                     path,
                                     size)

                        try:
                            json_data = json.loads(chamber_csv2json(sftp_client, path, size, self.Parameters.report_template_type))
                            if len(json_data):
                                data += json_data
                                # TODO: we can send a signal 'good report happened'
                            else:
                                logging.info('%s contains broken data', path)
                                move_broken(path)
                        except IndexError:
                            move_broken(path)

                        processed_list.append(path)

                    except (SSHException, socket.error):
                        logging.exception('Communication problem, aborting')
                        break
                    except Exception:
                        logging.exception('Invalid collection data')

                        # TODO: we can send a signal 'bad report happened'

            return processed_list, data

        processed, root_data = upload_collection(posixpath.join('yandexmini', 'raw', prefix), reports)
        if len(root_data):
            logging.warn("Root data contains %d records" % len(root_data))
            logging.info(root_data)

        return processed

    def connect_ssh(self):
        logging.info('connecting to SSH..')

        import paramiko as p

        ssh = p.SSHClient()
        ssh.set_missing_host_key_policy(p.client.WarningPolicy())  # FIXME: maybe not very good?
        ssh.connect(
            hostname=self.Parameters.host,
            port=self.Parameters.ssh_port,
            username=self.Parameters.ssh_user,
            pkey=self._get_pkey(self.Parameters.fenda_yav_secret_version, self.Parameters.fenda_yav_ssh_key),
        )

        return ssh

    def connect_sftp(self):
        logging.info('connecting to SFTP..')

        import paramiko as p

        transport = p.Transport((self.Parameters.host, self.Parameters.port))

        transport.use_compression(True)
        transport.connect(username=self.Parameters.user,
                          pkey=self._get_pkey(
                              self.Parameters.fenda_yav_secret_version,
                              self.Parameters.fenda_yav_sftp_key
                          ))
        sftp = p.SFTPClient.from_transport(transport)
        sftp.get_channel().settimeout(60.0)  # 1 minute timeout

        logging.info('connected!')

        return sftp

    def get_free_space(self, storage_name):
        """
        :return: free space on storage in shares of 1 (e.g. 0 for no free space, 0.5 for half space used)
        """

        ssh = None

        try:
            ssh = self.connect_ssh()
            # for example storage /dev/sda1 mapped to /opt/ftp
            _, out, _ = ssh.exec_command("df " + str(storage_name))

            # out looks like:
            #   Filesystem      1K-blocks     Used  Available Use% Mounted on
            #   /dev/sda1      3845577736 40155996 3610054524   2% /opt/ftp
            usage_percent = int(out.read().split('\n')[1].split()[4].replace('%', ''))
            return 1.0 - (usage_percent / 100.0)
        except Exception:
            logging.warn('Failed to get disk space: ', exc_info=True)
        finally:
            if ssh is not None:
                try:
                    ssh.close()
                except Exception:
                    logging.warn('Failed to close ssh connection: ', exc_info=True)
