import base64
import bisect
from collections import Counter, OrderedDict
import unicodecsv as csv
import datetime
import itertools
import json
from logging import getLogger
import numbers
import os
import pandas
import pytz
import random
import requests
import re
import retry
import shutil
import time
from uuid import UUID
from yt.wrapper import TablePath

from django.db.transaction import atomic
from django.db.models import Q
from django.utils import timezone

from cars.carsharing.models.car import Car
from cars.carsharing.models.car_tag_photo import CarsharingTagPhoto
from cars.carsharing.models.car_tags_history import CarTagsHistory
from cars.core.geocoder import Geocoder
from cars.core.sender import EmailNotifier
from cars.core.toloka import TolokaProjectManager
from cars.core.util import import_class, make_yt_client
from cars.refuel.models.refuel_recognition import (
    RefuelRecognitionTaskSuite,
    RefuelRecognitionTask
)
from cars.refuel.models.refuel_report import RefuelReport
from cars.refuel.models.refuel_tags_additional_info import RefuelTagsAdditionalInfoTimestamp
from cars import settings
from cars.users.models.user import User


LOGGER = getLogger(__name__)

moscow_timezone = pytz.timezone('Europe/Moscow')


def _get_timestamp(dt):
    epoch_start = pytz.utc.localize(datetime.datetime(1970, 1, 1))
    if dt.tzinfo is None:
        dt = moscow_timezone.localize(dt)
    return int((dt - epoch_start).total_seconds())


def _timestamp_to_moscow_datetime(ts):
    return pytz.utc.localize(datetime.datetime.utcfromtimestamp(
        ts
    )).astimezone(
        moscow_timezone
    )


def try_convert_to_float(v, fail=False):
    if not isinstance(v, (str, numbers.Number)):
        return v
    try:
        if isinstance(v, str):
            if re.match(r'\d+,\d+', v):
                v = v.replace(',', '.')
        return float(v)
    except Exception:
        LOGGER.exception('failed to convert {} to float'.format(v))
        if fail:
            raise
        return None


@retry.retry(tries=5, delay=1, backoff=2)
def _download_photo(url):
    resp = requests.get(url)
    resp.raise_for_status()
    return resp.content


def _encode(s):
    '''to bytes'''
    return s.encode('utf8') if isinstance(s, str) else s


class RefuelTagsAdditionalInfoManager:

    def __init__(self, yt_client, yt_data_dir, geocoder):
        self._yt_client = yt_client
        self._yt_data_dir = yt_data_dir
        self._geocoder = geocoder

    def get_tag_histories_additional_info(self, table_name):
        tag_addional_info = {}
        for get_info_method in (
            self._get_tag_histories_duration_info,
            self._get_tag_histories_geo_info,
        ):
            for tag_id, info_dict in get_info_method(table_name).items():
                tag_addional_info.setdefault(tag_id, {}).update(info_dict)
        return tag_addional_info

    def _get_tag_histories_info_by_path(self, path):
        if not self._yt_client.exists(path):
            return {}
        result = {}
        for rec in self._yt_client.read_table(path):
            result[rec['tag_id']] = rec
        return result

    def _get_tag_histories_duration_info(self, table_name):
        return self._get_tag_histories_info_by_path(
            self._get_tag_histories_duration_info_path(table_name)
        )

    def _get_tag_histories_geo_info(self, table_name):
        return self._get_tag_histories_info_by_path(
            self._get_tag_histories_geo_info_path(table_name)
        )

    def save_tag_histories_additional_info(self, car_tags_history):
        self._save_tag_histories_geo_info(car_tags_history)
        self._save_tag_histories_duration_info(car_tags_history)

    def _save_tag_histories_geo_info(self, car_tags_history):
        last_timestamp_obj = self._get_last_tags_geo_info_timestamp_obj()
        max_timestamp = max(
            (max(tag.history_timestamp for tag in cth)
             for cth in car_tags_history.values()),
            default=0,
        ) - 1
        table_data = {}
        for tags in car_tags_history.values():
            ts = max(tag.history_timestamp for tag in tags)
            if ts <= last_timestamp_obj.timestamp or ts > max_timestamp:
                continue
            data = {
                'tag_id': str(tags[0].tag_id),
                'latitude': None,
                'longitude': None,
                'geoname': None,
            }
            if tags[0].snapshot:
                location = json.loads(tags[0].snapshot)['data']['location']
                data['latitude'] = location['latitude']
                data['longitude'] = location['longitude']
                data['geoname'] = self._geocoder.most_relevant_name(
                    data['latitude'],
                    data['longitude']
                )
            table_names = self._get_tag_table_names(ts)
            for tn in table_names:
                table_data.setdefault(tn, []).append(data)

        for table_name, records in table_data.items():
            path = self._get_tag_histories_geo_info_path(table_name)
            self._yt_client.write_table(
                path,
                records,
            )
        last_timestamp_obj.timestamp = max_timestamp
        last_timestamp_obj.save()

    def _save_tag_histories_duration_info(self, car_tags_history):
        last_timestamp_obj = self._get_last_tags_duration_timestamp_obj()

        # GET MAX REMOVE TIMESTAMP
        max_timestamp = 0
        for cth_list in car_tags_history.values():
            remove_tags = [t for t in cth_list if t.history_action == 'remove']
            if remove_tags:
                max_timestamp = max(max_timestamp, remove_tags[0].history_timestamp)
        max_timestamp -= 1

        table_data = {}
        for tags in car_tags_history.values():
            set_performer_tags = [
                t for t in tags if t.history_action == 'set_performer'
            ]
            remove_tags = [
                t for t in tags if t.history_action == 'remove'
            ]
            if not (set_performer_tags and remove_tags):
                continue
            set_performer_timestamp = set_performer_tags[0].history_timestamp
            remove_timestamp = remove_tags[0].history_timestamp
            if (
                    remove_timestamp <= last_timestamp_obj.timestamp
                    or remove_timestamp > max_timestamp
            ):
                continue

            data = {
                'tag_id': str(tags[0].tag_id),
                'tag_duration': remove_timestamp - set_performer_timestamp,
            }

            table_names = self._get_tag_table_names(remove_timestamp)
            for tn in table_names:
                table_data.setdefault(tn, []).append(data)
        for table_name, records in table_data.items():
            path = self._get_tag_histories_duration_info_path(table_name)
            self._yt_client.write_table(
                path,
                records,
            )
        last_timestamp_obj.timestamp = max_timestamp
        last_timestamp_obj.save()

    def _get_tag_histories_geo_info_path(self, table_name):
        return TablePath(
            os.path.join(self._yt_data_dir, 'tag_histories_additional_info/geo', table_name),
            append=True
        )

    def _get_tag_histories_duration_info_path(self, table_name):
        return TablePath(
            os.path.join(self._yt_data_dir, 'tag_histories_additional_info/duration', table_name),
            append=True
        )

    def _get_last_tags_geo_info_timestamp_obj(self):
        ts_obj, _ = RefuelTagsAdditionalInfoTimestamp.objects.get_or_create(
            type=RefuelTagsAdditionalInfoTimestamp.Type.GEO.value
        )
        return ts_obj

    def _get_last_tags_duration_timestamp_obj(self):
        ts_obj, _ = RefuelTagsAdditionalInfoTimestamp.objects.get_or_create(
            type=RefuelTagsAdditionalInfoTimestamp.Type.DURATION.value
        )
        return ts_obj

    def _get_tag_table_names(self, timestamp):
        dt = _timestamp_to_moscow_datetime(timestamp)
        latest_dt = dt + datetime.timedelta(hours=-2)
        dt_str = dt.strftime('%Y-%m-%d')
        latest_dt_str = latest_dt.strftime('%Y-%m-%d')
        if dt_str == latest_dt_str:
            return [dt_str]
        else:
            return [dt_str, latest_dt_str]


class RefuelPhotoManager:

    DONT_REPORT_BEFORE = moscow_timezone.localize(
        datetime.datetime(2018, 9, 8)
    )
    VERSION = 3
    TOLOKA_POOL_MODIFIER = 'washer'

    TOLOKA_OVERLAP = 3
    TOLOKA_TASK_SUITE_MIN_SIZE = 5
    TOLOKA_TASK_SUITE_MAX_SIZE = 17
    TOLOKA_CONTROL_TASKS_COUNT = 4
    TOLOKA_ESTIMATED_TIMEDELTA = datetime.timedelta(hours=1)

    def __init__(
            self, *, email_notifier, email_campaign, mailing_list, yt_client,
            yt_data_dir, fuel_level_logs_dir, toloka_manager,
            mds_bucket_name, mds_client, geocoder
    ):
        self._control_tasks = None

        self._email = email_notifier
        self._email_campaign = email_campaign
        self._mailing_list = mailing_list

        self._mds_bucket_name = mds_bucket_name
        self._mds_client = mds_client

        self._toloka = toloka_manager

        self._yt_client = yt_client
        self._yt_data_dir = yt_data_dir
        self._fuel_level_logs_dir = fuel_level_logs_dir

        self._additional_info_manager = RefuelTagsAdditionalInfoManager(
            yt_client=yt_client,
            yt_data_dir=yt_data_dir,
            geocoder=geocoder,
        )

    @classmethod
    def from_settings(cls, **kwargs):
        toloka_manager = TolokaProjectManager.from_settings(
            project_id=settings.REFUEL['photo_recognition']['toloka_project_id'],
            template_pool_id=settings.REFUEL['photo_recognition']['toloka_template_pool_id'],
        )
        yt_client = make_yt_client('data')
        yt_data_dir = settings.REFUEL['photo_recognition']['yt_data_dir']
        kw = {
            'email_notifier': EmailNotifier.from_settings(
                sender_client_kwargs={
                    'copy_to_mailing': 'drive-refuel-reports@yandex-team.ru'
                }
            ),
            'email_campaign': 'refuel-report-2018-08-02',
            'mailing_list': settings.REFUEL['refuel_report_mailing_list'],

            'mds_bucket_name': settings.REFUEL['mds']['mds_bucket_name'],
            'mds_client': import_class(settings.MDS['client_class']).from_settings(),

            'toloka_manager': toloka_manager,

            'yt_client': yt_client,
            'yt_data_dir': yt_data_dir,
            'fuel_level_logs_dir': settings.REFUEL['refuel_extraction']['yt_target_dir'],

            'geocoder': Geocoder.from_settings()
        }
        kw.update(kwargs)
        return cls(**kw)

    def create_refuel_recognition_task_suite(self, criteria=None):

        get_tag_histories_since_timestamp = _get_timestamp(self.DONT_REPORT_BEFORE)

        if criteria is None:
            criteria = Q(pk__isnull=False)

        # TODO: limit time to necessary
        tag_histories = {}
        for th in CarTagsHistory.objects.filter(
            criteria &
            Q(
                performer__isnull=False,
                history_timestamp__gte=get_tag_histories_since_timestamp,
                tag__regex=r'^(fueling_(white|green|yellow|red)|urgent_fueling)\d+$',
                history_action__in=['set_performer', 'drop_performer', 'remove'],
            )
        ):
            tag_histories.setdefault(th.tag_id, []).append(th)
        self._additional_info_manager.save_tag_histories_additional_info(
            tag_histories
        )

        LOGGER.info('found %d tag histories', len(tag_histories))

        tag_ids = set(tag_histories.keys())
        tag_photos = list(CarsharingTagPhoto.objects.filter(
            tag_id__in=tag_ids,
            submitted_at__gt=self.DONT_REPORT_BEFORE,
            refuel_recognition_task__isnull=True,
        ).order_by(
            'submitted_at'
        ))

        LOGGER.info('uploading %d refuel photos to toloka', len(tag_photos))

        max_not_control_tasks_count = (
            self.TOLOKA_TASK_SUITE_MAX_SIZE
            - self.TOLOKA_CONTROL_TASKS_COUNT
        )
        from_ind, to_ind = (0, max_not_control_tasks_count)
        for from_ind in range(0, len(tag_photos), max_not_control_tasks_count):
            to_ind = min(from_ind + max_not_control_tasks_count,
                         len(tag_photos))
            if to_ind - from_ind < self.TOLOKA_TASK_SUITE_MIN_SIZE:
                break
            LOGGER.info('creating task suite with %d tasks', to_ind - from_ind)
            bad_performers = set()
            with atomic():
                last_refuel_datetime = max(
                    tag_photos[photo_ind].submitted_at
                    for photo_ind in range(from_ind, to_ind)
                )

                task_suite = RefuelRecognitionTaskSuite(
                    last_refuel_datetime=last_refuel_datetime
                )
                task_suite.save()

                for photo_ind in range(from_ind, to_ind):
                    tag_photo = tag_photos[photo_ind]
                    tag_history = tag_histories[tag_photo.tag_id][0]
                    try:
                        user_id = UUID(tag_history.performer, version=4)
                    except ValueError:
                        bad_performers.add(tag_history.performer)
                        continue
                    task = RefuelRecognitionTask(
                        user_id=user_id,
                        car_id=tag_photo.car.id,
                        photo_id=tag_photo.id,
                        task_suite_id=task_suite.id,
                        tag_history_action=tag_history.history_action,
                    )
                    task.save()
            if bad_performers:
                LOGGER.exception(
                    'found %s tags with photo and bad performer (not valid uuid). '
                    'Example of bad performer: %s',
                    len(bad_performers), bad_performers.pop()
                )

    def run_refuel_recognition_toloka(self):
        task_suites = RefuelRecognitionTaskSuite.objects.filter(
            toloka_task_suite_id__isnull=True
        )
        pool_id = self._toloka.get_current_pool_id(modifier=self.TOLOKA_POOL_MODIFIER)

        for task_suite in task_suites:
            base_tasks = RefuelRecognitionTask.objects.filter(task_suite_id=task_suite.id)

            try:
                tasks_response = self._toloka.start_task_suite(
                    pool_id=pool_id,
                    tasks=(
                        [self._task_to_toloka(t, pool_id=pool_id) for t in base_tasks]
                        + self._get_control_tasks(
                            count=self.TOLOKA_CONTROL_TASKS_COUNT
                        )
                    )
                )
            except Exception:
                LOGGER.exception('failed to start toloka task_suite for refuel photo recognition')
                raise

            tasks_toloka_ids = (t['id'] for t in tasks_response['tasks'])

            with atomic():
                task_suite.toloka_task_suite_id = tasks_response['id']
                task_suite.save()

                for task, toloka_task_id in zip(
                        base_tasks,
                        tasks_toloka_ids
                ):
                    task.toloka_task_id = toloka_task_id
                    task.save()

    def make_necessary_reports(self):
        necessary_reports_time_tuples = list(
            self._get_necessary_reports_time_tuples()
        )
        time_tuples_report_ready = {
            tup: False for tup in necessary_reports_time_tuples
        }

        for tup in time_tuples_report_ready.keys():
            if RefuelReport.objects.filter(
                from_date=tup[0],
                to_date=tup[1],
                version=tup[2],
            ).exists():
                time_tuples_report_ready[tup] = True
        LOGGER.info('Looking for refuel reports for: {}'.format(
            [k for k, v in time_tuples_report_ready.items() if not v]
        ))

        while not all(time_tuples_report_ready.values()):
            self._check_refuel_recognition_task_suite_status()
            self._fetch_finished_recognition_task_suites_results()

            for tup in time_tuples_report_ready.keys():
                if time_tuples_report_ready[tup]:
                    continue
                if RefuelRecognitionTask.objects.filter(
                        photo__submitted_at__gte=tup[0],
                        photo__submitted_at__lt=tup[1],
                        task_suite__status__in=(
                            RefuelRecognitionTaskSuite.Status.NEW.value,
                            RefuelRecognitionTaskSuite.Status.FINISHED.value,
                        ),
                ).exists():
                    LOGGER.info('toloka tasks for date interval %s - %s not ready yet',
                                tup[0], tup[1])
                    continue

                try:
                    from_date, to_date, _ = tup
                    report = self.make_report(from_date, to_date)
                    self._analyze_report(report)

                    if report:
                        mds_key, filename = self._save_and_upload_report(
                            report,
                            from_date=from_date,
                            to_date=to_date,
                        )
                    else:
                        mds_key = None
                        filename = None

                    RefuelReport.objects.create(
                        from_date=from_date,
                        to_date=to_date,
                        version=self.VERSION,
                        filename='{}.tar'.format(filename) if filename else None,
                        mds_key=mds_key,
                    )
                    time_tuples_report_ready[tup] = True
                except Exception as e:
                    raise
            if all(time_tuples_report_ready.values()):
                break
            else:
                LOGGER.info('Not all reports are ready.'
                            ' Ready reports dict: {}'.format(time_tuples_report_ready))
                time.sleep(10)

    def send_missed_reports(self):
        for report in RefuelReport.objects.filter(
                email_sent_at__isnull=True,
                mds_key__isnull=False,
        ):
            self._send_report(
                from_date=report.from_date,
                to_date=report.to_date,
                mds_key=report.mds_key,
                filename=report.filename,
            )
            report.email_sent_at = timezone.now()
            report.save()

    def _get_control_tasks(self, count):
        if self._control_tasks is None:
            self._control_tasks = list(self._yt_client.read_table(
                os.path.join(self._yt_data_dir, 'control_tasks')
            ))
        return random.sample(list(self._control_tasks), count)

    def _check_refuel_recognition_task_suite_status(self):
        for task_suite in RefuelRecognitionTaskSuite.objects.filter(
            status=RefuelRecognitionTaskSuite.Status.NEW.value
        ):
            if (
                    self._toloka.get_users_finished_task_suite_count(
                        task_suite.toloka_task_suite_id
                    ) >= self.TOLOKA_OVERLAP
            ):

                task_suite.status = RefuelRecognitionTaskSuite.Status.FINISHED.value
                task_suite.save()

    def _fetch_finished_recognition_task_suites_results(self):
        task_suites_to_report = RefuelRecognitionTaskSuite.objects.filter(
            status=RefuelRecognitionTaskSuite.Status.FINISHED.value
        )

        results = {}
        for task_suite in task_suites_to_report:
            try:
                toloka_task_suite_result = self._toloka.get_task_suite_result(
                    task_suite.toloka_task_suite_id
                )
            except Exception:
                LOGGER.exception('failed to get task suite result for task_suite %s (inner id)',
                                 task_suite.id)
                continue

            for task in RefuelRecognitionTask.objects.filter(
                task_suite_id=task_suite.id
            ):
                toloka_task_result = toloka_task_suite_result.get(
                    task.toloka_task_id
                )
                if toloka_task_result is None:
                    LOGGER.exception('not found toloka result for task %s (inner id)', task.id)

                table_name = task.photo.submitted_at.astimezone(
                    moscow_timezone
                ).strftime('%Y-%m-%d')
                photo_timestamp = _get_timestamp(task.photo.submitted_at)
                recs = results.setdefault(table_name, [])
                recs.append({
                    'task_id': str(task.id),
                    'user_id': str(task.user_id),
                    'car_id': str(task.car_id),
                    'link': task.photo.link,
                    'tag_history_action': task.tag_history_action,
                    'type': task.photo.type,
                    'tag_id': str(task.photo.tag_id),
                    'toloka_result': toloka_task_result,
                    'timestamp': photo_timestamp,
                })

            task_suite.status = RefuelRecognitionTaskSuite.Status.REPORTED.value
            task_suite.save()

        for table_name, records in results.items():
            self._yt_client.write_table(
                self._get_recognition_results_path(table_name),
                records,
            )

    def _get_recognition_results_path(self, table_name):
        return TablePath(
            os.path.join(self._yt_data_dir, 'recognition_results', table_name),
            append=True
        )

    def _get_fuel_level_logs_path(self, table_name):
        return os.path.join(
            self._fuel_level_logs_dir,
            table_name
        )

    def _get_imei_to_fuel_percentage_timeline(self, record_iterator, period_seconds=60):
        imei_to_fuel_percentage_timeline = {}
        last_period_start = 0
        imei_last_period_max = {}

        for record in record_iterator:
            imei = record['imei']
            fuel_percentage = record['fuel_percentage']
            lst = imei_to_fuel_percentage_timeline.setdefault(
                imei, ([], [])
            )

            ts = record['timestamp']
            ts_period_start = int(ts - ts % period_seconds)

            if last_period_start == ts_period_start:
                imei_last_period_max[imei] = max(
                    imei_last_period_max.get(imei, 0),
                    fuel_percentage,
                )
            else:
                for c_imei, max_fuel_percentage in imei_last_period_max.items():
                    tup = imei_to_fuel_percentage_timeline[c_imei]
                    tup[0].append(max_fuel_percentage)
                    tup[1].append(last_period_start)
                imei_last_period_max = {
                    imei: fuel_percentage,
                }
                last_period_start = ts_period_start

        for imei, max_fuel_percentage in imei_last_period_max.items():
            tup = imei_to_fuel_percentage_timeline[imei]
            tup[0].append(max_fuel_percentage)
            tup[1].append(last_period_start)

        return imei_to_fuel_percentage_timeline

    def _table_names_for_period_iterator(self, from_datetime, to_datetime, timezone):
        '''Return localized datestrings from datetimes range'''

        from_datetime, to_datetime = tuple(
            dt.astimezone(timezone)
            for dt in (from_datetime, to_datetime)
        )
        d = from_datetime.date()
        to_date = to_datetime.date()

        if to_datetime == moscow_timezone.localize(
            datetime.datetime.combine(to_date, datetime.time(0))
        ):
            to_date -= datetime.timedelta(days=1)

        while d <= to_date:
            yield d.strftime('%Y-%m-%d')
            d += datetime.timedelta(days=1)

    def _fuel_level_table_names_for_period(self, from_datetime, to_datetime, timezone):
        '''Return localized datestrings from datetimes range'''

        need_table_names = list(self._table_names_for_period_iterator(
            from_datetime, to_datetime, timezone
        ))

        missed_table_names = set(need_table_names) - set(
            self._yt_client.list(self._fuel_level_logs_dir)
        )

        if not missed_table_names.issubset(set(need_table_names[-2:])):
            raise RuntimeError(
                'Not found daily fuel level logs for dates: {}'.format(
                    ', '.join(missed_table_names)
                )
            )

        need_table_names = [
            t for t in need_table_names
            if t not in missed_table_names
        ]
        if missed_table_names:
            need_table_names.append('current_stream')

        return need_table_names

    def _fuel_level_record_iterator(self, from_datetime, to_datetime):
        for table_name in self._fuel_level_table_names_for_period(
                from_datetime - datetime.timedelta(hours=1),
                to_datetime + datetime.timedelta(hours=1),
                timezone=moscow_timezone,
        ):
            LOGGER.info('getting fuel level records from %s', table_name)
            for record in self._yt_client.read_table(
                    self._get_fuel_level_logs_path(table_name)
            ):
                yield record

    def _recognition_results_record_iterator(self, from_datetime, to_datetime):
        from_timestamp, to_timestamp = tuple(
            _get_timestamp(dt)
            for dt in [from_datetime, to_datetime]
        )

        for table_name in self._table_names_for_period_iterator(
                from_datetime,
                to_datetime,
                timezone=moscow_timezone,
        ):
            tag_additional_info = (
                self._additional_info_manager.get_tag_histories_additional_info(
                    table_name
                )
            )
            LOGGER.info('getting recognition results from %s', table_name)
            path = self._get_recognition_results_path(table_name)
            if self._yt_client.exists(path):
                records = list(self._yt_client.read_table(
                    self._get_recognition_results_path(table_name)
                ))
            else:
                records = []
            records.sort(key=lambda r: r['timestamp'])
            for record in records:
                if from_timestamp <= record['timestamp'] < to_timestamp:
                    yield record, tag_additional_info

    def _get_fuel_level_percentage(
            self, *, timestamp, imei_to_fuel_percentage_timeline, imei,
            period_seconds, fuel_percentage_after_wait_seconds
    ):
        seconds_error = 1200
        ts_start = timestamp - timestamp % period_seconds
        ts_finish = timestamp + fuel_percentage_after_wait_seconds
        ts_finish -= ts_finish % period_seconds

        fuel_percentage_timeline = imei_to_fuel_percentage_timeline.get(imei)
        if not fuel_percentage_timeline:
            LOGGER.error(
                'not found fuel percentage timeline for imei {}'.format(imei)
            )
            return (None, None)

        fuel_percentage_list, time_list = fuel_percentage_timeline

        start_ind = bisect.bisect_left(time_list, ts_start)
        finish_ind = bisect.bisect_right(time_list, ts_finish)

        if not time_list:
            before_ind = None
        elif start_ind == len(time_list) or time_list[start_ind] >= ts_start:
            before_ind = start_ind - 1
        else:
            before_ind = start_ind
        if before_ind and ts_start - time_list[before_ind] > seconds_error:
            before_ind = None

        if finish_ind >= len(time_list) - 1:
            after_ind = None
        elif time_list[finish_ind] <= ts_finish:
            after_ind = finish_ind + 1
        else:
            after_ind = finish_ind
        if after_ind and time_list[after_ind] - ts_finish > seconds_error:
            after_ind = None

        fuel_level_percentage_before = None
        fuel_level_percentage_after = None
        if before_ind:
            fuel_level_percentage_before = fuel_percentage_list[before_ind]
        if after_ind:
            fuel_level_percentage_after = fuel_percentage_list[after_ind]

        return (fuel_level_percentage_before, fuel_level_percentage_after)

    def _normalize_toloka_value(self, v):
        if isinstance(v, str):
            v = v.strip().lower()
            if 'bad' in v or not v:
                v = None
        try:
            return try_convert_to_float(v, fail=True)
        except Exception:
            return v

    def _add_toloka_result_to_user_report(self, user_report, toloka_result, *, when):
        if toloka_result:
            toloka_chosen = {}
            for param, val_list in toloka_result.items():
                val_list = [
                    self._normalize_toloka_value(v)
                    for v in val_list
                ]
                most_common_val, count = Counter(val_list).most_common(1)[0]
                if count > 1 and most_common_val not in ('bad', ''):
                    val = most_common_val
                else:
                    val = None
                toloka_chosen[param] = val
            user_report['is_washer'] = toloka_chosen.get('is_washer', False)
            user_report['spend_{}'.format(when)] = toloka_chosen.get('upper_value', None)
            user_report['total_spend_{}'.format(when)] = toloka_chosen.get('lower_value', None)
        else:
            LOGGER.error('user report without toloka result: %s', user_report)

    @staticmethod
    def _add_fuel_level_to_user_report(user_report, fuel_level_percs, car):
        for t in ('before', 'after'):
            fuel_level_percentage = fuel_level_percs[0 if t == 'before' else 1]
            if fuel_level_percentage is None:
                continue
            user_report['fuel_level_percentage_{}'.format(t)] = fuel_level_percentage
            tank_volume = car.model.fuel_tank_volume
            if tank_volume:
                user_report['fuel_level_amount_{}'.format(t)] = (
                    fuel_level_percentage * tank_volume / 100
                )

    def make_report(self, from_datetime, to_datetime):
        period_seconds = 60  # seconds of telematics fuel level accuracy
        fuel_percentage_after_wait_seconds = 60 * 10  # time after fuel, when fuel level is the most accurate

        LOGGER.info('Making refuel report from %s to %s',
                    from_datetime.strftime('%Y-%m-%d %H:%M:%S'),
                    to_datetime.strftime('%Y-%m-%d %H:%M:%S'))

        # get telematics fuel percentage data, compress it by minutes and save to structure
        # {imei: ([<max_fuel_percentage_for_period>, ...], [<period_start>, ...])}
        records = list(self._fuel_level_record_iterator(
            from_datetime,
            to_datetime,
        ))  # TODO - back to iterator
        imei_to_fuel_percentage_timeline = self._get_imei_to_fuel_percentage_timeline(
            records,
            period_seconds=period_seconds,
        )

        user_reports = {}

        for result, tag_additional_info in self._recognition_results_record_iterator(
                from_datetime,
                to_datetime,
        ):
            user_report = {
                'car_id': result['car_id'],
                'car_number': None,
                'datetime': None,
                'link_before': None,  # result['link'],
                'link_after': None,
                'link_washer': None,
                'tag_id': result['tag_id'],
                'timestamp': result['timestamp'],
                'spend_before': None,  # upper value
                'spend_after': None,
                'total_spend_before': None,
                'total_spend_after': None,
                'fuel_level_amount_before': None,
                'fuel_level_amount_after': None,
                'fuel_level_percentage_before': None,
                'fuel_level_percentage_after': None,

                'tag_id': result['tag_id'],
                'latitude': None,
                'longitude': None,
                'geoname': None,
                'tag_duration': None,
            }
            user_report.update(
                tag_additional_info.get(result['tag_id'], {})
            )

            user_report['datetime'] = _timestamp_to_moscow_datetime(
                result['timestamp']
            ).strftime('%Y-%m-%d %H:%M:%S')
            user_id = result['user_id']
            car = Car.objects.filter(id=result['car_id']).first()

            user_report['car_number'] = car.number
            tag_history_action = result['tag_history_action']

            self._add_toloka_result_to_user_report(
                user_report,
                result['toloka_result'],
                when=result['type'],
            )

            if user_report['is_washer']:
                user_report['link_washer'] = result['link']
            else:
                user_report['link_{}'.format(result['type'])] = result['link']

            if tag_history_action in ('set_performer',):
                time_shift = 0
            else:
                time_shift = 5 * 60

            fuel_level_percs = self._get_fuel_level_percentage(
                timestamp=(
                    result['timestamp']
                    - time_shift  # why `-` ?
                ),
                imei_to_fuel_percentage_timeline=imei_to_fuel_percentage_timeline,
                imei=car.imei,
                period_seconds=period_seconds,
                fuel_percentage_after_wait_seconds=(
                    fuel_percentage_after_wait_seconds
                    + time_shift
                ),
            )

            self._add_fuel_level_to_user_report(user_report, fuel_level_percs, car)

            user_report_dict = user_reports.setdefault(user_id, {})
            tag_id = user_report['tag_id']
            if tag_id in user_report_dict:
                to_update = user_report_dict[tag_id]
                for k in user_report.keys():
                    if to_update[k] is None:
                        to_update[k] = user_report[k]
            else:
                user_report_dict[tag_id] = user_report

        for k, user_report_dict in user_reports.items():
            user_reports[k] = sorted(
                user_report_dict.values(),
                key=lambda user_report: user_report['timestamp']
            )
        return user_reports

    def _analyze_report(self, report):
        for user_reports in report.values():
            i = 0
            while i < len(user_reports):
                report = user_reports[i]

                # for param in (
                #     'spend_before',
                #     'spend_after',
                #     'total_spend_before',
                #     'total_spend_after',
                # ):
                #     report[param] = try_convert_to_float(report[param])

                spent = None
                if (
                        report['spend_after'] is not None and
                        report['spend_before'] is not None
                ):
                    spent = report['spend_after'] - report['spend_before']
                elif (
                        report['total_spend_after'] is not None and
                        report['total_spend_before'] is not None
                ):
                    spent = report['total_spend_after'] - report['total_spend_before']
                elif (
                        report['total_spend_before'] is not None and
                        i + 1 < len(user_reports) and
                        user_reports[i+1]['timestamp'] - report['timestamp'] < 3600 * 2 and
                        user_reports[i+1]['total_spend_before'] is not None
                ):
                    # next_spend_before = try_convert_to_float(
                    #     user_reports[i+1]['total_spend_before']
                    # )
                    next_spend_before = user_reports[i+1]['total_spend_before']
                    if next_spend_before:
                        spent = next_spend_before - report['total_spend_before']

                got = None
                if (
                        report['fuel_level_amount_before'] is not None and
                        report['fuel_level_amount_after'] is not None
                ):
                    got = (report['fuel_level_amount_after'] -
                           report['fuel_level_amount_before'])

                if spent is None or got is None:
                    suspicious = None
                else:
                    suspicious = (abs(spent - got) > 10)

                report.update({
                    'analytics__charger_spent_fuel': spent,
                    'analytics__car_got_fuel': got,
                    'analytics__suspicious': suspicious,
                })

                i += 1

    def _save_and_upload_report(self, report, *, from_date, to_date):
        date_strings = tuple(
            x.astimezone(moscow_timezone).strftime('%Y-%m-%d_%H:%M')
            for x in (from_date, to_date)
        )
        filename = '{} - {}'.format(*date_strings)
        folder = './{}'.format(filename)
        archive_path = os.path.join(
            '.',
            filename,
        )

        if os.path.exists(_encode(folder)):
            shutil.rmtree(_encode(folder))
        os.mkdir(_encode(folder))

        writer = pandas.ExcelWriter(
            os.path.join(folder, 'report.xlsx'),
            engine='openpyxl'
        )
        for user_id, user_report_list in report.items():
            user = User.objects.filter(id=user_id).first()
            fullname = '{} {} {}'.format(
                user.last_name, user.first_name, user.patronymic_name
            )
            sheet_name = fullname
            # photo_dir = os.path.join(
            #     _encode(folder),
            #     _encode('photo_{}'.format(fullname))
            # )
            # os.mkdir(_encode(photo_dir))

            keys = [
                ('tag_id', 'tag_id'),
                ('datetime', 'Время'),
                ('car_number', 'Номер'),
                ('link_before', 'Фото до'),
                ('link_after', 'Фото после'),
                ('link_washer', 'Фото омывайки'),

                ('fuel_level_amount_before', 'Телематика: топлива до'),
                ('fuel_level_amount_after', 'Телематика: топлива после'),
                ('analytics__car_got_fuel', 'Телематика: прибавилось топлива'),

                ('spend_before', 'Пистолет: верхнее число до заправки'),
                ('total_spend_before', 'Пистолет: нижнее число до заправки'),
                ('spend_after', 'Пистолет: верхнее число после заправки'),
                ('total_spend_after', 'Пистолет: нижнее число после заправки'),
                ('analytics__charger_spent_fuel', 'Заправщик потратил топлива'),

                ('analytics__suspicious', 'Подозрительный'),

                ('geoname', 'Место заправки'),
                ('tag_duration', 'Время заправки (сек)'),
            ]

            prepared_rows = []
            for user_report in user_report_list:
                row = OrderedDict()
                for k, new_k in keys:
                    v = user_report[k]

                    if k == 'analytics__suspicious':
                        if v is None:
                            v = ''
                        elif v:
                            v = 'да'
                        else:
                            v = 'нет'

                    # if k in ('link_before', 'link_after', 'link_washer'):
                    #     if v:
                    #         photo_content = _download_photo(v)
                    #         v = '{}.jpeg'.format('_'.join(v.split('/')[-3:]))
                    #         with open(os.path.join(
                    #                 _encode(photo_dir),
                    #                 _encode(v),
                    #         ), 'wb') as f:
                    #             f.write(photo_content)

                    row[new_k] = v
                prepared_rows.append(row)
            data_frame = pandas.DataFrame(prepared_rows)
            data_frame.to_excel(writer, sheet_name)

        writer.save()
        shutil.make_archive(archive_path, 'tar', _encode(folder))

        mds_key = '{}.tar'.format('_'.join(date_strings))
        self._upload_report_to_mds(
            key=mds_key,
            report_path='{}.tar'.format(archive_path),
        )

        return (mds_key, filename)

    def _get_time_tuples_by_day(self):
        day = datetime.timedelta(days=1)
        from_date = self.DONT_REPORT_BEFORE
        to_date = timezone.now() - day

        report_from = moscow_timezone.localize(datetime.datetime(
            from_date.year,
            from_date.month,
            from_date.day))
        if report_from < from_date:
            report_from += day

        while report_from < to_date:
            tup = (report_from, report_from + day)
            if not RefuelReport.objects.filter(
                    from_date=tup[0],
                    to_date=tup[1],
                    version=tup[2],
            ).exists():
                yield tup
            report_from += day

    def _get_time_tuples_by_shift_times(self, dont_report_before, now,
                                        shift_times):
        '''slightly tested'''
        assert shift_times
        shift_times.sort()

        # today_datetime = moscow_timezone.localize(
        #     datetime.datetime(
        #         now.year,
        #         now.month,
        #         now.day
        #     )
        # )
        from_date = dont_report_before.date()
        to_datetime = now - self.TOLOKA_ESTIMATED_TIMEDELTA

        datetimes = []
        date = from_date
        continue_iteration = True
        while continue_iteration:

            for t in shift_times:
                dt = moscow_timezone.localize(datetime.datetime.combine(
                    date,
                    t
                ))
                if dt < dont_report_before:
                    continue
                if dt >= to_datetime:
                    continue_iteration = False
                    break

                datetimes.append(dt)
            date += datetime.timedelta(days=1)

        for i in range(len(datetimes) - 1):
            yield (datetimes[i], datetimes[i+1])

    def _get_necessary_reports_time_tuples(self):
        return (
            tup + (self.VERSION,)
            for tup in
            self._get_time_tuples_by_shift_times(
                dont_report_before=self.DONT_REPORT_BEFORE,
                now=timezone.now(),
                shift_times=[
                    datetime.time(hour=8, minute=45),
                    datetime.time(hour=20, minute=45),
                ]
            )
        )

    def recognize_fresh_photos(self):
        self.create_refuel_recognition_task_suite()
        self.run_refuel_recognition_toloka()

    def _upload_report_to_mds(self, *, key, report_path):
        with open(report_path, 'rb') as f:
            self._mds_client.put_object(
                key=key,
                bucket=self._mds_bucket_name,
                body=f.read(),
            )

    def _get_report_href(self, mds_key):
        return 'https://{}.s3.yandex.net/{}'.format(
            self._mds_bucket_name, mds_key
        )

    def _send_report(self, *, from_date, to_date, mds_key, filename):
        args = {
            'from_date': from_date.astimezone(moscow_timezone).strftime('%Y-%m-%d %H:%M'),
            'to_date': to_date.astimezone(moscow_timezone).strftime('%Y-%m-%d %H:%M'),
            'href': self._get_report_href(mds_key=mds_key),
        }

        self._email.send(
            campaign=self._email_campaign,
            to_email=self._mailing_list,
            args=args,
            assert_ok=True,
            # attachments=attachments,
            async=False,
        )

    def _task_to_toloka(self, refuel_recognition_task, *, pool_id):
        return {
            'pool_id': pool_id,
            'overlap': 3,  # TODO: fix - use default overlap from pool
            'input_values': {
                'display_image': refuel_recognition_task.photo.link
            },
        }
