package ru.yandex.wmconsole.service;

import java.io.FileNotFoundException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang.mutable.MutableLong;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.transaction.TransactionStatus;

import ru.yandex.common.util.concurrent.CommonThreadFactory;
import ru.yandex.webmaster.common.urltree.YandexSearchShard;
import ru.yandex.wmconsole.data.ErrorUrlSourceEnum;
import ru.yandex.wmconsole.data.Node;
import ru.yandex.wmconsole.data.bad.urls.DbActionEnum;
import ru.yandex.wmconsole.data.bad.urls.DbTask;
import ru.yandex.wmconsole.data.bad.urls.RequestDispatcher;
import ru.yandex.wmconsole.data.bad.urls.impl.DbTaskImpl;
import ru.yandex.wmconsole.data.bad.urls.impl.EndOfFileDbTask;
import ru.yandex.wmconsole.data.bad.urls.impl.RequestDispatcherImpl;
import ru.yandex.wmconsole.data.info.HostDbHostInfo;
import ru.yandex.wmconsole.data.info.TreeInfo;
import ru.yandex.wmconsole.data.partition.WMCPartition;
import ru.yandex.wmconsole.service.dao.TblCodeAgrErrorTreesDao;
import ru.yandex.wmconsole.service.dao.TblErrorUrlsDao;
import ru.yandex.wmconsole.service.dao.TblRobotdbInfoDao;
import ru.yandex.wmconsole.service.dao.TblUrlTreesDao;
import ru.yandex.wmconsole.util.file.comparator.EqualsRecordHandler;
import ru.yandex.wmconsole.util.file.comparator.FileComparator;
import ru.yandex.wmconsole.util.file.comparator.OneSideHandler;
import ru.yandex.wmconsole.util.file.comparator.TextFileReader;
import ru.yandex.wmconsole.util.file.comparator.impl.EmptyFileReader;
import ru.yandex.wmconsole.util.file.comparator.impl.NewFileFactory;
import ru.yandex.wmconsole.util.file.comparator.impl.OldFileFactory;
import ru.yandex.wmconsole.util.file.comparator.impl.UrlFileReader;
import ru.yandex.wmconsole.util.file.comparator.impl.UrlRecord;
import ru.yandex.wmtools.common.data.xmlsearch.InternalLinksCountRequest;
import ru.yandex.wmtools.common.data.xmlsearch.LinksCountRequest;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.error.UserException;
import ru.yandex.wmtools.common.service.AbstractDbService;
import ru.yandex.wmtools.common.service.IndexInfoService;
import ru.yandex.wmtools.common.util.ServiceTransactionCallbackWithoutResult;
import ru.yandex.wmtools.common.util.URLUtil;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

/**
 * User: azakharov
 * Date: 25.07.13
 * Time: 18:17
 */
public class RemovedPageService extends AbstractDbService {

    private static final Logger log = LoggerFactory.getLogger(RemovedPageService.class);

    private HostDbHostInfoService hostDbHostInfoService;
    private TblErrorUrlsDao tblErrorUrlsDao;
    private TblCodeAgrErrorTreesDao tblCodeAgrErrorTreesDao;
    private TblRobotdbInfoDao tblRobotdbInfoDao;
    private TblUrlTreesDao tblUrlTreesDao;
    private IndexInfoService indexInfoService;
    private UrlTreeService urlTreeService;

    private String currentErrorUrlFilename;
    private String previousErrorUrlFilename;

    public static final Comparator<URL> urlComparator = new Comparator<URL>() {
        @Override
        public int compare(URL url, URL url2) {

            // compare hostname ignore case
            String hostName1 = URLUtil.getHostName(url, true);
            String hostName2 = URLUtil.getHostName(url2, true);
            int ret = hostName1.compareToIgnoreCase(hostName2);
            if (ret != 0) {
                return ret;
            }

            // compare relative urls case sensitive
            String relativeUrl1 = URLUtil.getRelativeUrl(url);
            String relativeUrl2 = URLUtil.getRelativeUrl(url2);
            return relativeUrl1.compareTo(relativeUrl2);
        }
    };


    public static class HostDbTask implements Runnable {
        private final int dbIndex;
        private final BlockingQueue<DbTask> queue;
        private final HostDbHostInfoService hostDbHostInfoService;
        private final TblErrorUrlsDao tblErrorUrlsDao;
        private final TblCodeAgrErrorTreesDao tblCodeAgrErrorTreesDao;
        private final TblRobotdbInfoDao tblRobotdbInfoDao;
        private final TblUrlTreesDao tblUrlTreesDao;
        private final IndexInfoService indexInfoService;
        private final UrlTreeService urlTreeService;
        private final boolean incremental;

        public HostDbTask(boolean incremental, int dbIndex, BlockingQueue<DbTask> queue, HostDbHostInfoService hostDbHostInfoService, TblErrorUrlsDao tblErrorUrlsDao, TblCodeAgrErrorTreesDao tblCodeAgrErrorTreesDao, TblRobotdbInfoDao tblRobotdbInfoDao, TblUrlTreesDao tblUrlTreesDao, IndexInfoService indexInfoService, UrlTreeService urlTreeService) {
            this.incremental = incremental;
            this.dbIndex = dbIndex;
            this.queue = queue;
            this.hostDbHostInfoService = hostDbHostInfoService;
            this.tblErrorUrlsDao = tblErrorUrlsDao;
            this.tblCodeAgrErrorTreesDao = tblCodeAgrErrorTreesDao;
            this.tblRobotdbInfoDao = tblRobotdbInfoDao;
            this.tblUrlTreesDao = tblUrlTreesDao;
            this.indexInfoService = indexInfoService;
            this.urlTreeService = urlTreeService;
        }

        @Override
        public void run() {
            try {
                Collection<DbTask> tasksForHost = new ArrayList<DbTask>(128);
                String currentHost = null;
                while (true) {
                    DbTask task = queue.take();
                    if (EndOfFileDbTask.getInstance().equals(task)) {
                        log.debug("endOfFileTask has been reached for thread " + dbIndex);
                        if (currentHost != null && !tasksForHost.isEmpty()) {
                            try {
                                processBatch(currentHost, tasksForHost);
                            } catch (InternalException e) {
                                log.error("Error while updating data for host " + currentHost + " in db", e);
                            } catch (UserException e) {
                                log.error("UserException while updating data for host " + currentHost + " in db", e);
                            }
                        }
                        log.debug("All hosts have been processed for thread " + dbIndex);
                        return;
                    }
                    UrlRecord record = task.getNewUrlRecord() != null ? task.getNewUrlRecord() : task.getOldUrlRecord();
                    String hostName = URLUtil.getHostName(record.getKey(), false);
                    if (currentHost == null) {
                        currentHost = hostName;
                    } else if (!currentHost.equals(hostName)) {
                        try {
                            processBatch(currentHost, tasksForHost);
                        } catch (InternalException e) {
                            log.error("Error while updating data for host " + currentHost + " in db", e);
                        } catch (UserException e) {
                            log.error("UserException while updating data for host " + currentHost + " in db", e);
                        }
                        currentHost = hostName;
                        tasksForHost.clear();
                    }
                    tasksForHost.add(task);
                }
            } catch (InterruptedException e) {
                log.debug("HostDbTask " + dbIndex + " was interupted");
                return;
            }
        }

        public void processBatch(final String hostName, final Collection<DbTask> tasksForHost) throws InternalException, UserException {
            log.debug("Processing " + hostName + " tasks ( " + tasksForHost.size() + " )");
            final HostDbHostInfo hostDbHostInfo = hostDbHostInfoService.getHostDbHostInfo(hostName, true);
            if (hostDbHostInfo == null) {
                log.warn("Host " + hostName + " not found in hostdb");
                return;
            }

            final TreeInfo treeInfo = urlTreeService.getUrlTreeInfo(hostDbHostInfo, null, false, YandexSearchShard.RU);
            if (treeInfo == null) {
                log.warn("Host " + hostName + " doesn't have record in tbl_url_trees");
                return;
            }

            tblErrorUrlsDao.getServiceTransactionTemplate(new WMCPartition(hostDbHostInfo, null)).executeInService(
                    new ServiceTransactionCallbackWithoutResult() {
                        @Override
                        protected void doInTransactionWithoutResult(TransactionStatus transactionStatus) throws UserException, InternalException {
                            // mapping node_id -> code -> count
                            final Map<Long, Map<Integer, Long>> codeAggrErrorTreesBannedCount = new LinkedHashMap<Long, Map<Integer, Long>>();
                            // mapping node_id -> count
                            final Map<Long, Long> urlTreesBannedCount = new LinkedHashMap<Long, Long>();
                            final MutableLong robotDbInfoBannedCount = new MutableLong(0L);

                            if (!incremental) {
                                tblErrorUrlsDao.removeErrorUrls(hostDbHostInfo, ErrorUrlSourceEnum.BAD_URLS);
                            }

                            // todo: ограничить число примеров битых ссылок

                            // Обрабатываем отдельные страницы
                            for (DbTask task : tasksForHost) {
                                switch (task.getAction()) {
                                    case DELETE:
                                        delete(task.getOldUrlRecord(), hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAggrErrorTreesBannedCount, urlTreesBannedCount);
                                        break;
                                    case UPDATE:
                                        update(task.getOldUrlRecord(), task.getNewUrlRecord(), hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAggrErrorTreesBannedCount, urlTreesBannedCount);
                                        break;
                                    case INSERT:
                                        insert(task.getNewUrlRecord(), hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAggrErrorTreesBannedCount, urlTreesBannedCount);
                                        break;
                                }
                            }

                            if (incremental) {
                                // updating tbl_robotdb_info
                                tblRobotdbInfoDao.increaseBannedCount(hostDbHostInfo, robotDbInfoBannedCount.longValue());
                                // updating tbl_code_agr_error_trees
                                for (Map.Entry<Long, Map<Integer, Long>> entry : codeAggrErrorTreesBannedCount.entrySet()) {
                                    final Long nodeId = entry.getKey();
                                    for (Map.Entry<Integer, Long> codeCount : entry.getValue().entrySet()) {
                                        final Integer code = codeCount.getKey();
                                        final Long bannedCount = codeCount.getValue();
                                        tblCodeAgrErrorTreesDao.increaseBannedCount(hostDbHostInfo, nodeId, code, bannedCount);
                                    }
                                }
                                // updating tbl_url_trees
                                for (Map.Entry<Long, Long> nodeCount : urlTreesBannedCount.entrySet()) {
                                    final Long nodeId = nodeCount.getKey();
                                    final Long bannedCount = nodeCount.getValue();
                                    tblUrlTreesDao.increaseBannedCount(hostDbHostInfo, nodeId, bannedCount);
                                }
                            } else {
                                // updating tbl_robotdb_info
                                tblRobotdbInfoDao.updateBannedCount(hostDbHostInfo, robotDbInfoBannedCount.longValue());
                                // updating tbl_code_agr_error_trees
                                for (Map.Entry<Long, Map<Integer, Long>> entry : codeAggrErrorTreesBannedCount.entrySet()) {
                                    final Long nodeId = entry.getKey();
                                    for (Map.Entry<Integer, Long> codeCount : entry.getValue().entrySet()) {
                                        final Integer code = codeCount.getKey();
                                        final Long bannedCount = codeCount.getValue();
                                        tblCodeAgrErrorTreesDao.insertBannedCount(hostDbHostInfo, nodeId, code, bannedCount);
                                    }
                                }
                                // updating tbl_url_trees
                                for (Map.Entry<Long, Long> nodeCount : urlTreesBannedCount.entrySet()) {
                                    final Long nodeId = nodeCount.getKey();
                                    final Long bannedCount = nodeCount.getValue();
                                    tblUrlTreesDao.updateBannedCount(hostDbHostInfo, nodeId, bannedCount);
                                }
                            }
                            log.debug("Host " + hostName + " has been processed");
                        }
                    }
            );
        }

        private void update(UrlRecord oldUrlRecord, UrlRecord newUrlRecord, HostDbHostInfo hostDbHostInfo, TreeInfo treeInfo, MutableLong robotDbInfoBannedCount, Map<Long,Map<Integer, Long>> codeAgrErrorTrees, Map<Long, Long> urlTreesBannedCount) throws InternalException {
            log.debug("updating " + newUrlRecord.getKey());
            String relativeUrl = URLUtil.getRelativeUrl(newUrlRecord.getKey());

            final boolean newNeedInsert = needInsert(newUrlRecord.getHttpCode(), newUrlRecord.getKey());
            final boolean oldNeedInsert = needInsert(oldUrlRecord.getHttpCode(), oldUrlRecord.getKey());
            if (!oldNeedInsert && !newNeedInsert) {
                log.debug("old code (" + oldUrlRecord.getHttpCode() + ") and new code (" + newUrlRecord.getHttpCode() + ") "+
                          "are ignored for " + newUrlRecord.getKey() );
                return;
            } else if (oldNeedInsert && !newNeedInsert) {
                log.debug("old code (" + oldUrlRecord.getHttpCode() + ") is not ignored "+
                          "but new code (" + newUrlRecord.getKey() + ") is ignored for " + newUrlRecord.getKey() );
                delete(oldUrlRecord, hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAgrErrorTrees, urlTreesBannedCount);
                return;
            } else if (!oldNeedInsert && newNeedInsert) {
                log.debug("old code (" + oldUrlRecord.getHttpCode() + ") is ignored "+
                        "but new code (" + newUrlRecord.getKey() + ") is not ignored for " + newUrlRecord.getKey() );
                insert(newUrlRecord, hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAgrErrorTrees, urlTreesBannedCount);
                return;
            }

            if (newUrlRecord.getHttpCode().equals(oldUrlRecord.getHttpCode()) && newNeedInsert) {
                Integer codeInDb = tblErrorUrlsDao.getErrorUrlHttpCode(hostDbHostInfo, relativeUrl);
                if (codeInDb == null) {
                    // no record in db, so inserting new one
                    log.debug("old code equals to new code but record doesn't exist for " + newUrlRecord.getKey());
                    insert(newUrlRecord, hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAgrErrorTrees, urlTreesBannedCount);
                    return;
                } else if (newUrlRecord.getHttpCode().equals(codeInDb)) {
                    // already in database
                    log.debug("old code equals to new code but record already exist for " + newUrlRecord.getKey());
                    return;
                }
            }

            int res = tblErrorUrlsDao.updateErrorUrl(hostDbHostInfo, relativeUrl, newUrlRecord.getHttpCode());
            if (res != 1) {
                log.debug("record already exist for " + newUrlRecord.getKey() + " " + newUrlRecord.getHttpCode());
                // no record in db, so inserting new one
                insert(newUrlRecord, hostDbHostInfo, treeInfo, robotDbInfoBannedCount, codeAgrErrorTrees, urlTreesBannedCount);
            }
        }

        private static boolean isIgnoredUrl(int httpCode, URL url) {
            if (httpCode >= 300 && httpCode < 400) {
                return true;
            }

            YandexHttpStatus extHttpCode = YandexHttpStatus.parseCode(httpCode);
            switch (extHttpCode) {
                // Not used codes
                case HTTP_1000_BAD_RESPONSE_HEADER:
                case HTTP_1023_CUSTOM_NOT_MODIFIED:
                case EXT_HTTP_2002_NOTUSED2:
                case EXT_HTTP_2003_NOTUSED3:
                case EXT_HTTP_2006_BADCODES:
                    return true;

                // Intrenal codes
                case HTTP_1011_FILTER_DISALLOW:
                case HTTP_1012_LOCAL_EIO:
                case HTTP_1022_INTERRUPTED:
                case HTTP_1025_NO_RESOURCES:
                case HTTP_1026_FETCHER_SHUTDOWN:
                case EXT_HTTP_2004_REFRESH:
                case EXT_HTTP_2008_IOERROR:
                case EXT_HTTP_2009_BASEERROR:
                case EXT_HTTP_2013_NUMERERROR:
                case EXT_HTTP_2018_FILTERED:
                case EXT_HTTP_2019_PARSERFAIL:
                case EXT_HTTP_2021_CLEANPARAM:
                case EXT_HTTP_2022_MANUAL_DELETE_URL:
                    return true;

                // Connection problems
                case HTTP_1001_CONNECTION_LOST:
                case HTTP_1006_DNS_FAILURE:
                case HTTP_1010_CONNECT_FAILED:
                    return true;
                case HTTP_1003_ROBOTS_TXT_DISALLOW:
                    if ("/robots.txt".equals(URLUtil.getRelativeUrl(url))) {
                        return true;
                    }
                break;

                //good codes (without default check)
                case EXT_HTTP_3021_SELRANK:
                    break;

                default:
                    if (httpCode == 0) {
                        return true;
                    } else if (3000 <= httpCode) {
                        return true;
                    }
                    break;
            }
            return false;
        }

        private static boolean isOkUrl(int httpCode, URL url) {
            YandexHttpStatus extHttpCode = YandexHttpStatus.parseCode(httpCode);
            switch (extHttpCode) {
                case EXT_HTTP_2000_MIRRMOVE:
                case EXT_HTTP_2004_REFRESH:
                case EXT_HTTP_3021_SELRANK:
                    return true;

                case HTTP_1028_SERVER_BUSY:
                case HTTP_1029_SERVICE_UNKNOWN:
                case HTTP_1030_PROXY_UNKNOWN:
                case HTTP_1031_PROXY_REQUEST_TIME_OUT:
                case HTTP_1032_PROXY_INTERNAL_ERROR:
                case HTTP_1033_PROXY_CONNECT_FAILED:
                case HTTP_1034_PROXY_CONNECTION_LOST:
                case HTTP_1035_PROXY_NO_PROXY:
                case HTTP_1036_PROXY_ERROR:
                    return true;
                case HTTP_1005_BAD_MIME:
                    return false;

                case HTTP_200_OK:
                case HTTP_301_MOVED_PERMANENTLY:
                case HTTP_302_FOUND:
                case HTTP_303_SEE_OTHER:
                case HTTP_304_NOT_MODIFIED:
                case HTTP_307_TEMPORARY_REDIRECT:
                    return true;
            }
            return false;
        }

        private boolean needInsert(int httpCode, URL url) {
            return !isIgnoredUrl(httpCode, url) && !isOkUrl(httpCode, url);
        }

        private void insert(UrlRecord record, HostDbHostInfo hostDbHostInfo, TreeInfo treeInfo, MutableLong robotDbInfoBannedCount, Map<Long,Map<Integer, Long>> codeAgrErrorTrees, Map<Long, Long> urlTreesBannedCount) throws InternalException {
            log.debug("inserting " + record.getKey());
            int httpCode = record.getHttpCode();
            if (!needInsert(httpCode, record.getKey())) {
                log.debug("ignore " + record.getKey() + " " + httpCode);
                return;
            }

            Random random = new Random();
            long urlId = random.nextLong();
            URL url = record.getKey();
            String relativeUrl = URLUtil.getRelativeUrl(url);
            long nodeId = findClosestNode(relativeUrl, treeInfo);
            Date currentDate = new Date();
            int state = 0;
            long size = 0;

            Long extLinks = getExtLinksCount(url);
            Long intLinks = getIntLinksCount(url);

            final boolean hasIntLinks = intLinks != null && intLinks > 0;
            final boolean hasExtLinks = extLinks != null && extLinks > 0;
            final boolean isFiltered = false;

            tblErrorUrlsDao.insertErrorUrl(
                    hostDbHostInfo, urlId, relativeUrl, nodeId, currentDate, currentDate, currentDate, state, size, httpCode, hasIntLinks, hasExtLinks, isFiltered, null, ErrorUrlSourceEnum.BAD_URLS);

            // increasing banned_count in tbl_robotdb_info
            robotDbInfoBannedCount.increment();

            // increasing banned_count in tbl_code_agr_error_trees000 for node and http code
            increaseCodeAgrTrees(codeAgrErrorTrees, treeInfo, nodeId, httpCode, 1l, 1l);

            // increasing banned_count in tbl_url_trees000
            increaseUrlTreesBannedCount(urlTreesBannedCount, treeInfo, nodeId, 1l, 1l);
        }

        private void delete(UrlRecord record, HostDbHostInfo hostDbHostInfo, TreeInfo treeInfo, MutableLong robotDbInfoBannedCount, Map<Long,Map<Integer, Long>> codeAgrErrorTrees, Map<Long, Long> urlTreesBannedCount) throws InternalException {
            log.debug("deleting " + record.getKey());
            int httpCode = record.getHttpCode();
            if (!needInsert(httpCode, record.getKey())) {
                log.debug("ignore " + record.getKey() + " " + httpCode);
                return;
            }

            String relativeUrl = URLUtil.getRelativeUrl(record.getKey());
            int ret = tblErrorUrlsDao.removeErrorUrl(hostDbHostInfo, relativeUrl);
            if (ret != 1) {
                // url hasn't been deleted
                log.debug("url " + record.getKey() + "hasn't been deleted");
                return;
            }

            // decreasing banned_count in tbl_robotdb_info
            robotDbInfoBannedCount.decrement();

            // decreasing banned_count in code_agr_error_trees000 for http code
            long nodeId = findClosestNode(relativeUrl, treeInfo);
            increaseCodeAgrTrees(codeAgrErrorTrees, treeInfo, nodeId, httpCode, -1l, -1l);

            // decreasing banned_count in tbl_url_trees000
            increaseUrlTreesBannedCount(urlTreesBannedCount, treeInfo, nodeId, -1l, -1l);
        }

        public static interface UpdateCallback {
            void update(Long nodeId);
        }

        private void updateTree(TreeInfo treeInfo, Long nodeId, UpdateCallback callback) {
            Map<Long, Node> id2node = treeInfo.getId2node();
            Node cur = id2node.get(nodeId);
            while (cur != null) {
                // updating current node
                callback.update(cur.getInfo().getId());
                // go to parent node
                cur = id2node.get(cur.getInfo().getParentId());
            }
        }

        private void increaseCodeAgrTrees(final Map<Long,Map<Integer, Long>> codeAgrErrorTrees, TreeInfo treeInfo, long nodeId, final int httpCode, final long addedValue, final long newValue) {
            updateTree(treeInfo, nodeId, new UpdateCallback() {
                @Override
                public void update(Long nodeId) {
                    Map<Integer, Long> errorCountForCode = codeAgrErrorTrees.get(nodeId);
                    if (errorCountForCode == null) {
                        errorCountForCode = new LinkedHashMap<Integer, Long>();
                        codeAgrErrorTrees.put(nodeId, errorCountForCode);
                    }
                    Long codeCount = errorCountForCode.get(httpCode);
                    errorCountForCode.put(httpCode, codeCount == null? newValue : codeCount + addedValue);
                }
            });
        }

        private void increaseUrlTreesBannedCount(final Map<Long, Long> urlTreesBannedCount, TreeInfo treeInfo, long nodeId, final long newValue, final long addValue) {
            updateTree(treeInfo, nodeId, new UpdateCallback() {
                @Override
                public void update(Long nodeId) {
                    Long nodeCount = urlTreesBannedCount.get(nodeId);
                    urlTreesBannedCount.put(nodeId, nodeCount == null? newValue : nodeCount + addValue);
                }
            });
        }

        private long findClosestNode(String relativeUrl, TreeInfo treeInfo) {
            Node current = treeInfo.getRootNode();
            long closestId = current.getInfo().getId();
            String path = "/";
            while (true) {
                Node selectedChild = current;
                for (Node n : current.getChildren()) {
                    String tmpPath = path + current.getInfo().getPath();
                    if (relativeUrl.startsWith(tmpPath)) {
                        selectedChild = n;
                        closestId = n.getInfo().getId();
                        path = tmpPath;
                    }
                }

                if (selectedChild == current) {
                    return closestId;
                } else {
                    current = selectedChild;
                }
            }
        }

        private @Nullable Long getIntLinksCount(URL url) {
            String hostName = URLUtil.getHostName(url, false);
            String path = URLUtil.getRelativeUrl(url);
            try {
                return indexInfoService.extractLinksCount(new InternalLinksCountRequest(hostName, path, 0, 25));
            } catch (InternalException e) {
                return null;
            } catch (UserException e) {
                return null;
            }
        }

        private @Nullable Long getExtLinksCount(URL url) {
            String hostName = URLUtil.getHostName(url, false);
            String path = URLUtil.getRelativeUrl(url);
            try {
                return indexInfoService.extractLinksCount(new LinksCountRequest(hostName, path, 0, 25));
            } catch (InternalException e) {
                return null;
            } catch (UserException e) {
                return null;
            }
        }
    }

    /**
     * Makes full update of bad urls in host db
     */
    public void updateRemovedPages() throws FileNotFoundException, UserException, InternalException, InterruptedException {
        final TextFileReader<UrlRecord> oldFileReader = new EmptyFileReader();
        final TextFileReader<UrlRecord> newFileReader = new UrlFileReader(currentErrorUrlFilename, new NewFileFactory());
        updateRemovedPagesImpl(false, oldFileReader, newFileReader);
    }

    /**
     * Makes diff of two files with bad urls and updates webmaster host db according to changes

     * @throws FileNotFoundException
     * @throws InternalException
     * @throws UserException
     * @throws InterruptedException
     */
    public void updateRemovedPagesIncremental() throws FileNotFoundException, InternalException, UserException, InterruptedException {
        final TextFileReader<UrlRecord> oldFileReader = new UrlFileReader(previousErrorUrlFilename, new OldFileFactory());
        final TextFileReader<UrlRecord> newFileReader = new UrlFileReader(currentErrorUrlFilename, new NewFileFactory());
        updateRemovedPagesImpl(true, oldFileReader, newFileReader);
    }

    private void updateRemovedPagesImpl(final boolean incremental, final TextFileReader<UrlRecord> oldFileReader, final TextFileReader<UrlRecord> newFileReader) throws FileNotFoundException, InternalException, UserException, InterruptedException {
        log.info("incremental=" + incremental);
        final FileComparator<URL, UrlRecord> fileComparator = new FileComparator<URL, UrlRecord>();
        final int hostDbCount = WMCPartition.getHostDbCount(getDatabaseCount());
        final RequestDispatcher dbTaskDispatcher = new RequestDispatcherImpl(hostDbCount);

        ExecutorService executorService = Executors.newFixedThreadPool(hostDbCount, new CommonThreadFactory(true, RemovedPageService.class.getSimpleName() + "-"));
        for (int i = 0; i < hostDbCount; i++) {
            executorService.submit(new HostDbTask(incremental, i, dbTaskDispatcher.getQueue(i), hostDbHostInfoService, tblErrorUrlsDao, tblCodeAgrErrorTreesDao, tblRobotdbInfoDao, tblUrlTreesDao, indexInfoService, urlTreeService));
        }

        fileComparator.compare(
                oldFileReader,
                newFileReader,
                new OldFileRecordHandler(dbTaskDispatcher),
                new EqualsUrlHandler(dbTaskDispatcher),
                new NewFileRecordHandler(dbTaskDispatcher),
                urlComparator);
        dbTaskDispatcher.addRequest(EndOfFileDbTask.getInstance());

        executorService.shutdown();
        executorService.awaitTermination(10, TimeUnit.MINUTES);
        if (!executorService.isTerminated()) {
            executorService.shutdownNow();
        }
    }

    public static abstract class AbstractRecordHandler {
        protected final RequestDispatcher dbTaskDispatcher;

        protected AbstractRecordHandler(final RequestDispatcher dbTaskDispatcher) {
            this.dbTaskDispatcher = dbTaskDispatcher;
        }
    }

    public static class OldFileRecordHandler extends AbstractRecordHandler implements OneSideHandler<UrlRecord> {
        public OldFileRecordHandler(final RequestDispatcher dbTaskDispatcher) {
            super(dbTaskDispatcher);
        }

        @Override
        public void handle(final UrlRecord record) throws InterruptedException {
            this.dbTaskDispatcher.addRequest(new DbTaskImpl(DbActionEnum.DELETE, null, record));
        }
    }

    public static class NewFileRecordHandler extends AbstractRecordHandler implements OneSideHandler<UrlRecord> {
        public NewFileRecordHandler(final RequestDispatcher dbTaskDispatcher) {
            super(dbTaskDispatcher);
        }

        @Override
        public void handle(final UrlRecord record) throws InterruptedException {
            this.dbTaskDispatcher.addRequest(new DbTaskImpl(DbActionEnum.INSERT, record, null));
        }
    }

    public static class EqualsUrlHandler extends AbstractRecordHandler implements EqualsRecordHandler<UrlRecord> {
        public EqualsUrlHandler(final RequestDispatcher dbTaskDispatcher) {
            super(dbTaskDispatcher);
        }

        @Override
        public void handle(final UrlRecord oldFileRecord, final UrlRecord newFileRecord) throws InterruptedException {
            this.dbTaskDispatcher.addRequest(new DbTaskImpl(DbActionEnum.UPDATE, newFileRecord, oldFileRecord));
        }
    }

    @Required
    public void setHostDbHostInfoService(HostDbHostInfoService hostDbHostInfoService) {
        this.hostDbHostInfoService = hostDbHostInfoService;
    }

    @Required
    public void setTblErrorUrlsDao(TblErrorUrlsDao tblErrorUrlsDao) {
        this.tblErrorUrlsDao = tblErrorUrlsDao;
    }

    @Required
    public void setTblCodeAgrErrorTreesDao(TblCodeAgrErrorTreesDao tblCodeAgrErrorTreesDao) {
        this.tblCodeAgrErrorTreesDao = tblCodeAgrErrorTreesDao;
    }

    @Required
    public void setTblRobotdbInfoDao(TblRobotdbInfoDao tblRobotdbInfoDao) {
        this.tblRobotdbInfoDao = tblRobotdbInfoDao;
    }

    @Required
    public void setIndexInfoService(IndexInfoService indexInfoService) {
        this.indexInfoService = indexInfoService;
    }

    @Required
    public void setUrlTreeService(UrlTreeService urlTreeService) {
        this.urlTreeService = urlTreeService;
    }

    @Required
    public void setTblUrlTreesDao(TblUrlTreesDao tblUrlTreesDao) {
        this.tblUrlTreesDao = tblUrlTreesDao;
    }

    @Required
    public void setCurrentErrorUrlFilename(String currentErrorUrlFilename) {
        this.currentErrorUrlFilename = currentErrorUrlFilename;
    }

    @Required
    public void setPreviousErrorUrlFilename(String previousErrorUrlFilename) {
        this.previousErrorUrlFilename = previousErrorUrlFilename;
    }
}
