package BM::Pages::PageHierarchy;

use utf8;
use open ':utf8';

use std;
use base qw(BM::Pages::PageLite);

use Data::Dumper;

use Digest::MD5 qw(md5_hex);
use Utils::Sys qw(h2sa);

use Encode;

use open ":utf8";
no warnings 'utf8';

########################################################
#Доступ к полям
########################################################

__PACKAGE__->mk_accessors(qw(
    chlrn
    is_first_level
    deep_problem_pgl
));

########################################################
#Методы
########################################################

sub prnt {
    my ($self) = shift;
    if(@_){
        my $prnt = $_[0];
        weaken($prnt);
        $self->{prnt_proxy_ref} = \$prnt;
    }
    return ${$_->{prnt_proxy_ref}} if $_->{prnt_proxy_ref}; 
    return '';
}

sub page_list {
    my $self = shift;
    my $pgl = $self->SUPER::page_list(@_);
    $pgl->{prnt_proxy_ref} = $self->{prnt_proxy_ref};
    return $pgl;
}

=h
sub proxy_ref {
    my ($self) = @_;
    return $self->{proxy_ref} if $self->{proxy_ref};
    my $weak_ref = $self;
    weaken($weak_ref);
    $self->{proxy_ref} = \$weak_ref;
    return $self->{proxy_ref};
}
=cut

sub get_mainpage_freqhash_suburls {
    my ( $self ) = @_;
    my $site = $self->site || $self->proj->site($self->url);
    return $site->menu_filter_hash;
}

sub get_internal_subpages_without_menu {
    my ( $self ) = @_;
    my $fh = $self->get_mainpage_freqhash_suburls;
    my $pgl = $self->get_internal_subpages;
#print "$pgl";
#print Dumper($pgl->get_urls_frequency);
    $pgl = $pgl->freqhash_filtered_urls($fh); #Удаляем меню
#print Dumper($fh);
#exit;
    return $pgl;
}

#получаем подстраницы, хорошие с точки зрения иерархии
sub get_hierarchy_subpages {
    my ( $self ) = @_;

    #my $subpgl = $self->get_internal_subpages_pgl;
    my $subpgl = $self->get_internal_subpages_without_menu;
    print "get_internal_subpages_without_menu\n$subpgl" if $self->dbg;
#print "".$self->get_subpages->get_pager_urls;
#print "==================111\n";
#    print  $subpgl->filter_bad_urls->brand_filtered->_debug_group_pages_by_urls;

    $subpgl = $subpgl->good_pages_for_hierarchy;
    $subpgl = $subpgl->brand_filtered; 

    my $fh = $self->get_mainpage_freqhash_suburls;

    my $prspgl = $subpgl->get_pager_urls;
    print "prspgl:\n$prspgl" if $self->dbg;
    if( ( grep { $_->name =~ /\d/ } @$prspgl ) > 1 ){ #Если числовых страниц больше одной
        print "pgtype\n" if $self->dbg;
        #$subpgl = $subpgl->freqhash_filtered_urls($fh);
        $subpgl->{clear_pager} = 1;
        $subpgl = $subpgl->add_pager_urls($fh);
        $subpgl = $subpgl->context_badre_filter($self);
        $subpgl = $subpgl->delete_bad_tmpls; 
        $subpgl = $subpgl->most_freq_tmpl_urls($self->norm_url);
    }else{
        print "notpgtype\n" if $self->dbg;
        #my $h = $subpgl->get_tmpl_grps;
        #print $_.":\n".$h->{$_} for keys %$h;
        $subpgl->{dbg} = 1 if $self->dbg;
        $subpgl = $subpgl->add_pager_urls($fh);
        $subpgl = $subpgl->context_badre_filter($self);
        $subpgl = $subpgl->delete_bad_tmpls; 
    #my $hh = $subpgl->get_tmpl_grps;
    #print $self->dump_lite($hh);
    #print "_freq_tmpl_filter\n";
    #$hh = $subpgl->_freq_tmpl_filter($hh);
    #print $self->dump_lite($hh);
	$subpgl = $subpgl->most_freq_tmpl_urls($self->norm_url);
        $subpgl = $subpgl->delete_vendorlike_pages;
#        return "bad url\n" unless @$subpgl > 1;
    }
    $subpgl = $subpgl->good_pages_for_hierarchy;
    #print "count: ".$subpgl->count."\n";
    return $subpgl;    
}

sub add_pgl_log_inf {
    my ( $self, $pgl, $ttl ) = @_;
    print STDERR time." $ttl\n";
    return $pgl->pages2text($ttl);
}

sub get_hierarchy_subpages_log {
    my ( $self ) = @_;
    my $text = '';
    $text .= "pagename: ".$self->get_pagename."\n";
    $text .= "text_goods_count:".$self->text_goods_count."\n";
    #Фильтрация меню
    $text .= "menu_inf: ".Dumper($self->get_mainpage_freqhash_suburls);
    my $mh = {};
    $mh->{ $_->norm_url }++ for @{$self->get_internal_subpages_pgl};
    $text .= "pageurls: ".Dumper($mh);
    #/Фильтрация меню
    my $subpgl = $self->get_internal_subpages_without_menu;
    $text .= $self->add_pgl_log_inf($subpgl, 'get_internal_subpages_without_menu: ');
    $subpgl = $subpgl->good_pages_for_hierarchy;
    $text .= $self->add_pgl_log_inf($subpgl, 'good_pages_for_hierarchy: ');
    my $prspgl = $subpgl->get_pager_urls;
    $text .= $self->add_pgl_log_inf($prspgl, 'get_pager_urls: ');
    my $fh = $self->get_mainpage_freqhash_suburls;
    #print STDERR "freqhash_suburls\n";
    #print STDERR "$prspgl\n";
    $text .= Dumper($fh);
    $subpgl->{'dbg'} = 1;
    $subpgl->{'clear_pager'} = 1;
    $subpgl = $subpgl->add_pager_urls($fh);
    $text .= $self->add_pgl_log_inf($subpgl, 'add_pager_urls: ');
    $subpgl = $subpgl->context_badre_filter($self);
    $text .= $self->add_pgl_log_inf($subpgl, 'context_badre_filter: ');
    $subpgl = $subpgl->delete_bad_tmpls; 
    $text .= $self->add_pgl_log_inf($subpgl, 'delete_bad_tmpls: ');
    $subpgl = $subpgl->brand_filtered; 
    $text .= $self->add_pgl_log_inf($subpgl, 'brand_filtered: ');
    my $brnds = $subpgl->group_brand_tmpl_elems;
    $text .= "brnds:\n".Dumper($brnds); 
    my $hh = $subpgl->get_tmpl_grps;
    $text .= $self->dump_lite($hh);
    $text .= "_freq_tmpl_filter\n";
    $hh = $subpgl->_freq_tmpl_filter($hh);
    $text .= $self->dump_lite($hh);
    $subpgl = $subpgl->most_freq_tmpl_urls($self->norm_url);
    $text .= $self->add_pgl_log_inf($subpgl, 'most_freq_tmpl_urls: ');
    $subpgl = $subpgl->delete_vendorlike_pages;
    $text .= $self->add_pgl_log_inf($subpgl, 'delete_vendorlike_pages: ');
    $subpgl = $subpgl->pack_urls;
    $text .= $self->add_pgl_log_inf($subpgl, 'pack_urls: ');
    $text .= "result count: ".$subpgl->count."\n";
    return $text;
}

#Для интерфейса
sub get_hierarchy_subpages_phl {
    my ( $self ) = @_;
    my $pgl = $self->get_hierarchy_subpages;
    my $phl = $self->proj->phrase_list([ map {"$_"} @$pgl ]);
    $phl = $phl->tmap(sub {s/,/ /g; $_});
    return $phl;
}

sub subsection_subpages {
    my ( $self ) = @_;

    return $self->deep_problem_pgl if $self->deep_problem_pgl && $self->deep_problem_pgl->count;

    my $orig = $self->get_internal_subpages_pgl;
    my $subpgl = $orig;
    $subpgl = $subpgl->good_pages_for_hierarchy;

#    my $prspgl = $subpgl->get_pager_urls;
#    return $self->page_list if @$prspgl > 1; #Если есть листалка, то не может быть подразделов

    my $fh = $self->get_mainpage_freqhash_suburls;
    $subpgl = $subpgl->freqhash_filtered_urls($fh);
    $subpgl = $subpgl->subsection_filter;

=h
    my $tgrp = $subpgl->get_tmpl_grps;

    for my $k (%$tgrp){

    }
=cut

#print " GOOD:\n$orig BAD:\n".$orig->pages2fltdtext.":DDD\n";
#exit;
    return $subpgl;        
}

#Проверяем, может ли этот урл быть подразделом
our %badtypes = map {$_=>1} qw{ pager brand_exactly model brand qstn };
sub subsection_filter_reason :CACHE {
    my ($self) = @_;
    return 'badtype' if $badtypes{ $self->get_type };
    return 'badlength' if length($self->name) > 60;
    return 'badprodurl' if $self->url =~ /\/prod\d{4}/;
    return '';
}

#Проверяем, последний ли это уровень подразделов или нет
sub is_lastlevel_subsection :CACHE {
    my ($self, $exurl) = @_;

    return 0 if $self->deep_problem_pgl && $self->deep_problem_pgl->count;

    my $allsubpgl = $self->get_internal_subpages_without_menu->up_level_filter( $exurl );

    my $prspgl = $allsubpgl->get_pager_urls;
    return 'pager' if @$prspgl > 1; #Если есть листалка, то не может быть подразделов

    my $subpgl = $allsubpgl; 
    $subpgl = $subpgl->good_pages_for_hierarchy;
    $subpgl = $subpgl->most_freq_tmpl_urls; #берём самый частотный шаблон

    #Анализируем на основе урлов товаров
    my $sbscflt = $subpgl->subsection_filter;     
    my $mdlcnt = $subpgl->count - $sbscflt->count;
    return 'goods' if $mdlcnt > 1;

    $subpgl = $subpgl->shuffle_pages; #Перетасовываем урлы

    #Фильтр для урлов текущего уровня
    my $cur_exurl = {};
    $cur_exurl->{$_->norm_url}++ for @$allsubpgl; 

    #проверяем для нескольких урлов, раздел это или нет
    my $shtpgl = $subpgl->splice_pages(0, 5);
    if( $shtpgl->count > 2 ){
        my $pc = {};
        for my $p (@$shtpgl){
            my $curpgl = $p->get_internal_subpages_without_menu;
            $curpgl = $curpgl->up_level_filter( $exurl );        
            $curpgl = $curpgl->up_level_filter( $cur_exurl );        
            $curpgl = $curpgl->good_pages_for_hierarchy;
            $pc->{ $curpgl->count }++; 
        }
        my $ctop = [h2sa($pc)]->[0];
        return 'badsuburls' if 
              ( $ctop->[0] < 5 )  # топовое количество подурлов меньше 5
           && ( $ctop->[1] > $shtpgl->count - 1 ); #количество урлов с совпадающим количеством подурлов 
    }

    return 0; 
}

sub dumper_text_lite {
    my ($self) = @_;
    my $t = \"page: $self";
    $t = [ $t, $self->chlrn ] if $self->chlrn;
    $_[0] = $t;
    return @_; 
}

#sub is_catalog_url {
#    my ($self) = @_;
#    return 1 if $self->name =~ /^каталог(\s+товаров)?$/i;
#    return 0;
#}



1;

