package BM::Pages::LandingPage;

use base qw(BM::Pages::Page);

use List::Util qw(min max);
use List::MoreUtils qw(uniq);
use Utils::Words qw(word2norm text2normwords);
use strict;
use utf8;

# must be initialized with banner_id


# сейчас мы кэшируем текст страницы в памяти, в отличие от BM::Pages::Page !!
sub text :CACHE {
    my $self = shift;

    return '' if $self->{fake_empty_text};

    my $tm = $self->{timer};
    $tm->time('landing_page_text') if $tm;

    my $text = $self->_text;

    $tm->time_prev if $tm;

    return Utils::Sys::make_good_utf_string($text);
}

# 3600 * 24 * 14 = 1209600
sub _text : KYOTOCACHE(1209600) {
    my $self = shift;
    return $self->tt // "";
}

# cache by banner id
sub get_remotecache_id {
    my $self = shift;
    return $self->{banner_id}.'_landing_page';
}

sub title_phr {
    my $self = shift;
    $self->{title_phr} //= $self->proj->phrase($self->title);
    return $self->{title_phr};
}

sub title_subphl {
    my $self = shift;
    return $self->title_phr->replace_html_spec->get_keywords_from_title;
}

# Text extractors -- return arrays of texts
sub __tag2contents {
    my $tree = shift;
    my $tag_pattern = shift;

    my @contents;
    for my $element ($tree->look_down(_tag => qr/$tag_pattern/)) {
        my $content = join " ", grep { !ref($_) } $element->content_list;  # remove sub-elements
        $content =~ s/^\s+|\n|\s+$//g;
        next unless $content;
        push @contents, $content;
    }
    return @contents;
}

# lazy creation of html tree
sub html_tree :CACHE {
    my $self = shift;
    require HTML::TreeBuilder;
    return HTML::TreeBuilder->new_from_content($self->text);
}

sub tags2texts {
    my $self = shift;
    my $tag = shift // 'h';

    my $text = $self->text;
    return () unless $text;

    my $tag_pattern;
    if ($tag eq 'h') {
        $tag_pattern = 'h[1-6]';
    } elsif ($tag eq 'a') {
        $tag_pattern = 'a';
    }

    my @contents = __tag2contents($self->html_tree, $tag_pattern);
    return map {$self->_clean_text($_)} @contents;
}

sub tags2texts_old {
    # this method is deprecated
    my $self = shift;
    my $tag = shift // 'h';

    my $text = $self->text;
    return () unless $text;
    my $pattern;
    if ($tag eq 'h') {
        $pattern = "<h[1-6]>(.*?)<\/h[1-6]>";
    } elsif ($tag eq 'a') {
        $pattern = "<a .+?>(.+?)<\/a>";
    }
    return map {$self->_clean_text($_)} ($text =~ /$pattern/g);
}

sub texts_htags :CACHE {
    my $self = shift;
    return map {join(" ", text2normwords($_))} $self->tags2texts('h');
}

sub texts_atags :CACHE {
    my $self = shift;
    return map {join(" ", text2normwords($_))} $self->tags2texts('a');
}

sub banner_contexts {
    my $self = shift;
    my $k = shift // 1;

    my $banner_text = $self->{banner_title}." ".$self->{banner_body};
    my $bnr_normwords = $self->proj->phrase($banner_text)->normwordshash;

    my $text = $self->one_line_text;
    return () unless $text;
    # add some spaces between tags
    $text =~ s/</ </g;
    $text =~ s/>/> /g;
    my @landing_words = split /\s+/, $text;
    my $landing_nwords = scalar @landing_words;
    my @contexts;
    for my $i (0..$landing_nwords - 1) {
        my $w = word2norm $landing_words[$i];
        if (defined $bnr_normwords->{$w}) {
            # get context
            my $left = max(0, $i - $k);
            my $right = min($landing_nwords-1, $i + $k);
            my @words = @landing_words[$left..$right];
            # simple filtering
            @words = grep {!/<|>/} @words;
            push @contexts, $self->proj->phrase(join(" ", @words))->norm_phr;
        }
    }
    return uniq @contexts;
}

sub banner_contexts_3 :CACHE {
    my $self = shift;
    return map {join(" ", text2normwords($_))} $self->banner_contexts(3);
}

sub _clean_text {
    # remove tags and html symbols
    my $self = shift;
    my $text = shift;

    $text =~ s/<\/?.+?>|&.+?;//g;
    return $text;
}

1;
