#!/usr/bin/perl -w
#отбор фраз для пополнения категорий - 1-й этап

use strict;
use utf8;
use open ':utf8';
no warnings 'utf8';

binmode(STDIN,  ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

use FindBin;
use lib "$FindBin::Bin/../lib";
#use lib "/home/yuryz/arcadia/rt-research/broadmatching/scripts/wlib";
use lib "/home/yuryz/arcadia/rt-research/broadmatching/scripts/lib";

use Utils::Common;
use Project;
use BM::Phrase;
use BM::PhraseList;
use Time::HiRes qw(tv_interval gettimeofday);

my $proj = Project->new({ 
    load_dicts => 1,
    load_minicategs_light => 1,
});

while (<STDIN>) { #/home/yuryz/scripts/data/bnrs_10kk.camp
    chomp;

    my @f = split /\t/;
    next if $f[21] ne "lang=ru";

    my $categ = substr($f[20], 6); #mctgs=
    next unless $categ && $categ !~ m{/}; #оставляем НЕомонимы

    my $id = $f[0];
    my $camp = $f[1];
    my $text = "$f[3] $f[4]"; #баннер
    my $url = $f[8];

    my $bnr = $proj->bf->get_banner_by_id($id);
    next unless $bnr;

    my $h = $bnr->get_intent; #ссылка на хеш с ключом intent
    my $intent = $$h{intent} ? $$h{intent} : "NO_INTENT";

    my ($brand, $model) = $bnr->parse; #массив: (brand, model)
    my $br_mod;
    if ($brand && $model) {
        $br_mod = "$brand $model";
    } elsif ($brand) {
        $br_mod = $brand;
    } elsif ($model) {
        $br_mod = $model;
    } else {
        $br_mod = "NO_BRAND";
    }

    print "$categ\t$f[3]\t$intent\t$br_mod\n";
}
