#!/usr/bin/perl -w
#получение текста баннера и текста категории по их ID

use strict;

use utf8;
use open ":utf8";
use Data::Dumper;

binmode STDIN, ':utf8';
binmode STDOUT, ':utf8';
binmode STDOUT, ':utf8';

use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "/home/yuryz/arcadia/rt-research/broadmatching/scripts/lib";

use Utils::Common;
use Project;

my $proj = Project->new({
    load_dicts   => 1,
    load_minicategs_light => 1, 
});

my %id2ctg;
open F, "ctg2id";
while (<F>) {
    chomp;
    my @a = split /\t/; #0 - ctg, 1 - ctg_id
    $id2ctg{$a[1]} = $a[0];
}

my $worker = Utils::Worker->new;
$worker->{verbose}    = 1;
$worker->{num_processes}    = 20;

$worker->{file_input}       = "/home/yuryz/scripts/sample/bnr_low_freq_us";
$worker->{file_output}      = "/home/yuryz/scripts/sample/bnr_low_freq_text";

$worker->{process_line}     = sub {
    my ($line, $fh) = @_;
    chomp $line;

    my ($bid, $ctg_id) = split /\t/, $line;
    return unless $id2ctg{$ctg_id};

    my $bnr = $proj->bf->get_banner_by_id($bid);
    return unless $bnr;

    my $title = $bnr->title;
    my $body = $bnr->body;
    #print $fh "$bid\t$title\t$body\t$id2ctg{$ctg_id}\n";
    print $fh "$bid\t$title\t$body\t$ctg_id\n";
};

$worker->process_data;
