#!/usr/bin/perl -w
use strict;
use warnings;
use utf8;
use open ":utf8";

binmode(STDIN,':utf8');
binmode(STDOUT,':utf8');


use FindBin;
use lib "$FindBin::Bin/../../lib";
use lib "$FindBin::Bin/../../wlib";
use Utils::Sys qw/md5int/;

use Getopt::Long;
use CatalogiaMediaProject;
use Project;
use Cmds::Mediaplanners;
use Data::Dumper;
use BM::PhraseCategs;
my $proj = Project->new({
    load_dicts => 1,
});

my $wordlists = {};
my $flag = 'alcohol';

my $sql_list = $proj->catalogia_media_dbh->List_SQL('select 
                                                         distinct title, body 
                                                     from 
                                                         CatalogiaManualFlags 
                                                     where choosed regexp \'(^|,)'.$flag.'(,|$)\' or ( flags regexp \'(^|,)'.$flag.'(,|$)\' and unchecked not regexp \'(^|,)'.$flag.'(,|$)\' ) limit 100');
my $list = [];
foreach my $item ( @$sql_list ) {
    push @{$list}, [ grep { $_ !~ /\d/} $proj->phrase(join(' ', $item->{title}, $item->{body}))->uniqnormwords];
}

my $rules = {};
my $hashes = {};
my ($oldcount, $newcount) = (0,0);
my $iteration = 0;
do {
    $oldcount = $newcount;
    $iteration++;
    if ( !$oldcount ) {
        foreach my $nw ( @$list ) {
            $rules->{$_}++ foreach @$nw;
        }
    }
    else {
        foreach my $nw ( @$list ) {
            next unless scalar(@$nw) >= $iteration;
            foreach my $rule ( grep { scalar(split /\s/, $_) == $iteration-1 } keys %$hashes ) {
                if ( scalar(grep {$hashes->{$rule}{$_}} @$nw) == $iteration-1 ) {
                    my @newwords = grep {!exists $hashes->{$rule}{$_}} @$nw;
                    foreach my $newword ( @newwords ) {
                        my $new_rule = join(' ', sort (keys %{$hashes->{$rule}}, $newword));
                        $rules->{$new_rule}++;
                    }
                last;
                }
            }
        }
    }

    foreach my $rule ( keys %$rules ) {
        delete $rules->{$rule} if $rules->{$rule} == 1;
    }

    foreach my $rule ( keys %$rules ) {
        $hashes->{$rule} = {map {$_ => 1} split /\s/, $rule } unless exists $hashes->{$rule};
    }
    $newcount = scalar(keys %$rules);
    print "$newcount\n";
} while $newcount > $oldcount;

my $counts = {};

foreach my $rule ( keys %$rules ) {
    $counts->{$rule} = scalar(keys %{$hashes->{$rule}});
}
my $sql_counter_list = $proj->catalogia_media_dbh->List_SQL('select 
                                                                 distinct title, body 
                                                             from 
                                                                 CatalogiaManualFlags 
                                                             where 
                                                                 choosed not regexp \'(^|,)'.$flag.'(,|$)\' and (flags not regexp \'(^|,)'.$flag.'(,|$)\' or unchecked regexp \'(^|,)'.$flag.'(,|$)\' )');
foreach my $item ( @$sql_counter_list ) {
    my $nw = [ grep { $_ !~ /\d/} $proj->phrase(join(' ', $item->{title}, $item->{body}))->uniqnormwords];
    foreach my $rule ( keys %$rules ) {
        if ( $counts->{$rule} == scalar( grep {exists $hashes->{$rule}{$_}} @$nw ) ) {
            delete $rules->{$rule};
            delete $hashes->{$rule};
            $newcount--;
            print "phrase '$item->{title} $item->{body}' deletes rule $rule, remaining: $newcount\n";
        }
    }
}

$proj->dd($rules);
