package Yandex::ML::CLOPE;

# $Id$

use base 'Yandex::ML::Clustering';
use utf8;
use strict;
use warnings;
use Yandex::ML::Point;
use Data::Dumper;

#TODO
# Когда мы ищем профит, не учитываем "вес" слов
#
# Это плохо, вот пример
# Кластер А из трех баннеров
#     Сортировщики банкнот Glory      Большой выбор. Низкие цены. Продажа, обслуживание и ремонт.     9
#     Сортировщики банкнот KISAN      Большой выбор. Низкие цены. Бесплатная доставка по С-Пб и области       19
#     Сейфы в С-Пб.   Низкие цены! Большой выбор! Доставка!   29
#
# Кластер Б
#     Купить сейф VALBERG FRS-49 KL   Купить в Спб сейф VALBERG FRS-49 KL. Огнестойкость класс 90Б!   6
#     Сейф VALBERG ГАРАНТ 46  Купить в Спб сейф VALBERG ГАРАНТ 46. Устойчивость к взлому и огнестойкость.     16
#
# Объявление про сейф из кластера А не попало в клакстер Б потмоу что кластер А не про "Сортировщики банкнот" как может показаться, а про бесплатную доставку.
# Хочется учитывать при вычислении профита TF/IDF слов, чтобы частые слова входили с меньшим весом, а редкие с большим

sub new($$$)
{
    my ($class, $r, $dimensions) = @_;

    my $self =  $class->SUPER::new(undef);
    
    $self->{r} = $r;
    $self->{dimensions} = $dimensions;

    return $self;
}

sub make_new_point
{
    my ($self, $point_id, $point) = @_;
    
    if (ref($point) eq 'Point') {
        return $point;
    }
    
    return Yandex::ML::Point->new( $point_id, $self->{dimensions} )->init_point($point);
}

sub make_new_centroid
{
    my ($self) = @_;
    return Yandex::ML::CLOPECentroid->new($self->{r}, 0);#scalar $self->points()); 
}

sub init
{
    my ($self) = @_;

    foreach my $i ($self->points()) {
        my ($new_position, $better_dist) = $self->find_better_centroid($i);
        $self->add_to_centroid($new_position, $i, $better_dist);
    }
}

sub is_better_distance
{
    my ($self, $current_better_dist, $distance) = @_;

    return 1 unless defined $current_better_dist;
    
    if ($distance >  $current_better_dist) {
        return 1;
    }

    return 0;
}

sub new_centroid_is_better
{
    my ($self, $best_distance, $point, $current_centroid) = @_;
    
    if ($best_distance  < 0) {
        return 1;
    }

    #Если создание нового центроида лучше чем старый
    my $delta = $current_centroid ? $current_centroid->_delta_sub($point) : 0;

    my @attributes = $point->get_all_used_coords();
    
    my $w = scalar @attributes;
    my $s = $w;
    
    $self->{height} = $s / $w;

    my $profit = $s / ($w ** $self->{r});

    return ($best_distance >= ($profit+$delta) ) ? 0 : 1;
}

1;

package Yandex::ML::CLOPECentroid;

use strict;
use warnings;

use Data::Dumper;

use fields qw(attributes width height square count r point_counter profit);

sub new($$$)
{
    my ($class, $r, $point_count) = @_;

#    my $self =  $class->SUPER::new();
    
    my $self = fields::new($class);

    $self->{attributes} = {};
    $self->{width}      = 0;
    $self->{height}     = 0;
    $self->{square}     = 0;
    $self->{count}      = 0;
    $self->{r}          = $r;
    $self->{point_counter} = 1;#$point_count;

    return $self;
#    return bless $self, $class;
}

sub attributes
{
    my ($self) = @_;

    return keys %{ $self->{attributes}  };
}

sub add_point
{
    my ($self, $point) = @_;
    
    my @attributes = $point->get_all_used_coords();
    
#   warn Dumper(\@attributes, $point);
    
    unless (@attributes) {
        die Dumper( $point, $self, [(caller)]   );
    }

    foreach (@attributes) {
        $self->{attributes}{$_}++;
        $self->{square}++;
    }
    
    $self->{width} = scalar keys %{ $self->{attributes} };
    $self->{height} = $self->{square} / $self->{width};
    $self->{count}++;

    $self->{profit} = ($self->{square} * $self->{count} ) / ($self->{width} ** $self->{r});
    
    $self->{profit} /= $self->{point_counter};

#    die Dumper($self);
}

sub add_centroid
{
    my ($self, $centroid) = @_;
    
    return unless scalar keys %{ $centroid->{attributes}  };

    foreach my $attr (keys %{ $centroid->{attributes}  } ) {
        $self->{attributes}{$attr} += $centroid->{attributes}{$attr};
        $self->{square} += $centroid->{attributes}{$attr};
    }
    
    $self->{width} = scalar keys %{ $self->{attributes} };
    $self->{height} = $self->{square} / $self->{width};

    unless ($centroid->{count}) {
        warn Dumper($centroid);
    }

    $self->{count} += $centroid->{count};
}

sub remove_point
{
    my ($self, $point) = @_;

    my @attributes = $point->get_all_used_coords();
    
#   warn Dumper(\@attributes, $point);
    
    unless (@attributes) {
        die Dumper( $point, $self, [(caller)]   );
    }

    foreach (@attributes) {
        $self->{attributes}{$_}--;
        
        if ( $self->{attributes}{$_} <= 0 ) {
            delete $self->{attributes}{$_};
        }

        $self->{square}--;
    }
    
    $self->{width} = scalar keys %{ $self->{attributes} };
   
    die Dumper($self) if !$self->{square} &&  $self->{width};

    return undef unless $self->{width};

    $self->{height} =  $self->{square} / $self->{width};
    $self->{count}--;

    $self->{profit} = ($self->{square} * $self->{count} )/ ($self->{width} ** $self->{r});
    $self->{profit} /= $self->{point_counter};

    return 0;
}

sub _delta_add
{
    my ($self, $point) = @_;

    my @attributes = $point->get_all_used_coords();

    my $s_new = $self->{square} + scalar(@attributes);
    my $w_new = $self->{width};

    foreach (@attributes) {
        $w_new++ if !defined $self->{attributes}{$_};
    }
   
    die "bad w_new" if ($w_new < $self->{width});

    my $result;

    #eval { 
        $result = $s_new * ($self->{count} + 1) * $self->{point_counter} / ($w_new ** $self->{r}) - ($self->{profit} || 0);
    #};
    
    #warn "distance = $result r = $self->{r} profit: $self->{profit}\n";

    #if ($@) {
    #    die Dumper($@, $self, \@attributes, {s_new => $s_new, w_new => $w_new, result => $result, 'caller' => [(caller)]});
    #}

    return $result;
}

sub centroid_delta
{
    my ($self, $centroid) = @_;

    my @attributes = $centroid->attributes();

    my $s_new = $self->{square};
    my $w_new = $self->{width};

    foreach (@attributes) {
        $w_new++ if !defined $self->{attributes}{$_};
        $s_new += $centroid->{attributes}{$_};
    }
   
    die "bad w_new" if ($w_new < $self->{width});

    my $result;

    eval { $result = $s_new * ($self->{count} + $centroid->{count}) * $self->{point_counter} / ($w_new ** $self->{r}) - ($self->{profit} || 0) };
    
    #warn "distance = $result r = $self->{r} profit: $self->{profit}\n";

    if ($@) {
        die Dumper($@, $self, \@attributes, {s_new => $s_new, w_new => $w_new, result => $result, 'caller' => [(caller)]});
    }

    return $result;
}

sub _delta_sub
{
    my ($self, $point) = @_;

    my @attributes = $point->get_all_used_coords();

    my $s_new = $self->{square};
    my $w_new = $self->{width};

    foreach (@attributes) {
        if (defined $self->{attributes}{$_}) {
            $w_new-- if $self->{attributes}{$_} == 1;
            $s_new--;
        }
        else {
            die Dumper($self, $point);
        }
    }
    
    die "bad width " if $w_new < 0;

    my $result;
    
    return - $self->{profit}  unless $w_new;

#    eval { 
         $result = $s_new * ($self->{count} - 1) * $self->{point_counter} / ($w_new ** $self->{r}) - $self->{profit};
#     };
    
    #warn "distance = $result r = $self->{r} profit: $self->{profit}\n";

#    if ($@) {
#        die Dumper($@, $self, \@attributes, {s_new => $s_new, w_new => $w_new, result => $result, 'caller' => [(caller)]});
#    }

    return $result;
}

sub distance
{
    my ($self, $point, $current_centroid) = @_;

    my ($add, $sub) = (0,0);
    
    $add = $self->_delta_add($point);

    $sub = $current_centroid->_delta_sub($point) if $current_centroid;
    
    if ($current_centroid && $self == $current_centroid) {
        die "ZZZZ ($add + $sub):".($add+$sub) if ($add + $sub != 0);
        return 0;
    }
    
#    if ($self->{width} > 1000) {
#        die Dumper($self, $add, $sub, $point);
#    }

    return $add + $sub;
}

sub count
{
    my ($self) = @_;
    return $self->{count};
}

sub compare
{
    my ($self, $centroid, $changed) = @_;
    return $changed;
}

1;
