package Yandex::ML::Clustering;

# $Id$

use utf8;
use strict;
use warnings;
use Data::Dumper;
use List::Util qw(shuffle);
use Yandex::Trace;

sub new($)
{
    my ($class) = @_;

    my $self;

    $self = {};
    $self->{centroids} = [];
    $self->{points} = [];
    $self->{nearests_centroids} = {};

    return bless $self, $class;
}

sub set_cnt
{
    my ($self, $count) = @_;
    $self->{cnt} = $count;
}

#Фабричные методы
sub make_new_centroid
{
    my ($self, @params) = @_;
    die "Centroid must be overloaded";
}

sub make_new_point
{
    my ($self, @params) = @_;
    die "make_new_point must be overloaded";
}

sub init
{
    die "init must be overloaded";
}

sub is_better_distance
{
    my ($self, $current_min_dist, $distance) = @_;
    die "is_better_distance must be overloaded";
}

sub add_to_centroid
{
    my ($self, $centroid_index, $point_index, $best_distance) = @_;
    
    if (!defined $centroid_index || $self->new_centroid_is_better($best_distance, $self->{points}[$point_index], undef)) {
#        warn "create centroi";
        $centroid_index = $self->_add_new_centroid();
    }

    if (defined $centroid_index) {
        $self->{centroids}->[$centroid_index]->add_point($self->{points}[$point_index]);
        $self->{nearests_centroids}{$point_index} = $centroid_index;
    }
    else {
        my $c = $self->make_new_centroid($point_index);
        $c->add_point($self->{points}[$point_index]);
            
        push @{ $self->{centroids}  }, $c;

        $self->{nearests_centroids}{$point_index} = scalar @{ $self->{centroids}  } - 1;
    }
}

sub add_point($$$)
{
    my ($self, @params) = @_;
    push @{ $self->{points} },  $self->make_new_point(@params);
}

sub get_points_ref
{
    my ($self) = @_;
    return $self->{points};
}

sub points
{
    my ($self) = @_;
    return (0..(scalar (@{ $self->{points} }) - 1));
}

sub clusterization
{
    my ($self) = @_;
    my $profile = Yandex::Trace::new_profile('ml:clustering:clusterization');

    $|=1;

    $self->init();

    my $changed = 1;
    my $cnt = $self->{cnt} || 100;

    while ($changed && $cnt--) {
        $changed = $self->_calculate_new_centroids();
#        my $c_cnt = grep { defined $_ } @{ $self->{centroids}  };
#        warn "centroids: $c_cnt";
    }
}


sub find_better_centroid
{
    my ($self, $point_idx) = @_;

    my $better_distance;
    my $nearest_centroid;
    my $point =  $self->{points}[$point_idx];

    my $current_centroid;

    if (defined $self->{nearests_centroids}{$point_idx}) {
        $current_centroid = $self->{centroids}[ $self->{nearests_centroids}{$point_idx} ];
    }

    for (my $i = 0; $i < scalar @{ $self->{centroids} }; $i++) {
        next unless defined $self->{centroids}[$i];

        my $dist;
        
        if ($current_centroid && $i == $self->{nearests_centroids}{$point_idx}) {
            $dist = 0;
        }
        else {
            $dist = $self->{centroids}[$i]->distance($point, $current_centroid);
        }

        if ($self->is_better_distance($better_distance, $dist)) {
             $better_distance = $dist;
             $nearest_centroid = $i;
        }
    }

    return ($nearest_centroid, $better_distance);
}

sub _add_new_centroid
{
    my ($self) = @_;
    my $new_centroid = $self->make_new_centroid();
    
    my $prev_cnt = scalar(@{ $self->{centroids} }) - 1;
    push @{ $self->{centroids} }, $new_centroid;
    
    my $after_cnt = scalar(@{ $self->{centroids} }) - 1;

    return $after_cnt;
}

sub _move_point_between_centroids
{
    my ($self, $from, $to, $point) = @_;

    $to = $self->{centroids}[$to];

    if (defined $from) {
        my $ref =  $self->{centroids}[$from];

        #Удалим точку от старого центроида
        my $rc = $self->{centroids}[$from]->remove_point($point);
            
        #Если центроид опустел - удалим его.
        unless (defined $rc) {
#            warn "remove centroid";
            delete $self->{centroids}[$from];
        }
    }

    #и добавим точку к более подходящему центроиду
    $to->add_point($point);
}

sub _calculate_new_centroids($)
{
    my ($self) = @_;
    my $changed = 0;
    
    for (my $j = 0; $j < scalar @{ $self->{points} }; $j++) {
        my $point = $self->{points}[$j];
        
        my ($nearest_centroid, $better_distance) = $self->find_better_centroid($j);
        
        my $current_centroid = defined  $self->{nearests_centroids}{$j} ?  $self->{centroids}[$self->{nearests_centroids}{$j}] : undef;

        if (!defined $nearest_centroid || $self->new_centroid_is_better($better_distance, $point, $current_centroid)) {
#            warn "create centroi";
            $nearest_centroid = $self->_add_new_centroid();
        }

        if (!defined $self->{nearests_centroids}{$j} || $self->{nearests_centroids}{$j} != $nearest_centroid) {
            $self->_move_point_between_centroids($self->{nearests_centroids}{$j}, $nearest_centroid, $point);
            $changed = 1;
        }
        
        $self->{nearests_centroids}{$j} = $nearest_centroid;

#        print STDERR ".";
    }

    return $changed;
}

sub get_clusters
{
    my ($self) = @_;
    my @result;

    for (my $i = 0; $i < @{ $self->{points} }; $i++) {
        my $point = $self->{points}[$i];

        my $nearest_centroid = $self->{nearests_centroids}{$i};

        if ( !scalar @{ $result[$nearest_centroid] || [] } ) {
            push @{ $result[$nearest_centroid] }, [ "centroid", $self->{centroids}[$nearest_centroid] ];
        }
        
        push @{ $result[$nearest_centroid] }, [$point->id(), $point];
    }
    
    return \@result;
}

1;

