package Yandex::ML::SCLOPE;

use strict;
use warnings;

use Yandex::ML::SClope::Online;
use Yandex::ML::CLOPE;
use Data::Dumper;

sub new
{
    my ($class,$dimensions) = @_;
    
    my $self = bless {}, $class;
    
    $self->{repulsion} = 1.6; #1.6; 
    $self->{common_prefix_min}  = 2;
    $self->{dimensions} = $dimensions;
    $self->{sclope} = new Yandex::ML::SClope::Online( repulsion => $self->{repulsion}, common_prefix_min => $self->{common_prefix_min});

    #$self->{window_width} ||= scalar (keys %{ $self->{points} }) ;
    #$self->{number_of_microclusters} ||= 1000;

    #my $window_width = int($self->{window_width});
    
    return $self;
}

sub add_point($$)
{
    my ($self, $id, $attributes) = @_;

    $self->{transactions}++;

    if (ref($attributes) eq 'HASH') {
        return unless scalar keys %$attributes;
        $self->{sclope}->add_transaction( { id => $id, attributes => [ keys %$attributes ] } );
    }
    elsif (ref($attributes) eq 'ARRAY') {
        return unless scalar @$attributes;
        $self->{sclope}->add_transaction( { id => $id, attributes => $attributes } );
    }
}

sub set_dimensions
{
    my ($self, $d) = @_;
    $self->{dimensions} = $d; 
}

sub clusterization
{
    my ($self) = @_;

    my $clope = Yandex::ML::CLOPE->new($self->{repulsion}, $self->{dimensions});
	
    #$self->print_stat();

    my $microclusters = $self->{sclope}->merge_clustering(10);
    
    #print STDERR  "now microclusters = " . scalar( keys %{$self->{sclope}->{tree}{microclusters}} ) . "\n";
    
    foreach my $cluster (values %$microclusters) {
        my @texts;
        my $new_id = join(",", keys %{$cluster->[1]});
        my $new_point = $cluster->[0];
        
        $clope->add_point($new_id, $new_point);
    }
    
    delete $self->{sclope};
    
    $clope->set_cnt(3);
    $clope->clusterization();
    $self->{clusters} = $clope->get_clusters();
}

sub get_clusters
{
    my ($self) = @_;
    return $self->{clusters};
}

sub print_stat
{
    my ($self) = @_;
	print STDERR  "FP summary:\n";
	print STDERR  "___________________\n";
	print STDERR  "nodes:\t" . $self->{sclope}->{tree}{nCount} . "\n";
	print STDERR  "clusters:\t " . scalar( keys %{$self->{sclope}->{tree}{microclusters}} ) . "\n";
	print STDERR  "transactions:\t " . $self->{transactions} . "\n";
	print STDERR  "attributes:\t " . scalar( keys %{$self->{sclope}->{at_hash}} ) . "\n";
	print STDERR  "fi:\t$self->{number_of_microclusters}\n";
	print STDERR  "repulsion:\t$self->{repulsion}\n";
	print STDERR  "common prefix:\t$self->{common_prefix_min}\n";
	print STDERR  "merging microclusters...\n";
#    print STDERR  "freqs:\t", Dumper($self->{sclope}{freqs});
	print STDERR  "profit:\t".$self->{sclope}->profit( )."\n";
	print STDERR  "done\n";
}

1;
