#--------------------------------------------------------------------------------------------------#
#------------------- CLUSTER STABILITY -> DISTANCE BETWEEN CLUSTERING RESULTS ---------------------#


get.perm.sample <- function(prev.perm, clust.num)
{
  head = 1:2
  
  if(is.vector(prev.perm)) 
  {
    if(prev.perm[1] == clust.num)
    {
      if(prev.perm[2] >= (clust.num-1))
      { 
        head = NULL
        tail = NULL
      }
      else
      {
        head[1] = clust.num
        head[2] = prev.perm[2] + 1
      }
    }
    else
    {
      if(prev.perm[2] >= clust.num)
      {
        head[1] = prev.perm[1] + 1
        head[2] = 1
      }
      else
      {
        head[1] = prev.perm[1]
        if( (prev.perm[2] + 1) == prev.perm[1] ) head[2] = prev.perm[2] + 2
        else head[2] = prev.perm[2] + 1
      }
    }
  }
  
  if(clust.num > 2) tail = sample((1:clust.num)[-head])
  else tail = NULL
  
  return(c(head,tail))
}

stab.assoc.factor <- function(cnf.mx, clust.num)
{
  factor = 0
  perm.num = clust.num *(clust.num-1)
  
  iter = 0
  perm.smp.vec = NULL
  while( iter < perm.num )
  {
    perm.smp.vec = get.perm.sample(perm.smp.vec, clust.num)
    factor = factor + similarity.index.int(cnf.mx, perm.smp.vec) / perm.num
    iter = iter + 1
  }
  
  return(factor)
}

# version for all - hierarchical and aglomerative algorithms (for hierarcical is not optimal)

clust.stab.predict.pver.internal <- function( data, clust.num, sample.num, ratio, clust.method, clust.wrap, pred.wrap )
{
# prepare results
	result 
	si.res = 0
	factor.res = 0

	obj.num = dim(data)[1]
  
	for( j in 1:sample.num )
	{
		smp = sort( sample( 1:obj.num, ratio * obj.num ) )

		base.data = data[smp,]
		cls.base = clust.wrap( clust.method( base.data, clust.num ) )

		rest.data = data[-smp,]
		cls.rest = clust.wrap( clust.method(rest.data, clust.num) )

		cls.pred = pred.wrap( base.data, cls.base, rest.data )

		cnf.mx = confusion.matrix(cls.rest, cls.pred)
		si = similarity.index( cnf.mx )
		factor = stab.assoc.factor(cnf.mx, clust.num)

		si.res = ( si / sample.num ) + si.res
		factor.res = ( factor / sample.num ) + factor.res
	}

# this is wrong !!!!! - to investigate :D :D :D 
	result = ((factor.res+1)/factor.res) * si.res - 1/factor.res
	return( result )
}

# ------- cluster stability - similarity index -> ver. for partitionings algorithms ------ #

clust.stab.predict.pver <- function( data, cl.num, sample.num, ratio, clust.method, clust.wrap, pred.wrap )
{
	# result = vector("list", length=ind.num )
	result = as.data.frame(matrix(0, 1, cl.num))

	iter = 1
	for( cls.num in cl.num )
	{
		result[iter] = clust.stab.predict.pver.internal(data, clust.num=cls.num, sample.num=sample.num, ratio=(1-ratio), clust.method=clust.method, clust.wrap=clust.wrap, pred.wrap=pred.wrap )
				
		iter = iter + 1
	}

	colnames(result[[iter]]) = paste("C", cl.num, sep="")

	return(result)
}

# ------- cluster stability - similarity index -> ver. for hierarhical algorithms ------ #

clust.stab.predict.hver <- function(data, cl.num, sample.num, ratio, clust.method, clust.wrap, pred.wrap)
{
	obj.num = dim(data)[1]
	result = as.data.frame(matrix(0,1,length(cl.num)))
  
	for( j in 1:sample.num )
	{
		smp = sort( sample( 1:obj.num, (1-ratio) * obj.num ) )
		base.data = data[smp,]

		clust.tree = clust.method(base.data)

		rest.data = data[-smp,]
		rest.tree = clust.method(rest.data)

		iter = 1
		for( clust.num in cl.num )
		{
			base.cls = clust.wrap( clust.tree, clust.num )
			rest.cls = clust.wrap( rest.tree, clust.num )
			rest.pred = pred.wrap( base.data, base.cls, rest.data )

			cnf.mx = confusion.matrix(rest.cls, rest.pred)
			si = similarity.index( cnf.mx )
			factor = stab.assoc.factor(cnf.mx, clust.num)			

			result[1, iter] = result[1, iter] + ((factor+1)/factor) * si - 1/factor
			iter = iter + 1 
		}
	}

	colnames(result[[iter]]) = paste("C", cl.num, sep="")
	return(result)
}

# cluster stability -> similarity index approach

cls.stab.predict <- function( data, cl.num, 
								rep.num=10, 
								subset.ratio=0.75, 
								clust.method=c("agnes","pam"),
								method.type=c("single","average"),
								pred.type=c("knn"),
								fast=TRUE, ... )
{
# check input arguments
	data = data.validity(data, "data")
	cl.num = cls.num.vect.validity(cl.num, dim(data)[1], "cl.num")

	if( !is.integer(rep.num) ) rep.num=10
	if( rep.num < 1 ) rep.num=10 

	if( !is.numeric(subset.ratio) ) subset.ratio=0.75
	if( subset.ratio > 1 || subset.ratio <= 0 ) subset.ratio=0.75

	cls.method.type.bool = check.avail.methods(clust.method, "clust.method", supp.cls.methods.vec.const)
	method.type.bool = check.avail.methods(method.type, "method.type", hierarhical.method.types.vec.const )
	pred.method.type.bool = check.avail.methods(pred.type, "pred.type", pred.method.types.vec.const )

	pred.alg.num = length( pred.method.type.bool[pred.method.type.bool] )
	result.list = vector( "list", length=pred.alg.num )

	for( pred.method.num in 1:length(pred.method.type.bool) )
	{
		if( pred.method.type.bool[pred.method.num] )
		{
			iter = 1
			cls.result.list = vector( "list", length=length( cls.method.type.bool[cls.method.type.bool] ) )

			for( method.num in 1:length(cls.method.type.bool) )
			{
				if( cls.method.type.bool[method.num] && supp.cls.methods.list.const[[method.num]]$sup )
				{
# if algorithm is hierarhical and user wants fast computation set proper function
					if( supp.cls.methods.list.const[[method.num]]$hrr && fast ) algorithm = clust.stab.predict.hver
					else algorithm = clust.stab.predict.pver

					if( method.num != agnes.num.const && method.num != hclust.num.const )
					{
						clust.alg.pver <- function(data, clust.num) 
						{ 
							return( supp.cls.methods.list.const[[method.num]]$alg(data, clust.num=clust.num, method.type=NULL, ...) ) 
						}
						
						cls.result.list[[iter]] = algorithm( 
														data=data, cl.num=cl.num, 
														sample.num=rep.num, ratio=subset.ratio,
														clust.method=clust.alg.pver,
														clust.wrap=supp.cls.methods.list.const[[method.num]]$wrp,
														pred.wrap=supp.pred.methods.list.const[[pred.method.num]]$wrp
													)
						names(result.list)[iter] = supp.cls.methods.vec.const[method.num]
						iter = iter + 1
					}
					else
					{
						# be careful - this line depends on the fact that 
						# first is computed "agnes" (if choosen) and always after "agnes", "hclust" (see constants variables)
						if( method.num == hclust.num.const ) method.type.bool = method.type.bool[1:4]
    				
						if( any(method.type.bool) )
						{
							for( i in 1:length(method.type.bool))
							{
								if( method.type.bool[i] == TRUE )
								{
									clust.alg.hver <- function(data) 
									{ 
										return( supp.cls.methods.list.const[[method.num]]$alg(data, 0, method.type=hierarhical.method.types.vec.const[i], ...) ) 
									}

									cls.result.list[[iter]] = algorithm( 
																	data=data, cl.num=cl.num, 
																	sample.num=rep.num, ratio=subset.ratio,
																	clust.method=clust.alg.hver,
																	clust.wrap=supp.cls.methods.list.const[[method.num]]$wrp, 
				 													pred.wrap=supp.pred.methods.list.const[[pred.method.num]]$wrp
																)
									names(cls.result.list)[iter] = paste(
																	supp.cls.methods.vec.const[method.num],
																	hierarhical.method.types.vec.const[i],
																	sep=".")
    		
									iter = iter + 1
								}
							}
						}
					}
				}
			}
			
			result.list[[i]] = cls.result.list
		}
	}
	
	return(result.list)
}