## --------------------------------------------------------------------------
##
## This file is part of the miRNA-QC-and-Diagnosis software package.
##
## Version 1.1.1 - February 2021
##
##
## The miRNA-QC-and-Diagnosis package is free software; you can use it,
## redistribute it, and/or modify it under the terms of the GNU General
## Public License version 3 as published by the Free Software Foundation.
## The full text of the license can be found in the file LICENSE.txt at the top
## level of the package distribution.
##
## Authors:
##	Michele Castelluzzo (1), Alessio Perinelli (2), Simone Detassis (3),
##	Michela A. Denti (3) and Leonardo Ricci (1,2)
##	(1) Department of Physics, University of Trento, 38123 Trento, Italy
##	(2) CIMeC, Center for Mind/Brain Sciences, University of Trento,
##		38068 Rovereto, Italy
##	(3) Department of Cellular, Computational and Integrative Biology
##		(CIBIO), University of Trento, 38123 Trento, Italy
##
##	michele.castelluzzo@unitn.it
##	alessio.perinelli@unitn.it
##	michela.denti@unitn.it
##	leonardo.ricci@unitn.it
##	https://github.com/LeonardoRicci/
##	https://nse.physics.unitn.it/
##
##
## If you use the miRNA-QC-and-Diagnosis package for your analyses, please cite:
##
##	L. Ricci, V. Del Vescovo, C. Cantaloni, M. Grasso, M. Barbareschi and
##	M. A. Denti, Statistical analysis of a Bayesian classifier based on the
##	expression of miRNAs, BMC Bioinformatics 16:287 (2015).
##	DOI: 10.1186/s12859-015-0715-9
##
##
## --------------------------------------------------------------------------

#' Analysis of features and training of classifiers.
#'
#' This function carries out different tasks depending on the input parameters:
#' --> Analysis mode: analyzes the properties of each miRNA (possibly subtracting a normalizer) in terms of Target/Versus separation, normality, etc. A matrix of correlation coefficients between each pair of miRNAs is also assessed.
#' --> Training mode: trains a Bayesian classifier by assessing the corresponding diagnostic threshold values and the related uncertainties.
#'
#' In order to select between Analysis and Training mode, the input parameters "inputMiRNAList" and "coeffList" have to comply with the following requirements.
#' --> Analysis mode: "coeffList" has to be empty (i.e. omitted in the function call arguments). "inputMiRNAList" can either be empty (i.e. omitted in the function call arguments) or of length 1: in the latter case, the single entry of "inputMiRNAList" is assumed to be the normalizer.
#' --> Training mode: "inputMiRNAList" and "coeffList" have to be non-empty and of the same size.
#'
#' @param inputDataset Dataset (data frame) to be used for the analysis/training. The data frame must comply with the output format of the quality control functions (miRNA_expressionPreprocessing and miRNA_removeOutliers), thus containing the columns 'Subject', 'miRNA', 'Mean', 'StdDev', 'SampleSize', 'Class'. Any other column is ignored, and any missing column forbids execution. Please note that in this case the 'Class' column is mandatory.
#' @param inputTargetList List of classes to use as target for the classification. The chosen target must correspond to at least one of the classes present in the 'Class' column of the inputDataset.
#' @param inputVersusList List of classes to use as versus for the classification. If the argument is left empty, all classes present in the 'Class' column of the inputDataset, minus the Target classes, are used as Versus.
#' @param inputMiRNAList List of miRNAs to be used by the classifier ('Training mode'). The chosen miRNAs must be present in the 'miRNA' column of the inputDataset. In 'Analysis mode', this argument has to be omitted (if no normalizer has to be used) or has to contain a single entry (corresponding to the miRNA to be used as normalizer).
#' @param coeffList List of coefficients for the classifier. In 'Training mode', the number of coefficients must be the same as the number of used miRNAs and listed in the same order. In 'Analysis mode', this argument has to be omitted.
#' @param saveOutputFile Boolean option setting whether results are written to file (TRUE) or not (FALSE). Default is FALSE.
#' @param outputFileBasename Name of the output file where the classifier setup results ('Training mode') or the analysis results ('Analysis mode') are to be stored. If not assigned, a filename is automatically generated. File names of other files created by the function are generated by appending suitable labels to the provided "outputFileBasename".
#' @param sep Field separator character for the output files; the default is tabulation.
#' @param plotFormat String specifying the format of generated graphic files (plots): can either be "pdf" (default) or "png".
#' @param scorePlotAscending Boolean option to set the direction in which samples are ordered: TRUE corresponds to samples ordered by ascending score, FALSE corresponds to samples ordered by descending score. Default is TRUE. This argument is meaningful only if saveOutputFile is set to TRUE and the function is running in 'Training mode'.
#' @param scorePlotParameters String specifying the y-axis parameters of the score plot. If empty, the axis is configured by assessing suitable parameters from the data.  This argument is meaningful only if saveOutputFile is set to TRUE and the function is running in 'Training mode'. The string has to comply with the format "yl_yu_yt", where: yl is the lower y limit; yu is the upper y limit; yt is the interval between tics along the axis.
#' @param histogramParameters String specifying the parameters used to build histograms. If empty, histograms are built by assessing suitable parameters from the data. This parameter is meaningful only if saveOutputFile is set to TRUE. The string has to comply with the following format: "xl_xu_bw", where xl is the lower boundary of the leftmost bin; xu is the upper boundary of the rightmost bin; bw is the bin width.
#' @param colorComplementFlag Boolean option to switch between the default palette (FALSE) and its inverted version (TRUE). Default is FALSE, corresponding to target samples reported in blue and versus samples in red. This argument is meaningful only if saveOutputFile is set to TRUE.
#'
#' Beware! Cross-correlation coefficients, as well as Shapiro-Wilk tests for normality, require at least three data samples. In case of less than three samples, those tests are skipped and "NA" (not available) is reported in the corresponding output.
#'
#' @return In 'Analysis mode', a data frame containing the columns 'miRNA', 'Diagnosis', 'NumberOfSubjects', 'Mean', 'StdDev', 'NormalityTest', 't-test'. In 'Training mode', a data frame containing the columns 'Threshold', 'DeltaThreshold', 'DPrime', 'Pc', 'ChiUp', 'DChiUp', 'ChiDown', 'DChiDown'.
#'
#' @examples
#' requiredFile = paste(system.file(package="MiRNAQCD"),
#'			"/extdata/test_dataset_alpha_clean.dat", sep='')
#' myDataFrame <- read.table(file=requiredFile, header=TRUE)
#' Target <- c("A")
#' Versus <- c("B", "C")
#' ## Analysis mode
#' miRNAstats <- miRNA_classifierSetup(myDataFrame, Target, Versus)
#' ## Analysis mode, with normalizer
#' miRNAstats <- miRNA_classifierSetup(myDataFrame, Target, Versus, c("FZ"))
#'
#' ## Training mode
#' mirnaToUse <- c("FX", "FZ")
#' coefficientsToUse <- c(1.0, -1.0)
#' threshold <- miRNA_classifierSetup(myDataFrame, Target, Versus,
#'					mirnaToUse, coefficientsToUse)

#' @export
miRNA_classifierSetup <- function(inputDataset, inputTargetList, inputVersusList=character(), inputMiRNAList=character(), coeffList=double(), saveOutputFile=FALSE, outputFileBasename="", sep='\t', plotFormat="pdf", scorePlotAscending=TRUE, scorePlotParameters=character(), histogramParameters=character(), colorComplementFlag=FALSE) {

	## Input validation and pre-processing

	if (!(("Subject" %in% colnames(inputDataset)) & ("miRNA" %in% colnames(inputDataset)) & ("Mean" %in% colnames(inputDataset)) & ("StdDev" %in% colnames(inputDataset)) & ("SampleSize" %in% colnames(inputDataset) & ("Class" %in% colnames(inputDataset)))))  {
		stop("ERROR: unsuitable dataset format. Dataset must contain columns 'Subject', 'miRNA', 'Mean', 'StdDev', 'SampleSize', 'Class'.\n")
	}

	if (length(inputDataset[1,]) > 6) {
		warning("WARNING: more than 6 dataset columns. Columns other than 'Subject', 'miRNA', 'Mean', 'StdDev', 'SampleSize', 'Class' will be ignored.\n")
	}

	availableClasses <- unique(inputDataset$Class)

	# Select subjects based on target list
	listOfTargets <- unique(inputTargetList)
	if (length(listOfTargets) != length(inputTargetList)){
		warning("WARNING: target list presents some duplicates which will be ignored.\n")
	}
	inputTargetList <- unique(inputTargetList)
	listOfTargets <- intersect(listOfTargets, availableClasses)
	if (length(listOfTargets) != length(inputTargetList)){
		warning("WARNING: some of the target classes are not present in the dataset and will be ignored.\n")
	}

	# Select subjects based on versus list
	if (length(inputVersusList) == 0) {
		inputVersusList <- setdiff(availableClasses, listOfTargets)
		inputVersusList <- unique(inputVersusList)
	}
	listOfVersus <- unique(inputVersusList)
	if (length(listOfVersus) != length(inputVersusList)){
		warning("WARNING: versus list presents some duplicates which will be ignored.\n")
	}
	inputVersusList <- unique(inputVersusList)
	listOfVersus <- intersect(listOfVersus, availableClasses)
	if (length(listOfVersus) != length(inputVersusList)){
		warning("WARNING: some of the versus classes are not present in the dataset and will be ignored.\n")
	}

	# Check target and versus list are non-empty and not intersecting
	if (length(listOfTargets)==0 | length(listOfVersus)==0) {
		stop("ERROR: unsuitable function arguments. The requested target and/or versus classes are empty, or not present in the dataset.\n")
	} else if (length(intersect(listOfTargets, listOfVersus))) {
		stop("ERROR: conflicting function arguments; target set and versus set have non-empty intersection.\n")
	}

	targetVersusFrame <- inputDataset[inputDataset$Class %in% c(listOfTargets,listOfVersus), ]
	subjectsTargetVersus <- unique(targetVersusFrame$Subject)

	# Check for duplicates features in input list
	availableFeatures <- unique(targetVersusFrame$miRNA)
	listOfFeature <- unique(inputMiRNAList)
	if (length(listOfFeature) != length(inputMiRNAList)){
		warning("WARNING: features list presents some duplicates which will be ignored.\n")
	}
	inputMiRNAList <- unique(inputMiRNAList)

	# Check for features from the list which are not present in the dataset
	listOfFeature <- intersect(listOfFeature, availableFeatures)
	if (length(listOfFeature) != length(inputMiRNAList)){
		stop("ERROR: some entries of the features list are not present in the dataset.\n")
	}

	# Correct for empty separator (default behavior of write.table is sep=" ")
	if (sep == "")
		sep <- " "

	## Select function behavior

	if ((length(inputMiRNAList) == 1) && (length(coeffList) == 0)) {
		normalizer <- inputMiRNAList
		normalizerFlag <- 1
		classifierFlag <- 0
		message("LOG:\tmiRNA_classifierSetup() is running in Analysis mode (with normalizer).\n")
	} else if ((length(inputMiRNAList) == 0) && (length(coeffList) == 0)) {
		normalizerFlag <- 0
		classifierFlag <- 0
		message("LOG:\tmiRNA_classifierSetup() is running in Analysis mode (without normalizer).\n")
	} else if (length(inputMiRNAList) == length(coeffList)) {
		normalizerFlag <- 0
		classifierFlag <- 1
		message("LOG:\tmiRNA_classifierSetup() is running in Training mode.\n")
	} else {
		stop("ERROR: unsuitable input parameters. The list of features and the list of coefficients do not match any valid pattern.\n\tType help(miRNA_classifierSetup) for help.\n")
	}

	## Actual computation starts here (loop over features; if classifierFlag==1, stops after first iteration)

	for (currentFeature in availableFeatures) {

		if ((normalizerFlag == 1) && (classifierFlag == 0)) {
			if (currentFeature == normalizer)
				next
			listOfFeature <- c(currentFeature, normalizer)
			listOfFeature <- unique(listOfFeature)
			coeffList <- c(1, -1)
		} else if ((normalizerFlag == 0) && (classifierFlag == 0)) {
			listOfFeature <- currentFeature
			coeffList <- 1
		} else if ((normalizerFlag == 0) && (classifierFlag == 1)) {
			listOfFeature <- inputMiRNAList
		}

		## Pre-Processing: Filter subjects by needed features

		if (exists("subjectsToRemove"))
			rm(subjectsToRemove)

		for (subject in subjectsTargetVersus) {
			subjectFrame <- targetVersusFrame[targetVersusFrame$Subject == subject,]
			availableSubjectFeatures <- unique(subjectFrame$miRNA)
			if (length(listOfFeature) != length(intersect(availableSubjectFeatures, listOfFeature))) {
				if (!exists("subjectsToRemove")) {
					subjectsToRemove <- subject
				} else {
					subjectsToRemove <- rbind(subjectsToRemove, subject)
				}
			}
		}
		if (exists("subjectsToRemove")) {
			subjectsToRemove <- unique(subjectsToRemove)
			compliantSubjects <- setdiff(subjectsTargetVersus, subjectsToRemove)
		} else {
			compliantSubjects <- subjectsTargetVersus
		}
		if (length(compliantSubjects) == 0) {
			warning("WARNING: no available subjects for features:\t", listOfFeature, "\n")
			next
		}

		availableDataset <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects, ]
		availableDataset <- availableDataset[availableDataset$miRNA %in% listOfFeature, ]

		if (length(availableDataset[availableDataset$Class %in% listOfTargets, 1]) == 0 | length(availableDataset[availableDataset$Class %in% listOfVersus, 1]) == 0) {
			if (classifierFlag) {
				stop("ERROR: No available target and/or versus subjects for this classifier.\n")
			} else {
				warning("WARNING: No available target and/or versus subjects for feature '", currentFeature, "', continuing to next feature.\n")
				next
			}
		}

		## Pre-Processing: prepare dataframe in suitable format

		dataFrameTemp <- availableDataset[availableDataset$Class %in% listOfTargets, ]
		if (exists("tempFrame"))
			rm(tempFrame)
		for (feature in listOfFeature) {
			columnSubjectMean <- dataFrameTemp[dataFrameTemp$miRNA == feature,]
			columnSubjectMean <- subset(columnSubjectMean, select=c("Subject", "Mean"))

			if (!exists("tempFrame")) {
				tempFrame <- columnSubjectMean
				columnSubjectClass <- dataFrameTemp[dataFrameTemp$miRNA == feature,]
				columnSubjectClass <- subset(columnSubjectClass, select=c("Subject", "Class"))
			} else {
				tempFrame <- merge(tempFrame, columnSubjectMean, by = "Subject")
			}

		}
		tempFrame <- merge(tempFrame, columnSubjectClass, by = "Subject")
		completeDataFrame <- cbind(tempFrame, Diagnosis=rep("target", length(tempFrame[,1])))

		dataFrameTemp <- availableDataset[availableDataset$Class %in% listOfVersus, ]
		if (exists("tempFrame"))
			rm(tempFrame)
		for (feature in listOfFeature) {
			columnSubjectMean <- dataFrameTemp[dataFrameTemp$miRNA == feature,]
			columnSubjectMean <- subset(columnSubjectMean, select=c("Subject", "Mean"))

			if (!exists("tempFrame")) {
				tempFrame <- columnSubjectMean
				columnSubjectClass <- dataFrameTemp[dataFrameTemp$miRNA == feature,]
				columnSubjectClass <- subset(columnSubjectClass, select=c("Subject", "Class"))
			} else
				tempFrame <- merge(tempFrame, columnSubjectMean, by = "Subject")

		}
		tempFrame <- merge(tempFrame, columnSubjectClass, by = "Subject")

		completeDataFrame <- rbind(completeDataFrame, cbind(tempFrame, Diagnosis=rep("versus", length(tempFrame[,1]))))

		names(completeDataFrame) <- c("Subject", listOfFeature, "Class", "Diagnosis")

		## Processing: 1. Compute Score, i.e. linear combination of features

		dataFrameTemp <- completeDataFrame[2:(1+length(listOfFeature))]

		for (feature in listOfFeature)
			dataFrameTemp[,feature] <- dataFrameTemp[,feature] * as.numeric(coeffList[which(listOfFeature == feature)])

		completeDataFrame <- cbind(completeDataFrame, Score=rowSums(dataFrameTemp))
		if (scorePlotAscending) {
			completeDataFrame <- data.frame(completeDataFrame[with(completeDataFrame, order(Score)),])
		} else {
			completeDataFrame <- data.frame(completeDataFrame[with(completeDataFrame, order(-1.0*Score)),])
		}

		## Processing: 2. Means, StdDevs, NormalityTest

		# Target
		nT <- length(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"])
		xT <- mean(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"])
		sT <- stats::sd(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"])
		dxT <- sT/sqrt(nT)
		dsT <- sT/sqrt(2*(nT-1))
		if (!classifierFlag) {
			if (length(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"]) > 2) {
				res <- stats::shapiro.test(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"])
				shapiroWilkTarget <- (res[[2]])
			} else {
				shapiroWilkTarget <- NA
				warning("WARNING: miRNA '", currentFeature, "' has less than 3 Subjects in the target set. Cannot carry out Shapiro-Wilk test.\n")
			}
		}
		# Versus
		nV <- length(completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"])
		xV <- mean(completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"])
		sV <- stats::sd(completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"])
		dxV <- sV/sqrt(nV)
		dsV <- sV/sqrt(2*(nV-1))
		if (!classifierFlag) {
			if (length(completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"]) > 2) {
				res <- stats::shapiro.test(completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"])
				shapiroWilkVersus <- (res[[2]])
			} else {
				shapiroWilkVersus <- NA
				warning("WARNING: miRNA '", currentFeature, "' has less than 3 Subjects in the versus set. Cannot carry out Shapiro-Wilk test.\n")
			}
		}
		# Target and Versus
		nTV <- length(completeDataFrame$Score)
		xTV <- mean(completeDataFrame$Score)
		sTV <- stats::sd(completeDataFrame$Score)
		dxTV <- sTV/sqrt(nTV)
		dsTV <- sTV/sqrt(2*(nTV-1))
		if (!classifierFlag) {
			if (length(completeDataFrame$Score) > 2) {
				res <- stats::shapiro.test(completeDataFrame$Score)
				shapiroWilk <- (res[[2]])
			} else {
				shapiroWilkVersus <- NA
				warning("WARNING: miRNA '", currentFeature, "' has less than 3 Subjects in the dataset. Cannot carry out Shapiro-Wilk test.\n")
			}
		}
		localPrecision <- 1
		local_muAndS_precision <- localPrecision

		## Processing: 3. Student's t Test between target and versus Scores

		tTestRes <- stats::t.test(completeDataFrame$Score[completeDataFrame$Diagnosis == "target"], completeDataFrame$Score[completeDataFrame$Diagnosis == "versus"], var.equal = FALSE)

		## Processing: 4. d'

		d <- xT - xV
		dd <- sqrt((sT*sT + sV*sV)/2.)
		dee <- d/dd
		dDee <- sqrt(((dxT/dd)**2) + ((dxV/dd)**2) + ((dee/dd/dd/2)**2)*(sT*sT*dsT*dsT + sV*sV*dsV*dsV))

		## Processing: 5. In the case of Classification, compute thresholds

		if (classifierFlag) {

			## Auxiliary function: computes chi, accuracy and the respective errors
			compute_chi_dChi <- function(xT, xV, sT, sV, dxT, dxV, dsT, dsV, ratio) {


				epsilon <- 1.e-6
				chiPc <- compute_chi_dChi_aux(xT, xV, sT, sV, ratio)

				dchiPc_dxt <- (compute_chi_dChi_aux(xT + epsilon/2., xV, sT, sV, ratio) - compute_chi_dChi_aux(xT - epsilon/2., xV, sT, sV, ratio))/epsilon
				dchiPc_dxv <- (compute_chi_dChi_aux(xT, xV + epsilon/2., sT, sV, ratio) - compute_chi_dChi_aux(xT, xV - epsilon/2., sT, sV, ratio))/epsilon
				dchiPc_dst <- (compute_chi_dChi_aux(xT, xV, sT + epsilon/2., sV, ratio) - compute_chi_dChi_aux(xT, xV, sT - epsilon/2., sV, ratio))/epsilon
				dchiPc_dsv <- (compute_chi_dChi_aux(xT, xV, sT, sV + epsilon/2., ratio) - compute_chi_dChi_aux(xT, xV, sT, sV - epsilon/2., ratio))/epsilon

				dchiPc <- dchiPc_dxt*dchiPc_dxt*dxT*dxT + dchiPc_dxv*dchiPc_dxv*dxV*dxV + dchiPc_dst*dchiPc_dst*dsT*dsT + dchiPc_dsv*dchiPc_dsv*dsV*dsV
				dchiPc <- sqrt(dchiPc)


				return(c(chiPc, dchiPc))
			}

			## Auxiliary function: computes treshold value chi and accuracy (called by compute_chi_dChi)
			compute_chi_dChi_aux <- function(xT, xV, sT, sV, ratio) {

				alpha <- 1/sT/sT - 1/sV/sV
				beta <- xT/sT/sT - xV/sV/sV
				gamma <- xT*xT/sT/sT - xV*xV/sV/sV - 2*log(sV/sT) + 2*log(ratio)

				chi <- as.numeric(polyroot(c(gamma, -2*beta, alpha)))
				pc <- (ratio*stats::pnorm((xT-chi)/sT) + stats::pnorm((chi-xV)/sV))/(ratio + 1)
				return(c(chi,pc))
			}

			alpha <- 1/sT/sT - 1/sV/sV

			# pv/pt = 1
			chi_dChi <- compute_chi_dChi(xT, xV, sT, sV, dxT, dxV, dsT, dsV, 1)
			chi <- chi_dChi[1:2]
			pc <- chi_dChi[3:4]
			dchi <- chi_dChi[5:6]
			dpc <- chi_dChi[7:8]
			pc <- pc[which.min(alpha*chi)]
			dpc <- dpc[which.min(alpha*chi)]
			dchi <- dchi[which.min(alpha*chi)]
			chi <- chi[which.min(alpha*chi)]

			# pv/pt = likelyhoodRatio: 90% Target threshold
			chi_dChi <- compute_chi_dChi(xT, xV, sT, sV, dxT, dxV, dsT, dsV, 9.)
			chiUp <- chi_dChi[1:2]
			dchiUp <- chi_dChi[5:6]
			dchiUp <- dchiUp[which.min(alpha*chiUp)]
			chiUp <- chiUp[which.min(alpha*chiUp)]

			# pv/pt = likelyhoodRatio: 90% Versus threshold
			chi_dChi <- compute_chi_dChi(xT, xV, sT, sV, dxT, dxV, dsT, dsV, 1./9.)
			chiDown <- chi_dChi[1:2]
			dchiDown <- chi_dChi[5:6]
			dchiDown <- dchiDown[which.min(alpha*chiDown)]
			chiDown <- chiDown[which.min(alpha*chiDown)]

			# ROC and corresponding area under curve
			pred <- completeDataFrame$Score
			obs <- completeDataFrame$Diagnosis
			pROC_data <- suppressMessages(pROC::roc(obs, pred))
			ciAuc <- suppressMessages(pROC::ci.auc(pROC_data))

			# Performance metrics
			real_targets <- completeDataFrame[completeDataFrame$Diagnosis == "target", ]
			real_versus <- completeDataFrame[completeDataFrame$Diagnosis == "versus", ]
			nr_target_true <- NROW(real_targets[real_targets$Score >= chi, ])
			nr_target_false <- NROW(real_targets[real_targets$Score < chi, ])
			nr_versus_true <- NROW(real_versus[real_versus$Score < chi, ])
			nr_versus_false <- NROW(real_versus[real_versus$Score >= chi, ])
			performance_sensitivity <- nr_target_true / (nr_target_true + nr_target_false)
			performance_specificity <- nr_versus_true / (nr_versus_true + nr_versus_false)
			# performance_accuracy <- (nr_target_true + nr_versus_true) / (nr_target_true + nr_versus_true + nr_target_false + nr_versus_false)
			performance_accuracy <- pc
			performance_accuracy_error <- dpc
			performance_F1score <- 2.0*nr_target_true / (2.0*nr_target_true + nr_target_false + nr_versus_false)

			outputThresholdFrame <- data.frame(chi, dchi, chiUp, dchiUp, chiDown, dchiDown, performance_accuracy, performance_accuracy_error, performance_specificity, performance_sensitivity, performance_F1score, dee, ciAuc[2], ciAuc[1], ciAuc[3])
			names(outputThresholdFrame) <- c("Threshold", "DeltaThreshold", "ChiUp", "DChiUp", "ChiDown", "DChiDown", "Accuracy", "DAccuracy", "Specificity", "Sensitivity", "F1-score", "DPrime", "AUC", "AUCDown", "AUCUp")
		}

		## Processing: 5b. In the case of feature analysis (not Classification), prepare output

		completeDataFrame <- subset(completeDataFrame, select = c("Subject", "Diagnosis", "Score", listOfFeature))

		if (!classifierFlag) {
			if (normalizerFlag) {
				rowTarget <- c(paste("D", currentFeature, sep=""), "target", nT, round(xT,local_muAndS_precision), round(sT,local_muAndS_precision), round(shapiroWilkTarget,2), signif(tTestRes[[3]],2))
				rowVersus <- c(paste("D", currentFeature, sep=""), "versus", nV, round(xV,local_muAndS_precision), round(sV,local_muAndS_precision), round(shapiroWilkVersus,2), signif(tTestRes[[3]],2))
			} else {
				rowTarget <- c(currentFeature, "target", nT, round(xT,local_muAndS_precision), round(sT,local_muAndS_precision), round(shapiroWilkTarget,2), signif(tTestRes[[3]],2))
				rowVersus <- c(currentFeature, "versus", nV, round(xV,local_muAndS_precision), round(sV,local_muAndS_precision), round(shapiroWilkVersus,2), signif(tTestRes[[3]],2))
			}

			if (!exists("outputStatisticsFrame")) {
				outputStatisticsFrame <- rowTarget
				outputStatisticsFrame <- rbind(outputStatisticsFrame, rowVersus)
			} else {
				outputStatisticsFrame <- rbind(outputStatisticsFrame, rowTarget)
				outputStatisticsFrame <- rbind(outputStatisticsFrame, rowVersus)
			}
		} else {
			if (!saveOutputFile) {
				confusionMatrixFileName <- tempfile()
				cat("Confusion matrix:\tPredicted\tPredicted\n", append=FALSE, file=confusionMatrixFileName)
				cat("\t\t\tTarget\t\tVersus\n", append=TRUE, file=confusionMatrixFileName)
				cat(paste("Actual Target\t\t", nr_target_true, "\t\t", nr_target_false, "\n", sep=""), append=TRUE, file=confusionMatrixFileName)
				cat(paste("Actual Versus\t\t", nr_versus_false, "\t\t", nr_versus_true, "\n", sep=""), append=TRUE, file=confusionMatrixFileName)
				cat(readLines(confusionMatrixFileName), sep="\n")
			}
		}

		## Output: filename building

		if (exists("classifierLabel")) {
			rm(classifierLabel)
		}
		for (i in 1:length(coeffList)) {
			if(!exists("classifierLabel"))
				classifierLabel <- paste(coeffList[i], listOfFeature[i], sep="_")
			else
				classifierLabel <- paste(classifierLabel, coeffList[i], listOfFeature[i], sep="_")
		}

		targetLabel <- paste(listOfTargets, sep="", collapse="_")
		versusLabel <- paste(listOfVersus, sep="", collapse="_")
		targetVSVersusLabel <- paste(targetLabel, "vs", versusLabel, sep="_", collapse="")

		if (saveOutputFile) {

			## Output: write to file (classifier case)

			if (classifierFlag) {
				if (outputFileBasename == "") {
					thresholdFileName <- paste(targetVSVersusLabel, "u", classifierLabel, sep="_")
					scoresFileName <- paste(thresholdFileName, "_scores", ".dat", sep="")
					confusionMatrixFileName <- paste(thresholdFileName, "_confusion_matrix", ".txt", sep="")
					thresholdFileName <- paste(thresholdFileName, "_thresholds", ".txt", sep="")
				} else {
	 				thresholdFileName <- paste(outputFileBasename, ".txt", sep="")
					scoresFileName <- paste(outputFileBasename, ".dat", sep="")
					confusionMatrixFileName <- paste(outputFileBasename, "_confusion_matrix.txt", sep="")
	 			}

				if (file.exists(thresholdFileName) & file.access(thresholdFileName, mode=2)) {
					stop("ERROR:\tcannot write ", thresholdFileName, ". Check write permission.\n")
				}

				if (file.exists(scoresFileName) & file.access(scoresFileName, mode=2)) {
					stop("ERROR:\tcannot write ", scoresFileName, ". Check write permission.\n")
				}

				if (file.exists(confusionMatrixFileName) & file.access(confusionMatrixFileName, mode=2)) {
					stop("ERROR:\tcannot write ", confusionMatrixFileName, ". Check write permission.\n")
				}

				cat("#classifier: ", file=thresholdFileName)
				for (i in 1:length(coeffList)) {
					cat(paste(coeffList[i], "*", listOfFeature[i], sep=""), " ", file=thresholdFileName, append = TRUE)
				}
				cat("\n", file=thresholdFileName, append = TRUE)

				utils::write.table(format(outputThresholdFrame, drop0trailing=FALSE), file=thresholdFileName, append=TRUE, sep=sep, row.names=FALSE, col.names=TRUE, quote=FALSE)
				utils::write.table(format(subset(completeDataFrame, select=c("Subject", "Diagnosis", "Score")), drop0trailing=FALSE), file=scoresFileName, append=FALSE, sep=sep, row.names=FALSE, col.names=TRUE, quote=FALSE)
				cat("Confusion matrix:\tPredicted\tPredicted\n", append=FALSE, file=confusionMatrixFileName)
				cat("\t\t\tTarget\t\tVersus\n", append=TRUE, file=confusionMatrixFileName)
				cat(paste("Actual Target\t\t", nr_target_true, "\t\t", nr_target_false, "\n", sep=""), append=TRUE, file=confusionMatrixFileName)
				cat(paste("Actual Versus\t\t", nr_versus_false, "\t\t", nr_versus_true, "\n", sep=""), append=TRUE, file=confusionMatrixFileName)

				message("LOG:\tDiagnostic thresholds data frame written to ", thresholdFileName, " successfully.\n", sep="")
				message("LOG:\tScores data frame written to ", scoresFileName, " successfully.\n", sep="")
				message("LOG:\tConfusion matrix written to ", confusionMatrixFileName, " successfully.\n", sep="")
			}

			## Output: plot (all cases)

			if (saveOutputFile) {

				if (outputFileBasename == "") {
					plotFileName <- paste(targetVSVersusLabel, "u", classifierLabel, sep="_")
				} else {
					if (classifierFlag) {
						plotFileName <- outputFileBasename
					} else {
						plotFileName <- paste(outputFileBasename, currentFeature, sep="_")
					}
		 		}

				if (classifierFlag) {
					histPlot <- miRNA_plotHistograms(completeDataFrame, outputThresholdFrame, plotFileName, plotFormat=plotFormat, histogramParameters=histogramParameters, colorComplementFlag=colorComplementFlag)
					message("LOG:\tHistogram plot saved to ", plotFileName, "_histogram.", plotFormat, " successfully.\n", sep="")
					thresholdPlot <- miRNA_plotThresholds(completeDataFrame, outputThresholdFrame, plotFileName, plotFormat=plotFormat, scorePlotParameters=scorePlotParameters, colorComplementFlag=colorComplementFlag)
					message("LOG:\tScore classification plot saved to ", plotFileName, "_score.", plotFormat, " successfully.\n", sep="")
					rocPlot <- miRNA_plotROC(completeDataFrame, plotFileName, plotFormat=plotFormat)
					message("LOG:\tROC plot saved to ", plotFileName, "_ROC.", plotFormat, " successfully.\n", sep="")
				} else {
					histPlot <- miRNA_plotHistograms(completeDataFrame, outputFileLabel=plotFileName, plotFormat=plotFormat, histogramParameters=histogramParameters, colorComplementFlag=colorComplementFlag)
					message("LOG:\tHistogram plot saved to ", plotFileName, "_histogram.", plotFormat, " successfully.\n", sep="")
				}
			}
		}

		if (classifierFlag)
			break
	}

	if ((normalizerFlag == 1) && (classifierFlag == 0)) {
		normalizer <- inputMiRNAList
		listOfFeature <- setdiff(availableFeatures, normalizer)
		normalizerFlag <- 1
	} else if ((normalizerFlag == 0) && (classifierFlag == 0)) {
		listOfFeature <- unique(inputDataset$miRNA)
	} else if ((normalizerFlag == 0) && (classifierFlag == 1)) {
		listOfFeature <- inputMiRNAList
	}

	if (!classifierFlag) {
		if (saveOutputFile) {
			outputStatisticsFrame <- data.frame(outputStatisticsFrame, row.names=NULL)
			names(outputStatisticsFrame) <- c("miRNA", "Diagnosis", "NumberOfSubjects", "Mean", "StdDev", "NormalityTest", "t-test")

			if (outputFileBasename == "") {
				if (normalizerFlag) {
					statisticsFileName <- paste(targetVSVersusLabel, "norm", normalizer, "statistics", sep="_")
				} else {
					statisticsFileName <- paste(targetVSVersusLabel, "statistics", sep="_")
				}
				statisticsFileName <- paste(statisticsFileName, ".txt", sep="")
			} else {
	 			statisticsFileName <- paste(outputFileBasename, ".txt", sep="")
	 		}

			if (file.exists(statisticsFileName) & file.access(statisticsFileName, mode=2)) {
				stop("ERROR: cannot write ", statisticsFileName, ". Check write permission.\n")
			}

			utils::write.table(format(outputStatisticsFrame, drop0trailing=FALSE), file=statisticsFileName, sep=sep, row.names=FALSE, col.names=TRUE, quote=FALSE)
		}

		## Processing: Correlation coefficients

		targetLabel <- paste(listOfTargets, sep="", collapse="_")
		versusLabel <- paste(listOfVersus, sep="", collapse="_")
		targetVSVersusLabel <- paste(targetLabel, "vs", versusLabel, sep="_", collapse="")

		localPrecision <- 2
		if (normalizerFlag) {
			matrixLabels <- paste("D", listOfFeature, sep='')
		} else {
			matrixLabels <- listOfFeature
		}

		if (!saveOutputFile) {
			statisticsFileName <- tempfile()
		}

		cat("\n", append=TRUE, file=statisticsFileName)
		for (classificationLabel in c("target", "versus")) {
			cat("###############################\n\n", append=TRUE, file=statisticsFileName)
			if (classificationLabel=="target")
				cat("## Evaluation on subjects whose diagnosis belongs to the Target set\n", append=TRUE, file=statisticsFileName)
			else
				cat("## Evaluation on subjects whose diagnosis belongs to the Versus set\n", append=TRUE, file=statisticsFileName)
			cat("# Correlation coefficients:\n", append=TRUE, file=statisticsFileName)
			cat("\tr", paste(matrixLabels, sep=sep), sep=sep, append=TRUE, file=statisticsFileName)
			cat("\n", append=TRUE, file=statisticsFileName)
			for (a in listOfFeature) {
				cat(paste(sep="", "\t", matrixLabels[which(listOfFeature == a)]), append=TRUE, file=statisticsFileName)
				subjectsA <- targetVersusFrame[targetVersusFrame$miRNA == a, ]
				if (classificationLabel == "target") {
					subjectsA <- subjectsA[subjectsA$Class %in% listOfTargets, "Subject"]
				} else if (classificationLabel == "versus") {
					subjectsA <- subjectsA[subjectsA$Class %in% listOfVersus, "Subject"]
				}

				for (b in listOfFeature) {

					if (which(listOfFeature == a) < which(listOfFeature == b)) {
						subjectsB <- targetVersusFrame[targetVersusFrame$miRNA == b, "Subject"]
						if (normalizerFlag) {
							subjectsN <- targetVersusFrame[targetVersusFrame$miRNA == normalizer, "Subject"]
							compliantSubjects <- intersect(subjectsA, subjectsB)
							compliantSubjects <- intersect(compliantSubjects, subjectsN)
							filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]
							valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
							valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
						} else {
							compliantSubjects <- intersect(subjectsA, subjectsB)
							filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]
							valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"]
							valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"]
						}
						if ((length(valuesA) == length(valuesB)) && (length(valuesA) > 2)) {
							cat(sep=sep, "", round(stats::cor(valuesA, valuesB), localPrecision), append=TRUE, file=statisticsFileName)
						} else {
							cat(sep=sep, "", NA, append=TRUE, file=statisticsFileName)
							warning("WARNING: Correlation between '", a, "', '", b, "' cannot be computed for the ", classificationLabel, " set: less than 3 values.\n", sep="")
						}
					} else {
						cat(sep=sep, "", "-", append=TRUE, file=statisticsFileName)
					}
				}
				cat("\n", append=TRUE, file=statisticsFileName)
			}
			cat("\n", append=TRUE, file=statisticsFileName)

			cat(sep="", "# Correlation p-values \"", classificationLabel, "\":\n", append=TRUE, file=statisticsFileName)
			cat("\tp", paste(matrixLabels, sep=sep), sep=sep, append=TRUE, file=statisticsFileName)
			cat("\n", append=TRUE, file=statisticsFileName)
			for (a in listOfFeature) {
				cat(paste(sep="", "\t", matrixLabels[which(listOfFeature == a)]), append=TRUE, file=statisticsFileName)
				subjectsA <- targetVersusFrame[targetVersusFrame$miRNA == a, ]
				if (classificationLabel == "target") {
					subjectsA <- subjectsA[subjectsA$Class %in% listOfTargets, "Subject"]
				} else if (classificationLabel == "versus") {
					subjectsA <- subjectsA[subjectsA$Class %in% listOfVersus, "Subject"]
				}

				for (b in listOfFeature) {

					if (which(listOfFeature == a) < which(listOfFeature == b)) {
						subjectsB <- targetVersusFrame[targetVersusFrame$miRNA == b, "Subject"]
						if (normalizerFlag) {
							subjectsN <- targetVersusFrame[targetVersusFrame$miRNA == normalizer, "Subject"]
							compliantSubjects <- intersect(subjectsA, subjectsB)
							compliantSubjects <- intersect(compliantSubjects, subjectsN)
							if (length(compliantSubjects) == 0) next
							filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]

							valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
							valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
						} else {
							compliantSubjects <- intersect(subjectsA, subjectsB)
							if (length(compliantSubjects) == 0) next
							filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]

							valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"]
							valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"]
						}
						if ((length(valuesA) == length(valuesB)) && (length(valuesA) > 2)) {
							ctst <- stats::cor.test(valuesA, valuesB)
							cat(sep=sep, "", signif(ctst[[3]], localPrecision), append=TRUE, file=statisticsFileName)
						} else {
							cat(sep=sep, "", NA, append=TRUE, file=statisticsFileName)
							warning("WARNING: Correlation (p-value) between '", a, "', '", b, "' cannot be computed for the ", classificationLabel, " set: less than 3 values.\n", sep="")
						}
					} else {
						cat(sep=sep, "", "-", append=TRUE, file=statisticsFileName)
					}
				}
				cat("\n", append=TRUE, file=statisticsFileName)
			}
			cat("\n", append=TRUE, file=statisticsFileName)
		}

		cat("###############################\n\n", append=TRUE, file=statisticsFileName)
		cat("##  Evaluation on subjects whose diagnosis belongs to the union of Target and Versus sets\n", append=TRUE, file=statisticsFileName)
		cat("# Correlation coefficients:\n", append=TRUE, file=statisticsFileName)
		cat("\tr", paste(matrixLabels, sep=sep), sep=sep, append=TRUE, file=statisticsFileName)
		cat("\n", append=TRUE, file=statisticsFileName)
		for (a in listOfFeature) {
			cat(paste(sep="", "\t", matrixLabels[which(listOfFeature == a)]), append=TRUE, file=statisticsFileName)
			subjectsA <- targetVersusFrame[targetVersusFrame$miRNA == a, "Subject"]
			for (b in listOfFeature) {
				if (which(listOfFeature == a) < which(listOfFeature == b)) {
					subjectsB <- targetVersusFrame[targetVersusFrame$miRNA == b, "Subject"]
					if (normalizerFlag) {
						subjectsN <- targetVersusFrame[targetVersusFrame$miRNA == normalizer, "Subject"]
						compliantSubjects <- intersect(subjectsA, subjectsB)
						compliantSubjects <- intersect(compliantSubjects, subjectsN)
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]

						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]

					} else {
						compliantSubjects <- intersect(subjectsA, subjectsB)
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]
						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"]
					}
					if ((length(valuesA) == length(valuesB)) && (length(valuesA) > 2)) {
						cat(sep=sep, "", round(stats::cor(valuesA, valuesB), localPrecision), append=TRUE, file=statisticsFileName)
					} else {
						cat(sep=sep, "", NA, append=TRUE, file=statisticsFileName)
						warning("WARNING: Correlation between '", a, "', '", b, "' cannot be computed: less than 3 values.\n", sep="")
					}
				} else {
					cat(sep=sep, "", "-", append=TRUE, file=statisticsFileName)
				}
			}
			cat("\n", append=TRUE, file=statisticsFileName)
		}
		cat("\n", append=TRUE, file=statisticsFileName)

		cat("# Correlation p-values:\n", append=TRUE, file=statisticsFileName)
		cat("\tp", paste(matrixLabels, sep=sep), sep=sep, append=TRUE, file=statisticsFileName)
		cat("\n", append=TRUE, file=statisticsFileName)
		for (a in listOfFeature) {
			cat(paste(sep="", "\t", matrixLabels[which(listOfFeature == a)]), append=TRUE, file=statisticsFileName)
			subjectsA <- targetVersusFrame[targetVersusFrame$miRNA == a, "Subject"]
			for (b in listOfFeature) {

				if (which(listOfFeature == a) < which(listOfFeature == b)) {
					subjectsB <- targetVersusFrame[targetVersusFrame$miRNA == b, "Subject"]
					if (normalizerFlag) {
						subjectsN <- targetVersusFrame[targetVersusFrame$miRNA == normalizer, "Subject"]
						compliantSubjects <- intersect(subjectsA, subjectsB)
						compliantSubjects <- intersect(compliantSubjects, subjectsN)
						if (length(compliantSubjects) == 0) next
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]

						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
					} else {
						compliantSubjects <- intersect(subjectsA, subjectsB)
						if (length(compliantSubjects) == 0) next
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]

						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"]
					}
					if ((length(valuesA) == length(valuesB)) && (length(valuesA) > 2)) {
						ctst <- stats::cor.test(valuesA, valuesB)
						cat(sep=sep, "", signif(ctst[[3]], localPrecision), append=TRUE, file=statisticsFileName)
					} else {
						cat(sep=sep, "", NA, append=TRUE, file=statisticsFileName)
						warning("WARNING: Correlation (p-value) between '", a, "', '", b, "' cannot be computed: less than 3 values.\n", sep="")
					}
				} else
					cat(sep=sep, "", "-", append=TRUE, file=statisticsFileName)
			}
			cat("\n", append=TRUE, file=statisticsFileName)
		}
		cat("\n", append=TRUE, file=statisticsFileName)

		cat("# Epsilon:\n", append=TRUE, file=statisticsFileName)
		cat("\teps", paste(matrixLabels, sep=sep), sep=sep, append=TRUE, file=statisticsFileName)
		cat("\n", append=TRUE, file=statisticsFileName)
		for (a in listOfFeature) {
			cat(paste(sep="", "\t", matrixLabels[which(listOfFeature == a)]), append=TRUE, file=statisticsFileName)
			subjectsA <- targetVersusFrame[targetVersusFrame$miRNA == a, "Subject"]
			for (b in listOfFeature) {

				if (a != b) {
					subjectsB <- targetVersusFrame[targetVersusFrame$miRNA == b, "Subject"]
					if (normalizerFlag) {
						subjectsN <- targetVersusFrame[targetVersusFrame$miRNA == normalizer, "Subject"]
						compliantSubjects <- intersect(subjectsA, subjectsB)
						compliantSubjects <- intersect(compliantSubjects, subjectsN)
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]
						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"] - filteredFrame[filteredFrame$miRNA == normalizer, "Mean"]
					} else {
						compliantSubjects <- intersect(subjectsA, subjectsB)
						filteredFrame <- targetVersusFrame[targetVersusFrame$Subject %in% compliantSubjects,]
						valuesA <- filteredFrame[filteredFrame$miRNA == a, "Mean"]
						valuesB <- filteredFrame[filteredFrame$miRNA == b, "Mean"]
					}
					if ((length(valuesA) == length(valuesB)) && (length(valuesA) > 2)) {
						cat(sep=sep, "", round(-stats::cor(valuesA, valuesB)*stats::sd(valuesA)/stats::sd(valuesB), localPrecision), append=TRUE, file=statisticsFileName)
					} else {
						cat(sep=sep, "", NA, append=TRUE, file=statisticsFileName)
						warning("WARNING: Optimized coefficient between '", a, "', '", b, "' cannot be computed: less than 3 values.\n", sep="")
					}
				} else
					cat(sep=sep, "", "-", append=TRUE, file=statisticsFileName)
			}
			cat("\n", append=TRUE, file=statisticsFileName)
		}
		cat("\n", append=TRUE, file=statisticsFileName)

		if (saveOutputFile) {
			message("LOG:\tmiRNA statistics and correlation matrices written to ", statisticsFileName, " successfully.\n", sep="")
		} else {
			cat(readLines(statisticsFileName), sep="\n")
		}
	}

	if (classifierFlag) {
		return(outputThresholdFrame)
	} else {
		return(outputStatisticsFrame)
	}
}
