R version 2.12.0 (2010-10-15) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(108.35 + ,98.68 + ,100.70 + ,104.38 + ,97.72 + ,15.38 + ,31.27 + ,109.87 + ,99.21 + ,99.62 + ,103.97 + ,98.01 + ,15.03 + ,35.83 + ,111.30 + ,99.36 + ,99.83 + ,103.32 + ,97.78 + ,15.21 + ,37.12 + ,115.50 + ,100.72 + ,100.74 + ,105.01 + ,98.04 + ,15.20 + ,36.77 + ,116.22 + ,102.27 + ,100.84 + ,104.88 + ,98.54 + ,14.60 + ,35.17 + ,116.63 + ,102.62 + ,100.85 + ,104.46 + ,98.39 + ,13.79 + ,37.25 + ,116.84 + ,102.97 + ,99.71 + ,104.71 + ,98.58 + ,14.54 + ,33.77 + ,116.63 + ,102.88 + ,100.80 + ,106.09 + ,98.91 + ,14.31 + ,30.59 + ,117.03 + ,102.90 + ,100.06 + ,106.54 + ,98.68 + ,13.93 + ,33.59 + ,117.00 + ,103.01 + ,100.57 + ,104.36 + ,98.59 + ,14.82 + ,37.24 + ,117.14 + ,103.02 + ,99.79 + ,105.31 + ,99.13 + ,14.46 + ,34.81 + ,116.64 + ,103.73 + ,99.90 + ,105.07 + ,98.70 + ,14.85 + ,34.94 + ,117.24 + ,104.18 + ,100.12 + ,105.39 + ,99.00 + ,14.95 + ,34.47 + ,117.52 + ,103.73 + ,100.40 + ,105.65 + ,98.80 + ,14.43 + ,30.48 + ,117.83 + ,103.78 + ,100.51 + ,108.25 + ,98.80 + ,14.84 + ,30.94 + ,119.79 + ,103.61 + ,100.70 + ,107.71 + ,99.29 + ,14.39 + ,30.60 + ,120.86 + ,103.84 + ,100.62 + ,108.58 + ,99.69 + ,15.70 + ,28.42 + ,120.75 + ,103.86 + ,99.70 + ,108.27 + ,100.01 + ,15.34 + ,25.89 + ,120.63 + ,104.14 + ,99.48 + ,107.62 + ,99.85 + ,13.98 + ,26.32 + ,120.89 + ,104.05 + ,99.36 + ,108.80 + ,99.66 + ,14.75 + ,27.18 + ,120.23 + ,104.01 + ,99.39 + ,109.26 + ,101.18 + ,14.81 + ,25.85 + ,121.19 + ,104.49 + ,99.45 + ,108.58 + ,101.47 + ,14.67 + ,26.32 + ,120.79 + ,104.83 + ,99.28 + ,107.05 + ,101.28 + ,15.03 + ,23.07 + ,120.09 + ,104.78 + ,99.40 + ,109.20 + ,101.80 + ,14.34 + ,20.19 + ,120.86 + ,104.95 + ,99.10 + ,109.52 + ,102.48 + ,12.54 + ,18.65 + ,121.10 + ,105.28 + ,99.48 + ,111.12 + ,102.32 + ,11.37 + ,17.74 + ,121.47 + ,105.28 + ,99.74 + ,108.74 + ,102.30 + ,12.58 + ,17.26 + ,122.01 + ,105.91 + ,100.42 + ,110.53 + ,102.84 + ,13.06 + ,16.01 + ,123.94 + ,106.81 + ,100.80 + ,110.44 + ,102.36 + ,12.50 + ,17.94 + ,125.78 + ,106.39 + ,100.66 + ,111.02 + ,102.16 + ,11.11 + ,15.53 + ,125.31 + ,107.02 + ,101.03 + ,111.13 + ,102.57 + ,12.39 + ,14.49 + ,125.79 + ,106.92 + ,101.22 + ,110.90 + ,102.49 + ,12.34 + ,15.35 + ,126.12 + ,107.01 + ,101.23 + ,111.32 + ,104.11 + ,11.54 + ,14.67 + ,125.57 + ,106.79 + ,100.10 + ,109.37 + ,104.78 + ,10.22 + ,12.95 + ,125.44 + ,107.41 + ,99.98 + ,110.18 + ,104.13 + ,8.50 + ,8.81 + ,126.12 + ,107.13 + ,99.91 + ,110.74 + ,104.22 + ,9.06 + ,9.33 + ,126.01 + ,107.54 + ,99.84 + ,111.70 + ,104.73 + ,9.28 + ,9.31 + ,126.50 + ,108.48 + ,99.68 + ,111.33 + ,104.99 + ,7.24 + ,9.03 + ,126.13 + ,108.50 + ,99.74 + ,110.86 + ,104.70 + ,7.58 + ,10.96 + ,126.66 + ,108.27 + ,99.71 + ,109.48 + ,104.69 + ,7.81 + ,14.26 + ,126.33 + ,109.42 + ,99.35 + ,108.77 + ,104.85 + ,8.54 + ,14.20 + ,126.61 + ,110.09 + ,99.21 + ,109.81 + ,104.24 + ,9.27 + ,13.70 + ,126.36 + ,109.98 + ,99.21 + ,109.15 + ,104.74 + ,10.11 + ,17.46 + ,126.83 + ,109.99 + ,99.16 + ,109.63 + ,104.20 + ,9.21 + ,18.73 + ,125.90 + ,109.54 + ,99.20 + ,111.32 + ,105.62 + ,10.71 + ,20.37 + ,126.29 + ,108.85 + ,99.08 + ,109.75 + ,106.08 + ,10.85 + ,18.72 + ,126.37 + ,106.76 + ,98.16 + ,110.37 + ,105.46 + ,11.77 + ,21.60 + ,125.11 + ,107.56 + ,98.00 + ,108.30 + ,105.42 + ,11.81 + ,22.75) + ,dim=c(7 + ,48) + ,dimnames=list(c('Coffee' + ,'Tea' + ,'Sugar' + ,'Water' + ,'Soda' + ,'SaraLee' + ,'Starbucks') + ,1:48)) > y <- array(NA,dim=c(7,48),dimnames=list(c('Coffee','Tea','Sugar','Water','Soda','SaraLee','Starbucks'),1:48)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '0' > par2 = 'none' > par1 = '1' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "Coffee" > x[,par1] [1] 108.35 109.87 111.30 115.50 116.22 116.63 116.84 116.63 117.03 117.00 [11] 117.14 116.64 117.24 117.52 117.83 119.79 120.86 120.75 120.63 120.89 [21] 120.23 121.19 120.79 120.09 120.86 121.10 121.47 122.01 123.94 125.78 [31] 125.31 125.79 126.12 125.57 125.44 126.12 126.01 126.50 126.13 126.66 [41] 126.33 126.61 126.36 126.83 125.90 126.29 126.37 125.11 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 108.35 109.87 111.3 115.5 116.22 116.63 116.64 116.84 117 117.03 117.14 1 1 1 1 1 2 1 1 1 1 1 117.24 117.52 117.83 119.79 120.09 120.23 120.63 120.75 120.79 120.86 120.89 1 1 1 1 1 1 1 1 1 2 1 121.1 121.19 121.47 122.01 123.94 125.11 125.31 125.44 125.57 125.78 125.79 1 1 1 1 1 1 1 1 1 1 1 125.9 126.01 126.12 126.13 126.29 126.33 126.36 126.37 126.5 126.61 126.66 1 1 2 1 1 1 1 1 1 1 1 126.83 1 > colnames(x) [1] "Coffee" "Tea" "Sugar" "Water" "Soda" "SaraLee" [7] "Starbucks" > colnames(x)[par1] [1] "Coffee" > x[,par1] [1] 108.35 109.87 111.30 115.50 116.22 116.63 116.84 116.63 117.03 117.00 [11] 117.14 116.64 117.24 117.52 117.83 119.79 120.86 120.75 120.63 120.89 [21] 120.23 121.19 120.79 120.09 120.86 121.10 121.47 122.01 123.94 125.78 [31] 125.31 125.79 126.12 125.57 125.44 126.12 126.01 126.50 126.13 126.66 [41] 126.33 126.61 126.36 126.83 125.90 126.29 126.37 125.11 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/www/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/www/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/1gvbb1293019758.tab") + } + } > m Conditional inference tree with 5 terminal nodes Response: Coffee Inputs: Tea, Sugar, Water, Soda, SaraLee, Starbucks Number of observations: 48 1) Tea <= 105.28; criterion = 1, statistic = 42.801 2) Tea <= 102.88; criterion = 1, statistic = 23.093 3)* weights = 7 2) Tea > 102.88 4) Starbucks <= 28.42; criterion = 0.999, statistic = 13.773 5)* weights = 11 4) Starbucks > 28.42 6)* weights = 9 1) Tea > 105.28 7) SaraLee <= 11.11; criterion = 0.964, statistic = 7.518 8)* weights = 14 7) SaraLee > 11.11 9)* weights = 7 > postscript(file="/var/www/rcomp/tmp/2gvbb1293019758.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/www/rcomp/tmp/3gvbb1293019758.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 108.35 113.5000 -5.15000000 2 109.87 113.5000 -3.63000000 3 111.30 113.5000 -2.20000000 4 115.50 113.5000 2.00000000 5 116.22 113.5000 2.72000000 6 116.63 113.5000 3.13000000 7 116.84 117.4478 -0.60777778 8 116.63 113.5000 3.13000000 9 117.03 117.4478 -0.41777778 10 117.00 117.4478 -0.44777778 11 117.14 117.4478 -0.30777778 12 116.64 117.4478 -0.80777778 13 117.24 117.4478 -0.20777778 14 117.52 117.4478 0.07222222 15 117.83 117.4478 0.38222222 16 119.79 117.4478 2.34222222 17 120.86 120.8055 0.05454545 18 120.75 120.8055 -0.05545455 19 120.63 120.8055 -0.17545455 20 120.89 120.8055 0.08454545 21 120.23 120.8055 -0.57545455 22 121.19 120.8055 0.38454545 23 120.79 120.8055 -0.01545455 24 120.09 120.8055 -0.71545455 25 120.86 120.8055 0.05454545 26 121.10 120.8055 0.29454545 27 121.47 120.8055 0.66454545 28 122.01 124.9500 -2.94000000 29 123.94 124.9500 -1.01000000 30 125.78 126.1807 -0.40071429 31 125.31 124.9500 0.36000000 32 125.79 124.9500 0.84000000 33 126.12 124.9500 1.17000000 34 125.57 126.1807 -0.61071429 35 125.44 126.1807 -0.74071429 36 126.12 126.1807 -0.06071429 37 126.01 126.1807 -0.17071429 38 126.50 126.1807 0.31928571 39 126.13 126.1807 -0.05071429 40 126.66 126.1807 0.47928571 41 126.33 126.1807 0.14928571 42 126.61 126.1807 0.42928571 43 126.36 126.1807 0.17928571 44 126.83 126.1807 0.64928571 45 125.90 126.1807 -0.28071429 46 126.29 126.1807 0.10928571 47 126.37 124.9500 1.42000000 48 125.11 124.9500 0.16000000 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/www/rcomp/tmp/42vay1293019758.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/55wqm1293019758.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/6qxpa1293019758.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/7j6ov1293019758.tab") + } > > try(system("convert tmp/2gvbb1293019758.ps tmp/2gvbb1293019758.png",intern=TRUE)) character(0) > try(system("convert tmp/3gvbb1293019758.ps tmp/3gvbb1293019758.png",intern=TRUE)) character(0) > try(system("convert tmp/42vay1293019758.ps tmp/42vay1293019758.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.140 0.740 2.849