R version 2.12.0 (2010-10-15) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i486-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > x <- array(list(14450609152 + ,9119000 + ,15369578496 + ,9166000 + ,16440317952 + ,9218000 + ,17674829824 + ,9283000 + ,19782035456 + ,9367000 + ,21531359232 + ,9448000 + ,23118405632 + ,9508000 + ,24779487232 + ,9557000 + ,26495297536 + ,9590000 + ,29388689408 + ,9613000 + ,32676600000 + ,9638000 + ,35799700000 + ,9673000 + ,40077000000 + ,9709000 + ,45536800000 + ,9738000 + ,53409700000 + ,9768000 + ,59108700000 + ,9795000 + ,67180700000 + ,9811000 + ,72656600000 + ,9822000 + ,78026600000 + ,9830000 + ,83450800000 + ,9837000 + ,90756100000 + ,9847000 + ,95153800000 + ,9852000 + ,102966000000 + ,9856000 + ,109085000000 + ,9856000 + ,117854000000 + ,9853000 + ,125345000000 + ,9858000 + ,131200000000 + ,9862000 + ,136486000000 + ,9870000 + ,146033000000 + ,9902000 + ,158348000000 + ,9938000 + ,167909000000 + ,9967400 + ,175906000000 + ,10004500 + ,184714000000 + ,10045000 + ,190243000000 + ,10084500 + ,200495000000 + ,10115600 + ,207782000000 + ,10136800 + ,211399000000 + ,10157000 + ,221184000000 + ,10181000 + ,229572000000 + ,10203000 + ,238248000000 + ,10226000 + ,251741000000 + ,10252000 + ,258883000000 + ,10287000 + ,267652000000 + ,10333000 + ,274726000000 + ,10376080.14 + ,290825000000 + ,10421120.61 + ,302845000000 + ,10478650 + ,318193000000 + ,10547958 + ,334948000000 + ,10625700 + ,344676000000 + ,10708433 + ,337284000000 + ,10788760) + ,dim=c(2 + ,50) + ,dimnames=list(c('GDP' + ,'Population ') + ,1:50)) > y <- array(NA,dim=c(2,50),dimnames=list(c('GDP','Population '),1:50)) > for (i in 1:dim(x)[1]) + { + for (j in 1:dim(x)[2]) + { + y[i,j] <- as.numeric(x[i,j]) + } + } > par4 = 'no' > par3 = '2' > par2 = 'none' > par1 = '2' > #'GNU S' R Code compiled by R2WASP v. 1.0.44 () > #Author: Dr. Ian E. Holliday > #To cite this work: Ian E. Holliday, 2009, YOUR SOFTWARE TITLE (vNUMBER) in Free Statistics Software (v$_version), Office for Research Development and Education, URL http://www.wessa.net/rwasp_YOURPAGE.wasp/ > #Source of accompanying publication: > #Technical description: > library(party) Loading required package: survival Loading required package: splines Loading required package: grid Loading required package: modeltools Loading required package: stats4 Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library(Hmisc) Attaching package: 'Hmisc' The following object(s) are masked from 'package:survival': untangle.specials The following object(s) are masked from 'package:base': format.pval, round.POSIXt, trunc.POSIXt, units > par1 <- as.numeric(par1) > par3 <- as.numeric(par3) > x <- data.frame(t(y)) > is.data.frame(x) [1] TRUE > x <- x[!is.na(x[,par1]),] > k <- length(x[1,]) > n <- length(x[,1]) > colnames(x)[par1] [1] "Population." > x[,par1] [1] 9119000 9166000 9218000 9283000 9367000 9448000 9508000 9557000 [9] 9590000 9613000 9638000 9673000 9709000 9738000 9768000 9795000 [17] 9811000 9822000 9830000 9837000 9847000 9852000 9856000 9856000 [25] 9853000 9858000 9862000 9870000 9902000 9938000 9967400 10004500 [33] 10045000 10084500 10115600 10136800 10157000 10181000 10203000 10226000 [41] 10252000 10287000 10333000 10376080 10421121 10478650 10547958 10625700 [49] 10708433 10788760 > if (par2 == 'kmeans') { + cl <- kmeans(x[,par1], par3) + print(cl) + clm <- matrix(cbind(cl$centers,1:par3),ncol=2) + clm <- clm[sort.list(clm[,1]),] + for (i in 1:par3) { + cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') + } + cl$cluster <- as.factor(cl$cluster) + print(cl$cluster) + x[,par1] <- cl$cluster + } > if (par2 == 'quantiles') { + x[,par1] <- cut2(x[,par1],g=par3) + } > if (par2 == 'hclust') { + hc <- hclust(dist(x[,par1])^2, 'cen') + print(hc) + memb <- cutree(hc, k = par3) + dum <- c(mean(x[memb==1,par1])) + for (i in 2:par3) { + dum <- c(dum, mean(x[memb==i,par1])) + } + hcm <- matrix(cbind(dum,1:par3),ncol=2) + hcm <- hcm[sort.list(hcm[,1]),] + for (i in 1:par3) { + memb[memb==hcm[i,2]] <- paste('C',i,sep='') + } + memb <- as.factor(memb) + print(memb) + x[,par1] <- memb + } > if (par2=='equal') { + ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) + x[,par1] <- as.factor(ed) + } > table(x[,par1]) 9119000 9166000 9218000 9283000 9367000 9448000 1 1 1 1 1 1 9508000 9557000 9590000 9613000 9638000 9673000 1 1 1 1 1 1 9709000 9738000 9768000 9795000 9811000 9822000 1 1 1 1 1 1 9830000 9837000 9847000 9852000 9853000 9856000 1 1 1 1 1 2 9858000 9862000 9870000 9902000 9938000 9967400 1 1 1 1 1 1 10004500 10045000 10084500 10115600 10136800 10157000 1 1 1 1 1 1 10181000 10203000 10226000 10252000 10287000 10333000 1 1 1 1 1 1 10376080.14 10421120.61 10478650 10547958 10625700 10708433 1 1 1 1 1 1 10788760 1 > colnames(x) [1] "GDP" "Population." > colnames(x)[par1] [1] "Population." > x[,par1] [1] 9119000 9166000 9218000 9283000 9367000 9448000 9508000 9557000 [9] 9590000 9613000 9638000 9673000 9709000 9738000 9768000 9795000 [17] 9811000 9822000 9830000 9837000 9847000 9852000 9856000 9856000 [25] 9853000 9858000 9862000 9870000 9902000 9938000 9967400 10004500 [33] 10045000 10084500 10115600 10136800 10157000 10181000 10203000 10226000 [41] 10252000 10287000 10333000 10376080 10421121 10478650 10547958 10625700 [49] 10708433 10788760 > if (par2 == 'none') { + m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) + } > > #Note: the /var/www/rcomp/createtable file can be downloaded at http://www.wessa.net/cretab > load(file="/var/www/rcomp/createtable") > > if (par2 != 'none') { + m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) + if (par4=='yes') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + a<-table.element(a,'Prediction (training)',par3+1,TRUE) + a<-table.element(a,'Prediction (testing)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Actual',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) + a<-table.element(a,'CV',1,TRUE) + a<-table.row.end(a) + for (i in 1:10) { + ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) + m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) + if (i==1) { + m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) + m.ct.i.actu <- x[ind==1,par1] + m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) + m.ct.x.actu <- x[ind==2,par1] + } else { + m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) + m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) + m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) + m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) + } + } + print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) + numer <- numer + m.ct.i.tab[i,i] + } + print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) + print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) + numer <- 0 + for (i in 1:par3) { + print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) + numer <- numer + m.ct.x.tab[i,i] + } + print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) + a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) + for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) + a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) + a<-table.row.end(a) + } + a<-table.row.start(a) + a<-table.element(a,'Overall',1,TRUE) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.i.cp,4)) + for (jjj in 1:par3) a<-table.element(a,'-') + a<-table.element(a,round(m.ct.x.cp,4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/1r2nb1293444696.tab") + } + } > m Conditional inference tree with 4 terminal nodes Response: Population. Input: GDP Number of observations: 50 1) GDP <= 1.75906e+11; criterion = 1, statistic = 44.707 2) GDP <= 24779487232; criterion = 1, statistic = 21.106 3)* weights = 8 2) GDP > 24779487232 4) GDP <= 53409700000; criterion = 1, statistic = 19.859 5)* weights = 7 4) GDP > 53409700000 6)* weights = 17 1) GDP > 1.75906e+11 7)* weights = 18 > postscript(file="/var/www/rcomp/tmp/2r2nb1293444696.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(m) > dev.off() null device 1 > postscript(file="/var/www/rcomp/tmp/3r2nb1293444696.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') > dev.off() null device 1 > if (par2 == 'none') { + forec <- predict(m) + result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) + colnames(result) <- c('Actuals','Forecasts','Residuals') + print(result) + } Actuals Forecasts Residuals 1 9119000 9333250 -214250.000 2 9166000 9333250 -167250.000 3 9218000 9333250 -115250.000 4 9283000 9333250 -50250.000 5 9367000 9333250 33750.000 6 9448000 9333250 114750.000 7 9508000 9333250 174750.000 8 9557000 9333250 223750.000 9 9590000 9675571 -85571.429 10 9613000 9675571 -62571.429 11 9638000 9675571 -37571.429 12 9673000 9675571 -2571.429 13 9709000 9675571 33428.571 14 9738000 9675571 62428.571 15 9768000 9675571 92428.571 16 9795000 9868288 -73288.235 17 9811000 9868288 -57288.235 18 9822000 9868288 -46288.235 19 9830000 9868288 -38288.235 20 9837000 9868288 -31288.235 21 9847000 9868288 -21288.235 22 9852000 9868288 -16288.235 23 9856000 9868288 -12288.235 24 9856000 9868288 -12288.235 25 9853000 9868288 -15288.235 26 9858000 9868288 -10288.235 27 9862000 9868288 -6288.235 28 9870000 9868288 1711.765 29 9902000 9868288 33711.765 30 9938000 9868288 69711.765 31 9967400 9868288 99111.765 32 10004500 9868288 136211.765 33 10045000 10331533 -286533.431 34 10084500 10331533 -247033.431 35 10115600 10331533 -215933.431 36 10136800 10331533 -194733.431 37 10157000 10331533 -174533.431 38 10181000 10331533 -150533.431 39 10203000 10331533 -128533.431 40 10226000 10331533 -105533.431 41 10252000 10331533 -79533.431 42 10287000 10331533 -44533.431 43 10333000 10331533 1466.569 44 10376080 10331533 44546.709 45 10421121 10331533 89587.179 46 10478650 10331533 147116.569 47 10547958 10331533 216424.569 48 10625700 10331533 294166.569 49 10708433 10331533 376899.569 50 10788760 10331533 457226.569 > if (par2 != 'none') { + print(cbind(as.factor(x[,par1]),predict(m))) + myt <- table(as.factor(x[,par1]),predict(m)) + print(myt) + } > postscript(file="/var/www/rcomp/tmp/42tnw1293444696.ps",horizontal=F,onefile=F,pagecentre=F,paper="special",width=8.3333333333333,height=5.5555555555556) > if(par2=='none') { + op <- par(mfrow=c(2,2)) + plot(density(result$Actuals),main='Kernel Density Plot of Actuals') + plot(density(result$Residuals),main='Kernel Density Plot of Residuals') + plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') + plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') + par(op) + } > if(par2!='none') { + plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') + } > dev.off() null device 1 > if (par2 == 'none') { + detcoef <- cor(result$Forecasts,result$Actuals) + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Goodness of Fit',2,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'Correlation',1,TRUE) + a<-table.element(a,round(detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'R-squared',1,TRUE) + a<-table.element(a,round(detcoef*detcoef,4)) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'RMSE',1,TRUE) + a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) + a<-table.row.end(a) + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/5yll51293444696.tab") + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'#',header=TRUE) + a<-table.element(a,'Actuals',header=TRUE) + a<-table.element(a,'Forecasts',header=TRUE) + a<-table.element(a,'Residuals',header=TRUE) + a<-table.row.end(a) + for (i in 1:length(result$Actuals)) { + a<-table.row.start(a) + a<-table.element(a,i,header=TRUE) + a<-table.element(a,result$Actuals[i]) + a<-table.element(a,result$Forecasts[i]) + a<-table.element(a,result$Residuals[i]) + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/69uk81293444696.tab") + } > if (par2 != 'none') { + a<-table.start() + a<-table.row.start(a) + a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) + a<-table.row.end(a) + a<-table.row.start(a) + a<-table.element(a,'',1,TRUE) + for (i in 1:par3) { + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + } + a<-table.row.end(a) + for (i in 1:par3) { + a<-table.row.start(a) + a<-table.element(a,paste('C',i,sep=''),1,TRUE) + for (j in 1:par3) { + a<-table.element(a,myt[i,j]) + } + a<-table.row.end(a) + } + a<-table.end(a) + table.save(a,file="/var/www/rcomp/tmp/7ccid1293444696.tab") + } > > try(system("convert tmp/2r2nb1293444696.ps tmp/2r2nb1293444696.png",intern=TRUE)) character(0) > try(system("convert tmp/3r2nb1293444696.ps tmp/3r2nb1293444696.png",intern=TRUE)) character(0) > try(system("convert tmp/42tnw1293444696.ps tmp/42tnw1293444696.png",intern=TRUE)) character(0) > > > proc.time() user system elapsed 2.060 0.700 2.733