# Code to analyse image data # read data filename <- c("viola_train_Tdata.txt") path <- c(...insert....) filepath <- paste(path,filename,sep="") imagedat <- read.table(filepath,col.names=c("SL","Pack","Count","Number","class","rchrom","gchrom", "inten","mrchrom","mgchrom","minten","sdrchrom","sdgchrom", "sdinten","ypos","xpos","height","error","hmax","hmin"), header=TRUE) # load library library(nnet) # classification factor fclass <- factor(imagedat[,5],labels=c("tray","flower","leaf","compost")) imagedat <- cbind(fclass,imagedat) # output results of cross-validation concv <- function(x,y,n) { probs <- y/n r <- max.col(y/n) r <- factor(r,levels=c(1,2,3,4),labels=c("tray","flower","leaf","compost")) ecrit <- sum(-class.ind(fclass)*log(y/n)) tab <- table(x,r) print (tab) diag(tab) <- 0 mer <- round(100*sum(tab)/length(r),2) lmer <- length(r) pprob <- pmax(probs[,1],probs[,2],probs[,3]) cut11 <- r[pprob>0.7] cut12 <- r[pprob>0.8] cut13 <- r[pprob>0.9] cut21 <- x[pprob>0.7] cut22 <- x[pprob>0.8] cut23 <- x[pprob>0.9] tab1 <- table(cut21,cut11) tab2 <- table(cut22,cut12) tab3 <- table(cut23,cut13) diag(tab1) <- 0 mer1 <- round(100*sum(tab1)/length(cut11),2) lmer1 <- length(cut11) diag(tab2) <- 0 mer2 <- round(100*sum(tab2)/length(cut12),2) lmer2 <- length(cut12) diag(tab3) <- 0 mer3 <- round(100*sum(tab3)/length(cut13),2) lmer3 <- length(cut13) cat("CV error (all) = ",mer,"%"," Sample Size =",lmer,"\n") cat("CV error (p>0.7) = ",mer1,"%"," Sample Size =",lmer1,"\n") cat("CV error (p>0.8) = ",mer2,"%"," Sample Size =",lmer2,"\n") cat("CV error (p>0.9) = ",mer3,"%"," Sample Size =",lmer3,"\n") cat("CV Log scoring = ", round(ecrit,1),"\n") } # cross-validation CVnn <- function(nreps=10, seednos=c(3474,3738,6369,6473,6614,6986,3716,2332,6168,4560), ...) { res <- matrix(0,2400,4) dimnames(res) <- list(NULL, levels(fclass)) for(rep in 1:nreps) { set.seed(seednos[rep]) rand <- sample(10,2400, replace=T) cat("Replication =",rep,"\n") cat("Randomisation ",rand[1:10],". . .","\n") cat("Size ",size,"and decay ",decay,"\n") for (i in sort(unique(rand))) { cat("fold ",i,"\n", sep="") cvimage.nn <- nnet(fclass ~ rchrom + gchrom + inten + mrchrom + mgchrom + minten + sdrchrom +sdgchrom + sdinten, imagedat[rand !=i,], trace=F, skip=F,...) res[rand == i,] <- res[rand == i,] + predict(cvimage.nn, imagedat[rand ==i,]) } } res } # fit neural network wdecay <- c(0,0.0001,0.001,0.01,0.1) nnseeds <- c(3474,3738,6369,6473,6614,6986,3716,2332,6168,4560) dwtdecay <- vector(mode="numeric",10) dssqwt <- vector(mode="numeric",10) decrit <- vector(mode="numeric",10) for(i in 4:4){ set.seed(2103) size <- 3 decay <- wdecay[i] for(j in 1:10){ set.seed(nnseeds[j]) image.nn <- nnet(fclass ~ rchrom + gchrom + inten + mrchrom + mgchrom + minten + sdrchrom +sdgchrom + sdinten, imagedat, size = size, decay = decay, skip = F, maxit = 2000, trace = T) dwtdecay[j] <- image.nn[["decay"]] dssqwt[j] <- sum(image.nn[["wts"]]^2) decrit[j] <- sum(-class.ind(fclass)*log(image.nn[["fitted.values"]])) } dfitcrit <- dssqwt*dwtdecay+decrit fitcrit <- mean(dfitcrit) ecrit <- mean(decrit) ssqwt <- mean(dssqwt) cvres.nn <- CVnn(maxit=2000, size=size, decay=decay) cat(" Hidden nodes =", size," Decay = ",decay,"\n") concv(imagedat$fclass, cvres.nn, n=10) cat("Fit criterion = ", round(fitcrit,1),"\n") cat("Logarithmic scoring = ", round(ecrit,1),"\n") cat("Sum squared weights = ", round(ssqwt,1)," Weight decay = ",decay,"\n") } image.nn <- nnet(fclass ~ rchrom + gchrom + inten + mrchrom + mgchrom + minten + sdrchrom +sdgchrom + sdinten, imagedat, size = 3, decay = 0.01, skip = F, maxit = 2000, trace = T)