################################################################################# # Bayesian model selection and statistical modeling # Chapman & Hall/CRC Taylor and Francis Group # # Chapter5: Section5.6.2 # # Nonlinear multiclass classification using basis expansion predictors and GBIC # This code classifies the iris data # # Author : Tomohiro Ando # ################################################################################ # library library(mva) library(stats) library(akima) # Data generation data(iris) X <- iris[,1:4] y <- as.numeric(iris[,5]) n <- nrow(X) p <- ncol(X) g <- max(y) Targets <- matrix(0,nrow=n,ncol=g) for(i in 1:n){Targets[i,y[i]] <- 1} # Setting hyperparameters m <- 10 # number of basis fucntions nu <- 10 # hyperparameters lambda <- 10^-2 # smoothing parameter # Posterior inference ig <- rep(1,g-1) iN <- rep(1,n) Dk <- diff(diag(1,m+1),differences=2) K <- t(Dk)%*%Dk Km <- kmeans(X,m) U <- Km$centers W <- Km$withinss Size <- Km$size IN <- matrix(1,nrow=n,ncol=1) P <- matrix(0,nrow=n,ncol=m) for(i in 1:p){P <- P + (X[,i]%*%matrix(1,nrow=1,ncol=m)-IN%*%t(U[,i]))^2} SS <- nu*W/Size Sigma <- diag(1/(2*SS)) E <- cbind(1,exp(-P%*%Sigma)) #Design matrix d <- ncol(E) Beta <- matrix(runif(d*(g-1),-10^-5,10^-5),nrow=d,ncol=(g-1)) DB <- matrix(0,nrow=d*(g-1),ncol=1) H <- matrix(0,nrow=d*(g-1),ncol=d*(g-1)) for(ITERE in 1:100){ Beta.old <- Beta O <- E%*%Beta Soft <- exp(O)/(1+(exp(O)%*%ig)%*%t(ig)) for(gg in 1:(g-1)){ DB[(d*(gg-1)+1):(d*gg),1] <- (t(E)%*%(Targets[,gg]-Soft))[,gg]-n*lambda*Beta[,gg] } for(j in 1:(g-1)){for(k in 1:(g-1)){ L1 <- diag( as.vector(Soft[,j]) ); L2 <- diag( as.vector(Soft[,k]) ) H[(d*(j-1)+1):(d*j),(d*(k-1)+1):(d*k)] <- t(E)%*%L1%*%L2%*%E }} for(k in 1:(g-1)){ L <- diag( as.vector(Soft[,k]) ) H[(d*(k-1)+1):(d*k),(d*(k-1)+1):(d*k)] <- H[(d*(k-1)+1):(d*k),(d*(k-1)+1):(d*k)]-t(E)%*%L%*%E } SVDH <- svd(H-n*lambda*diag(1,d*(g-1))) solveH <- (SVDH$v)%*%solve(diag(as.vector(SVDH$d)))%*%t(SVDH$u) for(i in 1:(g-1)){Beta[,i] <- Beta[,i]-(solveH%*%DB)[(d*(i-1)+1):(d*i)]} if(sum((Beta.old-Beta)^2)<=10^-7){break} } # GBIC score O <- E%*%Beta Soft <- c(1/(exp(O)%*%ig+1))*exp(O) Second <- matrix(0,nrow=d*(g-1),ncol=d*(g-1)) for(j in 1:(g-1)){for(k in 1:(g-1)){ L1 <- diag( as.vector(Soft[,j]) ); L2 <- diag( as.vector(Soft[,k]) ) I.small <- t(E)%*%L1%*%L2%*%E Second[(d*(j-1)+1):(d*j),(d*(k-1)+1):(d*k)] <- I.small if(j==k){ L <- diag( as.vector(Soft[,k]) ) Second[(d*(j-1)+1):(d*j),(d*(k-1)+1):(d*k)]-t(E)%*%L%*%E+n*lambda*diag(1,d) } }} Second <- Second/n O <- E%*%Beta Soft <- exp(O)/(1+(exp(O)%*%ig)%*%t(ig)) Soft <- cbind(Soft,1-Soft%*%ig) LogLikelihood <- sum(log(Soft^Targets)) GBIC <- -2*LogLikelihood+n*lambda*sum(Beta^2)+log(det(Second))-(g-1)*d*log(lambda) print(GBIC) # Posterior probability O <- E%*%Beta Soft <- exp(O)/(1+(exp(O)%*%ig)%*%t(ig)) Postprob <- cbind(Soft,1-Soft%*%ig) print(cbind(Postprob,iris[,5]))