#ST407 Monte Carlo Methods 2012
#Practical 8: Scaling and Convergence Diagnostics
#Question 1
#Part a, mixture density
dmix<-function(x){
#Function to evaluate the density for given x of the specified mixed density
0.4*dnorm(x, mean=-1,sd=0.2)+0.6*dnorm(x, mean=2, sd=0.3) #A mixture between two Normals with respective weights
}
#To plot this density
#x<-(-300:400)/100 #Adam's way of getting a sequence from -3 to 4 of length 100
x<-seq(-3, 4, len=100) #The more conventional way
plot(x, dmix(x), type="l",lwd=2, col="red", ylab="Density", main="Mixture Distribution", xlab="x")
#A bimodal density corresponding to the two distributions considered
#The two modes are respectively centered around the means of Gaussians considered
#And the spread of the modes correspond to variances
#Part b, RWMH to produce samples from this distribution
rwmmix<-function(N=1000, sigma=0.4, x0=2){
#N=Number of samples we want to obtain from the distribution
#sigma= std deviation used in the proposal RW part
#x0= the initialisation of the MC
X<-vector("numeric", N+1) #Storage for samples
X[1] <- x0 #The initialisation
for(t in 2:(N+1)){
X[t]<-X[t-1] + rnorm(1) *sigma #Correponding RWMH proposal based on the current value of the sample
if(runif(1)> dmix(X[t])/dmix(X[t-1])){ #The correpsonding acceptance probability under the setup considered
X[t]<-X[t-1] #The rejection step where we reject the proposal, and take forward the previous value of the chain
}
}
X[-1] #Return N samples. excluding the initialisation state
}
#Part c, behaviour of chain under various proposal variances
sigma.vec<-c(0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 25.6, 50) #A vector of the proposal variances we shall be considering
Iterations.mat<-sapply(sigma.vec, FUN=rwmmix, N=5000, x0=2) #sapply will execute rwmmix under the provided settings of N and x0 for each value of sigma.vec
#Rows=Samples
#Columns=Different proposal variances
#Thus interested in each column of samples
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(Iterations.mat[,1], type="l", main=paste("Trace plot, sigma=", sigma.vec[1], sep=""), ylab="X")
plot(Iterations.mat[,2], type="l", main=paste("Trace plot, sigma=", sigma.vec[2], sep=""), ylab="X")
plot(Iterations.mat[,3], type="l", main=paste("Trace plot, sigma=", sigma.vec[3], sep=""), ylab="X")
#These trace plots appear to exhibit good mixing but upon closer inspection don't.
#The first start and stay in the mode centred around 2 and never enter the mode centred around -1
#The third does exhibit bettr mixing by exploring the mode around -1, but there is general sustained "sticking" behaviour when it enters the other mode for a long period of time.
#This is bad mixing as we don't explore the statespace defined by the definition and we are not provided independent samples from the target distribution of interest
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(Iterations.mat[,4], type="l", main=paste("Trace plot, sigma=", sigma.vec[4], sep=""), ylab="X")
plot(Iterations.mat[,5], type="l", main=paste("Trace plot, sigma=", sigma.vec[5], sep=""), ylab="X")
plot(Iterations.mat[,6], type="l", main=paste("Trace plot, sigma=", sigma.vec[6], sep=""), ylab="X")
#These plots show more encouraging mixing, although as the proposal variance increases, we see a lot more constant periods in the plots, suggesting lots of proposals being rejecting.
#The good mixing is suggested by the trace plot switching between the two means frequently, and moving within the two defined modes.
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(Iterations.mat[,7], type="l", main=paste("Trace plot, sigma=", sigma.vec[7], sep=""), ylab="X")
plot(Iterations.mat[,8], type="l", main=paste("Trace plot, sigma=", sigma.vec[8], sep=""), ylab="X")
plot(Iterations.mat[,9], type="l", main=paste("Trace plot, sigma=", sigma.vec[9], sep=""), ylab="X")
#These trace plots should show chains where a lot of large rejections of proposal values are occurring due to the chain remaining the same state for several iterations.
#This is because of the large ambitious proposal variance.
#This is despite the chain entering both modes. The jumping between the modes is made possible by the large proposal variance.
#All in all, good mixing is determined by the proposal variance such that it explores all modes of the distribution (causes problems if the number of modes are unknown)
#However, the proposal variance cannot be too large so that a high proportion of rejections occurr.
#Ergodic average...another way of saying cumulative mean?
no.iterations<-nrow(Iterations.mat)
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(cumsum(Iterations.mat[,1])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[1], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,2])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[2], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,3])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[3], sep=""), ylab="X (Ergodic average)")
#First two plots may seem to stabiliser eventually around a level.
#This however doesn't mean the chain has converged since the chain has only explored one mode.
#The sigma=0.8 scenario shows the ergodic average being affected by the stickiness of the chain as it changes in behaviour with respect to the change in modes.
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(cumsum(Iterations.mat[,4])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[4], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,5])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[5], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,6])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[6], sep=""), ylab="X (Ergodic average)")
#These plots show that the ergodic average settles down eventually with respect to the average of the two modes the distributions featured.
#This indicates convergence in the chains to some degree
par(mfrow=c(3,1), mar=c(4,4,2,2))
plot(cumsum(Iterations.mat[,7])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[7], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,8])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[8], sep=""), ylab="X (Ergodic average)")
plot(cumsum(Iterations.mat[,9])/(1:no.iterations), type="l", main=paste("Trace plot, sigma=", sigma.vec[9], sep=""), ylab="X (Ergodic average)")
#These high value of proposal variances show eratic, fluctuations and drift in the ergodic average.
#This is due to the high proposal variance, the high rejection rate.
#Part d
apply(Iterations.mat, 2, FUN=acf) #First-order autocorrelation i.e. lag 1 autocorrelation.
#We're looking for autocorrelation which are close to 0 if we're looking for independent samples.
#Most of iterations are providing high autocorrelation and thus we're not producing independent samples and mixing well.
#Least worst value is when sigma=3.2, indicating it is a little better at mixing well and providing independent samples.
#In general, analysing the autocorrelation is not very good at diagnosing autocorrelation.
#Problem is that the scale of proposal required to move between two modes is different to the scale required to move well within one modes.
#It is quite ambitious to expect expecting well within modes and out of modes in a single iteration.
#No proposal kernels provide good mixing and good exploration of the entire support of the distribution.
asjd<-function(X){
#Function to calculate the average squared jumping distance
T<-length(X)
sum((X[2:T]- X[1:(T-1)])^2)/(T-1)
}
apply(Iterations.mat,2, FUN=asjd)#Apply to all chains
#This measure shows the average squared difference between consecutive iterations (i.e. jumps between moves)
#Should observe that the middle chains where convergence is suspected have large values suching that we are moving both within the modes and between modes well.
#The first and final three where within mode exploration and high rejection rates are present have smaller values due to the respective behaviour.
#Quantity is useful for comparing the behaviour of different chain but not for a single chain itself without a point of reference.
riemann<-function(x){
#Function to compute te Riemann sums
t<-length(x)
#sum(x[2:t] - x[1:(t-1)]*dmix(x[2:t]))/(t-1)
xx<-sort(x)
sum((xx[2:t]- xx[1:t-1]) * dmix(xx[2:t]))#/(t-1)
}
apply(Iterations.mat, 2, FUN=riemann)
#This gives you an estimate of the area under the curve based on the samples generated
#Should observe that the chains associated with small proposal variance fail to explore the full suppport of the target distribution as suggested by their small values.
#I believe this should equal 1 if we're exploring the statespace well.
riemannq<-function(x,q){
#Function to compute the Riemann sums associated with each quartile.
#Uses the riemann function from the previous function, taking the appropriate data corresponding to that quartile
t<-length(x)
xx<-sort(x)
S<-floor((q-1)*t/4)+1
T<-floor(q*t/4)
riemann(xx[S:T])
}
Riemann.quartile<-rbind(apply(Iterations.mat, 2, FUN=riemannq, q=1),
apply(Iterations.mat, 2, FUN=riemannq, q=2),
apply(Iterations.mat, 2, FUN=riemannq, q=3),
apply(Iterations.mat, 2, FUN=riemannq, q=4))
#As the questions suggests, if the chain is spending about 25% in each of the quartiles, then the chain is said to be mixing well.
#This generally occurrs for the mid proposal variance range as expected.
#The low and high proposal variances under and over achieve the suggested benchmark level respectively
#The important lesson about Convergence Diagnostics
#Any given convergence diagnostic is able to dianose only a limited number of possible failure modes of MCMC.
#Thus any apparently good performance via the convergence diagnostic should never be interpreted that the MCMC algorithm is performing as intended.
#Converfence diagnostics are useful for spotting problems and whether the algorithm is working well or not.
#In conclusions, always usual a mixture of convergence diagnostics to see whether you algorithm is intended as expected.
cumulative.mass.Riemann<-apply(Riemann.quartile, FUN=cumsum, 2)
sigma.log<-c(log(0.2), log(sigma.vec), log(51.2))
#To produce a stacked area graphy showing the cumulative mass
plot(sigma.log, c(0, cumulative.mass.Riemann[4,],0) ,type='l', ylim=c(0, 1.5), xlab='Log Sigma', ylab='Cumulative Sum',main="Cumulative Riemann Sums Quartile")
polygon(sigma.log, c(0,cumulative.mass.Riemann[4,],0),col='blue')
polygon(sigma.log, c(0,cumulative.mass.Riemann[3,],0),col='green')
polygon(sigma.log, c(0,cumulative.mass.Riemann[2,],0),col='yellow')
polygon(sigma.log, c(0,cumulative.mass.Riemann[1,],0),col='red')
#Just a pictorial representation that the middle ranged proposal variances in giving a 25% Riemann Sum Estimate and generally their chains equal 1.