#Question 1: RANDU #a i # Z[j] = 65539 Z[j-1] mod 2^31 # = (65536 + 3) Z[j-1] mod 2^31 # = (65536 + 3) ((65536+3) Z[j-2] mod 2^31) mod 2^31 # = ((2^16 + 3)(2^16 + 3) Z[j-2] mod 2^31 # = (2^32 + 6*2^16 + 9) Z[j-2] mod 2^31 # = (6*2^16 + 9) Z[j-2] mod 2^31 # = (6* Z[j-1] - 9 Z[j-2]) mod 2^31 #a ii unif <- randu(n=100000,z0=123142) hist(unif) #This looks perfectly reasonable in isolation. #a iii ts.plot(unif[1:1000]) #Again there's no obvious trend here. #a iv plot(unif[1:999],unif[2:1000],xlab='x_{k-1}',ylab='x_k',main='RANDU Lag-1 Plot') #Again, no clear pattern. #a v plot(unif[1:998],unif[3:1000],xlab='x_{k-1}',ylab='x_k',main='RANDU Lag-2 Plot') library('rgl') rgl::plot3d(unif[1:99998],unif[2:99999],unif[3:100000],xlab='x_{k-2}',ylab='x_{k-1}',zlab='x_k',main='RANDU Consecutive Triplets') # Problems: viewed as triples these points clearly don't behave like independent triples of uniform random numbers; they're concentrated on a few planes. #b i box.muller <- function(U1,U2) { X1 <- sqrt(-2*log(U1)) * cos(2*pi*U2) X2 <- sqrt(-2*log(U1)) * sin(2*pi*U2) rbind(X1,X2) } #b ii plot(t(box.muller(unif[seq(1,10000,2)],unif[seq(2,10000,2)]))) #b iii unif.mt <- runif(10000) #b iv plot(t(box.muller(unif.mt[seq(1,10000,2)],unif.mt[seq(2,10000,2)]))) #b v unif.bad <- randu(10000,1371,a=1229,c=1,M=2**11) plot(t(box.muller(unif.bad[seq(1,10000,2)],unif.bad[seq(2,10000,2)]))) #It's actually rather difficult to spot the discrepancies here! #2 a i hist(Nile) boxplot(Nile) stem(Nile) plot(Nile) #It's skewed; more worryingly there's a clear variation over time: the samples aren't independent! #2 b mean(Nile) #[1] 919.35 median(Nile) #[1] 893.5 #Further evidence of skewness. #2 c sigma.hat <- sqrt(var(Nile) / length(Nile)) c(qnorm(0.025),qnorm(0.975)) * sigma.hat + mean(Nile) #[1] 886.182 952.518 #2 d/e/f library(boot) #Function for calculating bootstrap means f.mean <- function (x,i) { mean(x[i]) } boot.mean <- boot::boot(Nile,f.mean,9999,stype='i') boot.ci(boot.mean,type='perc') # (886.7, 952.7 ) # Note slight asymmetry around 919.35 f.median <- function (x,i) { mean(x[i]) } boot.median <- boot::boot(Nile,f.median,9999,stype='i') boot.ci(boot.median,type='perc') # Check sampling variability: with this number of replicates, bootstrap variability is negligible. #3 a ru <- runif(10000) F10 <- ecdf(ru[1:10]) F50 <- ecdf(ru[1:50]) F500 <- ecdf(ru[1:500]) F3000 <- ecdf(ru[1:3000]) F10000 <- ecdf(ru) seq <- seq(0,1,1/1000) plot.ecdf(F10,main='Sample Size n=10') lines(seq,punif(seq),col='red') plot.ecdf(F50,main='Sample Size n=50') lines(seq,punif(seq),col='red') plot.ecdf(F500,main='Sample Size n=500') lines(seq,punif(seq),col='red') plot.ecdf(F3000,main='Sample Size n=3 000') lines(seq,punif(seq),col='red') plot.ecdf(F10000,main='Sample Size n=10 000') lines(seq,punif(seq),col='red') #3b rn <- rnorm(10000) Fn10 <- ecdf(rn[1:10]) Fn50 <- ecdf(rn[1:50]) Fn500 <- ecdf(rn[1:500]) Fn3000 <- ecdf(rn[1:3000]) Fn10000 <- ecdf(rn) seq <- seq(-5,5,1/100) plot.ecdf(Fn10,main='Sample Size n=10') lines(seq,pnorm(seq),col='red') plot.ecdf(Fn50,main='Sample Size n=50') lines(seq,pnorm(seq),col='red') plot.ecdf(Fn500,main='Sample Size n=500') lines(seq,pnorm(seq),col='red') plot.ecdf(Fn3000,main='Sample Size n=3 000') lines(seq,pnorm(seq),col='red') plot.ecdf(Fn10000,main='Sample Size n=10 000') lines(seq,pnorm(seq),col='red') #3c rc <- rcauchy(10000) Fc10 <- ecdf(rc[1:10]) Fc50 <- ecdf(rc[1:50]) Fc500 <- ecdf(rc[1:500]) Fc3000 <- ecdf(rc[1:3000]) Fc10000 <- ecdf(rc) seq <- seq(-15,15,1/100) plot.ecdf(Fc10,main='Sample Size n=10') lines(seq,pcauchy(seq),col='red') plot.ecdf(Fc50,main='Sample Size n=50') lines(seq,pcauchy(seq),col='red') plot.ecdf(Fc500,main='Sample Size n=500') lines(seq,pcauchy(seq),col='red') plot.ecdf(Fc3000,main='Sample Size n=3 000') lines(seq,pcauchy(seq),col='red') plot.ecdf(Fc10000,main='Sample Size n=10 000') lines(seq,pcauchy(seq),col='red') #3d -- Uniform eu <- c(10, max(abs(F10(ru[1:10]) - punif(ru[1:10])))) eu <- rbind(eu, c(50, max(abs(F50(ru[1:50]) - punif(ru[1:50]))))) eu <- rbind(eu, c(500, max(abs(F500(ru[1:500]) - punif(ru[1:500]))))) eu <- rbind(eu, c(3000, max(abs(F3000(ru[1:3000]) - punif(ru[1:3000]))))) eu <- rbind(eu, c(10000, max(abs(F10000(ru[1:10000]) - punif(ru[1:10000]))))) en <- c(10, max(abs(Fn10(rn[1:10]) - pnorm(rn[1:10])))) en <- rbind(en, c(50, max(abs(Fn50(rn[1:50]) - pnorm(rn[1:50]))))) en <- rbind(en, c(500, max(abs(Fn500(rn[1:500]) - pnorm(rn[1:500]))))) en <- rbind(en, c(3000, max(abs(Fn3000(rn[1:3000]) - pnorm(rn[1:3000]))))) en <- rbind(en, c(10000, max(abs(Fn10000(rn[1:10000]) - pnorm(rn[1:10000]))))) ec <- c(10, max(abs(Fc10(rc[1:10]) - pcauchy(rc[1:10])))) ec <- rbind(ec, c(50, max(abs(Fc50(rc[1:50]) - pcauchy(rc[1:50]))))) ec <- rbind(ec, c(500, max(abs(Fc500(rc[1:500]) - pcauchy(rc[1:500]))))) ec <- rbind(ec, c(3000, max(abs(Fc3000(rc[1:3000]) - pcauchy(rc[1:3000]))))) ec <- rbind(ec, c(10000, max(abs(Fc10000(rc[1:10000]) - pcauchy(rc[1:10000]))))) plot(eu,type='l',col='red',main='Convergence of Empirical Distribution Functions',xlab='Sample Size', ylab='Worst Case Absolute Error') lines(en,col='blue') lines(ec,col='green') legend('topright',c('Uniform','Normal','Cauchy'), lty=1, col=c('red','blue','green')