## Q1 revisits the lm failure in the introduction
set.seed(1)
xx <- sort(runif(100))
y <- .2*(xx-.5)+(xx-.5)^2 + rnorm(100)*.1
x <- xx+100

## lm manages
plot(x,y,cex.lab=2,cex.axis=1.8)
b <- lm(y~x+I(x^2))
lines(x,fitted(b),lwd=2)

## direct doesn't
X <- model.matrix(b)
beta.hat <- solve(t(X)%*%X,t(X)%*%y)

## lm has problems
x <- xx+1000
plot(x,y,cex.lab=2,cex.axis=1.8)
b <- lm(y~x+I(x^2))
lines(x,fitted(b),lwd=2)

## examine condition numbers...
d <- svd(X)$d;d[1]/d[3]
X1 <- model.matrix(b)
d <- svd(X1)$d;d[1]/d[3]

## stable alternatives

x1 <- x - mean(x)
b <- lm(y~x1+I(x1^2))
plot(x,y,cex.lab=2,cex.axis=1.8)
lines(x,fitted(b),lwd=2)
X2 <- model.matrix(b)
d <- svd(X2)$d;d[1]/d[3]

b <- lm(y~poly(x,2))
plot(x,y,cex.lab=2,cex.axis=1.8)
lines(x,fitted(b),lwd=2)
X3 <- model.matrix(b)
d <- svd(X3)$d;d[1]/d[3]

## perhaps plot columns too

## Then poly can produce perfectly conditioned model matrices 

X <- cbind(length(x)^-.5,poly(x,2))
t(X)%*%X ## orthogonal

## So QR factorization is trivial! =>
mu <- X%*%(t(X)%*%y)
plot(x,y)
lines(x,mu)



### Q2. Now some mixed modelling...

## simulate some data
n <- 100;n.b <- 10;n.beta <- 5
X <- cbind(1,matrix(runif(n*n.beta-n),n,n.beta-1))
Z <- matrix(runif(n*n.b),n,n.b)
beta <- rep(1,n.beta)
b <- rnorm(n.b)
y <- X%*%beta + Z%*%b + rnorm(n)

logLik <- function(theta,y,X,Z) {
## somewhat plodding linear mixed model log 
## likelihood with theta partitioned 
## [beta,log(sig.b),log(sig)]
   n <- length(y)
   beta <- theta[1:ncol(X)]
   theta <- theta[-(1:ncol(X))]
   V <- diag(n)*exp(theta[2]*2) + Z%*%t(Z)*exp(theta[1]*2)
   R <- chol(V)
   z <- forwardsolve(t(R),y-X%*%beta)
   ll <- -n*log(2*pi)/2 - sum(log(diag(R))) - sum(z*z)/2
   -ll
}

theta <- c(beta,0,0)
optim(theta,logLik,method="BFGS",y=y,X=X,Z=Z)

logLikp <- function(theta,y,X,Z) {
## somewhat plodding linear mixed model log 
## profile likelihood with theta partitioned 
## [log(sig.b),log(sig)]
   n <- length(y)
   V <- diag(n)*exp(theta[2]*2) + Z%*%t(Z)*exp(theta[1]*2)
   R <- chol(V)
   yt <- forwardsolve(t(R),y)
   Xt <- forwardsolve(t(R),X)
   beta <- lm(yt~Xt-1)$coef
   z <- forwardsolve(t(R),y-X%*%beta)
   ll <- -n*log(2*pi)/2 - sum(log(diag(R))) - sum(z*z)/2
   ll <- -ll
   attr(ll,"beta") <- beta
   ll
}

theta <- c(0,0)
fit <- optim(theta,logLikp,method="BFGS",y=y,X=X,Z=Z)
logLikp(fit$par,y,X,Z);fit$par