## -----------------------------------------------------------------------------
x <- runif(1000000)
system.time(xs <- sort(x))


## -----------------------------------------------------------------------------
set.seed(2)
n <- 1000
A <- matrix(runif(n*n),n,n)
system.time(Ai <- solve(A))  # invert A
range(Ai%*%A-diag(n))        # check accuracy of result


## ----eval=FALSE---------------------------------------------------------------
## install.packages("PBSddesolve") ## install package from CRAN

## ----eval=FALSE---------------------------------------------------------------
## library(PBSddesolve)
## 
## bfly <- function(t,y,parms) {
## ## function defining rate of change of blowfly population, y, at
## ## time t, given y, t, lagged values of y form `pastvalue' and
## ## the parameters in `parms'. See `ddesolve' docs for more details.
## 
## ## making names more useful ...
##   P <- parms$P;d <- parms$delta;y0 <- parms$yinit
## 
##   if (t<=1) y1 <- P*y0*exp(-y0)-d*y else { ## initial phase
##     ylag <- pastvalue(t-1)         ## getting lagged y
##     y1 <- P*ylag*exp(-ylag) - d*y  ## the gradient
##   }
##   y1  ## return gradient
## }
## 
## ## set parameters...
## parms <- list(P=150,delta=6,yinit=2.001)
## ## solve model...
## yout <- dde(y=parms$yinit,times=seq(0,15,0.05),func=bfly,parms=parms)
## ## plot result...
## plot(yout$t,yout$y,type="l",ylab="n",xlab="time")


## -----------------------------------------------------------------------------
n <- 3000
A <- matrix(runif(n*n),n,n)
B <- matrix(runif(n*n),n,n)
y <- runif(n)


## -----------------------------------------------------------------------------
system.time(f0 <- A%*%B%*%y)
system.time(f1 <- A%*%(B%*%y))


## -----------------------------------------------------------------------------
library(rbenchmark)
benchmark(
    "ABy" = {
        f0 <- A%*%B%*%y
    },
    "A(By)" = {
        f1 <- A%*%(B%*%y)
    },
    replications = 10,
    columns = c("test", "replications", "elapsed",
                "relative", "user.self", "sys.self"))


## -----------------------------------------------------------------------------
x <- seq(1,2*pi,length=1000)
delta <- 1e-5      # FD interval
dsin.dx <- (sin(x+delta)-sin(x))/delta # FD derivative
error <- dsin.dx-cos(x)     # error in FD derivative


## -----------------------------------------------------------------------------
delta <- 1e-15


## -----------------------------------------------------------------------------
set.seed(1); n <- 100
xx <- sort(runif(n))
y <- .2*(xx-.5)+(xx-.5)^2 + rnorm(n)*.1
x <- xx+100


## -----------------------------------------------------------------------------
b <- lm(y~x+I(x^2))


## -----------------------------------------------------------------------------
X <- model.matrix(b)    # get same model matrix used above
beta.hat <- solve(t(X)%*%X,t(X)%*%y) # direct solution of `normal equations'


## -----------------------------------------------------------------------------
n <- 3000
A <- matrix(runif(n*n),n,n)
B <- matrix(runif(n*n),n,n)
y <- runif(n)
system.time(f0 <- A%*%B%*%y)    # case 1
system.time(f1 <- A%*%(B%*%y))  # case 2


## -----------------------------------------------------------------------------
n <- 5000; m <- 1000
A <- matrix(runif(n*m), n, m)
B <- matrix(runif(n*m), m, n)
benchmark(
    "sdAB" = { sum(diag(A %*% B)) },
    "sdBA" = { sum(diag(B %*% A)) },
    "sABt" = { sum(A*t(B)) },
    replications = 10, order=NULL,
    columns = c("test", "replications", "elapsed",
                "relative", "user.self", "sys.self"))


## -----------------------------------------------------------------------------
La_library()
extSoftVersion()["BLAS"]


## -----------------------------------------------------------------------------
  V <- matrix(c(2,-1,1,-1,2,-1,1,-1,2),3,3)
  mu <- c(1,-1,3)

  R <- chol(V)             ## Cholesky factor of V
  z <- rnorm(3)            ## iid N(0,1) deviates
  y <- t(R)%*%z + mu       ## N(mu,V) deviates
  y


## -----------------------------------------------------------------------------
  Z <- matrix(rnorm(3000),3,1000) ## 1000 N(0,I) 3-vectors
  Y <- t(R)%*%Z + mu              ## 1000 N(mu,V) vectors
  ## and check that they behave as expected...
  rowMeans(Y)                     ## observed mu
  (Y-mu)%*%t(Y-mu)/1000           ## observed V


## -----------------------------------------------------------------------------
  y <- 1:3
  z <- forwardsolve(t(R),y-mu)
  logLik <- -length(y)*log(2*pi)/2 - 
      sum(log(diag(R))) - sum(z*z)/2
  logLik


## -----------------------------------------------------------------------------
  set.seed(1)
  xx <- sort(runif(100))
  x <- xx + 100  # regenerated same x data
  X <- model.matrix(~x + I(x^2)) # and the model matrix
  XtX <- crossprod(X)          # form t(X)%*%X (with half the flop count)
  lambda <- eigen(XtX)$values
  lambda[1]/lambda[3]  # the condition number of X'X


## -----------------------------------------------------------------------------
solve(XtX)


## -----------------------------------------------------------------------------
  D <- diag(1/diag(XtX)^.5)
  DXXD <- D%*%XtX%*%D
  lambda <- eigen(DXXD)$values
  lambda[1]/lambda[3]


## -----------------------------------------------------------------------------
  XtXi <- D%*%solve(DXXD,D) ## computable inverse of X'X
  XtXi %*% XtX              ## how accurate?


## -----------------------------------------------------------------------------
n = 1000
XA = matrix(rnorm(n*n),ncol=n)
XXt = XA %*% t(XA)
system.time(chol(XXt))
system.time(eigen(XXt, symmetric=TRUE))
system.time(eigen(XA))
system.time(svd(XA))
system.time(qr(XA))


## -----------------------------------------------------------------------------
  d <- svd(X)$d # get the singular values of X
  d


## -----------------------------------------------------------------------------
  d[1]/d[3]


## -----------------------------------------------------------------------------
a <- 1e16
b <- 1e16 + pi
d <- b - a


## -----------------------------------------------------------------------------
  d; pi


## -----------------------------------------------------------------------------
  (d-pi)/pi  ## relative error in difference is huge...


## -----------------------------------------------------------------------------
f <- a + b
(f-2e16-pi)/(2e16+pi) ## relative error in sum, is tiny...


## ----eval=FALSE---------------------------------------------------------------
## (x1*x2*sin(x3) + exp(x1*x2))/x3


## -----------------------------------------------------------------------------
ad <- function(x,diff = c(1,1)) {
## create class "ad" object. diff[1] is length of grad
## diff[2] is element of grad to set to 1.
  grad <- rep(0,diff[1])
  if (diff[2]>0 && diff[2]<=diff[1]) grad[diff[2]] <- 1
  attr(x,"grad") <- grad
  class(x) <- "ad"
  x
}


## -----------------------------------------------------------------------------
  x1 <- ad(1,c(3,1))
  x1


## -----------------------------------------------------------------------------
sin.ad <- function(a) {
  grad.a <- attr(a,"grad")
  a <- as.numeric(a) ## avoid infinite recursion - change class
  d <- sin(a)
  attr(d,"grad") <- cos(a) * grad.a ## chain rule
  class(d) <- "ad"
  d
}

## ----echo=FALSE---------------------------------------------------------------
exp.ad <- function(a) {
  grad.a <- attr(a,"grad")
  a <- as.numeric(a) ## avoid infinite recursion - change class
  d <- exp(a)
  attr(d,"grad") <- exp(a) * grad.a ## chain rule
  class(d) <- "ad"
  d
}


## -----------------------------------------------------------------------------
sin(x1)


## -----------------------------------------------------------------------------
"*.ad" <- function(a,b) { ## ad multiplication
  grad.a <- attr(a,"grad")
  grad.b <- attr(b,"grad")
  a <- as.numeric(a)
  b <- as.numeric(b)
  d <- a*b    ## evaluation
  attr(d,"grad") <- a * grad.b + b * grad.a ## product rule
  class(d) <- "ad"
  d
}

## ----echo=FALSE---------------------------------------------------------------
"/.ad" <- function(a,b) { ## ad division
  grad.a <- attr(a,"grad")
  grad.b <- attr(b,"grad")
  a <- as.numeric(a)
  b <- as.numeric(b)
  d <- a/b    ## evaluation
  attr(d,"grad") <- (b * grad.a - a * grad.b)/(b*b) ## quotient rule
  class(d) <- "ad"
  d
}

"+.ad" <- function(a,b) { ## ad addition
  grad.a <- attr(a,"grad")
  grad.b <- attr(b,"grad")
  a <- as.numeric(a)
  b <- as.numeric(b)
  d <- a+b    ## evaluation
  attr(d,"grad") <- (grad.a + grad.b) ## add gradients
  class(d) <- "ad"
  d
}

"-.ad" <- function(a,b) { ## ad subtraction
  grad.a <- attr(a,"grad")
  grad.b <- attr(b,"grad")
  a <- as.numeric(a)
  b <- as.numeric(b)
  d <- a-b    ## evaluation
  attr(d,"grad") <- (grad.a - grad.b) ## subtract gradients
  class(d) <- "ad"
  d
}


## -----------------------------------------------------------------------------
  x1 <- 1; x2 <- 2; x3 <- pi/2
  (x1*x2*sin(x3)+ exp(x1*x2))/x3


## -----------------------------------------------------------------------------
  x1 <- ad(1, c(3,1))
  x2 <- ad(2, c(3,2))
  x3 <- ad(pi/2, c(3,3))
  (x1*x2*sin(x3) + exp(x1*x2))/x3


## -----------------------------------------------------------------------------
  I.true <- exp(1) - 1
  N <- 10
  I.mp <- sum(exp((1:N-.5)/N))/N
  c(I.mp,I.true,(I.true-I.mp)/I.true)


## -----------------------------------------------------------------------------
  N <- 100
  I.mp <- sum(exp((1:N-.5)/N))/N
  c(I.mp,I.true,(I.true-I.mp)/I.true)
  N <- 1000
  I.mp <- sum(exp((1:N-.5)/N))/N
  c(I.mp,I.true,(I.true-I.mp)/I.true)


## -----------------------------------------------------------------------------
library(statmod)
N <- 10
gq <- gauss.quad(N) # get the x_i, w_i
gq # take a look
sum(gq$weights) # for an interval of width 2
I.gq <- sum(gq$weights*exp((gq$nodes+1)/2))/2
c(I.gq, I.true, (I.true-I.gq)/I.true)


## -----------------------------------------------------------------------------
gqi <- function(g, N = 10, a = -1, b = 1) {
    gq = gauss.quad(N)
    w = gq$weights*(b-a)/2
    n = a + (b-a)*(gq$nodes + 1)/2
    sum(w * g(n))
    }

gqi(exp, 10, 0, 1)


## -----------------------------------------------------------------------------
I.gq <- gqi(exp, 5, 0, 1)
c(I.gq, I.true, (I.true-I.gq)/I.true)


## -----------------------------------------------------------------------------
I.gq <- gqi(exp, 3, 0, 1)
c(I.gq, I.true, (I.true-I.gq)/I.true)


## -----------------------------------------------------------------------------
mesh <- function(x,d,w=1/length(x)+x*0) {
  n <- length(x)
  W <- X <- matrix(0,n^d,d)
  for (i in 1:d) {
   X[,i] <- x;W[,i] <- w
   x <- rep(x,rep(n,length(x)))
   w <- rep(w,rep(n,length(w)))
  }
 w <- exp(rowSums(log(W))) ## column product of W gives weights
 list(X=X,w=w) ## each row of X gives co-ordinates of a node
}


## -----------------------------------------------------------------------------
mesh(c(1,2),3)


## -----------------------------------------------------------------------------
fd <- function(x) {exp(rowSums(x))}


## -----------------------------------------------------------------------------
  N <- 10; d <- 2; N^d # number of function evaluations
  I.true <- (exp(1)-exp(-1))^d
  mmp <- mesh((1:N-.5)/N*2-1,d,rep(2/N,N))
  I.mp <- sum(mmp$w*fd(mmp$X))
  c(I.mp,I.true,(I.true-I.mp)/I.true)


## -----------------------------------------------------------------------------
  N <- 10; d <- 5; N^d # number of function evaluations
  I.true <- (exp(1)-exp(-1))^d
  mmp <- mesh((1:N-.5)/N*2-1,d,rep(2/N,N))
  I.mp <- sum(mmp$w*fd(mmp$X))
  c(I.mp,I.true,(I.true-I.mp)/I.true)


## -----------------------------------------------------------------------------
  N <- 4; d <- 2; N^d
  I.true <- (exp(1)-exp(-1))^d
  gq <- gauss.quad(N)
  mgq <- mesh(gq$nodes,d,gq$weights)
  I.gq <- sum(mgq$w*fd(mgq$X))
  c(I.gq,I.true,(I.true-I.gq)/I.true)


## -----------------------------------------------------------------------------
  N <- 4; d <- 5; N^d
  I.true <- (exp(1)-exp(-1))^d
  gq <- gauss.quad(N)
  mgq <- mesh(gq$nodes,d,gq$weights)
  I.gq <- sum(mgq$w*fd(mgq$X))
  c(I.gq,I.true,(I.true-I.gq)/I.true)


## -----------------------------------------------------------------------------
set.seed(0);n <- 50
x <- runif(n)
z1 <- sample(c(0,1),n,replace=TRUE)
z2 <- sample(c(0,1),n,replace=TRUE)
b <- rnorm(4) ## simulated random effects
X <- model.matrix(~x)
Z <- cbind(z1,1-z1,z2,1-z2)
eta <- X%*%c(0,3) + Z%*%b
mu <- exp(eta)
y <- rpois(mu,mu)


## -----------------------------------------------------------------------------
lf.yb0 <- function(y, b, theta, X, Z, lambda) {
  beta <- theta[1:ncol(X)]
  theta <- theta[-(1:ncol(X))]
  eta <- X%*%beta + Z%*%b
  mu <- exp(eta)
  lam <- lambda(theta, ncol(Z))
  sum(y*eta - mu - lfactorial(y)) - sum(lam*b^2)/2 + 
      sum(log(lam))/2 - ncol(Z)*log(2*pi)/2
}

## ----echo=FALSE---------------------------------------------------------------
lf.yb <- function(y, b, theta, X, Z, lambda) {
  beta <- theta[1:ncol(X)]
  theta <- theta[-(1:ncol(X))]
  eta <- Z%*%b + as.vector(X%*%beta)
  mu <- exp(eta)
  lam <- lambda(theta, ncol(Z))
  colSums(y*eta - mu - lfactorial(y)) - colSums(lam*b^2)/2 + 
      sum(log(lam))/2 - ncol(Z)*log(2*pi)/2
}


## -----------------------------------------------------------------------------
var.func <- function(theta, nb) c(theta[1], theta[1], 
                                    theta[2], theta[2])


## -----------------------------------------------------------------------------
  theta <- c(0,3,1,1)
  nm <- 10;m.r <- 10
  bm <- mesh((1:nm-(nm+1)/2)*m.r/nm,4,rep(m.r/nm,nm))
  lf <- lf.yb(y,t(bm$X),theta,X,Z,var.func)
  log(sum(bm$w*exp(lf-max(lf)))) + max(lf)


## ----echo=FALSE---------------------------------------------------------------
  nm <- 20;m.r <- 10
  bm <- mesh((1:nm-(nm+1)/2)*m.r/nm,4,rep(m.r/nm,nm))
  lf <- lf.yb(y,t(bm$X),theta,X,Z,var.func)

## -----------------------------------------------------------------------------
  log(sum(bm$w*exp(lf-max(lf)))) + max(lf)


## -----------------------------------------------------------------------------
  nm <- 10
  gq <- gauss.quad(nm)
  w <- gq$weights*m.r/2 ## rescale to domain
  x <- gq$nodes*m.r/2
  bm <- mesh(x,4,w)
  lf <- lf.yb(y,t(bm$X),theta,X,Z,var.func)
  log(sum(bm$w*exp(lf-max(lf)))) + max(lf)


## ----echo=FALSE---------------------------------------------------------------
  nm <- 20
  gq <- gauss.quad(nm)
  w <- gq$weights*m.r/2 ## rescale to domain
  x <- gq$nodes*m.r/2
  bm <- mesh(x,4,w)
  lf <- lf.yb(y,t(bm$X),theta,X,Z,var.func)

## -----------------------------------------------------------------------------
  log(sum(bm$w*exp(lf-max(lf)))) + max(lf)


## ----echo=FALSE---------------------------------------------------------------
glf.yb <- function(y, b, theta, X, Z, lambda) {
  beta <- theta[1:ncol(X)]
  theta <- theta[-(1:ncol(X))]
  eta <- X%*%beta + Z%*%b
  mu <- exp(eta)
  lam <- lambda(theta, ncol(Z))
  g <- t(Z) %*% (y - mu) - lam*b
  as.vector(g)
}
hlf.yb <- function(y, b, theta, X, Z, lambda) {
  beta <- theta[1:ncol(X)]
  theta <- theta[-(1:ncol(X))]
  eta <- X%*%beta + Z%*%b
  mu <- as.vector(exp(eta))
  lam <- lambda(theta, ncol(Z))
  H <- -(t(Z) %*% (Z*mu)) - outer(lam*b, b)
  H - 0.01*diag(ncol(Z))
}

## -----------------------------------------------------------------------------
b <- rep(0,4)
while(TRUE) {
  b0 <- b
  g <- glf.yb(y, b, theta, X, Z, var.func)
  H <- hlf.yb(y, b, theta, X, Z, var.func)
  b <- b - solve(H, g)
  b <- as.vector(b)
  if (sum(abs(b-b0)) < 1e-5*sum(abs(b)) + 1e-4) break
}


## -----------------------------------------------------------------------------
lf.yb0(y,b,theta,X,Z,var.func) + 2*log(2*pi) -
    0.5*determinant(H,logarithm=TRUE)$modulus


## -----------------------------------------------------------------------------
  N <- 100000
  bm <- matrix((runif(N*4)-.5)*m.r,4,N)
  lf <- lf.yb(y, bm, theta, X, Z, var.func)
  log(sum(exp(lf-max(lf)))) + max(lf) - log(N) + log(m.r^4)

## ----echo=FALSE---------------------------------------------------------------
  bm <- matrix((runif(N*4)-.5)*m.r,4,N)
  lf <- lf.yb(y, bm, theta, X, Z, var.func)
  log(sum(exp(lf-max(lf)))) + max(lf) - log(N) + log(m.r^4)


## -----------------------------------------------------------------------------
lf.ygb <- function(y, b, theta, X, Z, lambda) {
  beta = theta[1:ncol(X)]
  eta = Z%*%b + as.vector(X%*%beta)
  colSums(y*eta - exp(eta) - lfactorial(y))
}
N = 100000
lam = var.func(theta[3:4])
bm = matrix(rnorm(4*N, 0, sqrt(lam)), nrow=4)
llv = lf.ygb(y, bm, theta, X, Z, var.func)
max(llv) + log(mean(exp(llv - max(llv))))

## ----echo=FALSE---------------------------------------------------------------
bm = matrix(rnorm(4*N, 0, sqrt(lam)), nrow=4)
llv = lf.ygb(y, bm, theta, X, Z, var.func)
max(llv) + log(mean(exp(llv - max(llv))))


## -----------------------------------------------------------------------------
N <- 10000
  R <- chol(-H)
  z <- matrix(rnorm(N*4),4,N)
  bm <- b + backsolve(R,z)
  lf <- lf.yb(y,bm,theta,X,Z,var.func)
  zz.2 <- colSums(z*z)/2
  lfz <- lf + zz.2
  log(sum(exp(lfz - max(lfz)))) + max(lfz) + 2 * log(2*pi) -
      sum(log(diag(R))) - log(N)

## ----echo=FALSE---------------------------------------------------------------
  z <- matrix(rnorm(N*4),4,N)
  bm <- b + backsolve(R,z)
  lf <- lf.yb(y,bm,theta,X,Z,var.func)
  zz.2 <- colSums(z*z)/2
  lfz <- lf +zz.2
  log(sum(exp(lfz - max(lfz)))) + max(lfz) + 2 * log(2*pi) -
      sum(log(diag(R))) - log(N)

  z <- matrix(rnorm(N*4),4,N)
  bm <- b + backsolve(R,z)
  lf <- lf.yb(y,bm,theta,X,Z,var.func)
  zz.2 <- colSums(z*z)/2
  lfz <- lf +zz.2
  log(sum(exp(lfz - max(lfz)))) + max(lfz) + 2 * log(2*pi) -
      sum(log(diag(R))) - log(N)


## -----------------------------------------------------------------------------
n <- 100000 ## code NOT for serious use
x <- rep(1,n)
a <- 65539;M <- 2^31;b <- 0 ## Randu
for (i in 2:n) x[i] <- (a*x[i-1]+b)%%M
u <- x/(M-1)

## ----eval=FALSE---------------------------------------------------------------
## qqplot((1:n-.5)/n,sort(u))


## -----------------------------------------------------------------------------
## Create data frame with U at 3 lags...
U <- data.frame(u1=u[1:(n-2)],u2=u[2:(n-1)],u3=u[3:n])

## ----eval=FALSE---------------------------------------------------------------
## plot(U$u1,U$u2,pch=".")


## ----eval=FALSE---------------------------------------------------------------
## library(lattice)
## cloud(u1~u2*u3,U,pch=".",col=1,screen=list(z=40,x=-70,y=0))


## ----eval=FALSE---------------------------------------------------------------
## cloud(u1~u2*u3,U,pch=".",col=1,screen=list(z=40,x=70,y=0))


## -----------------------------------------------------------------------------
  system.time(X <- qnorm(runif(1e6)))


## -----------------------------------------------------------------------------
  system.time(Z <- rnorm(1e6))


## -----------------------------------------------------------------------------
  n <- 100000 ## number of obs
  pa <- 40;pb <- 10 # numbers of factor levels
  a <- factor(sample(1:pa,n,replace=TRUE))
  b <- factor(sample(1:pb,n,replace=TRUE))
  X <- model.matrix(~a*b) # model matrix a + b + a:b
  y <- rnorm(n) # a random response
  object.size(X) # X takes lots of memory!
  sum(X==0)/prod(dim(X)) # proportion of zeros


## -----------------------------------------------------------------------------
  library(Matrix)
  Xs <- Matrix(X) ## a sparse version of X
  ## much reduced memory footprint
  as.numeric(object.size(Xs))/as.numeric(object.size(X)) 


## -----------------------------------------------------------------------------
  system.time(Xy <- t(X)%*%y)
  system.time(Xsy <- t(Xs)%*%y)


## -----------------------------------------------------------------------------
system.time({
    XX <- t(X)%*%X
    R <- chol(XX)
})
  dim(XX)
  sum(XX == 0)/prod(dim(XX))


## ----fig.height=5,fig.width=5-------------------------------------------------
image(XX == 0)


## ----fig.height=4,fig.width=4-------------------------------------------------
  range(t(R)%*%R-XX)
  sum(R != 0) ## the upper triangle is dense (not good)
  image(R == 0)
  R[1:5, 1:5] ## top left corner, for example


## ----fig.height=4,fig.width=4-------------------------------------------------
  system.time({
    XXs <- t(Xs)%*%Xs
    Rs <- chol(XXs,pivot=TRUE) ## pivot to reduce infill
  })
  range(t(Rs)%*%Rs-XXs[Rs@pivot,Rs@pivot]) ## factorization works!
  sum(Rs!=0) ## the upper triangle is sparse (much better)


## ----fig.height=5,fig.width=5-------------------------------------------------
  image(as.matrix(Rs) == 0)


## -----------------------------------------------------------------------------
  Rs[1:5,1:5] ## top left corner, for example


## -----------------------------------------------------------------------------
ht <- new.env(hash=TRUE) 
ht[["foo"]] = "bar"
ht[["baz"]] = 1:10
ht[["foo"]]
ht$baz