# Normal two-way ANOVA

# Data generation

# Choose sample size
n.groups.1 <- 5
n.groups.2 <- 3
nsample <- 12
(n <- n.groups.1*nsample)

# Create factor levels
(groups.1 <- gl(n=n.groups.1, k=nsample, length=n, labels=paste("level.", c(1:n.groups.1), sep="")))

# groups.2 is replicated for each level of groups.1
(groups.2 <- gl(n=n.groups.2, k=nsample/n.groups.2, length=n, labels=paste("level.", c(1:n.groups.2), sep="")))

cbind(groups.1, groups.2)

contrasts(groups.1)
contrasts(groups.2)

# Choose effects
baseline <- 40 # Intercept

# Main effects:
(groups.1.effects <- c(-10, -5, 5, 10)) # groups.1 effects
(groups.2.effects <- c(5, 10)) # groups.2 effects

# Interaction effects -- 8 of them, i.e., (5-1)*(3-1):
(interaction.effects <- c(-2, 3, 0, 4, 4, 0, 3, -2))

(all.effects <- c(baseline, groups.1.effects, groups.2.effects, interaction.effects))
length(all.effects)

# Residuals
sigma <- 3
normal.error <- rnorm(n, 0, sigma)

round(normal.error, 2)

# Create design matrix
X <- as.matrix(model.matrix(~groups.1*groups.2))
dim(X)
head(X)
X

# Create response by putting together the deterministic and stochastic parts of the model
# -- deterministic part
data.frame(groups.1, groups.2, E.y=as.vector(as.matrix(X) %*% as.matrix(all.effects)))

# -- all together
y <- as.numeric(as.matrix(X) %*% as.matrix(all.effects) + normal.error) # recall that as.numeric is ESSENTIAL for WinBUGS later
data.frame(groups.1, groups.2, E.y=as.vector(as.matrix(X) %*% as.matrix(all.effects)), y=round(y, 2))

# Plot of generated data
par(mar=c(5, 6, 4, 2))
boxplot(y~groups.2*groups.1, col="lightgreen", ylab="", xlab="Continuous Response", main="Simulated data set (groups.2*groups.1)", las=1, ylim=c(20, 70), horizontal=TRUE)	
abline(v=40, col="red", lwd=2)
par(mar=c(5, 4, 4, 2))

library("lattice")
xyplot(y~groups.1|groups.2, main = "Relationship between y and groups.1 (by groups.2)")
xyplot(y~groups.2|groups.1, main="Relationship between y and groups.2 (by groups.1)")


## Aside: Using simulation to assess bias and precision of an estimator
print(lm(y~groups.1*groups.2), dig=3)

# Let's compare the "true" values with the estimated values:
data.frame(true=all.effects, estimates=round(lm(y~groups.1*groups.2)$coef, 2), row.names=names(lm(y~groups.1*groups.2)$coef))
print(summary(lm(y~groups.1*groups.2))$coef, dig=2)

# We generate 1000 datasets to assess the estimators:
n.iter <- 1000 # Desired number of iterations
estimates <- array(dim=c(n.iter, length(all.effects))) # Data structure to hold results

for (i in 1:n.iter) { # Run simulation n.iter times
    print(i) # Print iteration number so that we know how far we are (optional)
    normal.error <- rnorm(n, 0, sigma) # residuals
    y <- as.numeric(as.matrix(X) %*% as.matrix(all.effects) + normal.error) # assemble data
    fit.model <- lm(y~groups.1*groups.2) # fit the model
    estimates[i,] <- fit.model$coefficients # keep values of coefs
}
                 
# Compare the true values and the mean estimates taken over the 1000 iterations:
data.frame(true=all.effects, estimates=round(apply(estimates, 2, mean), 2), row.names=names(lm(y~groups.1*groups.2)$coef))


# Analysis using R
# Main effects only:
mainfit <- lm(y~groups.1+groups.2)
summary(mainfit)

# The means parameterization of the interaction model:
intfit <- lm(y~groups.1*groups.2-1-groups.1-groups.2)
summary(intfit)



# Analysis using WinBUGS

# Main-effects ANOVA using WinBUGS

# Define model
cat("model {
# Priors
alpha~dnorm(0, 0.001) # intercept
beta.groups.1[1] <- 0 # set to zero effect of 1st level
beta.groups.1[2]~dnorm(0, 0.001)
beta.groups.1[3]~dnorm(0, 0.001)
beta.groups.1[4]~dnorm(0, 0.001)
beta.groups.1[5]~dnorm(0, 0.001)
beta.groups.2[1] <- 0 # ditto
beta.groups.2[2]~dnorm(0, 0.001)
beta.groups.2[3]~dnorm(0, 0.001)
sigma~dunif(0, 100)
tau <- 1/(sigma*sigma)
# Likelihood
for (i in 1:n) {
    y[i]~dnorm(mean[i], tau)
    mean[i] <- alpha + beta.groups.1[groups.1[i]] + beta.groups.2[groups.2[i]]
}
}", fill=TRUE, file="2w.anova.txt")


# Bundle data
win.data <- list(y=y, groups.1 = as.numeric(groups.1), groups.2=as.numeric(groups.2), n=length(y))

# Inits function
inits <- function() {
    list(alpha=rnorm(1), beta.groups.1=c(NA, rnorm(4)), beta.groups.2=c(NA, rnorm(2)), sigma = rlnorm(1))
}

# Parameters to estimate
params <- c("alpha", "beta.groups.1", "beta.groups.2", "sigma")

# MCMC settings
ni <- 1200; nb <- 200; nt <- 2; nc <- 3

# Start Gibbs sampling
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "2w.anova.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

# Print estimates
print(out, dig=3)

# Compare with the MLEs:
print(mainfit$coef, dig=3)
print(summary(mainfit)$sigma, dig=3)
print(-2*logLik(mainfit))
print(out$mean, dig=3)



# Interaction-effects ANOVA using WinBUGS

# Write model
cat("model {
# Priors
for (i in 1:n.groups.1) {
    for (j in 1:n.groups.2) {
        group.mean[i,j]~dnorm(0, 0.0001)
    }
}
sigma~dunif(0, 100)
tau <- 1/(sigma*sigma)
# Likelihood
for (i in 1:n) {
    mean[i] <- group.mean[groups.1[i], groups.2[i]]
    y[i]~dnorm(mean[i], tau)
}
}", fill=TRUE, file="2w.anova2.txt")

# Bundle data
win.data <- list(y=y, groups.1=as.numeric(groups.1), groups.2=as.numeric(groups.2), n=length(y), n.groups.1=length(unique(groups.1)), n.groups.2=length(unique(groups.2)))

# Inits function
inits <- function() {
    list(sigma=rlnorm(1))
}

# Parameters to estimate
params <- c("group.mean", "sigma")

# MCMC settings
ni <- 1200; nb <- 200; nt <- 2; nc <- 3

# Start Gibbs sampling
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "2w.anova2.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

# Print estimates
print(out, dig=3)

# Compare with the MLEs:
(new.data <- data.frame(groups.1=gl(n=n.groups.1, k=3, length=15, labels=paste("level.", c(1:n.groups.1), sep="")), groups.2=gl(n=n.groups.2, k=1, length=15, labels=paste("level.", c(1:n.groups.2), sep=""))))

data.frame(new.data, predicts=round(predict(intfit, newdata=new.data), 2))
print(out$mean, dig=3)


# Forming predictions

# -- we present the Bayesian inference for the interaction-effects model in a graph showing the predicted response for each combination of groups.1 and groups.2 levels

# -- this is analogous to the boxplot we had above 



par(mfrow=c(1, 2), mar=c(5, 6, 4, 2))
boxplot(y~groups.2*groups.1, col="lightgreen", ylab="", xlab="y", main="", las=1, ylim=c(20, 70), horizontal=TRUE)	
abline(v=40, col="red", lwd=2)

# -- we select the order of the predictions to match that of the boxplot

ordering <- c(1, 4, 7, 10, 13, 2, 5, 8, 11, 14, 3, 6, 9, 12, 15)
plot(out$mean$group.mean, ordering, ylab="", las=1, xlab="y", cex=1.2, xlim=c(20, 70), col="blue", lab=c(6, 15, 3))
segments(out$mean$group.mean-out$sd$group.mean, ordering, out$mean$group.mean+out$sd$group.mean, ordering, col="black", lwd=1)
abline(v=40, col="red", lwd=2)
par(mfrow=c(1, 1), mar=c(5, 4, 4, 2))




# General linear model (ANCOVA)


# Data generation
n.groups <- 3
n.sample <- 10	
(n <- n.groups*n.sample) # Total number of data points

# Generate the factor
rep(n.sample, n.groups)
(f1 <- rep(1:n.groups, rep(n.sample, n.groups))) # Indicator for groups
(groups <- factor(f1, labels = c("A", "B", "C")))

# Generate the continuous covariate
x <- runif(n, 45, 70)
round(x, 2)


# Generate the linear predictor (i.e., the deterministic part of the model)
Xmat <- model.matrix(~groups*x)
print(Xmat, dig=2) 
beta.vec <- c(-250, 150, 200, 6, -3, -4)

lin.pred <- Xmat %*% beta.vec # lin.predictor
data.frame(f1=f1, x=round(x, 2), E.y=round(lin.pred, 2))

# Add the stochastic part
sigma <- 10
normal.error <- rnorm(n=n, mean=0, sd=sigma) # residuals 
y <- lin.pred+normal.error # response=lin.pred+residual

data.frame(f1=f1, x=round(x, 2), E.y=round(lin.pred, 2), y=round(y, 2))

# Plot the data:
par(mfrow=(c(1, 2)))
hist(y, col="lightgreen", freq=FALSE)
lines(density(y), col="darkred", lwd=2)

matplot(cbind(x[1:10], x[11:20], x[21:30]), cbind(y[1:10], y[11:20], y[21:30]), ylim=c(0, max(y)), ylab="y", xlab="x", col=c("red", "green", "blue"), pch=c("A", "B", "C"), las=1, cex=1.2)
par(mfrow=(c(1, 1)))


# Analysis using R
summary(lm(y~groups*x))

# Compare with the true values:
beta.vec
sigma


# Analysis using WinBUGS (and a cautionary tale about the importance of covariate standardization)

# Define model
cat("model {
# Priors
for (i in 1:n.group) {
    alpha[i]~dnorm(0, 0.001) # Intercepts
    beta[i]~dnorm(0, 0.001) # Slopes
}
sigma~dunif(0, 100)			# Residual standard deviation
tau <- 1/(sigma*sigma)
# Likelihood
for (i in 1:n) {
    mu[i] <- alpha[groups[i]]+beta[groups[i]]*x[i]
    y[i]~dnorm(mu[i], tau)
}
# Derived quantities
# Define effects relative to baseline level
a.effe2 <- alpha[2]-alpha[1] # Intercept B vs. A
a.effe3 <- alpha[3]-alpha[1] # Intercept C vs. A
b.effe2 <- beta[2]-beta[1] # Slope B vs. A
b.effe3 <- beta[3]-beta[1] # Slope C vs. A
# Custom tests
test1 <- beta[3]-beta[2] # Slope C vs. B
}", fill=TRUE, file="lm.txt")

# Bundle data
win.data <- list(y=as.numeric(y), groups=as.numeric(groups), x=x, n.group=max(as.numeric(groups)), n=n)

# Inits function
inits <- function() {
    list(alpha=rnorm(n.groups, 0, 2), beta=rnorm(n.groups, 1, 1), sigma=rlnorm(1))
}

# Parameters to estimate
params <- c("alpha", "beta", "sigma", "a.effe2", "a.effe3", "b.effe2", "b.effe3", "test1")

# MCMC settings
ni <- 1200; nb <- 200; nt <- 2; nc <- 3

# Start Markov chains
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "lm.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

print(out, dig=3)			# Bayesian analysis

# Compare Bayesian estimates, ML estimates and the true values
print(out$mean, dig=3)
summary(lm(y~groups*x))	  # The ML solution
beta.vec; sigma           # Truth

# WHAT?? The Bayesian estimates are really off -- although the chains seem to have converged (see Rhat values)! And this is a fairly simple model ...

# This is the reason for the following warning in the WinBUGS User Manual:
# Beware: MCMC sampling can be dangerous!

# Imagine you have to decide whether to approve a drug (that could be widely used) based on the above MCMC simulation.
# But we're lucky -- we're only playing with linguistic corpora and experiments here.

# This example illustrates how useful it is to check the consistency of one’s inference from WinBUGS with other sources:
# -- estimates from a simpler, but similar model run in WinBUGS
# -- maximum likelihood estimates from another software
# -- plots of the estimated regression lines into the observed data to check whether something is wrong

matplot(cbind(x[1:10], x[11:20], x[21:30]), cbind(y[1:10], y[11:20], y[21:30]), ylim=c(0, max(y)), ylab="y", xlab="x", col = c("red", "green", "blue"), pch = c("A", "B", "C"), las=1, cex=1.2)

(alphas <- out$mean$alpha)
(betas <- out$mean$beta)
abline(alphas[1], betas[1], col="red", lty=3, lwd=2)
abline(alphas[2], betas[2], col="green", lty=3, lwd=2)
abline(alphas[3], betas[3], col="blue", lty=3, lwd=2)

(lm.coefs <- lm(y~groups*x)$coef)
abline(lm.coefs[1], lm.coefs[4], col="red", lty=1, lwd=1)
abline(lm.coefs[1]+lm.coefs[2], lm.coefs[4]+lm.coefs[5], col="green", lty=1, lwd=1)
abline(lm.coefs[1]+lm.coefs[3], lm.coefs[4]+lm.coefs[6], col="blue", lty=1, lwd=1)


# The problem: lack of standardization of the covariate length.
# -- in WinBUGS, it is always advantageous to scale covariates so that their extremes are not too far away from zero
# -- otherwise, there may be nonconvergence


# Let's keep the data the same, but increase the burnin significantly
# MCMC settings
ni <- 100000; nb <- 90000; nt <- 5; nc <- 3

# Start Markov chains
library("R2WinBUGS")
out.bis <- bugs(win.data, inits, params, "lm.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

print(out.bis, dig=3)			# Bayesian analysis

# Compare Bayesian estimates, ML estimates and the true values
print(out.bis$mean, dig=3)
summary(lm(y~groups*x))	  # The ML solution
beta.vec; sigma           # Truth

# No significant change!


# New data for WinBUGS with standardized covariate x:
win.data2 <- list(y=as.numeric(y), groups=as.numeric(groups), x=as.numeric(scale(x)), n.group=max(as.numeric(groups)), n=n)

ni <- 1200; nb <- 200; nt <- 2; nc <- 3

# Start Markov chains
library("R2WinBUGS")
out2 <- bugs(win.data2, inits, params, "lm.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

# Inspect results and compare with MLEs (much better)
print(out2$mean, dig=3)
print(lm(y~groups*as.numeric(scale(x)))$coef, dig=4)


# We should always consider transforming all covariates for WinBUGS even if that slightly complicates presentation of results afterwards (for instance, in graphics).

# Transforming:
# 1. centering, i.e., subtracting the mean, which changes the intercept only (not the slope)
scale(x, scale=FALSE)

# 2. normalizing / standardizing, i.e., subtracting the mean and dividing the result by the standard deviation of the original covariate values
scale(x)


# Simply centering the data also works for the above ANCOVA example:

# -- new data for WinBUGS with centered covariate x
win.data3 <- list(y=as.numeric(y), groups=as.numeric(groups), x=as.numeric(scale(x, scale=FALSE)), n.group=max(as.numeric(groups)), n=n)

# -- start Markov chains
library("R2WinBUGS")
out3 <- bugs(win.data3, inits, params, "lm.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=FALSE, DIC=TRUE, clearWD=TRUE)

# -- inspect results and compare with MLEs
print(out3$mean, dig=3)
print(lm(y~groups*as.numeric(scale(x, scale=FALSE)))$coef, dig=3)
     
 
    

# Linear mixed-effects model

# Mixed-effects or mixed models contain factors, or more generally covariates, with both fixed and random effects.

# The dataset and model we consider are a generalization of the ANCOVA model we just discussed:
# -- we now constrain the values for at least one set of effects (intercepts and/or slopes) to come from a normal distribution
# -- this is what the random-effects assumption means (usually; in general, the random effects can come from any distribution)

# There are at least three sets of assumptions that one may make about the random effects for the intercept and/or the slope of regression lines that are fitted to grouped data:

## 1. only intercepts are random, but slopes are identical for all groups

## 2. both intercepts and slopes are random, but they are independent

## 3. both intercepts and slopes are random and there is a correlation between them

## (an additional case, where slopes are random and intercepts are fixed, is not a sensible model in most circumstances)


# Model No. 1 is often called a random-intercepts model.
# Both models No. 2 and 3 are called random-coefficients models.
# Model No. 3 is the default in R’s function lmer() in package lme4 when fitting a random-coefficients model.


# The plan:
# -- first, we generate a random-coefficients dataset under model No. 2, where both intercepts and slopes are uncorrelated random effects; we then fit both a random-intercepts (No. 1) and a random-coefficients model without correlation (No. 2) to this dataset

# -- next, we generate a second data set that includes a correlation between random intercepts and random slopes and adopt the random-coefficients model with correlation between intercepts and slopes (No. 3) to analyze it


# A close examination of how such a dataset can be assembled (i.e., simulated) will be of great help for understanding how analogous datasets are broken down (i.e., analyzed) using mixed models.
# -- as Kery (2010) puts it: very few strategies can be more effective to understand this type of mixed model than the combination of simulating data sets and describing the models fitted in WinBUGS syntax 
# (well, we could always write up the Metropolis-Hastings algorithm / Gibbs sampler by hand and that definitely is the best :-))



# Data generation

# The factor for groups:
n.groups <- 56				# Number of groups
n.sample <- 10				# Number of observations in each group
(n <- n.groups*n.sample) 		# Total number of data points
(groups <- gl(n=n.groups, k=n.sample)) 	# Indicator for population


# Continuous predictor x
original.x <- runif(n, 45, 70)
summary(original.x)

# We standardize it
x <- scale(original.x)
round(x, 2)

(mn <- mean(original.x))
(st.dev <- sd(original.x))
x <- (original.x - mn)/st.dev
round(x, 2)

hist(x, col="lightgreen", freq=FALSE)
lines(density(x), col="darkred", lwd=2)


# This is the model matrix for the means parametrization of the interaction model between the groups and the continuous covariate x:

Xmat <- model.matrix(~groups*x-1-x)
dim(Xmat)
head(Xmat)

# -- there are 560 observations (rows) and 112 regression terms / variables (columns)
dimnames(Xmat)[[1]]
dimnames(Xmat)[[2]]

# -- there are 56 terms for groups, the coefficients of which will provide the group-specific intercepts (i.e., the intercept random effects, or intercept effects for short)
# -- there are 56 terms for interactions between each group and the continuous covariate x, the coefficients of which will provide the group-specific slopes (i.e., the slope random effects, or slope effects for short)

round(Xmat[1, ], 2) 		# Print the top row for each column
Xmat[, 1]               # Print all rows for column 1 (group 1)
round(Xmat[, 57], 2)    # Print all rows for column 57 (group 1:x)
                                    
                                    
# Parameters for the distributions of the random coefficients / random effects (note that the intercepts and slopes comes from two independent Gaussian distributions):

intercept.mean <- 230 # mu_alpha
intercept.sd <- 20 # sigma_alpha

slope.mean <- 60 # mu_beta
slope.sd <- 30 # sigma_beta


# Generate the random coefficients:

intercept.effects <- rnorm(n=n.groups, mean=intercept.mean, sd=intercept.sd)

par(mfrow=c(1, 2))
hist(intercept.effects, col="lightgreen", freq=FALSE)
lines(density(intercept.effects), col="darkred", lwd=2)

slope.effects <- rnorm(n=n.groups, mean=slope.mean, sd=slope.sd)

hist(slope.effects, col="lightgreen", freq=FALSE)
lines(density(slope.effects), col="darkred", lwd=2)
par(mfrow=c(1, 1))

all.effects <- c(intercept.effects, slope.effects) # Put them all together
round(all.effects, 2)

# -- thus, we have two stochastic components in our model IN ADDITION TO the usual stochastic component for the individual-level responses, to which we now turn


# Generating the continuous response variable:

# -- the deterministic part
lin.pred <- Xmat %*% all.effects # Value of lin.predictor
str(lin.pred)

# -- the stochastic part
sigma <- 30
normal.error <- rnorm(n=n, mean=0, sd=sigma) # residuals 
str(normal.error)

# -- put the two together
y <- lin.pred+normal.error
str(y)
# or, alternatively
y <- rnorm(n=n, mean=lin.pred, sd=sigma)
str(y)

# We take a look at the response variable
hist(y, col="lightgreen", freq=FALSE)
lines(density(y), col="darkred", lwd=2)
summary(y)

library("lattice")
xyplot(y~x|groups)



# Analysis under a random-intercepts model

# REML analysis using R

library("lme4")
(lme.fit1 <- lmer(y~x+(1|groups)))
fixef(lme.fit1)
ranef(lme.fit1)
coef(lme.fit1)

# Compare with true values:
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, sigma=sigma)
lme.fit1

# Bayesian analysis using WinBUGS

# Write model
cat("model {
# Priors
mu.int~dnorm(0, 0.001) # Mean hyperparameter for random intercepts
sigma.int~dunif(0, 100) # SD hyperparameter for random intercepts
tau.int <- 1/(sigma.int*sigma.int)
for (i in 1:ngroups) {
    alpha[i]~dnorm(mu.int, tau.int) # Random intercepts
}
beta~dnorm(0, 0.001) # Common slope
sigma~dunif(0, 100) # Residual standard deviation
tau <- 1/(sigma*sigma)
# Likelihood
for (i in 1:n) {
    mu[i] <- alpha[groups[i]]+beta*x[i] # Expectation
    y[i]~dnorm(mu[i], tau) # The actual (random) responses
}
}", fill=TRUE, file="lme.model1.txt")

# Bundle data
win.data <- list(y=as.numeric(y), groups=as.numeric(groups), x=as.numeric(x), ngroups=max(as.numeric(groups)), n=as.numeric(n))
# use as.numeric across the board for the data passed to WinBUGS; omitting this can yield an "expected collection operator c" error ...

# Inits function
inits <- function() {
    list(alpha=rnorm(n.groups, 0, 2), beta=rnorm(1, 1, 1), mu.int=rnorm(1, 0, 1), sigma.int=rlnorm(1), sigma=rlnorm(1))
}

# Parameters to estimate
params <- c("alpha", "beta", "mu.int", "sigma.int", "sigma")

# MCMC settings
ni <- 2000; nb <- 500; nt <- 2; nc <- 3

# Start Gibbs sampling
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "lme.model1.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

# Inspect results
print(out, dig=3)

# Compare with true values and MLEs:
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, sigma=sigma)
print(out$mean, dig=3)
lme.fit1



# Analysis under a random-coefficients model without correlation between intercept and slope

# REML analysis using R
library("lme4")
(lme.fit2 <- lmer(y~x+(1|groups)+(0+x|groups)))
fixef(lme.fit2)
ranef(lme.fit2)
coef(lme.fit2)


# Compare with true values:
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, sigma=sigma)
lme.fit2


# Bayesian analysis using WinBUGS
# Define model
cat("model {
# Priors
mu.int~dnorm(0, 0.001) # Mean hyperparameter for random intercepts
sigma.int~dunif(0, 100) # SD hyperparameter for random intercepts
tau.int <- 1/(sigma.int*sigma.int)
mu.slope~dnorm(0, 0.001) # Mean hyperparameter for random slopes
sigma.slope~dunif(0, 100) # SD hyperparameter for slopes
tau.slope <- 1/(sigma.slope*sigma.slope)
for (i in 1:ngroups) {
    alpha[i]~dnorm(mu.int, tau.int) # Random intercepts
    beta[i]~dnorm(mu.slope, tau.slope) # Random slopes
}
sigma~dunif(0, 100) # Residual standard deviation
tau <- 1/(sigma*sigma) # Residual precision
# Likelihood
for (i in 1:n) {
    mu[i] <- alpha[groups[i]]+beta[groups[i]]*x[i]
    y[i]~dnorm(mu[i], tau)    
}
}", fill=TRUE, file="lme.model2.txt")

# Bundle data
win.data <- list(y=as.numeric(y), groups=as.numeric(groups), x=as.numeric(x), ngroups=max(as.numeric(groups)), n=as.numeric(n))

# Inits function
inits <- function() {
    list(alpha=rnorm(n.groups, 0, 2), beta=rnorm(n.groups, 10, 2), mu.int=rnorm(1, 0, 1), sigma.int=rlnorm(1), mu.slope=rnorm(1, 0, 1), sigma.slope=rlnorm(1), sigma=rlnorm(1))
}

# Parameters to estimate
params <- c("alpha", "beta", "mu.int", "sigma.int", "mu.slope", "sigma.slope", "sigma")

# MCMC settings
ni <- 2000; nb <- 500; nt <- 2; nc <- 3

# Start Gibbs sampling
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "lme.model2.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

print(out, dig=2)

# Compare with true values and MLEs:
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, sigma=sigma)
print(out$mean, dig=2)
lme.fit2

# Using simulated data and successfully recovering the input values makes us fairly confident that the WinBUGS analysis has been correctly specified.

# This is very helpful for more complex models b/c it's easy to make mistakes:
# -- a good way to check the WinBUGS analysis for a custom model that is needed for a particular phenomenon is to simulate the data and run the WinBUGS model on that data



# The random-coefficients model with correlation between intercept and slope

# Data generation

# Group factor:
n.groups <- 56
n.sample <- 10
(n <- n.groups*n.sample)
(groups <- gl(n=n.groups, k=n.sample))

# Standardized continuous covariate:
original.x <- runif(n, 45, 70)
x <- scale(original.x)

hist(x, col="lightgreen", freq=FALSE)
lines(density(x), col="darkred", lwd=2)

# Design matrix:
Xmat <- model.matrix(~groups*x-1-x)

# -- there are 560 observations (rows) and 112 regression terms / variables (columns), just as before
dimnames(Xmat)[[1]]
dimnames(Xmat)[[2]]

round(Xmat[1, ], 2) # Print the top row for each column
                                
# Generate the correlated random effects for intercept and slope:
       
library("MASS") # Load MASS
?mvrnorm

# Assembling the parameters for the multivariate normal distribution

intercept.mean <- 230 # Values for five hyperparameters
intercept.sd <- 20
slope.mean <- 60
slope.sd <- 30
intercept.slope.covariance <- 10

(mu.vector <- c(intercept.mean, slope.mean))
(var.covar.matrix <- matrix(c(intercept.sd^2, intercept.slope.covariance, intercept.slope.covariance, slope.sd^2), 2, 2))

# Generating the correlated random effects for intercepts and slopes:

effects <- mvrnorm(n=n.groups, mu=mu.vector, Sigma=var.covar.matrix)
round(effects, 2)

par(mfrow=c(1, 2))
hist(effects[, 1], col="lightgreen", freq=FALSE)
lines(density(effects[, 1]), col="darkred", lwd=2)

hist(effects[, 2], col="lightgreen", freq=FALSE)
lines(density(effects[, 2]), col="darkred", lwd=2)
par(mfrow=c(1, 1))

# Plotting the bivariate distribution:
effects.kde <- kde2d(effects[, 1], effects[, 2], n=50) # kernel density estimate
par(mfrow=c(1, 3))
contour(effects.kde)
image(effects.kde)
persp(effects.kde, phi=45, theta=30)

# even better:
par(mfrow=c(1, 2))
image(effects.kde); contour(effects.kde, add=T)
persp(effects.kde, phi=45, theta=-30, shade=.1, border=NULL, col="lightblue", ticktype="detailed", xlab="", ylab="", zlab="")
par(mfrow=c(1, 1))

apply(effects, 2, mean)
apply(effects, 2, sd)
apply(effects, 2, var)
cov(effects[, 1], effects[, 2])
var(effects)


# Sampling error for intercept-slope covariance (200 samples of 50, 500, 5000 and 50000 group/random effects each)

par(mfrow=c(1, 4))
cov.temp1 <- numeric()
for (i in 1:200) {
    temp1 <- mvrnorm(50, mu=mu.vector, Sigma=var.covar.matrix)
    cov.temp1[i] <- var(temp1)[1, 2]    
}
hist(cov.temp1, col="lightgreen", freq=FALSE, main="n=50")
lines(density(cov.temp1), col="darkred", lwd=2)

cov.temp2 <- numeric()
for (i in 1:200) {
    temp2 <- mvrnorm(500, mu=mu.vector, Sigma=var.covar.matrix)
    cov.temp2[i] <- var(temp2)[1, 2]    
}
hist(cov.temp2, col="lightgreen", freq=FALSE, main="n=500")
lines(density(cov.temp2), col="darkred", lwd=2)

cov.temp3 <- numeric()
for (i in 1:200) {
    temp3 <- mvrnorm(5000, mu=mu.vector, Sigma=var.covar.matrix)
    cov.temp3[i] <- var(temp3)[1, 2]    
}
hist(cov.temp3, col="lightgreen", freq=FALSE, main="n=5000")
lines(density(cov.temp3), col="darkred", lwd=2)

cov.temp4 <- numeric()
for (i in 1:200) {
    temp4 <- mvrnorm(50000, mu=mu.vector, Sigma=var.covar.matrix)
    cov.temp4[i] <- var(temp4)[1, 2]    
}
hist(cov.temp4, col="lightgreen", freq=FALSE, main="n=50000")
lines(density(cov.temp4), col="darkred", lwd=2)
par(mfrow=c(1, 1))


intercept.effects <- effects[, 1]
round(intercept.effects, 2)
slope.effects <- effects[, 2]
round(slope.effects, 2)
all.effects <- c(intercept.effects, slope.effects) # Put them all together
round(all.effects, 2)


# Generate the response variable:
# -- the deterministic part
lin.pred <- Xmat %*% all.effects
round(as.vector(lin.pred), 2)

# -- the stochastic part
sigma <- 30
(normal.error <- rnorm(n=n, mean=0, sd=sigma))	# residuals

# -- add them together
y <- lin.pred+normal.error
# or, in one go:
y <- rnorm(n=n, mean=lin.pred, sd=sigma)

hist(y, col="lightgreen", freq=FALSE)
lines(density(y), col="darkred", lwd=2)

library("lattice")
xyplot(y~x|groups, pch=20)


# REML analysis using R
library("lme4")
lme.fit3 <- lmer(y~x+(x|groups))
print(lme.fit3, cor=FALSE)

# Compare with the true values:
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, intercept.slope.correlation=intercept.slope.covariance/(intercept.sd*slope.sd) , sigma=sigma)  



# Bayesian analysis using WinBUGS

# This is one way in which we can specify a Bayesian analysis of the random-coefficients model with correlation.
# For a different and more general way to allow for correlation among two or more sets of random effects in a model, see Gelman and Hill (2007: 376-377).

# Define model
cat("model {
# Priors
mu.int~dnorm(0, 0.001) # mean for random intercepts
mu.slope~dnorm(0, 0.001) # mean for random slopes
sigma.int~dunif(0, 100) # SD of intercepts
sigma.slope~dunif(0, 100) # SD of slopes
rho~dunif(-1, 1) # correlation between intercepts and slopes
Sigma.B[1, 1] <- pow(sigma.int, 2) # We start assembling the var-covar matrix for the random effects
Sigma.B[2, 2] <- pow(sigma.slope, 2)
Sigma.B[1, 2] <- rho*sigma.int*sigma.slope
Sigma.B[2, 1] <- Sigma.B[1, 2]
covariance <- Sigma.B[1, 2]
Tau.B[1:2, 1:2] <- inverse(Sigma.B[,])
for (i in 1:ngroups) {
    B.hat[i, 1] <- mu.int
    B.hat[i, 2] <- mu.slope
    B[i, 1:2]~dmnorm(B.hat[i, ], Tau.B[,]) # the pairs of correlated random effects
    alpha[i] <- B[i, 1] # random intercept
    beta[i] <- B[i, 2] # random slope
}
sigma~dunif(0, 100) # Residual standard deviation
tau <- 1/(sigma*sigma)
# Likelihood
for (i in 1:n) {
    mu[i] <- alpha[groups[i]]+beta[groups[i]]*x[i]
    y[i]~dnorm(mu[i], tau)
}
}", fill=TRUE, file="lme.model3.txt")

# Bundle data
win.data <- list(y=as.numeric(y), groups=as.numeric(groups), x=as.numeric(x), ngroups=max(as.numeric(groups)), n=as.numeric(n))

# Inits function
inits <- function() {
    list(mu.int=rnorm(1, 0, 1), sigma.int=rlnorm(1), mu.slope=rnorm(1, 0, 1), sigma.slope=rlnorm(1), rho=runif(1, -1, 1), sigma=rlnorm(1))
}

# Parameters to estimate
params <- c("alpha", "beta", "mu.int", "sigma.int", "mu.slope", "sigma.slope", "rho", "covariance", "sigma")

# MCMC settings
ni <- 2000; nb <- 500; nt <- 2; nc <- 3

# Start Gibbs sampler
library("R2WinBUGS")
out <- bugs(win.data, inits, params, "lme.model3.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni, debug=TRUE, DIC=TRUE, clearWD=TRUE)

print(out, dig=2)

# Compare with the MLEs and the true values:
print(out$mean, dig=2)
print(lme.fit3, cor=FALSE)
data.frame(intercept.mean=intercept.mean, slope.mean=slope.mean, intercept.sd=intercept.sd, slope.sd=slope.sd, intercept.slope.correlation=intercept.slope.covariance/(intercept.sd*slope.sd) , sigma=sigma)  

# Note the very large SD for the posterior distribution of the covariance (relative to the mean):
print(out$mean$covariance, dig=2)
print(out$sd$covariance, dig=2)

print(out$mean$rho, dig=2)
print(out$sd$rho, dig=2)


# -- R does not even provide an SE for the covariance estimator (equivalently, for the correlation of random effects)
# -- covariances are even harder to reliably estimate than variances, which are harder than mean estimators (it's easy to estimate measures of center / location, harder to estimate measures of dispersion and even harder to estimate measures of association)
