Mixed model for modeling ultrassônicas wave

Modeling the data with Mixed Model

if(!require(rstan)) install.packages("rstan");require(rstan)

if(!require(coda)) install.packages("coda");require(coda) 

if(!require(ggplot2)) install.packages("ggplot2");require(ggplot2)

Data

For each frame and wall, 10 ultrassonic waves were emitted and received in a total of 240 waves. Each wave contains six points of measurements.

## download from github
caminho = "U:/Drives compartilhados/Artigo Mixto e PG/Resultados/"
dados<-read.table(paste(caminho,"dados_com_e_sem_vazios.csv",sep=""),h=T,sep=",")

Selecting \(m=231\) observed curves. Nine waves were not captured by the receiver in any distance and they are not in the dataset. One sequence appears with only 4 measurements instead of 6 and it is also omitted.

tira<-which(dados[,4]==0 & dados[,5]==0)
dados<-dados[-tira,]
tira_2 <- which(dados[,2]=="223") #this wave does not contain six points and is left out
dados<-dados[-tira_2,]
indiv<-as.numeric(dados[,2])
#
#
m=length(table(indiv))    #total of individuals
repl=table(indiv) # total of observations for each individual 
n=length(indiv)   # total of measurements

Factors (inputs) for the model

Covariates for fixed effects

X<-as.matrix(dados[,c(5,1,6)])
X<-cbind(rep(1,n),X)
X<-as.matrix(X)
#
parede<-dados[,1]
quadrante<-dados[,3]
######################
##dummies construction
######################
####################
tijolo_sb<-which(quadrante %in% c(2,3) & parede==2 | (quadrante %in% c(5,6,7,8,10) & parede==1))
tijolo_cb<-which(quadrante %in% c(1,5,6,10) & parede==2)
argamassa<-which((quadrante %in% c(1,2,3,11) & parede==1) | (quadrante %in% c(7,8) & parede==2))
interface<-which(quadrante %in% c(4,9,12) | (quadrante %in% c(11) & parede==2))
####################
####################
##condition of the walls at each propagation time measurement location.
categoria<-NULL
categoria[argamassa]<-"Argamassa"  ### measurement made on grout as reference 
categoria[interface]<-"Interface"  ### dummy indicating if the measurement is made on the brick–grout interface or not
categoria[tijolo_cb]<-"Tij_com_buraco" ### dummy indicating if the measurement is made on the brick with void or not
categoria[tijolo_sb]<-"Tij_sem_buraco" ### dummy indicating if the measurement is made on the brick without void or not
#####################
#####################
elem<-model.matrix(dados$Valor ~ factor(categoria))[,-1]
elem<-matrix(as.numeric(elem),ncol=3)
####################
DummiesCategory = categoria
####################

print(table(DummiesCategory))

DummiesCategory
     Argamassa      Interface Tij_com_buraco Tij_sem_buraco 
           360            414            240            366

Covariates for random-effects

####################
Vt<-c(10,15,20,25,30,35) ## six distances
aux<-matrix(round(poly(Vt, degree=3),3),ncol=3) ## orthogonal polynomials for each individual
#
aux<-cbind(1,aux)
Z<-NULL
for (i in 1:m) Z<-rbind(Z,aux[1:repl[i],])
#
Xfinal<-cbind(X,elem,Z[,-1])
#
Y<-as.matrix(dados[,4],ncol=1) ## column vector for response variable
####################
## removing measurements at 35cm for fitting the model and using then for validating it
####################
tira<-which(Xfinal[,2]==35)
Xmod<-Xfinal[-tira,-2]
Ymod<-matrix(Y[-tira,],ncol=1)
Zmod<-Z[-tira,]
Data<-dados[-tira,]
#
Xvalid<-Xfinal[tira,-2]
Yvalid<-matrix(Y[tira,],ncol=1)
Zvalid<-Z[tira,]
Datavalid<-dados[tira,]
#
Amostramod<-NULL
for (i in 1:length(table(indiv))){
    Amostramod<-c(Amostramod,rep(i,repl[i]-1))}
Amostravalid<-1:230
####################

Fitting the Model

Stan code for estimating the Mixed Effect Model

stanmodelcode=" data {
  int<lower=0> m; // total of individuals
  int<lower=0> N; // total of observations
  int<lower=0> t; // total of parameters for fixed-effect 
  int<lower=0> q; // total of parameters for random-effect 
  int<lower=1, upper=m> amostra[N]; //index for individuals in the fitting dataset
  vector[q] u; // mean vector for random-effect
  matrix[N,t] x; // model matrix for fixed-effect
  matrix[m,t] xvalid; // model matrix for fixed-effect
  matrix[N,q] z; // model matrix for random-effect
  matrix[m,q] zvalid; // model matrix for random-effect
  matrix[q,q] W; // covariance matrix Inverse−Wishart
  matrix[N,1] y; // vector of response variable
  vector[m] yvalid;
  int<lower=1, upper=m> amostravalid[m]; //index for individuals in the test dataset
}

parameters {
  real<lower=0> sigma; // variance for random-errors
  vector[t] beta; // coefficients vector for fixed-effect
  matrix[q,m] b; // parameters matrix for random-effect
  cov_matrix[q] G; // covariance matrix for Inverse−Wishart
}

model {
  real media; //mean vector of response variable
  beta ~ normal(0,1000); // prior for coefficients of fixed-effect
  1/sigma ~ gamma(0.1,0.1); //prior for the variance of random-errors
  G ~ inv_wishart(5,W); // prior for var-con matrix of random-effects
  for(i in 1:m){
    b[,i] ~ multi_normal(u,G); //distribution of random-effects
  }
  for(j in 1:N){
    media = x[j,]*beta+z[j,]*b[,amostra[j]]; //mean of each observation
    y[j] ~ normal(media, sqrt(sigma)); //likelihood function
  }
}
 
generated quantities{
 real med;
  vector[m] yest;
    real Log;
   Log = 0;
    for(l in 1:N){
    med = x[l,]*beta+z[l,]*b[,amostra[l]]; //mean
    Log =+ normal_lpdf(y[l]|med, sqrt(sigma)); //log likelihood
    }
    for(l in 1:m){
    yest[l] = xvalid[l,]*beta+zvalid[l,]*b[,amostravalid[l]]; 
    }  
}
"

Run Stan code for estimating the parameters of Mixed Effect Model

Configuration for the arguments of Stan code

Chain= 1
Iter = 20000
Warmup = 5000
Thin = 10
Seed = 1000

Fitting the model with Stan

### Configuration for running stan code

m = m
N = length(Ymod)
q = ncol(Zmod)
amostra = Amostramod
u = rep(0,q)
x = Xmod[,-2]
t = ncol(x)
z = Zmod
diagonal_W = rep(100,q)
W = diag(diagonal_W)
y = Ymod
v = rep(0,N)

### running the code
dta = list(m=m, N=N, t=t, q=q, amostra=amostra, u=u, x=x, xvalid=Xvalid[,-2], z=z, zvalid=Zvalid, W=W, y=y, yvalid=as.vector(Yvalid), amostravalid=Amostravalid)


fit1 <- stan(model_code =stanmodelcode,
            data = dta,
            chains = Chain, iter = Iter, warmup = Warmup, thin = Thin, seed=Seed)

Checking convergence of the chain

Checking the convergence of the MCMC chain using the log-likelihood

cadeia<-as.numeric(extract(fit1, pars = "Log")$Log)
gewek<-geweke.diag(mcmc(cadeia))
TestConvergency = numeric()
TestConvergency["Zscore"] = gewek[1]
TestConvergency["Pvalue"] = data.frame(pnorm(as.numeric(abs(gewek$z[1])),lower.tail=FALSE)*2)

TestConvergency

$Zscore
    var1 
2.378093 

$Pvalue
[1] 0.01740245

plot(mcmc(exp(cadeia)),type='l')

Estimation

Regression coefficients estimates (point and interval estimates – 90% credibility interval)

cadeia<-data.frame(extract(fit1, pars = "beta")[[1]])

names(cadeia) = c("X0",names(cadeia)[-ncol(cadeia)])

cadeia["sigma"]<-data.frame(extract(fit1, pars = "sigma")[[1]])

###########################
## HPD
##########################

pvalue <- apply(cadeia, 2, function(x){HPDinterval(mcmc(x),0.90)})
P = length(pvalue[1,])

t(pvalue)

             [,1]       [,2]
X0    119.6939346 134.905444
X1      5.7377233  13.551403
X2      1.8133374   8.304671
X3      5.7844225  13.031493
X4      4.3730528  10.584401
X5    176.5347192 213.653661
X6     20.7069109  44.444784
X7      0.7912736  11.816798
sigma 155.5919946 187.956400

###########################
## Quantil
##########################

ICQM <- data.frame(Mean=apply(cadeia, 2, function(x){mean(x)}))
ICQM[c("5%","50%","95%")] <- t(apply(cadeia, 2, function(x){quantile(x,c(0.05,0.5,0.95))}))

ICQM

            Mean          5%        50%        95%
X0    127.309289 119.3576176 127.319479 134.659681
X1      9.541335   5.6062020   9.602768  13.423544
X2      5.238042   1.9533510   5.229519   8.507884
X3      9.786947   6.0581806   9.799893  13.423537
X4      7.593563   4.4378868   7.591157  10.681477
X5    195.383261 176.6582329 195.578618 213.890872
X6     31.793349  19.8679950  31.818365  43.776441
X7      5.874922   0.3036433   5.927226  11.439720
sigma 172.492441 156.4354594 172.314596 188.965367

Final Analysis

Model fitted without the non relevant coefficient X7 (cubic distance)

### Configuration for running stan code

m = m
N = length(Ymod)
q = ncol(Zmod)
amostra = Amostramod
u = rep(0,q)
x = Xmod[,-c(2,ncol(Xmod))]
xv= Xvalid[,-c(2,ncol(Xmod))]
t = ncol(x)
z = Zmod
diagonal_W = rep(100,q)
W = diag(diagonal_W)
y = Ymod
v = rep(0,N)

### running the code
dta = list(m=m, N=N, t=t, q=q, amostra=amostra, u=u, x=x, xvalid=xv, z=z, zvalid=Zvalid, W=W, y=y, yvalid=as.vector(Yvalid), amostravalid=Amostravalid)


fit2 <- stan(model_code =stanmodelcode,
            data = dta,
            chains = Chain, iter = Iter, warmup = Warmup, thin = Thin, seed=Seed)

Regression coefficients estimates (point and interval estimates – 90% credibility inter-val)

cadeia<-data.frame(extract(fit2, pars = "beta")[[1]])
names(cadeia) = c("X0",names(cadeia)[-ncol(cadeia)])
cadeia["sigma"]<-data.frame(extract(fit2, pars = "sigma")[[1]])


###########################
## HPD
##########################
hpd <- apply(cadeia, 2, function(x){HPDinterval(mcmc(x),0.90)})


###########################
## Quantil
##########################
CI <- data.frame(Mean=apply(cadeia, 2, function(x){mean(x)}))
CI[c("5%","50%","95%")] <- t(apply(cadeia, 2, function(x){quantile(x,c(0.05,0.5,0.95))}))

CI

            Mean         5%        50%        95%
X0    122.582543 116.604757 122.439266 128.794805
X1      9.494461   5.358483   9.422558  13.555108
X2      5.202603   1.907584   5.273785   8.279321
X3      9.728295   6.182982   9.704540  13.411491
X4      7.512091   4.487555   7.481163  10.532986
X5    179.799562 169.144846 179.720493 190.317715
X6     20.210583  15.673914  20.179750  25.026658
sigma 172.291011 156.179155 172.179651 189.948725

Barplot with the estimate for some fixed-effects

coeffs = c(3,4,5)
R=data.frame(Coefficients = rownames(CI)[coeffs],Value=CI[coeffs,1])


ggplot(R, aes(x = Coefficients)) +
  geom_col(aes(y = Value,), fill = "gray") +
  theme_minimal()  +  xlab('Coefficients in Mixed Model') +
theme( plot.title=element_text(size=16),
         axis.text=element_text(size=18),
         axis.title=element_text(size=20))

Obs: all covariates seem to reduce the wave speed, considering that they contribute to its propagation in a longer time. Specifically, the covariate X3 (brick with voids) acts more strongly, reducing the wave speed more strongly.

Prediction in the wall without voids (Panel 1), and with voids (Panel 2)

cadeia=data.frame(extract(fit2, pars = "yest")[[1]])

dif =  numeric()
for (j in 1:m){
dif = cbind(dif,cadeia[,j] - Yvalid[j])}

dif=data.frame(dif)

EQM1=data.frame(MSE=sqrt(colMeans((dif)^2)))

EQM_P1 = EQM1[Xvalid[,2]==1,]
EQM_P1 = data.frame(Index = 1:length(EQM_P1),Panel=as.factor(rep(0,length(EQM_P1))),MSE=EQM_P1)
EQM_P2 = EQM1[Xvalid[,2]==2,]
EQM_P2 = data.frame(Index = 1:length(EQM_P2),Panel=as.factor(rep(1,length(EQM_P2))),MSE=EQM_P2)

EQM = rbind(EQM_P1,EQM_P2)

EQMtotal = mean(EQM[,3])

paste("EQM total:",round(EQMtotal,2))

[1] "EQM total: 108.29"

# Visualization
ggplot(EQM, aes(x = Index, y = MSE)) + 
  geom_line(aes(color = Panel, linetype = Panel)) + 
ylab("RMSE (Mixed model)")+
 xlab("Individual Index")+
  scale_color_manual(values = c("gray", "black"))+ theme_minimal()+
theme( plot.title=element_text(size=16),
         axis.text=element_text(size=18),
         axis.title=element_text(size=20))

Plot for predicted and real values

B = t(colMeans(extract(fit2, pars = "b")$b ))
Betas = matrix(CI[-8,1],ncol = 1)

ypred = numeric()
for(i in 1:m){
ypred= rbind(ypred, matrix(x[which(amostra==i),],nrow = 5)%*%Betas +  matrix(z[which(amostra==i),],nrow = 5)%*%matrix(B[i,],ncol=1)) 
}


plt_df = data.frame(ind=amostra, x = Data$D_m, ypred, Y=c(t(y)), Panel = Data$parede_n  )


p <- ggplot(data = plt_df, aes(x = x, y = Y, group = ind, colour = 'Observed data'))


p + geom_line(aes(group=ind ),size=1)+
  labs(x = "Time", y = " Response (s) ")+
  geom_point(aes(colour = 'Observed data'), size=1) +  
  geom_line(data = plt_df, aes(x = x, y = ypred, colour = 'Predicted value'), size=1, alpha=0.6) +
  theme_bw() + theme(legend.position="bottom") +
  #scale_color_manual(name = '', values = c('Observed data'='black')) +
  xlab('Distance') +
  ylab('Propagation time')

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Residual for Mixed Model

Res = c(t(y))-ypred


datares = data.frame(Index = 1:N,Res = Res)

# Visualization
ggplot(datares, aes(x = Index, y = Res)) + 
  geom_point(aes(), size=1) + 
  ylab("Residual (Mixed Model)")+
  xlab("Index")+
  scale_color_manual(values = c("darkblue", "red"))+ theme_minimal()+
  theme( plot.title=element_text(size=16),
         axis.text=element_text(size=18),
         axis.title=element_text(size=12))

Panel_val = Xvalid[,2]

Panel_val[Xvalid[,2]==0] = 0
Panel_val[Xvalid[,2]==1] = 1

yest = numeric()
for (i in 1:m){
  yest= rbind(yest, matrix(xv[which(Amostravalid==i),],nrow = 1)%*%Betas +  matrix(Zvalid[which(Amostravalid==i),],nrow = 1)%*%matrix(B[i,],ncol=1)) }

Forecasting<- data.frame(ind =1:m ,x=rep(35,m),ypred = yest,Y=Yvalid, Panel = Panel_val)

plt_df2 = rbind(plt_df,Forecasting) 

or = order(plt_df2$ind)
plt_df2 = plt_df2[or,]


p <- ggplot(data = plt_df2, aes(x = x, y = Y, group = ind,colour = 'Observed data'))


p + geom_line(aes(group=ind),size=1)+ ylim(0, 1600)+
  geom_point(aes(colour = 'Observed data'), size=1) +  
  geom_line(data = plt_df2, aes(x = x, y = ypred, colour = 'Predicted value'), size=1, alpha=0.6) +
  theme_bw() +
  #scale_color_manual(name = '', values = c('Observed data'='black')) +
  xlab('Distance') +
  ylab('Propagation time') +theme( plot.title=element_text(size=20),
                                   axis.text=element_text(size=10),
                                   axis.title=element_text(size=12),
                                   legend.position="bottom")

plt_df3=plt_df2
plt_df3$Panel[plt_df2$Panel==0]= "Panel0"
plt_df3$Panel[plt_df2$Panel==1]= "Panel1"

Plot for predicted and real values followed by the real and estimated value for distance 35 cm - Panel=1 and Panel =2

plt_df3 = plt_df2[plt_df2$Panel==1,]

p <- ggplot(data = plt_df3, aes(x = x, y = Y, group = ind,colour = 'Observed data'))


p + geom_line(aes(group=ind ),size=1)+
  labs(x = "Time", y = " Response (s) ")+
  geom_point(aes(colour = 'Observed data'), size=1) +  
  geom_line(data = plt_df3, aes(x = x, y = ypred, colour = 'Predicted value'), size=1, alpha=0.6) +
  theme_bw() + theme(legend.position="bottom") +
  #scale_color_manual(name = '', values = c('Realized data'='black')) +
  xlab('Distance') +
  ylab('Propagation time')

plt_df3 = plt_df2[plt_df2$Panel==2,]

p <- ggplot(data = plt_df3, aes(x = x, y = Y, group = ind,colour = 'Observed data'))


p + geom_line(aes(group=ind ),size=1)+
  labs(x = "Time", y = " Response (s) ")+
  geom_point(aes(colour = 'Observed data'), size=1) +  
  geom_line(data = plt_df3, aes(x = x, y = ypred, colour = 'Predicted value'), size=1, alpha=0.6) +
  theme_bw() + theme(legend.position="bottom") +
  #scale_color_manual(name = '', values = c('Realized data'='black')) +
  xlab('Distance') +
  ylab('Propagation time')