getwd()
## [1] "C:/Users/USUARIO/OneDrive - University of Texas at El Paso/Documents/Exam 2"
setwd("C:\\Users\\USUARIO\\OneDrive - University of Texas at El Paso\\Documents\\Exam 2")
mydat<-read.csv("trauma.csv")
#mydat
str(mydat)
## 'data.frame': 300 obs. of 5 variables:
## $ x1: int 1 9 29 10 2 9 25 2 11 16 ...
## $ x2: num 7.84 7.84 2.93 7.84 7.84 ...
## $ x3: int 25 63 32 61 43 27 30 25 42 80 ...
## $ x4: int 1 0 0 0 1 0 1 0 1 1 ...
## $ y : int 0 0 0 0 0 0 1 0 0 0 ...
Transformation
Fit the above model after transforming the covariates appropriately (standardizing quantitative covariates and subtracting 0.5 from a binary covariate)
#Inspection
table(mydat$y) #categorical
##
## 0 1
## 278 22
table(mydat$x1) #numeric
##
## 1 2 3 4 5 6 8 9 10 11 12 13 14 16 17 18 19 20 22 25 26 27 29 30 33 34
## 18 11 2 26 10 2 10 52 20 6 4 24 8 15 5 8 2 11 5 18 2 4 12 1 1 11
## 35 38 43 45 50 54 75
## 1 4 1 1 3 1 1
table(mydat$x2) #numeric
##
## 0 1.4652 2.1978 2.9304 3.2212 3.512 3.8672 4.0936 4.2112 4.4488 4.6196
## 1 2 1 4 1 1 3 2 2 1 1
## 4.804 5.0304 5.2346 5.3856 5.6764 5.7408 5.8806 5.9672 6.0848 6.3756 6.6132
## 2 2 1 2 1 1 1 8 1 2 2
## 6.6776 6.8174 6.904 7.1082 7.55 7.8408
## 2 3 12 9 25 207
table(mydat$x3) #numeric
##
## 1 2 3 4 5 6 7 8 9 10 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
## 3 4 5 1 1 2 1 2 2 3 1 2 2 6 10 5 8 10 8 11 9 13 10 9 12 4
## 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 55
## 9 7 10 5 9 4 5 7 5 5 7 8 3 4 7 3 4 1 4 3 3 3 4 1 3 2
## 56 59 61 62 63 64 65 66 68 69 70 71 73 74 80 82 84 90 94
## 4 1 3 1 2 1 3 2 1 1 1 1 1 3 1 1 1 1 1
table(mydat$x4) #categorical
##
## 0 1
## 225 75
#Transformation
data2=mydat
#Transforming categorical covariates
vector<-(rep(c(0.5),300))
data2$x4<-data2$x4-vector
#Standarizing quantitative covariates
data2$x1 <-(data2$x1-mean(data2$x1))/sd(data2$x1)
data2$x2 <-(data2$x2-mean(data2$x2))/sd(data2$x2)
data2$x3 <-(data2$x3-mean(data2$x3))/sd(data2$x3)
hist(mydat$x1)
hist(data2$x1 )
hist(mydat$x2)
hist(data2$x2 )
hist(mydat$x3)
hist(data2$x4 )
mod0=glm(factor(y)~x1+x2+x3+x4+x3*x4,data=data2, family = binomial)
summary(mod0)
##
## Call:
## glm(formula = factor(y) ~ x1 + x2 + x3 + x4 + x3 * x4, family = binomial,
## data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5388 -0.2921 -0.1912 -0.1135 2.9019
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.30747 0.38868 -8.509 < 2e-16 ***
## x1 0.90796 0.30214 3.005 0.00265 **
## x2 -0.69337 0.21431 -3.235 0.00121 **
## x3 0.86934 0.29009 2.997 0.00273 **
## x4 1.18054 0.71156 1.659 0.09710 .
## x3:x4 -0.08554 0.54082 -0.158 0.87433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 157.31 on 299 degrees of freedom
## Residual deviance: 100.03 on 294 degrees of freedom
## AIC: 112.03
##
## Number of Fisher Scoring iterations: 7
Make an R plot with two panels, (a) and (b) (using par(mfrow=c(1,2))), of the predicted probbilities
confint(mod0)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -4.1718833 -2.6273739
## x1 0.3192227 1.5220521
## x2 -1.1353558 -0.2906976
## x3 0.2487803 1.4425669
## x4 -0.2651208 2.5933910
## x3:x4 -1.2716927 0.9533441
#(a)
newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=0)
newdata
## x1 x2 x3 x4
## 1 3.34 3.34 60 0
newdata1<-predict(mod0, newdata,type = "response")
newdata1
## 1
## 1
#OR
p.a.0=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=0), se.fit=TRUE,interval="prediction", level=0.95)
p.a.1=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=1), se.fit=TRUE,interval="prediction", level=0.95)
cbind(p.a.0,p.a.1)
## p.a.0 p.a.1
## fit 49.56974 45.61793
## se.fit 17.42206 43.84999
## residual.scale 1 1
#When x3=60, the expected value is 49.6 for x4=0
#When x3=60, the expected value is 45.61 for x4=1
#(b)
newdat=data.frame(x1=3.34,x2=3.34, x3=10,x4=0)
newdat
## x1 x2 x3 x4
## 1 3.34 3.34 10 0
newdat1<-predict(mod0, newdat,type = "response")
newdat1
## 1
## 0.9977681
#OR
p.b.0=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=10,x4=0), se.fit=TRUE,interval="prediction", level=0.95)
p.b.1=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=10,x4=1), se.fit=TRUE,interval="prediction", level=0.95)
cbind(p.b.0,p.b.1)
## p.b.0 p.b.1
## fit 6.102683 6.42783
## se.fit 3.187436 7.449392
## residual.scale 1 1
#When x3=10, the expected value is 6.10 for x4=0
#When x3=10, the expected value is 6.42 for x4=1
#plot1
par(mfrow=c(1,2))
plot(jitter(y,0.1) ~ x1+x2+x3+x4+x3*x4, xlim=c(-100,100), pch=16, ylab=" ", data=data2)
data.plot <- data.frame(x1=3.34,x2=3.34, x3=60,x4=1)
lp <- predict(mod0, newdata=data.plot, se.fit=TRUE)
pred.prob <- exp(lp$fit)/(1 + exp(lp$fit))
LB <- lp$fit - qnorm(0.975)*lp$se.fit
UB <- lp$fit + qnorm(0.975)*lp$se.fit
LB.p <- exp(LB)/(1 + exp(LB)); UB.p <- exp(UB)/(1 + exp(UB))
#lines(-100:100,pred.prob)
#lines(0.60, LB.p, col="red"); lines(0.60, UB.p, col="blue")
#plot2
plot(jitter(y,0.4) ~ x1+x2+x3+x4+x3*x4, xlim=c(0,60), pch=16, ylab=" ", data=data2)
data.plot <- data.frame(x1=3.34,x2=3.34, x3=10,x4=1)
lp <- predict(mod0, newdata=data.plot, se.fit=TRUE)
pred.prob <- exp(lp$fit)/(1 + exp(lp$fit))
LB <- lp$fit - qnorm(0.975)*lp$se.fit
UB <- lp$fit + qnorm(0.975)*lp$se.fit
LB.p <- exp(LB)/(1 + exp(LB)); UB.p <- exp(UB)/(1 + exp(UB))
#lines(0:300,pred.prob)
#lines(0.60, LB.p, col="red"); lines(0.60, UB.p, col="blue")
4.3
library(MCMCpack)
## Warning: package 'MCMCpack' was built under R version 4.1.3
## Loading required package: coda
## Loading required package: MASS
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2022 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
b.fit<-MCMClogit(factor(y)~x1+x2+x3+x4+x3*x4,mcmc=10000,b0=0,B0=.01,data=mydat)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
## response will be ignored
summary(b.fit)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) -2.81628 1.61141 0.0161141 0.0695247
## x1 0.08585 0.02708 0.0002708 0.0011620
## x2 -0.59286 0.18064 0.0018064 0.0080025
## x3 0.05777 0.01666 0.0001666 0.0007218
## x4 1.59209 1.38875 0.0138875 0.0622860
## x3:x4 -0.01177 0.03335 0.0003335 0.0015448
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) -6.13394 -3.83361 -2.74213 -1.726095 0.28704
## x1 0.03215 0.06623 0.08576 0.103893 0.13995
## x2 -0.97084 -0.71120 -0.58062 -0.469704 -0.24680
## x3 0.02702 0.04619 0.05689 0.068558 0.09166
## x4 -1.23334 0.67421 1.62540 2.516548 4.33532
## x3:x4 -0.07880 -0.03324 -0.01209 0.009471 0.05496
4.4
if(!require(bayesplot)){install.packages("bayesplot")}
## Loading required package: bayesplot
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
posterior <- as.array(b.fit)
color_scheme_set("red")
mcmc_intervals(posterior, pars = c("x1", "x3"))
mcmc_areas(
posterior,
pars = c("x1", "x3"),
prob = 0.8, # 80% intervals
prob_outer = 0.99, # 99%
point_est = "mean"
)
```