getwd()
## [1] "C:/Users/USUARIO/OneDrive - University of Texas at El Paso/Documents/Exam 2"
setwd("C:\\Users\\USUARIO\\OneDrive - University of Texas at El Paso\\Documents\\Exam 2")
mydat<-read.csv("trauma.csv")
#mydat


str(mydat)
## 'data.frame':    300 obs. of  5 variables:
##  $ x1: int  1 9 29 10 2 9 25 2 11 16 ...
##  $ x2: num  7.84 7.84 2.93 7.84 7.84 ...
##  $ x3: int  25 63 32 61 43 27 30 25 42 80 ...
##  $ x4: int  1 0 0 0 1 0 1 0 1 1 ...
##  $ y : int  0 0 0 0 0 0 1 0 0 0 ...

Transformation

Fit the above model after transforming the covariates appropriately (standardizing quantitative covariates and subtracting 0.5 from a binary covariate)

#Inspection

table(mydat$y) #categorical
## 
##   0   1 
## 278  22
table(mydat$x1) #numeric
## 
##  1  2  3  4  5  6  8  9 10 11 12 13 14 16 17 18 19 20 22 25 26 27 29 30 33 34 
## 18 11  2 26 10  2 10 52 20  6  4 24  8 15  5  8  2 11  5 18  2  4 12  1  1 11 
## 35 38 43 45 50 54 75 
##  1  4  1  1  3  1  1
table(mydat$x2) #numeric
## 
##      0 1.4652 2.1978 2.9304 3.2212  3.512 3.8672 4.0936 4.2112 4.4488 4.6196 
##      1      2      1      4      1      1      3      2      2      1      1 
##  4.804 5.0304 5.2346 5.3856 5.6764 5.7408 5.8806 5.9672 6.0848 6.3756 6.6132 
##      2      2      1      2      1      1      1      8      1      2      2 
## 6.6776 6.8174  6.904 7.1082   7.55 7.8408 
##      2      3     12      9     25    207
table(mydat$x3) #numeric
## 
##  1  2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 
##  3  4  5  1  1  2  1  2  2  3  1  2  2  6 10  5  8 10  8 11  9 13 10  9 12  4 
## 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 55 
##  9  7 10  5  9  4  5  7  5  5  7  8  3  4  7  3  4  1  4  3  3  3  4  1  3  2 
## 56 59 61 62 63 64 65 66 68 69 70 71 73 74 80 82 84 90 94 
##  4  1  3  1  2  1  3  2  1  1  1  1  1  3  1  1  1  1  1
table(mydat$x4) #categorical
## 
##   0   1 
## 225  75
#Transformation

data2=mydat

#Transforming categorical covariates
vector<-(rep(c(0.5),300))
data2$x4<-data2$x4-vector

#Standarizing quantitative covariates
data2$x1 <-(data2$x1-mean(data2$x1))/sd(data2$x1)
data2$x2 <-(data2$x2-mean(data2$x2))/sd(data2$x2)
data2$x3 <-(data2$x3-mean(data2$x3))/sd(data2$x3)

hist(mydat$x1)

hist(data2$x1 )

hist(mydat$x2)

hist(data2$x2 )

hist(mydat$x3)

hist(data2$x4 )

mod0=glm(factor(y)~x1+x2+x3+x4+x3*x4,data=data2,  family = binomial)
summary(mod0)
## 
## Call:
## glm(formula = factor(y) ~ x1 + x2 + x3 + x4 + x3 * x4, family = binomial, 
##     data = data2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5388  -0.2921  -0.1912  -0.1135   2.9019  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.30747    0.38868  -8.509  < 2e-16 ***
## x1           0.90796    0.30214   3.005  0.00265 ** 
## x2          -0.69337    0.21431  -3.235  0.00121 ** 
## x3           0.86934    0.29009   2.997  0.00273 ** 
## x4           1.18054    0.71156   1.659  0.09710 .  
## x3:x4       -0.08554    0.54082  -0.158  0.87433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 157.31  on 299  degrees of freedom
## Residual deviance: 100.03  on 294  degrees of freedom
## AIC: 112.03
## 
## Number of Fisher Scoring iterations: 7

Make an R plot with two panels, (a) and (b) (using par(mfrow=c(1,2))), of the predicted probbilities

confint(mod0)
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) -4.1718833 -2.6273739
## x1           0.3192227  1.5220521
## x2          -1.1353558 -0.2906976
## x3           0.2487803  1.4425669
## x4          -0.2651208  2.5933910
## x3:x4       -1.2716927  0.9533441
#(a)
newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=0)
newdata
##     x1   x2 x3 x4
## 1 3.34 3.34 60  0
newdata1<-predict(mod0, newdata,type = "response")
newdata1
## 1 
## 1
#OR
p.a.0=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=0), se.fit=TRUE,interval="prediction", level=0.95) 
p.a.1=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=60,x4=1), se.fit=TRUE,interval="prediction", level=0.95) 
cbind(p.a.0,p.a.1)
##                p.a.0    p.a.1   
## fit            49.56974 45.61793
## se.fit         17.42206 43.84999
## residual.scale 1        1
#When x3=60, the expected value is 49.6 for x4=0
#When x3=60, the expected value is 45.61 for x4=1

#(b)
newdat=data.frame(x1=3.34,x2=3.34, x3=10,x4=0)
newdat
##     x1   x2 x3 x4
## 1 3.34 3.34 10  0
newdat1<-predict(mod0, newdat,type = "response")
newdat1
##         1 
## 0.9977681
#OR
p.b.0=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=10,x4=0), se.fit=TRUE,interval="prediction", level=0.95) 
p.b.1=predict(mod0, newdata=data.frame(x1=3.34,x2=3.34, x3=10,x4=1), se.fit=TRUE,interval="prediction", level=0.95) 

cbind(p.b.0,p.b.1)
##                p.b.0    p.b.1   
## fit            6.102683 6.42783 
## se.fit         3.187436 7.449392
## residual.scale 1        1
#When x3=10, the expected value is 6.10 for x4=0
#When x3=10, the expected value is 6.42 for x4=1
#plot1
par(mfrow=c(1,2))
plot(jitter(y,0.1) ~ x1+x2+x3+x4+x3*x4, xlim=c(-100,100), pch=16, ylab=" ", data=data2)

data.plot <- data.frame(x1=3.34,x2=3.34, x3=60,x4=1)
lp <- predict(mod0, newdata=data.plot, se.fit=TRUE)

pred.prob <- exp(lp$fit)/(1 + exp(lp$fit))
LB <- lp$fit - qnorm(0.975)*lp$se.fit
UB <- lp$fit + qnorm(0.975)*lp$se.fit
LB.p <- exp(LB)/(1 + exp(LB)); UB.p <- exp(UB)/(1 + exp(UB))
#lines(-100:100,pred.prob)
#lines(0.60, LB.p, col="red"); lines(0.60, UB.p, col="blue")

#plot2

plot(jitter(y,0.4) ~ x1+x2+x3+x4+x3*x4, xlim=c(0,60), pch=16, ylab=" ", data=data2)

data.plot <- data.frame(x1=3.34,x2=3.34, x3=10,x4=1)
lp <- predict(mod0, newdata=data.plot, se.fit=TRUE)

pred.prob <- exp(lp$fit)/(1 + exp(lp$fit))
LB <- lp$fit - qnorm(0.975)*lp$se.fit
UB <- lp$fit + qnorm(0.975)*lp$se.fit
LB.p <- exp(LB)/(1 + exp(LB)); UB.p <- exp(UB)/(1 + exp(UB))
#lines(0:300,pred.prob)
#lines(0.60, LB.p, col="red"); lines(0.60, UB.p, col="blue")

4.3

library(MCMCpack)
## Warning: package 'MCMCpack' was built under R version 4.1.3
## Loading required package: coda
## Loading required package: MASS
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2022 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
b.fit<-MCMClogit(factor(y)~x1+x2+x3+x4+x3*x4,mcmc=10000,b0=0,B0=.01,data=mydat)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
## response will be ignored
summary(b.fit)
## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean      SD  Naive SE Time-series SE
## (Intercept) -2.81628 1.61141 0.0161141      0.0695247
## x1           0.08585 0.02708 0.0002708      0.0011620
## x2          -0.59286 0.18064 0.0018064      0.0080025
## x3           0.05777 0.01666 0.0001666      0.0007218
## x4           1.59209 1.38875 0.0138875      0.0622860
## x3:x4       -0.01177 0.03335 0.0003335      0.0015448
## 
## 2. Quantiles for each variable:
## 
##                 2.5%      25%      50%       75%    97.5%
## (Intercept) -6.13394 -3.83361 -2.74213 -1.726095  0.28704
## x1           0.03215  0.06623  0.08576  0.103893  0.13995
## x2          -0.97084 -0.71120 -0.58062 -0.469704 -0.24680
## x3           0.02702  0.04619  0.05689  0.068558  0.09166
## x4          -1.23334  0.67421  1.62540  2.516548  4.33532
## x3:x4       -0.07880 -0.03324 -0.01209  0.009471  0.05496

4.4

if(!require(bayesplot)){install.packages("bayesplot")}
## Loading required package: bayesplot
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
posterior <- as.array(b.fit)
color_scheme_set("red")
mcmc_intervals(posterior, pars = c("x1", "x3"))

mcmc_areas(
  posterior, 
  pars = c("x1", "x3"),
  prob = 0.8, # 80% intervals
  prob_outer = 0.99, # 99%
  point_est = "mean"
)

```