Outline

We’d like to include multiple variables in Logistic Regression and reflect it on GGplot.

  1. builde the Logistic Regression Model
  2. Construct New data
  3. Ggplot with two layers (probability from Newdata, Geom_points from original data)

1 Model

mod1 <- glm(aval ~ dlevel+d2, data=df2, family=binomial(link="logit"))
summary(mod1)
## 
## Call:
## glm(formula = aval ~ dlevel + d2, family = binomial(link = "logit"), 
##     data = df2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9356  -1.3161   0.7763   0.9124   1.0685  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  4.56336    4.41640   1.033    0.301
## dlevel      -0.42683    0.61162  -0.698    0.485
## d2           0.01059    0.02055   0.515    0.606
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 87.148  on 69  degrees of freedom
## Residual deviance: 84.682  on 67  degrees of freedom
## AIC: 90.682
## 
## Number of Fisher Scoring iterations: 4

2 Plot

Modification inv_logit <- function(y) 1 / (1 + exp(-y))

#define new data frame that contains predictor variable dlevel <- seq(min(df2\(dlevel),max(df2\)dlevel),length=100) newdata <- data.frame(dlevel,d2=dlevel^2) #use fitted model to predict values preddata <- predict(mod1, newdata, se.fit=TRUE) preddata\(LoCI <- preddata\)fit - 1.96*preddata\(se.fit preddata\)HiCI <- preddata\(fit + 1.96*preddata\)se.fit

newdata\(prob.Prediction <- inv_logit(preddata\)fit) newdata\(prob.LoCI <- inv_logit(preddata\)LoCI) newdata\(prob.HiCI <- inv_logit(preddata\)HiCI)

inv_logit <- function(y) 1 / (1 + exp(-y))

#define new data frame that contains predictor variable
dlevel <- seq(min(df2$dlevel),max(df2$dlevel),length=100)
newdata <- data.frame(dlevel,d2=dlevel^2)
#use fitted model to predict values
preddata <- predict(mod1, newdata, se.fit=TRUE)
preddata$LoCI <- preddata$fit - 1.96*preddata$se.fit
preddata$HiCI <- preddata$fit + 1.96*preddata$se.fit

newdata$prob.Prediction <- inv_logit(preddata$fit)
newdata$prob.LoCI <- inv_logit(preddata$LoCI)
newdata$prob.HiCI <- inv_logit(preddata$HiCI)

Plot without modification

ggplot(df2,aes(x = dlevel+d2,
           y = aval)) +
  geom_smooth() +
  xlab("DLEVEL") + ylab("response") + labs(title="p_title")+
  geom_point(data=df2, aes(dlevel, aval), color="blue") + theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Plot with newdata

ggplot(newdata,aes(x = dlevel,
           y = prob.Prediction)) +
  geom_smooth(aes(ymin = prob.LoCI, ymax = prob.HiCI),
              stat = "identity", color = "#6d8bc3", fill = "#bbd1e7") +
  xlab("DLEVEL") + ylab("response") + labs(title="p_title")+
  geom_point(data=df2, aes(dlevel, aval), color="blue") + theme_minimal()

Happy Coding!

Reference

Y.Liu