packages

library(googlesheets4)
library(googledrive)
library(ggplot2)
library(DescTools)
library(lme4)
library(AICcmodavg)

Admission to UCA-Berkley - DATA

ss= "https://docs.google.com/spreadsheets/d/19vTKfCXP8CgodnIbG5wgHCj15uLypwNhdzUox-Zl7fQ/edit?usp=sharing"
hoja= "admit"
rango = "D4:G404"
#
gs4_deauth()
admit <- read_sheet(ss,
                   sheet=hoja,
                   range=rango,
                   col_names=TRUE
                   )
## Reading from "Factores de riesgo"
## Range "'admit'!D4:G404"
admit$admit.F <- as.factor(admit$admit)
admit$gre <- as.numeric(admit$gre)
admit$gpa <- as.numeric(admit$gpa)
admit$rank <- as.factor(admit$rank)
admit$admit <- as.numeric(admit$admit)

ggPLOTS

ggplot(aes(x= gre, y= admit , 
           col= gre ), 
       data= admit ) +
  geom_point(size= 0.7, 
               position= position_jitter(width= 1.4 , height =0.1 )) + 
  geom_smooth(method="glm", method.args=list("binomial"), se=TRUE)
## `geom_smooth()` using formula 'y ~ x'

#
ggplot(aes(x= gpa, y= admit , 
           col= gpa), 
       data= admit ) +
  geom_point(size= 0.7, 
               position= position_jitter(width= 0.2 , height =0.1 )) + 
  geom_smooth(method="glm", method.args=list("binomial"), se=TRUE)
## `geom_smooth()` using formula 'y ~ x'

#
ggplot(aes(x= as.factor(rank), y= admit , 
           fill= as.factor(rank) ), 
       data= admit ) +
  geom_col() +
  geom_point(size= 0.7, 
               position= position_jitter(width= 0.4 , height =0.1 ))

Models

m1 <- glm(admit ~ gre + gpa + rank, 
          data=admit, family= binomial )
m2 <- glm(admit ~ gre + gpa       , 
          data=admit, family= binomial)
m3 <- glm(admit ~ gre +       rank, 
          data=admit, family= binomial)
m4 <- glm(admit ~       gpa + rank, 
          data=admit, family= binomial)
m5 <- glm(admit ~ gre, 
          data=admit, family= binomial)
m6 <- glm(admit ~ gpa, 
          data=admit, family= binomial)
m7 <- glm(admit ~ rank, 
          data=admit, family= binomial)

AIC, AICc, Wi

n.= length(admit$admit)
## extracting AICc
extractAIC(m1)[2]
## [1] 470.5175
k=AICc(m1, return.K = TRUE)
AICc.m1 <- extractAIC(m1)[2] + (((2*k)*(k+1))/(n.-k-1))
k1=AICc(m1, return.K = TRUE)

extractAIC(m2)[2]
## [1] 486.344
k=AICc(m2, return.K = TRUE)
AICc.m2 <- extractAIC(m2)[2] + (((2*k)*(k+1))/(n.-k-1))
k2=AICc(m2, return.K = TRUE)

extractAIC(m3)[2]
## [1] 474.5318
k=AICc(m3, return.K = TRUE)
AICc.m3 <- extractAIC(m3)[2] + (((2*k)*(k+1))/(n.-k-1))
k3=AICc(m3, return.K = TRUE)

extractAIC(m4)[2]
## [1] 472.8753
k=AICc(m4, return.K = TRUE)
AICc.m4 <- extractAIC(m4)[2] + (((2*k)*(k+1))/(n.-k-1))
k4=AICc(m4, return.K = TRUE)

extractAIC(m5)[2]
## [1] 490.0561
k=AICc(m5, return.K = TRUE)
AICc.m5 <- extractAIC(m5)[2] + (((2*k)*(k+1))/(n.-k-1))
k5=AICc(m5, return.K = TRUE)

extractAIC(m6)[2]
## [1] 490.9676
k=AICc(m6, return.K = TRUE)
AICc.m6 <- extractAIC(m6)[2] + (((2*k)*(k+1))/(n.-k-1))
k6=AICc(m6, return.K = TRUE)

extractAIC(m7)[2]
## [1] 482.9667
k=AICc(m7, return.K = TRUE)
AICc.m7 <- extractAIC(m7)[2] + (((2*k)*(k+1))/(n.-k-1))
k7=AICc(m7, return.K = TRUE)

########## AIC table #####
aics<- data.frame(paste("m",1:7,sep=""),
                  c(AICc.m1,AICc.m2,AICc.m3, AICc.m4, AICc.m5, 
                    AICc.m6, AICc.m7),
                  c(k1,k2,k3,k4,k5,k6,k7),
                  row.names=NULL) 
colnames(aics) <- c("model","AICc", "k") 
# sort according to AIC
aics<-aics[order(aics$AICc),] 

wi (weights)

for(i in 1:dim(aics)[1]){
  aics$deltaAIC[i]<-aics$AICc[1]-aics$AICc[i]}
wi<-exp(aics$deltaAIC/2)
wi <- wi/sum(wi)
aics$wi <- round(wi/sum(wi), 3)
aics
##   model     AICc k   deltaAIC    wi
## 1    m1 470.7312 6   0.000000 0.686
## 4    m4 473.0275 5  -2.296303 0.218
## 3    m3 474.6841 5  -3.952839 0.095
## 7    m7 483.0680 4 -12.336751 0.001
## 2    m2 486.4046 3 -15.673355 0.000
## 5    m5 490.0864 2 -19.355132 0.000
## 6    m6 490.9978 2 -20.266616 0.000

Delta AICs. Cuánto “pierde” el modelo sacando cada factor?

Deviance Gain Method [ loss of GOF]

###
####

m1$formula; AICc(m1)          #########   GLobal 
## admit ~ gre + gpa + rank
## [1] 470.7312
##########################  Contribution  ####
m2$formula; AICc(m2); AICc(m1) - AICc(m2)         ### 'Rank' contribution
## admit ~ gre + gpa
## [1] 486.4046
## [1] -15.67335
m3$formula; AICc(m3); AICc(m1) - AICc(m3)         ### 'GPA' contribution
## admit ~ gre + rank
## [1] 474.6841
## [1] -3.952839
m4$formula; AICc(m4); AICc(m1) - AICc(m4)         ### 'GRE' contribution
## admit ~ gpa + rank
## [1] 473.0275
## [1] -2.296303
#
DeltaAics <- as.data.frame(cbind("Model"= c("m1", "m2", "m3", "m4"),
      "Delta.AIC"= c(0,AICc(m1)-AICc(m2),AICc(m1)-AICc(m3), AICc(m1)-AICc(m4))))

DeltaAics$Delta.AIC <- as.numeric(DeltaAics$Delta.AIC)
DeltaAics$Delta.AIC <- round(DeltaAics$Delta.AIC,2)
DeltaAics
##   Model Delta.AIC
## 1    m1      0.00
## 2    m2    -15.67
## 3    m3     -3.95
## 4    m4     -2.30