packages
Cargar paquetes

library(tidyverse)
library(googlesheets4); gs4_deauth()
library(MASS)
library(binom)
library(lme4)
library(scales)
library(AICcmodavg)

Import data

Data

Tabla en hoja LICO2 del Google Sheets [en G-Drive]

La Hoja LICO3 muestra las 2158 unidades muestrales (para efectos de revisar si hay “overdispersion”)

summary(mask)
##    ageclass       Age       mask.on        none.improp         Gender 
##  a.age3:4   Adult   :4   Min.   :  2.00   Min.   :  3.00   females:8  
##  age1  :4   Elder   :4   1st Qu.: 12.00   1st Qu.:  6.00   males  :8  
##  age2  :4   Juvenile:4   Median : 21.50   Median : 17.00              
##  age4  :4   Kid     :4   Mean   : 79.56   Mean   : 55.31              
##                          3rd Qu.: 70.25   3rd Qu.: 62.75              
##                          Max.   :460.00   Max.   :352.00              
##     Decree 
##  a.Pre :8  
##  b.Post:8  
##            
##            
##            
## 

Agregando columnas computadas

mask$pr_maskOn <- mask$mask.on / (mask$none.improp+ mask$mask.on) ## maskUse Proportion
mask$totals <- mask$none.improp+ mask$mask.on ## maskUse totals 

Pooled tables

## pooled by Gender
by_Gender <- mask %>% group_by(Gender)
mask_by_Gender<- as.data.frame(by_Gender %>% summarise( mask.on = sum(mask.on), none.improp = sum(none.improp)))
## Pooled by Ageclass
by_ageclass <- mask %>% group_by(ageclass)
mask_by_ageclass <- 
  as.data.frame(by_ageclass %>% 
                  summarise( mask.on = sum(mask.on),
                             none.improp = sum(none.improp)))
## Pooled by Decree Time
by_Decree <- mask %>% group_by(Decree)
mask_by_Decree <- as.data.frame(by_Decree %>% summarise( mask.on = sum(mask.on), none.improp = sum(none.improp)))
### Pooled by Age Gender and Decree
## Age.Decree
by_age.Decree <- mask %>% group_by(ageclass,Decree)
mask_by_age.Decree <- as.data.frame(by_age.Decree %>% 
                                      summarise( mask.on = sum(mask.on), none.improp = sum(none.improp)))
## Age.Gender
by_age.Gender <- mask %>% group_by(ageclass,Gender)
mask_by_age.Gender <- as.data.frame(by_age.Gender %>% summarise( mask.on = sum(mask.on), none.improp = sum(none.improp)))

Exploratory pooled plots

Se utiliza el Wilson Score (Agresti & Coull, 1998) para calcular los intervalos de confianza de las proporciones.

size=3
## age plot ####
mask_by_ageclass$pr_maskOn <- 
  mask_by_ageclass$mask.on/(mask_by_ageclass$mask.on + 
                              mask_by_ageclass$none.improp)
mask_by_ageclass$totals <- 
  mask_by_ageclass$none.improp + 
  mask_by_ageclass$mask.on           ## maskUse totals 
CIs <- binom.confint(x=mask_by_ageclass$mask.on, 
                     n=mask_by_ageclass$totals, 
                     methods="wilson")
mask_by_ageclass$lower <- CIs[,5]
mask_by_ageclass$upper<- CIs[,6]
mask_by_ageclass
##   ageclass mask.on none.improp pr_maskOn totals     lower     upper
## 1   a.age3    1043         662 0.6117302   1705 0.5883707 0.6345874
## 2     age1      39          40 0.4936709     79 0.3863027 0.6016260
## 3     age2      84          86 0.4941176    170 0.4199266 0.5685687
## 4     age4     107          97 0.5245098    204 0.4561616 0.5919520
p <- ggplot(aes(x=ageclass , y=pr_maskOn, ymin = lower, ymax = upper), 
            data=mask_by_ageclass)
p + geom_point(position= position_dodge(width=0.2), size= size) +
  geom_linerange(position = position_dodge(width=0.2)) + 
  ylim(0.2,0.8) +
  scale_x_discrete(breaks=c("a.age3","age1","age2", "age4"),
        labels=c("Adults", "Children", "Juveniles", "Elders"),
        name= "Age Class") +
  scale_y_continuous(n.breaks = 3, labels=c("20%","40%", "60%", "80%"),
                     breaks=c(0.2,0.4, 0.6,0.8), limits = c(0.2,0.8),
                     name= "Proportion of individuals using mask (%)")

## age.Gender plot ####
mask_by_age.Gender$pr_maskOn <- 
  mask_by_age.Gender$mask.on/(mask_by_age.Gender$mask.on + 
                             mask_by_age.Gender$none.improp)
mask_by_age.Gender$totals <- 
  mask_by_age.Gender$none.improp + 
  mask_by_age.Gender$mask.on           ## maskUse totals 
CIs <- binom.confint(x=mask_by_age.Gender$mask.on, 
                     n=mask_by_age.Gender$totals, 
                     methods="wilson")
mask_by_age.Gender$lower <- CIs[,5]
mask_by_age.Gender$upper<- CIs[,6]
mask_by_age.Gender
##   ageclass  Gender mask.on none.improp pr_maskOn totals     lower     upper
## 1   a.age3 females     353         185 0.6561338    538 0.6150168 0.6950370
## 2   a.age3   males     690         477 0.5912596   1167 0.5628000 0.6191205
## 3     age1 females      14          15 0.4827586     29 0.3138609 0.6556898
## 4     age1   males      25          25 0.5000000     50 0.3664451 0.6335549
## 5     age2 females      15          20 0.4285714     35 0.2798457 0.5914259
## 6     age2   males      69          66 0.5111111    135 0.4276552 0.5939522
## 7     age4 females      34           9 0.7906977     43 0.6479436 0.8857717
## 8     age4   males      73          88 0.4534161    161 0.3784978 0.5305057
p <- ggplot(aes(x=ageclass , y=pr_maskOn, col= Gender,
                ymin = lower, ymax = upper), 
            data=mask_by_age.Gender)
p + geom_hline(yintercept = mask_by_age.Gender$pr_maskOn[1],
               linetype="dashed") +
  geom_point(position= position_dodge(width=0.2), 
               size=size) +
  geom_linerange(position = position_dodge(width=0.2)) +
  ylim(0,1) +
  scale_x_discrete(breaks=c("a.age3","age1","age2", "age4"),
        labels=c("Adults", "Children", "Juveniles", "Elders"),
        name= "Age Class") +
  scale_y_continuous(n.breaks = 5, labels=c("0%","25%", "50%", "75%", "100%"),
                     breaks=c(0, 0.25, 0.50,0.75, 1), limits = c(0,1),
                     name= "Proportion of individuals using mask (%)")

## age.Decree plot
mask_by_age.Decree$pr_maskOn <- mask_by_age.Decree$mask.on/
  (mask_by_age.Decree$mask.on + mask_by_age.Decree$none.improp)

mask_by_age.Decree$totals <- 
  mask_by_age.Decree$none.improp + 
  mask_by_age.Decree$mask.on           ## maskUse totals 
CIs <- binom.confint(x=mask_by_age.Decree$mask.on, 
                     n=mask_by_age.Decree$totals, 
                     methods="wilson")
mask_by_age.Decree$lower <- CIs[,5]
mask_by_age.Decree$upper<- CIs[,6]
mask_by_age.Decree
##   ageclass Decree mask.on none.improp pr_maskOn totals     lower     upper
## 1   a.age3  a.Pre     373         495 0.4297235    868 0.3971718 0.4628945
## 2   a.age3 b.Post     670         167 0.8004779    837 0.7720580 0.8261523
## 3     age1  a.Pre      10          26 0.2777778     36 0.1584834 0.4399249
## 4     age1 b.Post      29          14 0.6744186     43 0.5251621 0.7950670
## 5     age2  a.Pre      36          77 0.3185841    113 0.2398556 0.4092416
## 6     age2 b.Post      48           9 0.8421053     57 0.7263682 0.9146420
## 7     age4  a.Pre      57          76 0.4285714    133 0.3476376 0.5135156
## 8     age4 b.Post      50          21 0.7042254     71 0.5898146 0.7976711
p <- ggplot(aes(x=ageclass , y=pr_maskOn, col= Decree, ymin = lower, ymax = upper), 
            data=mask_by_age.Decree)
p + geom_hline(yintercept = mask_by_age.Decree$pr_maskOn[1],
               linetype= "dashed", col="dark gray") +
  geom_point(position= position_dodge(width=0.2), size=size, aes(size=3) ) +
  geom_linerange(position = position_dodge(width=0.2)) +
  ylim(0,1) +
  scale_x_discrete(breaks=c("a.age3","age1","age2", "age4"),
        labels=c("Adults", "Children", "Juveniles", "Elders"),
        name= "Age Class") +
  scale_y_continuous(n.breaks = 5, labels=c("0%","25%", "50%", "75%", "100%"),
                     breaks=c(0, 0.25, 0.50,0.75, 1), limits = c(0,1),
                     name= "Proportion of individuals using mask (%)")

## age.Decree.Gender Plot
CIs <- binom.confint(x=mask$mask.on, n=mask$totals, methods="wilson")
mask$lower <- CIs[,5]
mask$upper<- CIs[,6]
mask
## # A tibble: 16 x 10
##    ageclass Age   mask.on none.improp Gender Decree pr_maskOn totals lower upper
##    <fct>    <fct>   <dbl>       <dbl> <fct>  <fct>      <dbl>  <dbl> <dbl> <dbl>
##  1 a.age3   Adult     143         143 femal~ a.Pre      0.5      286 0.442 0.558
##  2 a.age3   Adult     230         352 males  a.Pre      0.395    582 0.356 0.435
##  3 a.age3   Adult     210          42 femal~ b.Post     0.833    252 0.782 0.874
##  4 a.age3   Adult     460         125 males  b.Post     0.786    585 0.751 0.818
##  5 age1     Kid         5           9 femal~ a.Pre      0.357     14 0.163 0.612
##  6 age1     Kid         5          17 males  a.Pre      0.227     22 0.101 0.434
##  7 age1     Kid         9           6 femal~ b.Post     0.6       15 0.357 0.802
##  8 age1     Kid        20           8 males  b.Post     0.714     28 0.529 0.847
##  9 age2     Juve~      13          17 femal~ a.Pre      0.433     30 0.274 0.608
## 10 age2     Juve~      23          60 males  a.Pre      0.277     83 0.192 0.382
## 11 age2     Juve~       2           3 femal~ b.Post     0.4        5 0.118 0.769
## 12 age2     Juve~      46           6 males  b.Post     0.885     52 0.770 0.946
## 13 age4     Elder      16           5 femal~ a.Pre      0.762     21 0.549 0.894
## 14 age4     Elder      41          71 males  a.Pre      0.366    112 0.283 0.458
## 15 age4     Elder      18           4 femal~ b.Post     0.818     22 0.615 0.927
## 16 age4     Elder      32          17 males  b.Post     0.653     49 0.513 0.771
width=0.4
p <- ggplot(aes(x=ageclass , y=pr_maskOn, 
                col=Decree, shape= Gender,
                ymin = lower, ymax = upper), 
            data=mask)
p + geom_hline(yintercept = mask$pr_maskOn[1],
               linetype= "dashed", col="dark gray") +
  geom_point(position= position_dodge(width=width), 
               size=size) +
  geom_linerange(position = position_dodge(width=width)) +
  ylim(0,1) +
  scale_x_discrete(breaks=c("a.age3","age1","age2", "age4"),
        labels=c("Adults", "Children", "Juveniles", "Elders"),
        name= "Age category") +
  scale_y_continuous(n.breaks = 5, labels=c("0%","25%", "50%", "75%", "100%"),
                     breaks=c(0, 0.25, 0.50,0.75, 1), limits = c(0,1),
                     name= "Proportion of individuals using face mask (%)") +
  scale_colour_grey(start=0, end=0.6) + 
  theme_bw(base_size = 12)

GLMs. Generalized logistic regression models (Agresti, 2007)

Additive model. Prediction for use of mask , by factor

library(lme4)
## response var (binary)
Y <- cbind(mask$mask.on,mask$none.improp)

## 
glm.glob <- glm(mask ~  Decree + ageclass + Gender, 
             family=binomial, data= mask2)
summary(glm.glob)
## 
## Call:
## glm(formula = mask ~ Decree + ageclass + Gender, family = binomial, 
##     data = mask2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9275  -1.0046   0.5825   0.7779   1.6719  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.008761   0.096503  -0.091   0.9277    
## Decreepost    1.696804   0.099457  17.061   <2e-16 ***
## ageclassage1 -0.692747   0.252131  -2.748   0.0060 ** 
## ageclassage2 -0.235467   0.173846  -1.354   0.1756    
## ageclassage4 -0.108664   0.162065  -0.670   0.5025    
## Gendermales  -0.412245   0.105991  -3.889   0.0001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2921.5  on 2157  degrees of freedom
## Residual deviance: 2569.5  on 2152  degrees of freedom
## AIC: 2581.5
## 
## Number of Fisher Scoring iterations: 4
glm.wo.Decree <- glm(mask ~ ageclass + Gender, 
             family=binomial, data= mask2)
summary(glm.wo.Decree)
## 
## Call:
## glm(formula = mask ~ ageclass + Gender, family = binomial, data = mask2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4712  -1.3335   0.9097   1.0290   1.2368  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.66837    0.08483   7.879  3.3e-15 ***
## ageclassage1 -0.49828    0.23119  -2.155  0.03114 *  
## ageclassage2 -0.44659    0.16184  -2.759  0.00579 ** 
## ageclassage4 -0.32598    0.14932  -2.183  0.02903 *  
## Gendermales  -0.30873    0.09796  -3.152  0.00162 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2921.5  on 2157  degrees of freedom
## Residual deviance: 2895.2  on 2153  degrees of freedom
## AIC: 2905.2
## 
## Number of Fisher Scoring iterations: 4
glm.wo.ageclass <- glm(mask ~ Decree + Gender, 
             family=binomial, data= mask2)
summary(glm.wo.ageclass)
## 
## Call:
## glm(formula = mask ~ Decree + Gender, family = binomial, data = mask2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9035  -0.9826   0.5973   0.7198   1.3856  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.06226    0.09370  -0.664    0.506    
## Decreepost   1.69542    0.09851  17.210  < 2e-16 ***
## Gendermales -0.41487    0.10522  -3.943 8.05e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2921.5  on 2157  degrees of freedom
## Residual deviance: 2578.5  on 2155  degrees of freedom
## AIC: 2584.5
## 
## Number of Fisher Scoring iterations: 4
glm.wo.Gender <- glm(mask ~ Decree + ageclass, 
             family=binomial, data= mask2)
summary(glm.wo.Gender)
## 
## Call:
## glm(formula = mask ~ Decree + ageclass, family = binomial, data = mask2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7960  -1.0595   0.6668   0.7534   1.5981  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.28371    0.06586  -4.308 1.65e-05 ***
## Decreepost    1.67417    0.09872  16.959  < 2e-16 ***
## ageclassage1 -0.66637    0.25120  -2.653  0.00798 ** 
## ageclassage2 -0.27619    0.17375  -1.590  0.11194    
## ageclassage4 -0.15843    0.16023  -0.989  0.32280    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2921.5  on 2157  degrees of freedom
## Residual deviance: 2584.8  on 2153  degrees of freedom
## AIC: 2594.8
## 
## Number of Fisher Scoring iterations: 4

Delta AICs. How much the additive model “losses” with each variable removal

AICc(glm.glob)
## [1] 2581.522
AICc(glm.wo.Decree)
## [1] 2905.212
AICc(glm.wo.ageclass)
## [1] 2584.543
AICc(glm.wo.Gender)
## [1] 2594.861
DeltaAics <- as.data.frame(cbind("Model"= c("Global", "Without.Decree", "Without.Age", "Without.Gender"),
      "Delta.AIC"= c(0,AICc(glm.glob)-AICc(glm.wo.Decree),AICc(glm.glob)-AICc(glm.wo.ageclass), AICc(glm.glob)-AICc(glm.wo.Gender))))

DeltaAics$Delta.AIC <- as.numeric(DeltaAics$Delta.AIC)
DeltaAics$Delta.AIC <- round(DeltaAics$Delta.AIC,2)
DeltaAics
##            Model Delta.AIC
## 1         Global      0.00
## 2 Without.Decree   -323.69
## 3    Without.Age     -3.02
## 4 Without.Gender    -13.34

ORs table for global additive model (intercepts not included)

glm = glm.glob
glob.estim <- c(coef(glm)[2:6] )
glob.ee <- sqrt(diag(vcov(glm)))[2:6]   ## Standar Errors for glm.1
##### IC.95%
glob.ul <- glob.estim + (1.96* glob.ee)
glob.ll <- glob.estim - (1.96* glob.ee)
##### OR <- exp(estim) 
glob.ORs <- exp(glob.estim)
glob.OR.ul <- exp(glob.ul)
glob.OR.ll <- exp(glob.ll)
ORs <- as.data.frame(cbind(
  "Variable"= c("Post-Decree", "Children", "Juveniles" ,"Elders", "Males"),
  "Lower"=as.numeric( round(c(glob.OR.ll),1)),
  "Odds Ratio"=as.numeric( round(c(glob.ORs),1)), 
  "Upper"=as.numeric( round(c(glob.OR.ul),1))))
ORs                               ##### for Table 1.
##      Variable Lower Odds Ratio Upper
## 1 Post-Decree   4.5        5.5   6.6
## 2    Children   0.3        0.5   0.8
## 3   Juveniles   0.6        0.8   1.1
## 4      Elders   0.7        0.9   1.2
## 5       Males   0.5        0.7   0.8

Interaction model

glm.1 <- glm(mask ~  Decree * ageclass * Gender, 
             family=binomial, data= mask2)
summary(glm.1)
## 
## Call:
## glm(formula = mask ~ Decree * ageclass * Gender, family = binomial, 
##     data = mask2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0782  -1.0028   0.6039   0.6934   1.7214  
## 
## Coefficients:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          2.112e-14  1.183e-01   0.000  1.00000    
## Decreepost                           1.609e+00  2.063e-01   7.802 6.11e-15 ***
## ageclassage1                        -5.878e-01  5.702e-01  -1.031  0.30259    
## ageclassage2                        -2.683e-01  3.870e-01  -0.693  0.48814    
## ageclassage4                         1.163e+00  5.258e-01   2.212  0.02696 *  
## Gendermales                         -4.256e-01  1.455e-01  -2.924  0.00345 ** 
## Decreepost:ageclassage1             -6.162e-01  7.946e-01  -0.775  0.43808    
## Decreepost:ageclassage2             -1.747e+00  1.006e+00  -1.737  0.08246 .  
## Decreepost:ageclassage4             -1.269e+00  7.814e-01  -1.623  0.10452    
## Decreepost:Gendermales               1.190e-01  2.448e-01   0.486  0.62679    
## ageclassage1:Gendermales            -2.104e-01  7.688e-01  -0.274  0.78431    
## ageclassage2:Gendermales            -2.650e-01  4.659e-01  -0.569  0.56945    
## ageclassage4:Gendermales            -1.287e+00  5.676e-01  -2.267  0.02339 *  
## Decreepost:ageclassage1:Gendermales  1.028e+00  1.040e+00   0.988  0.32326    
## Decreepost:ageclassage2:Gendermales  3.014e+00  1.130e+00   2.667  0.00766 ** 
## Decreepost:ageclassage4:Gendermales  7.217e-01  8.698e-01   0.830  0.40670    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2921.5  on 2157  degrees of freedom
## Residual deviance: 2544.4  on 2142  degrees of freedom
## AIC: 2576.4
## 
## Number of Fisher Scoring iterations: 4

References

  1. Agresti, A. (2007). An Introduction to Categorical Data Analysis. JohnWiley and Sons.

  2. Agresti, A., & Coull, B. A. (1998). Approximate is better than “exact” for interval estimation of binomial proportions. The American Statistician, 52(2), 119–126.