Mit diesem Code und dem über GESIS frei verfügbaren Datensatz zum Flash Eurobarometer 404 können die Analysen im Aufsatz “Gesundheitsbezogene Internet-Nutzung in Europa: Second-level Digital Divides?”, der zur Tagung der Ad-Hoc-Gruppe Gesundheitskommunikation 2015 in Lugano erscheint, repliziert werden.

Pakete

require(haven)
require(tidyr)
require(stringi)
require(ggplot2); theme_set(theme_bw())
require(lme4)
require(arm)
require(broom)
require(survey)
require(srvyr)
require(scales)
require(ggstance)
require(dplyr)

sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.3 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] dplyr_0.4.3         ggstance_0.0.0.9000 scales_0.4.0       
##  [4] srvyr_0.1.0         survey_3.30-3       broom_0.4.0        
##  [7] arm_1.8-6           MASS_7.3-45         lme4_1.1-11        
## [10] Matrix_1.2-4        ggplot2_2.1.0       stringi_1.0-1      
## [13] tidyr_0.4.1         haven_0.2.0        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.3      formatR_1.3      nloptr_1.0.4     plyr_1.8.3      
##  [5] tools_3.2.3      digest_0.6.9     evaluate_0.8.3   gtable_0.2.0    
##  [9] nlme_3.1-125     lattice_0.20-33  psych_1.5.8      DBI_0.3.1       
## [13] yaml_2.1.13      parallel_3.2.3   coda_0.18-1      stringr_1.0.0   
## [17] knitr_1.12.3     R6_2.1.2         rmarkdown_0.9.5  minqa_1.2.4     
## [21] purrr_0.2.1      reshape2_1.4.1   magrittr_1.5     htmltools_0.3   
## [25] splines_3.2.3    assertthat_0.1   abind_1.4-3      mnormt_1.5-3    
## [29] colorspace_1.2-6 munsell_0.4.3

Funktionen

remove_label = function(x) {
  attr(x, "value.labels") <- NULL
  attr(x, "label") <- NULL
  attr(x, "labels") <- NULL
  class(x) <- NULL
  x
}

bivariat = function(group_var) {
  out = dd_w %>% group_by_("land", group_var) %>% summarise_each(funs(survey_mean), starts_with("dv")) %>% select(-ends_with("_se")) %>% gather(variable, P, -(1:2)) %>% group_by_(group_var, "variable") %>% summarise(P = mean(P)) %>% mutate(iv = group_var)
  names(out)[1] = "iv_level"
  out
}

Aufbereiten des Datensatzes

d = read_sav("ZA5948_v1-0-0.sav") %>% mutate(land = stri_trim(isocntry), q1 = ifelse(q1 == 8, NA, ifelse(q1 == 7, 6, q1)) * -1 + 6, q2 = ifelse(q2 > 6, NA, q2) * -1 + 6)
d[] = d %>% lapply(remove_label)

dd = d %>% filter(q1 > 0) %>% mutate(iv_age = cut_interval(d1, n = 4), iv_age = plyr::mapvalues(iv_age, from = levels(iv_age), to = levels(iv_age)[c(1:3,3)]), iv_age = factor(iv_age, labels=c("15-35", "36-55", "56+")), iv_edu = d4r2, iv_edu = ifelse(iv_edu == 4 & d1 <= 15, 1, ifelse(iv_edu == 4 & d1 <= 19, 2, ifelse(iv_edu == 4 & d1 > 19, 3, ifelse(iv_edu == 5, 1, ifelse(iv_edu > 5, NA, iv_edu))))), iv_edu = factor(iv_edu, labels=c("-15", "16-19", "20+")), iv_sex = factor(d2, labels=c("männlich", "weiblich")), iv_urban = ifelse(d13 > 3, NA, d13), iv_urban = factor(iv_urban, labels=c("Land", "Kleinstadt", "Großstadt")), iv_health = ifelse(q33 > 4, NA, q33), iv_health = plyr::mapvalues(iv_health, from = 1:4, to = c(1:3, 3)), iv_health = factor(iv_health, labels=c("Sehr gut", "Ziemlich gut", "Schlecht")), iv_know = ifelse(q36 > 4, NA, q36), iv_know = plyr::mapvalues(iv_know, from = 1:4, to = c(1:3,3)), iv_know = factor(iv_know, labels=c("Sehr gut", "Ziemlich gut", "Schlecht")), iv_inet = plyr::mapvalues(q1, from = 1:5, to = c(1,1,1,1,2)), iv_inet = factor(iv_inet, labels=c("seltener", "(fast) täglich")), dv_health_use = ifelse(q2 > 0, 1, 0), dv_serious = ifelse(q3.2 == 1 | q3.3 == 1 | q3.4 == 1, 1, 0), dv_serious = ifelse(dv_health_use == 0, 0, dv_serious), dv_fun = q3.1, dv_fun = ifelse(dv_health_use == 0, 0, dv_fun)) %>% select(uniqid, land, w1, starts_with("iv"), starts_with("dv")) %>% na.omit
dd %>% summary
##      uniqid             land                 w1           iv_age    
##  Min.   : 1000018   Length:18035       Min.   :0.3000   15-35:4810  
##  1st Qu.: 7005398   Class :character   1st Qu.:0.6284   36-55:7127  
##  Median :13016348   Mode  :character   Median :0.9435   56+  :6098  
##  Mean   :20049358                      Mean   :1.1160               
##  3rd Qu.:36000914                      3rd Qu.:1.4065               
##  Max.   :46033155                      Max.   :3.0672               
##    iv_edu          iv_sex            iv_urban           iv_health    
##  -15  :1252   männlich: 7715   Land      :5439   Sehr gut    : 5163  
##  16-19:6949   weiblich:10320   Kleinstadt:6803   Ziemlich gut:11347  
##  20+  :9834                    Großstadt :5793   Schlecht    : 1525  
##                                                                      
##                                                                      
##                                                                      
##          iv_know                iv_inet      dv_health_use   
##  Sehr gut    : 2328   seltener      : 4886   Min.   :0.0000  
##  Ziemlich gut:13138   (fast) täglich:13149   1st Qu.:0.0000  
##  Schlecht    : 2569                          Median :1.0000  
##                                              Mean   :0.7279  
##                                              3rd Qu.:1.0000  
##                                              Max.   :1.0000  
##    dv_serious         dv_fun      
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000  
##  Mean   :0.5137   Mean   :0.3899  
##  3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000
dd_w = dd %>% select(-dv_health_use) %>% as_survey_design(ids = uniqid, weights = w1)
dd_w
## Independent Sampling design (with replacement)
## [1] "called via srvyr"
## Sampling variables:
##  - ids: uniqid
##  - weights: w1
## Data variables: uniqid (dbl), land (chr), w1 (dbl), iv_age (fctr), iv_edu
##   (fctr), iv_sex (fctr), iv_urban (fctr), iv_health (fctr), iv_know
##   (fctr), iv_inet (fctr), dv_serious (dbl), dv_fun (dbl)

Abbildung 1: Gesundheitsbezogene Internet-Nutzung im Ländervergleich

vd1_1 = dd_w %>% group_by(land) %>% summarise_each(funs(survey_mean), starts_with("dv")) %>% select(-ends_with("_se")) %>% gather(variable, P, -land) %>% mutate(variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen")))
vd1_2 = vd1_1 %>% group_by(variable) %>% summarise(P=mean(P))
vd1_1 %>% mutate(land = factor(land, levels=sort(unique(land), decreasing = T))) %>% ggplot(aes(P, land, label=land)) + geom_vline(data = vd1_2, aes(xintercept=P), linetype=2, color="grey20") + geom_text() + scale_x_continuous(labels = scales::percent, limits = c(.2, .8)) + facet_wrap("variable") + labs(x=NULL, y=NULL)

Abbildung 2: Interindividuelle Unterschiede in der gesundheitsbezogenen Internet-Nutzung: Bivariate Anteile

vd2 = dd %>% select(starts_with("iv")) %>% names %>% lapply(bivariat) %>% bind_rows()
vd2 %>% mutate(iv = factor(iv, levels=unique(iv)[c(1,3,4,2,6,5,7)], labels=c("Alter", "Geschl.", "Urbanität", "Bildung", "Gesundheits-\nwissen", "Gesundheit", "Internet-\nnutzung")), iv_level = factor(iv_level, levels=unique(iv_level)[c(3:1, 6:4, 8:7, 11:9, 12:14, 16:15)]), variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen"))) %>% ggplot(aes(P, iv_level)) + geom_point() + facet_grid(iv~variable, scales = "free_y", space = "free_y", switch = "y") + scale_x_continuous(labels = scales::percent, limits = c(.2, .65)) + labs(x=NULL, y=NULL)

Abbildung 3: Interindividuelle Unterschiede in der gesundheitsbezogenen Internet-Nutzung: Multivariates Mehrebenen-Modell

ddd = dd %>% mutate(iv_age. = relevel(iv_age, ref="36-55"), iv_sex. = relevel(iv_sex, ref="männlich"), iv_urban.=relevel(iv_urban, ref="Kleinstadt"), iv_health.=relevel(iv_health, ref="Ziemlich gut"), iv_edu. = relevel(iv_edu, ref="16-19"), iv_know.=relevel(iv_know, ref="Ziemlich gut"), iv_inet.=relevel(iv_inet, ref="(fast) täglich"))
m3_ser = glmer(dv_serious ~ iv_age. + iv_sex. + iv_urban. + iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), family = binomial, data = ddd, nAGQ = 1)
m3_ser %>% display(detail=T)
## glmer(formula = dv_serious ~ iv_age. + iv_sex. + iv_urban. + 
##     iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), 
##     data = ddd, family = binomial, nAGQ = 1)
##                    coef.est coef.se z value Pr(>|z|)
## (Intercept)          0.03     0.07    0.43    0.66  
## iv_age.15-35         0.00     0.04   -0.08    0.94  
## iv_age.56+          -0.38     0.04  -10.00    0.00  
## iv_sex.weiblich      0.47     0.03   14.82    0.00  
## iv_urban.Land       -0.10     0.04   -2.66    0.01  
## iv_urban.Großstadt   0.04     0.04    1.05    0.29  
## iv_edu.-15          -0.34     0.07   -5.17    0.00  
## iv_edu.20+           0.30     0.03    9.01    0.00  
## iv_health.Sehr gut  -0.35     0.04   -9.78    0.00  
## iv_health.Schlecht   0.46     0.06    7.93    0.00  
## iv_know.Sehr gut    -0.02     0.05   -0.48    0.63  
## iv_know.Schlecht    -0.17     0.05   -3.76    0.00  
## iv_inet.seltener    -0.67     0.04  -18.17    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  land     (Intercept) 0.26    
##  Residual             1.00    
## ---
## number of obs: 18035, groups: land, 28
## AIC = 23788.6, DIC = 23576.1
## deviance = 23668.3
m3_fun = glmer(dv_fun ~ iv_age. + iv_sex. + iv_urban. + iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), family = binomial, data = ddd, nAGQ = 1)
m3_ser %>% display(detail=T)
## glmer(formula = dv_serious ~ iv_age. + iv_sex. + iv_urban. + 
##     iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), 
##     data = ddd, family = binomial, nAGQ = 1)
##                    coef.est coef.se z value Pr(>|z|)
## (Intercept)          0.03     0.07    0.43    0.66  
## iv_age.15-35         0.00     0.04   -0.08    0.94  
## iv_age.56+          -0.38     0.04  -10.00    0.00  
## iv_sex.weiblich      0.47     0.03   14.82    0.00  
## iv_urban.Land       -0.10     0.04   -2.66    0.01  
## iv_urban.Großstadt   0.04     0.04    1.05    0.29  
## iv_edu.-15          -0.34     0.07   -5.17    0.00  
## iv_edu.20+           0.30     0.03    9.01    0.00  
## iv_health.Sehr gut  -0.35     0.04   -9.78    0.00  
## iv_health.Schlecht   0.46     0.06    7.93    0.00  
## iv_know.Sehr gut    -0.02     0.05   -0.48    0.63  
## iv_know.Schlecht    -0.17     0.05   -3.76    0.00  
## iv_inet.seltener    -0.67     0.04  -18.17    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  land     (Intercept) 0.26    
##  Residual             1.00    
## ---
## number of obs: 18035, groups: land, 28
## AIC = 23788.6, DIC = 23576.1
## deviance = 23668.3
vd3 = list(ser = m3_ser, fun = m3_fun) %>% lapply(function(x) tidy(x, effects="fixed", conf.int=T)) %>% bind_rows(.id = "variable") %>% filter(term != "(Intercept)") %>% extract(term, c("iv", "iv_level"), "([[:alnum:]\\_]+).([[:graph:][:space:]]+)")
vd3 %>% mutate(variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen")), iv = factor(iv, levels=unique(iv)[c(1,2,3,4,6,5,7)], labels=c("Alter", "Gesc.", "Urbanität", "Bildung", "Gesundheits-\nwissen", "Gesundheit", "Inter-\nnet")), iv_level = factor(iv_level, levels=unique(iv_level)[c(2,1,3,5,4,7,6,8:10)]), ref_cat = plyr::mapvalues(iv_level, from = unique(iv_level), to = c("vs. 36-55", "vs. 36-55", "vs. männlich", "vs. Kleinstadt", "vs. Kleinstadt", "vs. 16-19", "vs. 16-19", "vs. Ziemlich gut", "vs. Ziemlich gut", "vs. (fast) täglich")), ref_cat = stri_c(iv_level, "\n(", ref_cat, ")"), ref_cat = factor(ref_cat, levels=unique(ref_cat)[c(2,1,3,5,4,7,6,8:10)])) %>% ggplot(aes(estimate, ref_cat, xmin=conf.low, xmax=conf.high)) + geom_vline(xintercept = 0, color="grey20") + geom_pointrangeh() + facet_grid(iv~variable, scales = "free_y", space = "free_y", switch = "y") + labs(x = "log(Odds)", y = NULL)