Mit diesem Code und dem über GESIS frei verfügbaren Datensatz zum Flash Eurobarometer 404 können die Analysen im Aufsatz “Gesundheitsbezogene Internet-Nutzung in Europa: Second-level Digital Divides?”, der zur Tagung der Ad-Hoc-Gruppe Gesundheitskommunikation 2015 in Lugano erscheint, repliziert werden.
require(haven)
require(tidyr)
require(stringi)
require(ggplot2); theme_set(theme_bw())
require(lme4)
require(arm)
require(broom)
require(survey)
require(srvyr)
require(scales)
require(ggstance)
require(dplyr)
sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.3 (El Capitan)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] dplyr_0.4.3 ggstance_0.0.0.9000 scales_0.4.0
## [4] srvyr_0.1.0 survey_3.30-3 broom_0.4.0
## [7] arm_1.8-6 MASS_7.3-45 lme4_1.1-11
## [10] Matrix_1.2-4 ggplot2_2.1.0 stringi_1.0-1
## [13] tidyr_0.4.1 haven_0.2.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.3 formatR_1.3 nloptr_1.0.4 plyr_1.8.3
## [5] tools_3.2.3 digest_0.6.9 evaluate_0.8.3 gtable_0.2.0
## [9] nlme_3.1-125 lattice_0.20-33 psych_1.5.8 DBI_0.3.1
## [13] yaml_2.1.13 parallel_3.2.3 coda_0.18-1 stringr_1.0.0
## [17] knitr_1.12.3 R6_2.1.2 rmarkdown_0.9.5 minqa_1.2.4
## [21] purrr_0.2.1 reshape2_1.4.1 magrittr_1.5 htmltools_0.3
## [25] splines_3.2.3 assertthat_0.1 abind_1.4-3 mnormt_1.5-3
## [29] colorspace_1.2-6 munsell_0.4.3
remove_label = function(x) {
attr(x, "value.labels") <- NULL
attr(x, "label") <- NULL
attr(x, "labels") <- NULL
class(x) <- NULL
x
}
bivariat = function(group_var) {
out = dd_w %>% group_by_("land", group_var) %>% summarise_each(funs(survey_mean), starts_with("dv")) %>% select(-ends_with("_se")) %>% gather(variable, P, -(1:2)) %>% group_by_(group_var, "variable") %>% summarise(P = mean(P)) %>% mutate(iv = group_var)
names(out)[1] = "iv_level"
out
}
d = read_sav("ZA5948_v1-0-0.sav") %>% mutate(land = stri_trim(isocntry), q1 = ifelse(q1 == 8, NA, ifelse(q1 == 7, 6, q1)) * -1 + 6, q2 = ifelse(q2 > 6, NA, q2) * -1 + 6)
d[] = d %>% lapply(remove_label)
dd = d %>% filter(q1 > 0) %>% mutate(iv_age = cut_interval(d1, n = 4), iv_age = plyr::mapvalues(iv_age, from = levels(iv_age), to = levels(iv_age)[c(1:3,3)]), iv_age = factor(iv_age, labels=c("15-35", "36-55", "56+")), iv_edu = d4r2, iv_edu = ifelse(iv_edu == 4 & d1 <= 15, 1, ifelse(iv_edu == 4 & d1 <= 19, 2, ifelse(iv_edu == 4 & d1 > 19, 3, ifelse(iv_edu == 5, 1, ifelse(iv_edu > 5, NA, iv_edu))))), iv_edu = factor(iv_edu, labels=c("-15", "16-19", "20+")), iv_sex = factor(d2, labels=c("männlich", "weiblich")), iv_urban = ifelse(d13 > 3, NA, d13), iv_urban = factor(iv_urban, labels=c("Land", "Kleinstadt", "Großstadt")), iv_health = ifelse(q33 > 4, NA, q33), iv_health = plyr::mapvalues(iv_health, from = 1:4, to = c(1:3, 3)), iv_health = factor(iv_health, labels=c("Sehr gut", "Ziemlich gut", "Schlecht")), iv_know = ifelse(q36 > 4, NA, q36), iv_know = plyr::mapvalues(iv_know, from = 1:4, to = c(1:3,3)), iv_know = factor(iv_know, labels=c("Sehr gut", "Ziemlich gut", "Schlecht")), iv_inet = plyr::mapvalues(q1, from = 1:5, to = c(1,1,1,1,2)), iv_inet = factor(iv_inet, labels=c("seltener", "(fast) täglich")), dv_health_use = ifelse(q2 > 0, 1, 0), dv_serious = ifelse(q3.2 == 1 | q3.3 == 1 | q3.4 == 1, 1, 0), dv_serious = ifelse(dv_health_use == 0, 0, dv_serious), dv_fun = q3.1, dv_fun = ifelse(dv_health_use == 0, 0, dv_fun)) %>% select(uniqid, land, w1, starts_with("iv"), starts_with("dv")) %>% na.omit
dd %>% summary
## uniqid land w1 iv_age
## Min. : 1000018 Length:18035 Min. :0.3000 15-35:4810
## 1st Qu.: 7005398 Class :character 1st Qu.:0.6284 36-55:7127
## Median :13016348 Mode :character Median :0.9435 56+ :6098
## Mean :20049358 Mean :1.1160
## 3rd Qu.:36000914 3rd Qu.:1.4065
## Max. :46033155 Max. :3.0672
## iv_edu iv_sex iv_urban iv_health
## -15 :1252 männlich: 7715 Land :5439 Sehr gut : 5163
## 16-19:6949 weiblich:10320 Kleinstadt:6803 Ziemlich gut:11347
## 20+ :9834 Großstadt :5793 Schlecht : 1525
##
##
##
## iv_know iv_inet dv_health_use
## Sehr gut : 2328 seltener : 4886 Min. :0.0000
## Ziemlich gut:13138 (fast) täglich:13149 1st Qu.:0.0000
## Schlecht : 2569 Median :1.0000
## Mean :0.7279
## 3rd Qu.:1.0000
## Max. :1.0000
## dv_serious dv_fun
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000
## Mean :0.5137 Mean :0.3899
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
dd_w = dd %>% select(-dv_health_use) %>% as_survey_design(ids = uniqid, weights = w1)
dd_w
## Independent Sampling design (with replacement)
## [1] "called via srvyr"
## Sampling variables:
## - ids: uniqid
## - weights: w1
## Data variables: uniqid (dbl), land (chr), w1 (dbl), iv_age (fctr), iv_edu
## (fctr), iv_sex (fctr), iv_urban (fctr), iv_health (fctr), iv_know
## (fctr), iv_inet (fctr), dv_serious (dbl), dv_fun (dbl)
vd1_1 = dd_w %>% group_by(land) %>% summarise_each(funs(survey_mean), starts_with("dv")) %>% select(-ends_with("_se")) %>% gather(variable, P, -land) %>% mutate(variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen")))
vd1_2 = vd1_1 %>% group_by(variable) %>% summarise(P=mean(P))
vd1_1 %>% mutate(land = factor(land, levels=sort(unique(land), decreasing = T))) %>% ggplot(aes(P, land, label=land)) + geom_vline(data = vd1_2, aes(xintercept=P), linetype=2, color="grey20") + geom_text() + scale_x_continuous(labels = scales::percent, limits = c(.2, .8)) + facet_wrap("variable") + labs(x=NULL, y=NULL)
vd2 = dd %>% select(starts_with("iv")) %>% names %>% lapply(bivariat) %>% bind_rows()
vd2 %>% mutate(iv = factor(iv, levels=unique(iv)[c(1,3,4,2,6,5,7)], labels=c("Alter", "Geschl.", "Urbanität", "Bildung", "Gesundheits-\nwissen", "Gesundheit", "Internet-\nnutzung")), iv_level = factor(iv_level, levels=unique(iv_level)[c(3:1, 6:4, 8:7, 11:9, 12:14, 16:15)]), variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen"))) %>% ggplot(aes(P, iv_level)) + geom_point() + facet_grid(iv~variable, scales = "free_y", space = "free_y", switch = "y") + scale_x_continuous(labels = scales::percent, limits = c(.2, .65)) + labs(x=NULL, y=NULL)
ddd = dd %>% mutate(iv_age. = relevel(iv_age, ref="36-55"), iv_sex. = relevel(iv_sex, ref="männlich"), iv_urban.=relevel(iv_urban, ref="Kleinstadt"), iv_health.=relevel(iv_health, ref="Ziemlich gut"), iv_edu. = relevel(iv_edu, ref="16-19"), iv_know.=relevel(iv_know, ref="Ziemlich gut"), iv_inet.=relevel(iv_inet, ref="(fast) täglich"))
m3_ser = glmer(dv_serious ~ iv_age. + iv_sex. + iv_urban. + iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), family = binomial, data = ddd, nAGQ = 1)
m3_ser %>% display(detail=T)
## glmer(formula = dv_serious ~ iv_age. + iv_sex. + iv_urban. +
## iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land),
## data = ddd, family = binomial, nAGQ = 1)
## coef.est coef.se z value Pr(>|z|)
## (Intercept) 0.03 0.07 0.43 0.66
## iv_age.15-35 0.00 0.04 -0.08 0.94
## iv_age.56+ -0.38 0.04 -10.00 0.00
## iv_sex.weiblich 0.47 0.03 14.82 0.00
## iv_urban.Land -0.10 0.04 -2.66 0.01
## iv_urban.Großstadt 0.04 0.04 1.05 0.29
## iv_edu.-15 -0.34 0.07 -5.17 0.00
## iv_edu.20+ 0.30 0.03 9.01 0.00
## iv_health.Sehr gut -0.35 0.04 -9.78 0.00
## iv_health.Schlecht 0.46 0.06 7.93 0.00
## iv_know.Sehr gut -0.02 0.05 -0.48 0.63
## iv_know.Schlecht -0.17 0.05 -3.76 0.00
## iv_inet.seltener -0.67 0.04 -18.17 0.00
##
## Error terms:
## Groups Name Std.Dev.
## land (Intercept) 0.26
## Residual 1.00
## ---
## number of obs: 18035, groups: land, 28
## AIC = 23788.6, DIC = 23576.1
## deviance = 23668.3
m3_fun = glmer(dv_fun ~ iv_age. + iv_sex. + iv_urban. + iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land), family = binomial, data = ddd, nAGQ = 1)
m3_ser %>% display(detail=T)
## glmer(formula = dv_serious ~ iv_age. + iv_sex. + iv_urban. +
## iv_edu. + iv_health. + iv_know. + iv_inet. + (1 | land),
## data = ddd, family = binomial, nAGQ = 1)
## coef.est coef.se z value Pr(>|z|)
## (Intercept) 0.03 0.07 0.43 0.66
## iv_age.15-35 0.00 0.04 -0.08 0.94
## iv_age.56+ -0.38 0.04 -10.00 0.00
## iv_sex.weiblich 0.47 0.03 14.82 0.00
## iv_urban.Land -0.10 0.04 -2.66 0.01
## iv_urban.Großstadt 0.04 0.04 1.05 0.29
## iv_edu.-15 -0.34 0.07 -5.17 0.00
## iv_edu.20+ 0.30 0.03 9.01 0.00
## iv_health.Sehr gut -0.35 0.04 -9.78 0.00
## iv_health.Schlecht 0.46 0.06 7.93 0.00
## iv_know.Sehr gut -0.02 0.05 -0.48 0.63
## iv_know.Schlecht -0.17 0.05 -3.76 0.00
## iv_inet.seltener -0.67 0.04 -18.17 0.00
##
## Error terms:
## Groups Name Std.Dev.
## land (Intercept) 0.26
## Residual 1.00
## ---
## number of obs: 18035, groups: land, 28
## AIC = 23788.6, DIC = 23576.1
## deviance = 23668.3
vd3 = list(ser = m3_ser, fun = m3_fun) %>% lapply(function(x) tidy(x, effects="fixed", conf.int=T)) %>% bind_rows(.id = "variable") %>% filter(term != "(Intercept)") %>% extract(term, c("iv", "iv_level"), "([[:alnum:]\\_]+).([[:graph:][:space:]]+)")
vd3 %>% mutate(variable = factor(variable, labels=c("Allgemeine Informationen zu Gesundheitsthemen", "Spezifische Informationen zu Gesundheitsproblemen")), iv = factor(iv, levels=unique(iv)[c(1,2,3,4,6,5,7)], labels=c("Alter", "Gesc.", "Urbanität", "Bildung", "Gesundheits-\nwissen", "Gesundheit", "Inter-\nnet")), iv_level = factor(iv_level, levels=unique(iv_level)[c(2,1,3,5,4,7,6,8:10)]), ref_cat = plyr::mapvalues(iv_level, from = unique(iv_level), to = c("vs. 36-55", "vs. 36-55", "vs. männlich", "vs. Kleinstadt", "vs. Kleinstadt", "vs. 16-19", "vs. 16-19", "vs. Ziemlich gut", "vs. Ziemlich gut", "vs. (fast) täglich")), ref_cat = stri_c(iv_level, "\n(", ref_cat, ")"), ref_cat = factor(ref_cat, levels=unique(ref_cat)[c(2,1,3,5,4,7,6,8:10)])) %>% ggplot(aes(estimate, ref_cat, xmin=conf.low, xmax=conf.high)) + geom_vline(xintercept = 0, color="grey20") + geom_pointrangeh() + facet_grid(iv~variable, scales = "free_y", space = "free_y", switch = "y") + labs(x = "log(Odds)", y = NULL)