options(digits = 2)
options(contrasts=c("contr.treatment", "contr.treatment"))
library(pacman)
p_load(kirkegaard, readr, haven, polycor, rms, ggeffects, correlation, osfr)
theme_set(theme_bw())
#read
d = haven::read_spss('data/GSS7218_R1.sav')
#table of variables
d_table = df_var_table(d)
#convert variable names
meaningful_var_names = purrr::map2_chr(d, names(d), function(x, x_orig) {
#get label, replace illegal characters
x_label = attr(x, "label")
x_label_nice = str_replace_all(x_label, pattern = "[ \\-\\/]", replacement = "_") %>%
str_replace_all(pattern = "[\\(\\)]", replacement = "")
#if no label, use orignal
if (are_equal(x_label_nice, character())) return(x_orig)
x_label_nice
})
#get rid of labelled type
converter_labelled = function(x) {
#does object have labels?
has_labels = has_attr(x, "labels")
#if labels, convert to factor
if (has_labels) {
y = as_factor(x)
} else {
#convert to ordinary numeric
y = as.vector(x)
}
y
}
#convert each variable
d = map_df(d, converter_labelled)
#recode
d$age_coded = d$AGE %>% as.character() %>% mapvalues("89 OR OLDER", "89") %>% as.numeric()
#politics
d$polviews_coded = d$POLVIEWS %>%
str_to_lower() %>%
mapvalues(from = c("extrmly conservative", "slghtly conservative"),
to = c("extremely conservative", "slightly conservative")) %>%
ordered(levels = c("extremely liberal", "liberal", "slightly liberal", "moderate", "slightly conservative", "conservative", "extremely conservative"))
d$polviews_coded_num = d$polviews_coded %>% as.numeric()
#party affiliation
d$party = d$PARTYID %>% mapvalues(from = c("STRONG DEMOCRAT", "NOT STR DEMOCRAT", "IND,NEAR DEM", "INDEPENDENT", "IND,NEAR REP", "NOT STR REPUBLICAN", "STRONG REPUBLICAN"),
to = c("Strong Democrat", "Democrat", "Lean Democrat", "Independent", "Lean Republican", "Republican", "Strong Republican")) %>% ordered(levels = c("Strong Democrat", "Democrat", "Lean Democrat", "Independent", "Lean Republican", "Republican", "Strong Republican"))
d$party_num = d$party %>% as.numeric()
d$party %>% table2()
#R HAS EMOTIONAL OR MENTAL DISABILITY
d$has_emo_mental_disorder = (d$DISABLD5 == "Yes")
#DAYS OF POOR MENTAL HEALTH PAST 30 DAYS
d$days_poor_health = d$MNTLHLTH %>% as.character() %>% as.numeric()
#COUNSELLNG FOR MENTAL PROBLEMS
d$got_counseling_last_year = d$HLTH2 == "YES"
#EVER HAD MENTAL HEALTH PROBLEM
d$ever_had_mental_problem = d$EVMHP == "YES"
#R Have ever personally received treatment for mental health problem
d$ever_received_treatment_mental_health = d$MHTRTSLF == "Yes"
#happiness
d$happy = d$HAPPY %>% str_to_lower() %>% ordered(levels = c("not too happy", "pretty happy", "very happy"))
d$happy2 = d$HAPPY7 %>% str_to_lower() %>% ordered(levels = c("completely unhappy", "very unhappy", "fairly unhappy", "neither happy nor unhappy", "fairly happy", "very happy", "completely happy"))
#numerical, reversed
d$happy_num = d$happy %>% as.numeric() %>% reverse_scale()
d$happy2_num = d$happy2 %>% as.numeric() %>% reverse_scale()
#reverse coded above, binary
d$not_happy = (d$happy == "not too happy")
d$not_happy %>% table2(include_NA = F)
d$not_happy2 = (as.numeric(d$happy2) < 5)
d$not_happy2 %>% table2(include_NA = F)
#rename
d$year = d$YEAR
d$decade = (d$year %>% str_sub(end = 3)) + "0s"
d$sex = d$SEX %>% str_to_lower()
#items
variables = c("DISABLD5", "MNTLHLTH", "HLTH2", "EVMHP", "MHTRTSLF")
outcomes = c("has_emo_mental_disorder", "days_poor_health", "got_counseling_last_year", "ever_had_mental_problem", "ever_received_treatment_mental_health")
nice_names = c("% has emotional or mental disability",
"Mean days of poor mental health last 30 days",
"% underwent counseling for mental or emotional problems last 12 months",
"% ever had mental health problem",
"% ever personally received treatment for mental health problem")
names(nice_names) = outcomes
#complete sample
nrow(d)
## [1] 64814
#by item
map_df(outcomes, function(v) {
# browser()
v2 = as.symbol(v)
tibble(
variable = v,
n = d[[v]] %>% na.omit() %>% length(),
years = d %>% filter(!is.na(!!v2)) %>% pull(year) %>% table() %>% names() %>% {str_c(., collapse = ", ")}
)
})
#political distribution by year
ideo_by_year = d %>%
select(ID, year, polviews_coded) %>%
plyr::ddply("year", function(dd) {
#proportion of each
counts = table(dd$polviews_coded)
props = counts %>% prop.table()
tibble(
position = names(props) %>% factor(levels = levels(d$polviews_coded)),
n = counts,
proportion = props
)
})
ideo_by_year
#plot
ideo_by_year %>%
filter(!is.na(proportion)) %>%
ggplot(aes(year, proportion, color = position)) +
geom_path() +
scale_y_continuous("Percent", labels = scales::percent) +
scale_x_continuous("Year", breaks = seq(0, 3000, by = 5)) +
scale_color_discrete("Political ideology")
GG_save("figs/dist_ideo_by_year.png")
#happiness items
table(d$polviews_coded, d$happy)
##
## not too happy pretty happy very happy
## extremely liberal 286 848 494
## liberal 826 3662 1815
## slightly liberal 846 4043 1913
## moderate 2614 11934 6131
## slightly conservative 896 4803 2754
## conservative 853 4120 2999
## extremely conservative 280 750 733
table(d$polviews_coded, d$happy) %>% sum()
## [1] 53600
table(d$polviews_coded, d$happy2)
##
## completely unhappy very unhappy fairly unhappy
## extremely liberal 0 2 5
## liberal 0 3 3
## slightly liberal 0 2 3
## moderate 2 5 16
## slightly conservative 0 1 2
## conservative 0 1 5
## extremely conservative 1 1 0
##
## neither happy nor unhappy fairly happy very happy
## extremely liberal 3 11 21
## liberal 4 67 76
## slightly liberal 11 49 58
## moderate 36 187 191
## slightly conservative 5 52 81
## conservative 7 52 89
## extremely conservative 2 4 25
##
## completely happy
## extremely liberal 7
## liberal 15
## slightly liberal 8
## moderate 46
## slightly conservative 21
## conservative 31
## extremely conservative 11
table(d$polviews_coded, d$happy2) %>% sum()
## [1] 1222
#sample sizes
map_df(c("happy", "happy2"), function(v) {
# browser()
v2 = as.symbol(v)
tibble(
variable = v,
n = d[[v]] %>% na.omit() %>% length(),
years = d %>% filter(!is.na(!!v2)) %>% pull(year) %>% table() %>% names() %>% {str_c(., collapse = ", ")}
)
})
#line splitter
pol_labels = c("extremely liberal", "liberal", "slightly liberal", "moderate", "slightly conservative", "conservative", "extremely conservative") %>% str_replace(" ", "\n")
#scales
x_scale = scale_x_discrete("Political ideology (self-rated)", label = pol_labels)
fill_scale = scale_fill_manual("Sex", values = c("blue", "red"))
#plot each item
GG_group_means(d, "has_emo_mental_disorder", groupvar = "polviews_coded", subgroupvar = "sex") +
scale_y_continuous(nice_names["has_emo_mental_disorder"], labels = scales::percent) +
x_scale +
fill_scale
## Proportion variable detected, using `prop.test()`
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/has emotional or mental disability.png")
GG_group_means(d, "days_poor_health", groupvar = "polviews_coded", subgroupvar = "sex") +
scale_y_continuous(nice_names["days_poor_health"]) +
x_scale +
fill_scale
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/mean days of poor mental health last 30 days.png")
GG_group_means(d, "got_counseling_last_year", groupvar = "polviews_coded", subgroupvar = "sex") +
scale_y_continuous(nice_names["got_counseling_last_year"], labels = scales::percent) +
x_scale +
fill_scale
## Proportion variable detected, using `prop.test()`
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/underwent counseling for mental or emotional problems last 12 months.png")
GG_group_means(d, "ever_had_mental_problem", groupvar = "polviews_coded", subgroupvar = "sex") +
scale_y_continuous(nice_names["ever_had_mental_problem"], labels = scales::percent) +
x_scale +
fill_scale
## Proportion variable detected, using `prop.test()`
## Missing values were removed.
## Warning in prop.test(sum(dd[[var]] == 1), length(dd[[var]])): Chi-squared
## approximation may be incorrect
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/ever had mental health problem.png")
GG_group_means(d, "ever_received_treatment_mental_health", groupvar = "polviews_coded", subgroupvar = "sex") +
scale_y_continuous(nice_names["ever_received_treatment_mental_health"], labels = scales::percent) +
x_scale +
fill_scale
## Proportion variable detected, using `prop.test()`
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/ever personally received treatment for mental health problem.png")
#happy
GG_group_means(d, "happy", groupvar = "polviews_coded", subgroupvar = "sex", type = "point") +
scale_y_continuous("If you were to consider your life in general these days, how happy or unhappy would you say you are, on the whole…" %>% kirkegaard::add_newlines(60)) +
x_scale +
fill_scale
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/happy.png")
GG_group_means(d, "happy2", groupvar = "polviews_coded", subgroupvar = "sex", type = "point") +
scale_y_continuous("If you were to consider your life in general, how happy or unhappy would you say you are, on the whole?" %>% kirkegaard::add_newlines(60)) +
x_scale +
fill_scale
## Missing values were removed.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
GG_save("figs/happy2.png")
#correlation sex and ideology
lrm(sex ~ polviews_coded, data = d)
## Frequencies of Missing Values Due to Each Variable
## sex polviews_coded
## 0 9486
##
## Logistic Regression Model
##
## lrm(formula = sex ~ polviews_coded, data = d)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 55328 LR chi2 195.35 R2 0.005 C 0.533
## female 30597 d.f. 6 g 0.132 Dxy 0.066
## male 24731 Pr(> chi2) <0.0001 gr 1.141 gamma 0.085
## max |deriv| 4e-11 gp 0.033 tau-a 0.033
## Brier 0.246
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.1382 0.0489 -2.83 0.0047
## polviews_coded=liberal -0.1044 0.0549 -1.90 0.0573
## polviews_coded=slightly liberal -0.0761 0.0545 -1.40 0.1625
## polviews_coded=moderate -0.2034 0.0508 -4.00 <0.0001
## polviews_coded=slightly conservative 0.0870 0.0534 1.63 0.1030
## polviews_coded=conservative 0.0521 0.0536 0.97 0.3316
## polviews_coded=extremely conservative 0.1098 0.0676 1.62 0.1046
##
lrm(polviews_coded ~ sex, data = d)
## Frequencies of Missing Values Due to Each Variable
## polviews_coded sex
## 9486 0
##
## Logistic Regression Model
##
## lrm(formula = polviews_coded ~ sex, data = d)
##
##
## Frequencies of Responses
##
## extremely liberal liberal slightly liberal
## 1682 6514 7010
## moderate slightly conservative conservative
## 21370 8690 8230
## extremely conservative
## 1832
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 55328 LR chi2 60.50 R2 0.001 C 0.512
## max |deriv| 3e-10 d.f. 1 g 0.059 Dxy 0.024
## Pr(> chi2) <0.0001 gr 1.061 gamma 0.048
## gp 0.012 tau-a 0.018
## Brier 0.199
##
## Coef S.E. Wald Z Pr(>|Z|)
## y>=liberal 3.4108 0.0256 133.15 <0.0001
## y>=slightly liberal 1.6977 0.0137 124.30 <0.0001
## y>=moderate 0.9186 0.0116 79.30 <0.0001
## y>=slightly conservative -0.7213 0.0113 -63.76 <0.0001
## y>=conservative -1.5581 0.0131 -119.13 <0.0001
## y>=extremely conservative -3.4292 0.0248 -138.18 <0.0001
## sex=male 0.1196 0.0154 7.78 <0.0001
##
#pairwise counts
for (v in outcomes) {
print(v)
table(d$polviews_coded, d[[v]]) %>% print()
}
## [1] "has_emo_mental_disorder"
##
## FALSE TRUE
## extremely liberal 76 11
## liberal 306 16
## slightly liberal 301 17
## moderate 981 57
## slightly conservative 364 22
## conservative 402 20
## extremely conservative 104 5
## [1] "days_poor_health"
##
## 0 1 2 3 4 5 6 7 8 9 10
## extremely liberal 175 21 23 23 15 22 2 6 0 0 17
## liberal 585 63 101 77 27 78 9 21 5 2 47
## slightly liberal 581 67 98 71 34 77 5 22 5 0 43
## moderate 1957 159 282 174 95 193 21 78 16 3 150
## slightly conservative 764 70 123 70 45 87 5 20 4 0 44
## conservative 868 73 96 63 34 66 8 24 5 0 43
## extremely conservative 232 25 10 14 6 16 2 3 0 0 5
##
## 11 12 14 15 16 17 18 19 20 21 22
## extremely liberal 0 1 1 12 0 0 1 0 17 0 0
## liberal 0 5 8 29 0 0 1 0 29 1 1
## slightly liberal 0 2 7 37 0 0 1 0 17 3 0
## moderate 2 11 27 109 3 0 1 0 70 3 2
## slightly conservative 0 4 9 38 0 0 0 0 22 2 1
## conservative 0 4 8 35 1 3 0 0 19 1 0
## extremely conservative 0 0 2 8 0 0 0 0 5 0 0
##
## 23 24 25 26 27 28 29 30
## extremely liberal 0 0 2 0 1 2 3 21
## liberal 0 0 7 1 0 1 1 39
## slightly liberal 0 1 6 0 1 1 1 33
## moderate 1 2 19 1 1 4 2 149
## slightly conservative 1 0 8 1 1 1 1 42
## conservative 0 0 6 0 1 0 0 31
## extremely conservative 0 0 0 0 0 1 0 11
## [1] "got_counseling_last_year"
##
## FALSE TRUE
## extremely liberal 55 15
## liberal 193 20
## slightly liberal 268 29
## moderate 828 57
## slightly conservative 344 24
## conservative 358 10
## extremely conservative 75 5
## [1] "ever_had_mental_problem"
##
## FALSE TRUE
## extremely liberal 14 6
## liberal 77 10
## slightly liberal 102 14
## moderate 364 18
## slightly conservative 154 13
## conservative 170 10
## extremely conservative 37 2
## [1] "ever_received_treatment_mental_health"
##
## FALSE TRUE
## extremely liberal 31 11
## liberal 132 39
## slightly liberal 132 26
## moderate 480 78
## slightly conservative 153 24
## conservative 179 19
## extremely conservative 47 5
#cohen d
SMD_matrix(d$days_poor_health, d$polviews_coded)
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 0.2087 0.248 0.1993
## liberal 0.21 NA 0.039 -0.0093
## slightly liberal 0.25 0.0392 NA -0.0485
## moderate 0.20 -0.0093 -0.049 NA
## slightly conservative 0.28 0.0676 0.028 0.0769
## conservative 0.36 0.1546 0.115 0.1640
## extremely conservative 0.39 0.1825 0.143 0.1918
## slightly conservative conservative
## extremely liberal 0.276 0.363
## liberal 0.068 0.155
## slightly liberal 0.028 0.115
## moderate 0.077 0.164
## slightly conservative NA 0.087
## conservative 0.087 NA
## extremely conservative 0.115 0.028
## extremely conservative
## extremely liberal 0.391
## liberal 0.183
## slightly liberal 0.143
## moderate 0.192
## slightly conservative 0.115
## conservative 0.028
## extremely conservative NA
SMD_matrix(d$happy %>% fct_rev() %>% as.numeric(), d$polviews_coded)
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 4.6e-02 4.6e-02 0.067
## liberal 0.046 NA -6.9e-05 0.021
## slightly liberal 0.046 -6.9e-05 NA 0.021
## moderate 0.067 2.1e-02 2.1e-02 NA
## slightly conservative 0.146 1.0e-01 1.0e-01 0.079
## conservative 0.224 1.8e-01 1.8e-01 0.157
## extremely conservative 0.204 1.6e-01 1.6e-01 0.138
## slightly conservative conservative
## extremely liberal 0.146 0.224
## liberal 0.100 0.178
## slightly liberal 0.100 0.178
## moderate 0.079 0.157
## slightly conservative NA 0.078
## conservative 0.078 NA
## extremely conservative 0.059 -0.019
## extremely conservative
## extremely liberal 0.204
## liberal 0.158
## slightly liberal 0.158
## moderate 0.138
## slightly conservative 0.059
## conservative -0.019
## extremely conservative NA
SMD_matrix(d$happy2 %>% fct_rev() %>% as.numeric(), d$polviews_coded)
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 0.20 0.0649 0.0734
## liberal 0.198 NA -0.1329 -0.1245
## slightly liberal 0.065 -0.13 NA 0.0085
## moderate 0.073 -0.12 0.0085 NA
## slightly conservative 0.371 0.17 0.3060 0.2975
## conservative 0.395 0.20 0.3297 0.3212
## extremely conservative 0.555 0.36 0.4906 0.4821
## slightly conservative conservative
## extremely liberal 0.371 0.395
## liberal 0.173 0.197
## slightly liberal 0.306 0.330
## moderate 0.298 0.321
## slightly conservative NA 0.024
## conservative 0.024 NA
## extremely conservative 0.185 0.161
## extremely conservative
## extremely liberal 0.56
## liberal 0.36
## slightly liberal 0.49
## moderate 0.48
## slightly conservative 0.18
## conservative 0.16
## extremely conservative NA
#correlation
GG_scatter(d, "polviews_coded_num", "days_poor_health") +
geom_count() +
ylim(NA, 32)
## `geom_smooth()` using formula 'y ~ x'
#cor
cor.test(d$polviews_coded_num, d$days_poor_health)
##
## Pearson's product-moment correlation
##
## data: d$polviews_coded_num and d$days_poor_health
## t = -6, df = 9241, p-value = 3e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.086 -0.045
## sample estimates:
## cor
## -0.066
cor.test(d$polviews_coded_num, d$happy %>% as.numeric())
##
## Pearson's product-moment correlation
##
## data: d$polviews_coded_num and d$happy %>% as.numeric()
## t = 14, df = 53598, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.051 0.068
## sample estimates:
## cor
## 0.06
cor.test(d$polviews_coded_num, d$happy2 %>% as.numeric())
##
## Pearson's product-moment correlation
##
## data: d$polviews_coded_num and d$happy2 %>% as.numeric()
## t = 4, df = 1220, p-value = 1e-04
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.054 0.165
## sample estimates:
## cor
## 0.11
#latent correlations
polycor::hetcor(d %>% select(polviews_coded, !!outcomes, happy, happy2), use = "pairwise")
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 2, 4
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 2, 5
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 3, 5
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 4, 5
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 3, 6
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 4, 6
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 5, 6
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 6, 7
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 2, 8
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 4, 8
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 5, 8
## Warning in hetcor.data.frame(d %>% select(polviews_coded, !!outcomes, happy, :
## no cases for pair 6, 8
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## polviews_coded has_emo_mental_disorder
## polviews_coded 1 Polychoric
## has_emo_mental_disorder -0.0587 1
## days_poor_health -0.0688 0.308
## got_counseling_last_year -0.199 <NA>
## ever_had_mental_problem -0.182 <NA>
## ever_received_treatment_mental_health -0.173 0.766
## happy 0.0703 -0.357
## happy2 0.134 <NA>
## days_poor_health got_counseling_last_year
## polviews_coded Polyserial Polychoric
## has_emo_mental_disorder Polyserial
## days_poor_health 1 Polyserial
## got_counseling_last_year 0.317 1
## ever_had_mental_problem <NA> <NA>
## ever_received_treatment_mental_health <NA> <NA>
## happy -0.305 -0.263
## happy2 -0.308 <NA>
## ever_had_mental_problem
## polviews_coded Polychoric
## has_emo_mental_disorder
## days_poor_health
## got_counseling_last_year
## ever_had_mental_problem 1
## ever_received_treatment_mental_health <NA>
## happy -0.213
## happy2 <NA>
## ever_received_treatment_mental_health
## polviews_coded Polychoric
## has_emo_mental_disorder Polychoric
## days_poor_health
## got_counseling_last_year
## ever_had_mental_problem
## ever_received_treatment_mental_health 1
## happy <NA>
## happy2 <NA>
## happy happy2
## polviews_coded Polychoric Polychoric
## has_emo_mental_disorder Polychoric
## days_poor_health Polyserial Polyserial
## got_counseling_last_year Polychoric
## ever_had_mental_problem Polychoric
## ever_received_treatment_mental_health
## happy 1 Polychoric
## happy2 0.682 1
##
## Standard Errors/Numbers of Observations:
## polviews_coded has_emo_mental_disorder
## polviews_coded 55328 2682
## has_emo_mental_disorder 0.0404 2777
## days_poor_health 0.0107 0.0539
## got_counseling_last_year 0.0393 <NA>
## ever_had_mental_problem 0.0582 <NA>
## ever_received_treatment_mental_health 0.0419 0.0362
## happy 0.00503 0.0593
## happy2 0.0309 <NA>
## days_poor_health got_counseling_last_year
## polviews_coded 9243 2281
## has_emo_mental_disorder 811 0
## days_poor_health 11338 863
## got_counseling_last_year 0.0451 2345
## ever_had_mental_problem <NA> <NA>
## ever_received_treatment_mental_health <NA> <NA>
## happy 0.0104 0.0424
## happy2 0.024 <NA>
## ever_had_mental_problem
## polviews_coded 991
## has_emo_mental_disorder 0
## days_poor_health 0
## got_counseling_last_year 0
## ever_had_mental_problem 1053
## ever_received_treatment_mental_health <NA>
## happy 0.0637
## happy2 <NA>
## ever_received_treatment_mental_health
## polviews_coded 1356
## has_emo_mental_disorder 1410
## days_poor_health 0
## got_counseling_last_year 0
## ever_had_mental_problem 0
## ever_received_treatment_mental_health 1413
## happy <NA>
## happy2 <NA>
## happy happy2
## polviews_coded 53600 1222
## has_emo_mental_disorder 1358 0
## days_poor_health 9493 1497
## got_counseling_last_year 2332 0
## ever_had_mental_problem 1045 0
## ever_received_treatment_mental_health 0 0
## happy 60054 1283
## happy2 0.019 2444
##
## P-values for Tests of Bivariate Normality:
## polviews_coded has_emo_mental_disorder
## polviews_coded
## has_emo_mental_disorder 0.396
## days_poor_health 0 9.09e-256
## got_counseling_last_year 0.0882 <NA>
## ever_had_mental_problem 0.0621 <NA>
## ever_received_treatment_mental_health 0.88 <NA>
## happy 2.51e-43 0.112
## happy2 0.00701 <NA>
## days_poor_health got_counseling_last_year
## polviews_coded
## has_emo_mental_disorder
## days_poor_health
## got_counseling_last_year 1.05e-202
## ever_had_mental_problem <NA> <NA>
## ever_received_treatment_mental_health <NA> <NA>
## happy 0 0.963
## happy2 0 <NA>
## ever_had_mental_problem
## polviews_coded
## has_emo_mental_disorder
## days_poor_health
## got_counseling_last_year
## ever_had_mental_problem
## ever_received_treatment_mental_health <NA>
## happy 0.293
## happy2 <NA>
## ever_received_treatment_mental_health
## polviews_coded
## has_emo_mental_disorder
## days_poor_health
## got_counseling_last_year
## ever_had_mental_problem
## ever_received_treatment_mental_health
## happy <NA>
## happy2 <NA>
## happy
## polviews_coded
## has_emo_mental_disorder
## days_poor_health
## got_counseling_last_year
## ever_had_mental_problem
## ever_received_treatment_mental_health
## happy
## happy2 1.21e-09
#adjust for reliability issue
wtd.cor(d$polviews_coded_num, d$days_poor_health)[1] / sqrt(.7 * .7)
## [1] -0.094
#bad days, by year
days_poor_health_by_year = d %>%
filter(!is.na(polviews_coded), !is.na(days_poor_health)) %>%
plyr::ddply(c("year", "polviews_coded"), function(dd) {
sumstats = describe(dd$days_poor_health)
# browser()
tibble(
mean = sumstats$mean,
se = sumstats$se
)
})
#plot
days_poor_health_by_year %>%
ggplot(aes(factor(year), mean, fill = polviews_coded)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymax = mean + 2*se, ymin = mean - 2*se), position = "dodge") +
scale_y_continuous(nice_names["days_poor_health"]) +
scale_x_discrete("Year") +
labs(fill = "Political ideology")
GG_save("figs/bad_days_by_year.png")
#happiness correlations
cor.test(d$polviews_coded_num, d$happy %>% as.numeric())
##
## Pearson's product-moment correlation
##
## data: d$polviews_coded_num and d$happy %>% as.numeric()
## t = 14, df = 53598, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.051 0.068
## sample estimates:
## cor
## 0.06
cor.test(d$polviews_coded_num, d$happy2 %>% as.numeric())
##
## Pearson's product-moment correlation
##
## data: d$polviews_coded_num and d$happy2 %>% as.numeric()
## t = 4, df = 1220, p-value = 1e-04
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.054 0.165
## sample estimates:
## cor
## 0.11
#factor outcome
#otherwise we get errors...
d$has_emo_mental_disorder2 = factor(d$has_emo_mental_disorder)
d$got_counseling_last_year2 = factor(d$got_counseling_last_year)
d$ever_had_mental_problem2 = factor(d$ever_had_mental_problem)
d$ever_received_treatment_mental_health2 = factor(d$ever_received_treatment_mental_health)
#univariate models
list(
lrm(has_emo_mental_disorder2 ~ polviews_coded, data = d),
ols(days_poor_health ~ polviews_coded, data = d),
lrm(got_counseling_last_year2 ~ polviews_coded, data = d),
lrm(ever_had_mental_problem2 ~ polviews_coded, data = d),
lrm(ever_received_treatment_mental_health2 ~ polviews_coded, data = d)
)
## [[1]]
## Frequencies of Missing Values Due to Each Variable
## has_emo_mental_disorder2 polviews_coded
## 62037 9486
##
## Logistic Regression Model
##
## lrm(formula = has_emo_mental_disorder2 ~ polviews_coded, data = d)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 2682 LR chi2 7.26 R2 0.008 C 0.539
## FALSE 2534 d.f. 6 g 0.132 Dxy 0.078
## TRUE 148 Pr(> chi2) 0.2971 gr 1.141 gamma 0.101
## max |deriv| 2e-12 gp 0.008 tau-a 0.008
## Brier 0.052
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.9328 0.3226 -5.99 <0.0001
## polviews_coded=liberal -1.0182 0.4121 -2.47 0.0135
## polviews_coded=slightly liberal -0.9411 0.4077 -2.31 0.0210
## polviews_coded=moderate -0.9127 0.3502 -2.61 0.0092
## polviews_coded=slightly conservative -0.8733 0.3902 -2.24 0.0252
## polviews_coded=conservative -1.0679 0.3957 -2.70 0.0070
## polviews_coded=extremely conservative -1.1021 0.5601 -1.97 0.0491
##
##
## [[2]]
## Frequencies of Missing Values Due to Each Variable
## days_poor_health polviews_coded
## 53476 9486
##
## Linear Regression Model
##
## ols(formula = days_poor_health ~ polviews_coded, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 9243 LR chi2 58.58 R2 0.006
## sigma6.9990 d.f. 6 R2 adj 0.006
## d.f. 9236 Pr(> chi2) 0.0000 g 0.561
##
## Residuals
##
## Min 1Q Median 3Q Max
## -5.3260 -3.8656 -2.7833 0.1344 27.4118
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 5.3260 0.3663 14.54 <0.0001
## polviews_coded=liberal -1.4605 0.4210 -3.47 0.0005
## polviews_coded=slightly liberal -1.7348 0.4222 -4.11 <0.0001
## polviews_coded=moderate -1.3951 0.3848 -3.63 0.0003
## polviews_coded=slightly conservative -1.9335 0.4125 -4.69 <0.0001
## polviews_coded=conservative -2.5427 0.4117 -6.18 <0.0001
## polviews_coded=extremely conservative -2.7378 0.5275 -5.19 <0.0001
##
##
## [[3]]
## Frequencies of Missing Values Due to Each Variable
## got_counseling_last_year2 polviews_coded
## 62469 9486
##
## Logistic Regression Model
##
## lrm(formula = got_counseling_last_year2 ~ polviews_coded, data = d)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 2281 LR chi2 33.80 R2 0.037 C 0.613
## FALSE 2121 d.f. 6 g 0.469 Dxy 0.225
## TRUE 160 Pr(> chi2) <0.0001 gr 1.598 gamma 0.286
## max |deriv| 8e-11 gp 0.029 tau-a 0.029
## Brier 0.064
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.2993 0.2913 -4.46 <0.0001
## polviews_coded=liberal -0.9677 0.3742 -2.59 0.0097
## polviews_coded=slightly liberal -0.9244 0.3508 -2.64 0.0084
## polviews_coded=moderate -1.3767 0.3219 -4.28 <0.0001
## polviews_coded=slightly conservative -1.3633 0.3598 -3.79 0.0002
## polviews_coded=conservative -2.2787 0.4332 -5.26 <0.0001
## polviews_coded=extremely conservative -1.4088 0.5461 -2.58 0.0099
##
##
## [[4]]
## Frequencies of Missing Values Due to Each Variable
## ever_had_mental_problem2 polviews_coded
## 63761 9486
##
## Logistic Regression Model
##
## lrm(formula = ever_had_mental_problem2 ~ polviews_coded, data = d)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 991 LR chi2 19.86 R2 0.049 C 0.632
## FALSE 918 d.f. 6 g 0.465 Dxy 0.264
## TRUE 73 Pr(> chi2) 0.0029 gr 1.592 gamma 0.326
## max |deriv| 5e-11 gp 0.036 tau-a 0.036
## Brier 0.066
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.8473 0.4880 -1.74 0.0825
## polviews_coded=liberal -1.1939 0.5925 -2.01 0.0439
## polviews_coded=slightly liberal -1.1386 0.5651 -2.01 0.0439
## polviews_coded=moderate -2.1595 0.5444 -3.97 <0.0001
## polviews_coded=slightly conservative -1.6247 0.5670 -2.87 0.0042
## polviews_coded=conservative -1.9859 0.5865 -3.39 0.0007
## polviews_coded=extremely conservative -2.0705 0.8747 -2.37 0.0179
##
##
## [[5]]
## Frequencies of Missing Values Due to Each Variable
## ever_received_treatment_mental_health2 polviews_coded
## 63401 9486
##
## Logistic Regression Model
##
## lrm(formula = ever_received_treatment_mental_health2 ~ polviews_coded,
## data = d)
##
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 1356 LR chi2 18.18 R2 0.023 C 0.584
## FALSE 1154 d.f. 6 g 0.328 Dxy 0.167
## TRUE 202 Pr(> chi2) 0.0058 gr 1.388 gamma 0.217
## max |deriv| 4e-10 gp 0.042 tau-a 0.042
## Brier 0.125
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.0361 0.3510 -2.95 0.0032
## polviews_coded=liberal -0.1831 0.3955 -0.46 0.6433
## polviews_coded=slightly liberal -0.5886 0.4113 -1.43 0.1524
## polviews_coded=moderate -0.7810 0.3716 -2.10 0.0356
## polviews_coded=slightly conservative -0.8163 0.4140 -1.97 0.0486
## polviews_coded=conservative -1.2069 0.4259 -2.83 0.0046
## polviews_coded=extremely conservative -1.2046 0.5869 -2.05 0.0401
##
#fit models
mh_models = list(
lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d),
ols(days_poor_health ~ polviews_coded + sex + rcs(age_coded), data = d),
lrm(got_counseling_last_year2 ~ polviews_coded + sex + rcs(age_coded), data = d),
lrm(ever_had_mental_problem2 ~ polviews_coded + sex + rcs(age_coded), data = d),
lrm(ever_received_treatment_mental_health2 ~ polviews_coded + sex + rcs(age_coded), data = d)
)
#whites only
d_whites = d %>% filter(RACE == "WHITE")
mh_models_white = list(
lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d_whites),
ols(days_poor_health ~ polviews_coded + sex + rcs(age_coded), data = d_whites),
lrm(got_counseling_last_year2 ~ polviews_coded + sex + rcs(age_coded), data = d_whites),
lrm(ever_had_mental_problem2 ~ polviews_coded + sex + rcs(age_coded), data = d_whites),
lrm(ever_received_treatment_mental_health2 ~ polviews_coded + sex + rcs(age_coded), data = d_whites)
)
#testing the marginal plots
if (F) {
ggeffect(ols(days_poor_health ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded")
ggpredict(ols(days_poor_health ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded") %>% plot()
ggemmeans(ols(days_poor_health ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded")
ggeffect(lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded")
ggeffect(lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d %>% filter(!is.na(has_emo_mental_disorder2))), "polviews_coded")
ggpredict(lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded") %>% plot()
ggemmeans(lrm(has_emo_mental_disorder2 ~ polviews_coded + sex + rcs(age_coded), data = d), "polviews_coded")
}
#plot and save
#loop
for (v in outcomes) {
#all races
i = which(v == outcomes)
#y label
if (v == "days_poor_health") y_label = "Count" else y_label = "Probability"
#plot
y = ggpredict(mh_models[[i]], "polviews_coded") %>%
plot() +
ggtitle(nice_names[v]) +
ylab(y_label) +
xlab("Political ideology") +
theme(
axis.text = element_text(color = "black"),
axis.title = element_text(color = "black")
)
print(y)
GG_save(str_glue("figs/model_{v}.png"))
#whites only
y = ggpredict(mh_models_white[[i]], "polviews_coded") %>%
plot() +
ggtitle(nice_names[v],
"Whites only") +
ylab(y_label) +
xlab("Political ideology") +
theme(
axis.text = element_text(color = "black"),
axis.title = element_text(color = "black")
)
print(y)
GG_save(str_glue("figs/whites_model_{v}.png"))
}
#plot mental illness and happiness as function of political ideology by decade
#by wave is probably too noisy
d %>%
plyr::ddply("decade", function(dd) {
#compute group stats
desc_mi = describeBy(dd$days_poor_health, dd$polviews_coded, mat = T)
desc_h = describeBy(dd$happy %>% as.numeric(), dd$polviews_coded, mat = T)
#save
bind_rows(
desc_mi, desc_h
) %>%
select(n, mean, sd, se, group1) %>%
mutate(
decade = dd$decade[1],
outcome = c(rep("days_poor_health", 7), rep("happiness", 7)),
group1 = ordered(group1, levels = d$polviews_coded %>% levels())
)
}) %>%
ggplot(aes(decade, mean, fill = group1)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se), position = "dodge") +
scale_fill_manual("Political ideology", values = RColorBrewer::brewer.pal(n = 7, name = "BrBG")) +
facet_wrap("outcome", nrow = 2, scales = "free")
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min; returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max; returning -
## Inf
## Warning: Removed 21 rows containing missing values (geom_bar).
GG_save("figs/temporal_pattern.png")
## Warning: Removed 21 rows containing missing values (geom_bar).
#numerical sizes
d %>%
plyr::dlply("decade", function(dd) {
#compute group stats
list(
days_poor_health = SMD_matrix(dd$days_poor_health, group = dd$polviews_coded),
happiness = SMD_matrix(dd$happy %>% as.numeric(), group = dd$polviews_coded)
)
})
## $`1970s`
## $`1970s`$days_poor_health
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA NA NA NA
## liberal NA NA NA NA
## slightly liberal NA NA NA NA
## moderate NA NA NA NA
## slightly conservative NA NA NA NA
## conservative NA NA NA NA
## extremely conservative NA NA NA NA
## slightly conservative conservative
## extremely liberal NA NA
## liberal NA NA
## slightly liberal NA NA
## moderate NA NA
## slightly conservative NA NA
## conservative NA NA
## extremely conservative NA NA
## extremely conservative
## extremely liberal NA
## liberal NA
## slightly liberal NA
## moderate NA
## slightly conservative NA
## conservative NA
## extremely conservative NA
##
## $`1970s`$happiness
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA -0.017 -0.055 -0.0896
## liberal -0.017 NA -0.037 -0.0721
## slightly liberal -0.055 -0.037 NA -0.0347
## moderate -0.090 -0.072 -0.035 NA
## slightly conservative -0.152 -0.134 -0.097 -0.0620
## conservative -0.149 -0.131 -0.094 -0.0590
## extremely conservative -0.083 -0.066 -0.029 0.0062
## slightly conservative conservative
## extremely liberal -0.1515 -0.1486
## liberal -0.1341 -0.1312
## slightly liberal -0.0967 -0.0937
## moderate -0.0620 -0.0590
## slightly conservative NA 0.0029
## conservative 0.0029 NA
## extremely conservative 0.0682 0.0652
## extremely conservative
## extremely liberal -0.0834
## liberal -0.0659
## slightly liberal -0.0285
## moderate 0.0062
## slightly conservative 0.0682
## conservative 0.0652
## extremely conservative NA
##
##
## $`1980s`
## $`1980s`$days_poor_health
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA NA NA NA
## liberal NA NA NA NA
## slightly liberal NA NA NA NA
## moderate NA NA NA NA
## slightly conservative NA NA NA NA
## conservative NA NA NA NA
## extremely conservative NA NA NA NA
## slightly conservative conservative
## extremely liberal NA NA
## liberal NA NA
## slightly liberal NA NA
## moderate NA NA
## slightly conservative NA NA
## conservative NA NA
## extremely conservative NA NA
## extremely conservative
## extremely liberal NA
## liberal NA
## slightly liberal NA
## moderate NA
## slightly conservative NA
## conservative NA
## extremely conservative NA
##
## $`1980s`$happiness
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA -0.021 -0.034 -0.046
## liberal -0.021 NA -0.013 -0.025
## slightly liberal -0.034 -0.013 NA -0.012
## moderate -0.046 -0.025 -0.012 NA
## slightly conservative -0.121 -0.100 -0.087 -0.075
## conservative -0.240 -0.219 -0.206 -0.194
## extremely conservative -0.224 -0.203 -0.190 -0.178
## slightly conservative conservative
## extremely liberal -0.121 -0.240
## liberal -0.100 -0.219
## slightly liberal -0.087 -0.206
## moderate -0.075 -0.194
## slightly conservative NA -0.119
## conservative -0.119 NA
## extremely conservative -0.103 0.016
## extremely conservative
## extremely liberal -0.224
## liberal -0.203
## slightly liberal -0.190
## moderate -0.178
## slightly conservative -0.103
## conservative 0.016
## extremely conservative NA
##
##
## $`1990s`
## $`1990s`$days_poor_health
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA NA NA NA
## liberal NA NA NA NA
## slightly liberal NA NA NA NA
## moderate NA NA NA NA
## slightly conservative NA NA NA NA
## conservative NA NA NA NA
## extremely conservative NA NA NA NA
## slightly conservative conservative
## extremely liberal NA NA
## liberal NA NA
## slightly liberal NA NA
## moderate NA NA
## slightly conservative NA NA
## conservative NA NA
## extremely conservative NA NA
## extremely conservative
## extremely liberal NA
## liberal NA
## slightly liberal NA
## moderate NA
## slightly conservative NA
## conservative NA
## extremely conservative NA
##
## $`1990s`$happiness
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA -0.142 -0.066 -0.097
## liberal -0.142 NA 0.076 0.045
## slightly liberal -0.066 0.076 NA -0.031
## moderate -0.097 0.045 -0.031 NA
## slightly conservative -0.193 -0.051 -0.127 -0.096
## conservative -0.265 -0.123 -0.199 -0.168
## extremely conservative -0.299 -0.157 -0.233 -0.202
## slightly conservative conservative
## extremely liberal -0.193 -0.265
## liberal -0.051 -0.123
## slightly liberal -0.127 -0.199
## moderate -0.096 -0.168
## slightly conservative NA -0.072
## conservative -0.072 NA
## extremely conservative -0.106 -0.034
## extremely conservative
## extremely liberal -0.299
## liberal -0.157
## slightly liberal -0.233
## moderate -0.202
## slightly conservative -0.106
## conservative -0.034
## extremely conservative NA
##
##
## $`2000s`
## $`2000s`$days_poor_health
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 0.316 0.268 0.285
## liberal 0.32 NA -0.048 -0.031
## slightly liberal 0.27 -0.048 NA 0.017
## moderate 0.29 -0.031 0.017 NA
## slightly conservative 0.43 0.118 0.166 0.149
## conservative 0.49 0.175 0.223 0.206
## extremely conservative 0.49 0.174 0.222 0.205
## slightly conservative conservative
## extremely liberal 0.434 0.49139
## liberal 0.118 0.17534
## slightly liberal 0.166 0.22339
## moderate 0.149 0.20630
## slightly conservative NA 0.05747
## conservative 0.057 NA
## extremely conservative 0.057 -0.00089
## extremely conservative
## extremely liberal 0.49050
## liberal 0.17445
## slightly liberal 0.22250
## moderate 0.20541
## slightly conservative 0.05658
## conservative -0.00089
## extremely conservative NA
##
## $`2000s`$happiness
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 0.011 -0.050 -0.084
## liberal 0.011 NA -0.061 -0.094
## slightly liberal -0.050 -0.061 NA -0.033
## moderate -0.084 -0.094 -0.033 NA
## slightly conservative -0.159 -0.170 -0.109 -0.076
## conservative -0.304 -0.315 -0.253 -0.220
## extremely conservative -0.302 -0.313 -0.252 -0.218
## slightly conservative conservative
## extremely liberal -0.159 -0.3036
## liberal -0.170 -0.3145
## slightly liberal -0.109 -0.2533
## moderate -0.076 -0.2200
## slightly conservative NA -0.1442
## conservative -0.144 NA
## extremely conservative -0.143 0.0016
## extremely conservative
## extremely liberal -0.3021
## liberal -0.3130
## slightly liberal -0.2517
## moderate -0.2185
## slightly conservative -0.1427
## conservative 0.0016
## extremely conservative NA
##
##
## $`2010s`
## $`2010s`$days_poor_health
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA 0.1592 0.249 0.1604
## liberal 0.16 NA 0.090 0.0012
## slightly liberal 0.25 0.0902 NA -0.0890
## moderate 0.16 0.0012 -0.089 NA
## slightly conservative 0.19 0.0316 -0.059 0.0304
## conservative 0.30 0.1406 0.050 0.1394
## extremely conservative 0.35 0.1859 0.096 0.1847
## slightly conservative conservative
## extremely liberal 0.191 0.300
## liberal 0.032 0.141
## slightly liberal -0.059 0.050
## moderate 0.030 0.139
## slightly conservative NA 0.109
## conservative 0.109 NA
## extremely conservative 0.154 0.045
## extremely conservative
## extremely liberal 0.345
## liberal 0.186
## slightly liberal 0.096
## moderate 0.185
## slightly conservative 0.154
## conservative 0.045
## extremely conservative NA
##
## $`2010s`$happiness
## extremely liberal liberal slightly liberal moderate
## extremely liberal NA -0.018 0.010 -0.0068
## liberal -0.0184 NA 0.029 0.0116
## slightly liberal 0.0105 0.029 NA -0.0173
## moderate -0.0068 0.012 -0.017 NA
## slightly conservative -0.0705 -0.052 -0.081 -0.0637
## conservative -0.1252 -0.107 -0.136 -0.1184
## extremely conservative -0.0819 -0.063 -0.092 -0.0751
## slightly conservative conservative
## extremely liberal -0.070 -0.125
## liberal -0.052 -0.107
## slightly liberal -0.081 -0.136
## moderate -0.064 -0.118
## slightly conservative NA -0.055
## conservative -0.055 NA
## extremely conservative -0.011 0.043
## extremely conservative
## extremely liberal -0.082
## liberal -0.063
## slightly liberal -0.092
## moderate -0.075
## slightly conservative -0.011
## conservative 0.043
## extremely conservative NA
##
##
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
## decade
## 1 1970s
## 2 1980s
## 3 1990s
## 4 2000s
## 5 2010s
We combine the outcomes into relative rates, and then we can meta-analyze these for greater statistical precision.
#compute relative rates for each outcome
RR_outcomes = d %>%
select(polviews_coded, !!!outcomes) %>%
gather(key = outcome, value = value, -polviews_coded) %>%
filter(!is.na(polviews_coded)) %>%
plyr::ddply(c("polviews_coded", "outcome"), function(dd) {
describe(dd$value, skew = F)
}) %>%
#convert to RR
plyr::ddply("outcome", function(dd) {
tibble(
polviews = dd$polviews,
RR = dd$mean / dd$mean[4],
RR_upper = (dd$mean + dd$se*2) / dd$mean[4],
RR_lower = (dd$mean - dd$se*2) / dd$mean[4],
n = dd$n
)
})
#plot
RR_outcomes %>%
ggplot(aes(outcome, RR, fill = polviews)) +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(mapping = aes(ymin = RR_lower, ymax = RR_upper), position = "dodge", stat = "identity") +
scale_x_discrete(labels = str_clean) +
theme(axis.text.x = element_text(angle = -10, hjust = 0)) +
labs(y = "Relative risk (moderates = 1)",
fill = "Political ideology")
GG_save("figs/ideology_outcomes.png")
#overall RR
RR_outcomes_meta = RR_outcomes %>%
plyr::ddply("polviews", function(dd) {
desc = describe(dd$RR)
tibble(
mean = wtd_mean(dd$RR, dd$n %>% sqrt()),
n = dd$n %>% sum()
)
})
#plot
RR_outcomes_meta %>%
ggplot(aes(polviews, mean)) +
geom_bar(stat = "identity") +
geom_label(aes(label = n, y = 0)) +
labs(x = "Political ideology",
y = "Mean relative risk\nmoderates = 1")
#bootstrap
RR_outcomes_meta_replicates = cache_object(filename = "RR_outcomes_meta_replicates.rds", {
RR_outcomes_meta_replicates = tibble(
i = 1:1000,
"extremely liberal" = NA,
"liberal" = NA,
"slightly liberal" = NA,
"moderate" = NA,
"slightly conservative" = NA,
"conservative" = NA,
"extremely conservative" = NA
)
#go!
set.seed(1)
for (i in 1:1000) {
#message progress
if (i %% 10 == 0) message(i)
#compute
i_d = d %>%
#remove cases without data
filter(!is.na(polviews_coded)) %>%
#resample
sample_frac(replace = T) %>%
select(polviews_coded, !!!outcomes) %>%
#to long format
gather(key = outcome, value = value, -polviews_coded) %>%
plyr::ddply(c("polviews_coded", "outcome"), function(dd) {
describe(dd$value, skew = F)
}) %>%
#convert to RR
plyr::ddply("outcome", function(dd) {
tibble(
polviews = dd$polviews,
RR = dd$mean / dd$mean[4],
n = dd$n
)
}) %>%
#and average across outcomes
plyr::ddply("polviews", function(dd) {
desc = describe(dd$RR)
tibble(
mean = wtd_mean(dd$RR, dd$n %>% sqrt()),
n = dd$n %>% sum()
)
})
#save
RR_outcomes_meta_replicates[i, d$polviews_coded %>% levels()] = i_d$mean
}
RR_outcomes_meta_replicates
})
## Cache found, reading object from disk
#plot result
RR_outcomes_meta_replicates %>%
gather(key = polviews, value = value, -i) %>%
plyr::ddply("polviews", function(dd) {
tibble(
mean = mean(dd$value),
se = sd(dd$value)
)
}) %>%
print() %>%
mutate(polviews = ordered(polviews, levels = levels(d$polviews_coded))) %>%
ggplot(aes(polviews, mean)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se)) +
geom_label(data = RR_outcomes_meta, aes(polviews, y = 0, label = n)) +
labs(x = "Political ideology") +
scale_y_continuous("Mean relative risk\nmoderates = 1", breaks = seq(0, 100, .5))
## polviews mean se
## 1 conservative 0.76 0.087
## 2 extremely conservative 0.83 0.163
## 3 extremely liberal 2.48 0.382
## 4 liberal 1.32 0.146
## 5 moderate 1.00 0.000
## 6 slightly conservative 1.06 0.114
## 7 slightly liberal 1.29 0.147
GG_save("figs/meta_negative_bootstrapped.png")
Add the 2 happiness variables, and reverse code.
#variables
outcomes_all = c(outcomes, "happy_num", "happy2_num")
#compute relative rates for each outcome
RR_outcomes_all = d %>%
select(polviews_coded, !!!outcomes_all) %>%
gather(key = outcome, value = value, -polviews_coded) %>%
filter(!is.na(polviews_coded)) %>%
plyr::ddply(c("polviews_coded", "outcome"), function(dd) {
describe(dd$value, skew = F)
}) %>%
#convert to RR
plyr::ddply("outcome", function(dd) {
tibble(
polviews = dd$polviews,
RR = dd$mean / dd$mean[4],
RR_upper = (dd$mean + dd$se*2) / dd$mean[4],
RR_lower = (dd$mean - dd$se*2) / dd$mean[4],
n = dd$n
)
})
#plot
RR_outcomes_all %>%
ggplot(aes(outcome, RR, fill = polviews)) +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(mapping = aes(ymin = RR_lower, ymax = RR_upper), position = "dodge", stat = "identity") +
scale_x_discrete(labels = str_clean) +
theme(axis.text.x = element_text(angle = -10, hjust = 0)) +
labs(y = "Relative risk (moderates = 1)",
fill = "Political ideology")
GG_save("figs/ideology_outcomes_all.png")
#overall RR
RR_outcomes_meta_all = RR_outcomes_all %>%
plyr::ddply("polviews", function(dd) {
desc = describe(dd$RR)
tibble(
mean = wtd_mean(dd$RR, dd$n %>% sqrt()),
n = dd$n %>% sum()
)
})
#plot
RR_outcomes_meta_all %>%
ggplot(aes(polviews, mean)) +
geom_bar(stat = "identity") +
geom_label(aes(label = n, y = 0)) +
labs(x = "Political ideology",
y = "Mean relative risk\nmoderates = 1")
#bootstrap
RR_outcomes_meta_replicates_all = cache_object(filename = "RR_outcomes_meta_replicates_all.rds", {
RR_outcomes_meta_replicates_all = tibble(
i = 1:1000,
"extremely liberal" = NA,
"liberal" = NA,
"slightly liberal" = NA,
"moderate" = NA,
"slightly conservative" = NA,
"conservative" = NA,
"extremely conservative" = NA
)
#go!
set.seed(1)
for (i in 1:1000) {
#message progress
if (i %% 10 == 0) message(i)
#compute
i_d = d %>%
#remove cases without data
filter(!is.na(polviews_coded)) %>%
#resample
sample_frac(replace = T) %>%
select(polviews_coded, !!!outcomes_all) %>%
#to long format
gather(key = outcome, value = value, -polviews_coded) %>%
plyr::ddply(c("polviews_coded", "outcome"), function(dd) {
describe(dd$value, skew = F)
}) %>%
#convert to RR
plyr::ddply("outcome", function(dd) {
tibble(
polviews = dd$polviews,
RR = dd$mean / dd$mean[4],
n = dd$n
)
}) %>%
#and average across outcomes
plyr::ddply("polviews", function(dd) {
desc = describe(dd$RR)
tibble(
mean = wtd_mean(dd$RR, dd$n %>% sqrt()),
n = dd$n %>% sum()
)
})
#save
RR_outcomes_meta_replicates_all[i, d$polviews_coded %>% levels()] = i_d$mean
}
RR_outcomes_meta_replicates_all
})
## Cache found, reading object from disk
#plot result
RR_outcomes_meta_replicates_all %>%
gather(key = polviews, value = value, -i) %>%
plyr::ddply("polviews", function(dd) {
tibble(
mean = mean(dd$value),
se = sd(dd$value)
)
}) %>%
print() %>%
mutate(polviews = ordered(polviews, levels = levels(d$polviews_coded))) %>%
ggplot(aes(polviews, mean)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se)) +
geom_label(data = RR_outcomes_meta_all, aes(polviews, y = 0, label = n)) +
labs(x = "Political ideology") +
scale_y_continuous("Mean relative risk\nmoderates = 1", breaks = seq(0, 100, .5))
## polviews mean se
## 1 conservative 0.85 0.044
## 2 extremely conservative 0.88 0.084
## 3 extremely liberal 1.76 0.192
## 4 liberal 1.16 0.072
## 5 moderate 1.00 0.000
## 6 slightly conservative 1.01 0.056
## 7 slightly liberal 1.15 0.073
GG_save("figs/meta_negative_bootstrapped_all.png")
#alternative political measure
#party affiliation
#compute relative rates for each outcome
RR_outcomes_all_party = d %>%
select(party, !!!outcomes) %>%
gather(key = outcome, value = value, -party) %>%
filter(!is.na(party)) %>%
plyr::ddply(c("party", "outcome"), function(dd) {
describe(dd$value, skew = F)
}) %>%
#convert to RR
plyr::ddply("outcome", function(dd) {
tibble(
party = dd$party,
RR = dd$mean / dd$mean[4],
RR_upper = (dd$mean + dd$se*2) / dd$mean[4],
RR_lower = (dd$mean - dd$se*2) / dd$mean[4],
n = dd$n
)
})
#plot
RR_outcomes_all_party %>%
ggplot(aes(outcome, RR, fill = party)) +
geom_bar(position = "dodge", stat = "identity") +
geom_errorbar(mapping = aes(ymin = RR_lower, ymax = RR_upper), position = "dodge", stat = "identity") +
scale_x_discrete(labels = str_clean) +
theme(axis.text.x = element_text(angle = -10, hjust = 0)) +
labs(y = "Relative risk (independent = 1)",
fill = "Party affiliation")
GG_save("figs/party_outcomes.png")
#overall RR
RR_outcomes_meta_all_party = RR_outcomes_all_party %>%
plyr::ddply("party", function(dd) {
desc = describe(dd$RR)
tibble(
mean = wtd_mean(dd$RR, dd$n %>% sqrt()),
n = dd$n %>% sum()
)
})
#plot
RR_outcomes_meta_all_party %>%
ggplot(aes(party, mean)) +
geom_bar(stat = "identity") +
geom_label(aes(label = n, y = 0)) +
labs("Party affiliation") +
scale_y_continuous("Relative risk (independent = 1)", breaks = seq(0, 10, .2))
#versions
write_sessioninfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] osfr_0.2.8 correlation_0.2.1 ggeffects_0.14.3
## [4] rms_5.1-4 SparseM_1.78 polycor_0.7-10
## [7] haven_2.2.0 kirkegaard_2020-05-18 metafor_2.4-0
## [10] Matrix_1.2-18 psych_1.9.12.31 magrittr_1.5
## [13] assertthat_0.2.1 weights_1.0.1 mice_3.9.0
## [16] gdata_2.18.0 Hmisc_4.4-0 Formula_1.2-3
## [19] survival_3.1-12 lattice_0.20-41 forcats_0.5.0
## [22] stringr_1.4.0 dplyr_0.8.99.9003 purrr_0.3.4
## [25] readr_1.3.1 tidyr_1.0.3 tibble_3.0.1
## [28] ggplot2_3.3.0 tidyverse_1.3.0 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] TH.data_1.0-10 colorspace_1.4-1 ellipsis_0.3.1
## [4] sjlabelled_1.1.4 htmlTable_1.13.3 parameters_0.7.0
## [7] base64enc_0.1-3 fs_1.4.1 httpcode_0.3.0
## [10] rstudioapi_0.11 farver_2.0.3 MatrixModels_0.4-1
## [13] fansi_0.4.1 mvtnorm_1.1-0 lubridate_1.7.8
## [16] xml2_1.3.2 codetools_0.2-16 splines_4.0.0
## [19] mnormt_1.5-7 knitr_1.28 jsonlite_1.6.1
## [22] broom_0.5.6 cluster_2.1.0 dbplyr_1.4.3
## [25] png_0.1-7 effectsize_0.3.1 compiler_4.0.0
## [28] httr_1.4.1 backports_1.1.7 cli_2.0.2
## [31] acepack_1.4.1 htmltools_0.4.0 quantreg_5.55
## [34] tools_4.0.0 gtable_0.3.0 glue_1.4.1
## [37] Rcpp_1.0.4.6 cellranger_1.1.0 vctrs_0.3.0
## [40] crul_0.9.0 nlme_3.1-147 multilevel_2.6
## [43] insight_0.8.4 xfun_0.13 rvest_0.3.5
## [46] lifecycle_0.2.0 gtools_3.8.2 polspline_1.1.19
## [49] MASS_7.3-51.6 zoo_1.8-8 scales_1.1.1
## [52] hms_0.5.3 parallel_4.0.0 sandwich_2.5-1
## [55] RColorBrewer_1.1-2 psychometric_2.2 curl_4.3
## [58] yaml_2.2.1 memoise_1.1.0 gridExtra_2.3
## [61] rpart_4.1-15 latticeExtra_0.6-29 stringi_1.4.6
## [64] bayestestR_0.6.0 checkmate_2.0.0 rlang_0.4.6
## [67] pkgconfig_2.0.3 evaluate_0.14 labeling_0.3
## [70] htmlwidgets_1.5.1 tidyselect_1.1.0 plyr_1.8.6
## [73] R6_2.4.1 generics_0.0.2 multcomp_1.4-13
## [76] DBI_1.1.0 mgcv_1.8-31 pillar_1.4.4
## [79] foreign_0.8-76 withr_2.2.0 nnet_7.3-14
## [82] modelr_0.1.7 crayon_1.3.4 rmarkdown_2.1
## [85] jpeg_0.1-8.1 grid_4.0.0 readxl_1.3.1
## [88] data.table_1.12.8 reprex_0.3.0 digest_0.6.25
## [91] munsell_0.5.0 viridisLite_0.3.0
#update OSF files
osfr::osf_auth(read_lines("~/.config/osf_token"))
## Registered PAT from the provided token
osf_repo = osf_retrieve_node("https://osf.io/fhpxm/")
#upload all figs + notebook files
proj_files = c("figs", "notebook.html", "notebook.Rmd")
osf_upload(osf_repo, path = proj_files, conflicts = "overwrite")
## Searching for conflicting files on OSF
## Retrieving 28 of 28 available items:
## ..retrieved 10 items
## ..retrieved 20 items
## ..retrieved 28 items
## ..done
## Updating 30 existing file(s) on OSF