options(Ncores = 12)
library(tidyverse, quietly = T)
library(haven, quietly = T)
require(foreign)
require(nnet)
require(ggplot2)
require(reshape2)PCA, Multinomial, Poisson
gss <- haven::read_dta(
unz("2021_stata.zip",
filename = "GSS2021.dta")
)Recode
library(tidylog, quietly = T)
Attaching package: 'tidylog'
The following objects are masked from 'package:dplyr':
add_count, add_tally, anti_join, count, distinct, distinct_all,
distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
full_join, group_by, group_by_all, group_by_at, group_by_if,
inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
relocate, rename, rename_all, rename_at, rename_if, rename_with,
right_join, sample_frac, sample_n, select, select_all, select_at,
select_if, semi_join, slice, slice_head, slice_max, slice_min,
slice_sample, slice_tail, summarise, summarise_all, summarise_at,
summarise_if, summarize, summarize_all, summarize_at, summarize_if,
tally, top_frac, top_n, transmute, transmute_all, transmute_at,
transmute_if, ungroup
The following objects are masked from 'package:tidyr':
drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
spread, uncount
The following object is masked from 'package:stats':
filter
gss_sub0 <- gss %>%
haven::zap_labels() %>%
mutate(
chldidel_fctr1 = factor(case_when(chldidel %in% c(0:1) ~ "1small (0-1)",
chldidel %in% c(2:3) ~ "2normative (2-3)",
chldidel %in% c(4:7) ~ "3large (4+)",
chldidel %in% c(8) ~ "4As many")),
chldidel_fctr1 = relevel(chldidel_fctr1, ref = "2normative (2-3)"),
chldidel_fctr2 = factor(case_when(chldidel %in% c(0:1) ~ "1small (0-1)",
chldidel %in% c(2:3) ~ "2normative (2-3)",
chldidel %in% c(4:7) ~ "3large (4+)")),
chldidel_fctr2 = relevel(chldidel_fctr2, ref = "2normative (2-3)"),
chldidel_rc = if_else(chldidel == 8, NA_real_, chldidel),
grngroup_rc = if_else(grngroup == 2, 0, grngroup),
grnsign_rc = if_else(grnsign == 2, 0, grnsign),
grnmoney_rc = if_else(grnmoney == 2, 0, grnmoney),
grndemo_rc = if_else(grndemo == 2, 0, grndemo),
# Reverse code
grwthelp_rc = 6-grwthelp,
grnexagg_rc = 6-grnexagg,
grnprog_rc = 6-grnprog,
naturdev_rc = 6-naturdev,
impgrn_rc = 6-impgrn,
grnecon_rc = 6-grnecon,
age_cat = case_when(
age %in% c(18:29) ~ "18-29",
age %in% c(30:39) ~ "30-39",
age %in% c(40:49) ~ "40-49",
age %in% c(50:64) ~ "50-64",
age %in% c(65:89) ~ "65-89"
),
race1 = if_else(is.na(racecen1) == F, 1, 0),
race2 = if_else(is.na(racecen2) == F, 1, 0),
race3 = if_else(is.na(racecen3) == F, 1, 0),
race_sum = race1 + race2 + race3,
race_two = if_else(race_sum > 1, 1, 0),
raceth = case_when(
race_two == 0 & racecen1 == 1 ~ "NH White",
race_two == 0 & racecen1 == 2 ~ "NH Black",
race_two == 0 & racecen1 == 3 ~ "AIAN",
race_two == 0 & racecen1 %in% c(4:10) ~ "Asian",
race_two == 0 & racecen1 %in% c(11:14) ~ "NHPI",
race_two == 0 & racecen1 == 15 ~ "Other",
race_two == 0 & racecen1 == 16 ~ "Hispanic",
race_two ==1 ~ "Two or More"),
race_d = if_else(raceth == 1, 1, 0),
# grneffme_rc = case_when(grneffme %in% c(1:2) ~ "1Agree",
# grneffme %in% c(3) ~ "2Neutral",
# grneffme %in% c(3:4) ~ "3Disagree"),
educ = case_when(degree %in% 0 ~ "1Less than HS",
degree %in% 1 ~ "2High School",
degree %in% 2 ~ "3Associate's",
degree %in% 3 ~ "4Bachelor's",
degree %in% 4 ~ "5Graduate"),
educ_d = if_else(educ > 2, 1, 0),
sex_rc = if_else(sex == 1, "Male", "Female"),
relitenv_rc = 5-relitenv
) %>%
select(id, chldidel, chldidel_fctr1, chldidel_rc, age, age_cat, polviews, scigrn:recycle, impgrn:grnexagg, grncon, helpharm:nobuygrn,
clmtcaus:watergen1, nukegen1, grngroup_rc:relitenv_rc)mutate: converted 'educ' from double to character (43 fewer NA)
new variable 'chldidel_fctr1' (factor) with 5 unique values and 33% NA
new variable 'chldidel_fctr2' (factor) with 4 unique values and 54% NA
new variable 'chldidel_rc' (double) with 9 unique values and 54% NA
new variable 'grngroup_rc' (double) with 3 unique values and 55% NA
new variable 'grnsign_rc' (double) with 3 unique values and 55% NA
new variable 'grnmoney_rc' (double) with 3 unique values and 55% NA
new variable 'grndemo_rc' (double) with 3 unique values and 55% NA
new variable 'grwthelp_rc' (double) with 6 unique values and 56% NA
new variable 'grnexagg_rc' (double) with 6 unique values and 56% NA
new variable 'grnprog_rc' (double) with 6 unique values and 56% NA
new variable 'naturdev_rc' (double) with 6 unique values and 56% NA
new variable 'impgrn_rc' (double) with 6 unique values and 56% NA
new variable 'grnecon_rc' (double) with 6 unique values and 55% NA
new variable 'age_cat' (character) with 6 unique values and 8% NA
new variable 'race1' (double) with 2 unique values and 0% NA
new variable 'race2' (double) with 2 unique values and 0% NA
new variable 'race3' (double) with 2 unique values and 0% NA
new variable 'race_sum' (double) with 4 unique values and 0% NA
new variable 'race_two' (double) with 2 unique values and 0% NA
new variable 'raceth' (character) with 9 unique values and 1% NA
new variable 'race_d' (double) with 2 unique values and 1% NA
new variable 'educ_d' (double) with 3 unique values and 1% NA
new variable 'sex_rc' (character) with 3 unique values and 2% NA
new variable 'relitenv_rc' (double) with 5 unique values and 37% NA
select: dropped 701 variables (year, wrkslf, wrkgovt, occ10, prestg10, …)
Subset
# Filter data sets one that includes "as many children as you want and another that excludes that category
gss_sub1 <- gss_sub0 %>%
select(id, age, sex_rc, race_d, educ_d, chldidel_fctr1) %>%
filter(complete.cases(.))select: dropped 52 variables (chldidel, chldidel_rc, age_cat, polviews, scigrn, …)
filter: removed 1,614 rows (40%), 2,418 rows remaining
gss_sub2 <- gss_sub0 %>%
select(id, age, sex_rc, race_d, educ_d, chldidel_rc) %>%
filter(complete.cases(.))select: dropped 52 variables (chldidel, chldidel_fctr1, age_cat, polviews, scigrn, …)
filter: removed 2,338 rows (58%), 1,694 rows remaining
pca <- gss_sub0 %>%
select(id, grnecon_rc, grnprog_rc, grwthelp_rc, ihlpgrn, impgrn_rc, grnexagg_rc, grneffme) %>%
filter(complete.cases(.))select: dropped 50 variables (chldidel, chldidel_fctr1, chldidel_rc, age, age_cat, …)
filter: removed 2,388 rows (59%), 1,644 rows remaining
Principal Component Analysis
Agree/Disagree response with loading values greater than .1
#pca1b <- pca1 %>% select(grnecon_rc, grnprog_rc, grwthelp_rc, ihlpgrn, impgrn_rc, grnexagg_rc, grneffme)
pc_test <- prcomp(formula = ~., data = pca[,-c(1)], center = T, scale. = T)
summary(pc_test)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 1.7902 1.0262 0.8752 0.8362 0.78119 0.59518 0.55901
Proportion of Variance 0.4578 0.1504 0.1094 0.0999 0.08718 0.05061 0.04464
Cumulative Proportion 0.4578 0.6082 0.7177 0.8176 0.90475 0.95536 1.00000
biplot(pc_test, scale = 0)pc_test_tibble <- pc_test$rotation %>%
as_tibble(rownames = "predictor")
pc_test_tibble %>%
select(predictor:PC3) %>%
pivot_longer(PC1:PC3, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component) +
labs(y = NULL, x = "Value") +
theme_minimal()select: dropped 4 variables (PC4, PC5, PC6, PC7)
pivot_longer: reorganized (PC1, PC2, PC3) into (component, value) [was 7x4, now 21x3]
scores<-data.frame(pc_test$x)
pca_join <- cbind(pca,scores) %>%
select(id, PC1)select: dropped 13 variables (grnecon_rc, grnprog_rc, grwthelp_rc, ihlpgrn, impgrn_rc, …)
# merge with analysis dataframe
gss_sub1 <- left_join(gss_sub1, pca_join, by = c("id" = "id"))left_join: added one column (PC1)
> rows only in x 891
> rows only in y ( 117)
> matched rows 1,527
> =======
> rows total 2,418
gss_sub2 <- left_join(gss_sub2, pca_join, by = c("id" = "id"))left_join: added one column (PC1)
> rows only in x 621
> rows only in y ( 571)
> matched rows 1,073
> =======
> rows total 1,694
Multinomial Regression with dummy variables
test <- multinom(chldidel_fctr1 ~ PC1 + sex_rc + race_d + educ_d + scale(age), data = gss_sub1)# weights: 28 (18 variable)
initial value 2116.871489
iter 10 value 1488.702951
iter 20 value 1424.868755
final value 1424.846097
converged
summary(test)Warning in sqrt(diag(vc)): NaNs produced
Call:
multinom(formula = chldidel_fctr1 ~ PC1 + sex_rc + race_d + educ_d +
scale(age), data = gss_sub1)
Coefficients:
(Intercept) PC1 sex_rcMale race_d educ_d scale(age)
1small (0-1) -3.8687053 0.2874626 -0.25178468 0 0.9949492 -0.49610156
3large (4+) -1.8184204 -0.2370428 0.05344611 0 -0.6511560 -0.17578455
4As many -0.8515122 -0.0601730 -0.41973882 0 0.3432283 -0.08076576
Std. Errors:
(Intercept) PC1 sex_rcMale race_d educ_d scale(age)
1small (0-1) 1.0198825 0.08631703 0.2794492 NaN 1.0268337 0.14541860
3large (4+) 0.3779090 0.06162215 0.2241608 NaN 0.3873315 0.11356737
4As many 0.2811257 0.03281668 0.1172944 0 0.2863948 0.05878833
Residual Deviance: 2849.692
AIC: 2879.692
exp(coef(test)) (Intercept) PC1 sex_rcMale race_d educ_d scale(age)
1small (0-1) 0.02088539 1.3330407 0.7774121 1 2.7045870 0.6088998
3large (4+) 0.16228188 0.7889575 1.0549001 1 0.5214426 0.8387987
4As many 0.42676909 0.9416016 0.6572185 1 1.4094905 0.9224097
z <- summary(test)$coefficients/summary(test)$standard.errorsWarning in sqrt(diag(vc)): NaNs produced
Warning in sqrt(diag(vc)): NaNs produced
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p (Intercept) PC1 sex_rcMale race_d educ_d
1small (0-1) 1.486671e-04 0.0008674900 0.3675864633 NaN 0.33257076
3large (4+) 1.495808e-06 0.0001197125 0.8115494938 NaN 0.09273692
4As many 2.454158e-03 0.0667119288 0.0003455644 NaN 0.23074407
scale(age)
1small (0-1) 0.000645967
3large (4+) 0.121659907
4As many 0.169491340
Poisson Regression
fit1 <- glm(chldidel_rc~PC1 + sex_rc + race_d + educ_d + scale(age), data = gss_sub2, family = poisson)
summary(fit1)
Call:
glm(formula = chldidel_rc ~ PC1 + sex_rc + race_d + educ_d +
scale(age), family = poisson, data = gss_sub2)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3682 -0.2791 -0.1671 0.3467 2.0614
Coefficients: (1 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.992372 0.082294 12.059 < 2e-16 ***
PC1 -0.036827 0.011313 -3.255 0.00113 **
sex_rcMale -0.001873 0.039806 -0.047 0.96247
race_d NA NA NA NA
educ_d -0.133038 0.083803 -1.588 0.11240
scale(age) 0.007374 0.020235 0.364 0.71554
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 349.57 on 1072 degrees of freedom
Residual deviance: 335.79 on 1068 degrees of freedom
(621 observations deleted due to missingness)
AIC: 3236.4
Number of Fisher Scoring iterations: 4