## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
#checking Missing Completely At Random
options(scipen=999)
# Varibale 1: Age Coming out to family
mcar1 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
age_LGB_comingout_family)
mcar_test(mcar1)
# Variable 2: Age Coming out to friend
mcar2 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
age_LGB_comingout_friend)
mcar_test(mcar2)
#Without COHORT
mcar2b <- df1 %>% select(gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
age_LGB_comingout_friend)
mcar_test(mcar2b)
# Variable 3: Age Outed to Family
mcar3 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
age_LGB_outed_to_family)
mcar_test(mcar3)
#Without COHORT
mcar3b <- df1 %>% select(gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
age_LGB_outed_to_family)
mcar_test(mcar3b)
#Variable 4: Came out to friend
mcar4 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
came_out_friend)
mcar_test(mcar4)
#Variable 5: Came out to family
mcar5 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
came_out_family)
mcar_test(mcar5)
#Variable 6: Outed to family
mcar6 <- df1 %>% select(COHORT,gender,race,W1POVERTYCAT,
gender,sexual_orientation,W1CONVERSION,
US_born,GEDUC2,ACE_index,
outed_to_family)
mcar_test(mcar6)
The MCAR tests show that patterns of missing data arise with including Cohort for 2 of the outcome variables of interest: (2) coming out to a friend and (3) outed to family without consent. For the “(1) Coming out to Family” variable, the MCAR test was not significant meaning that the data can be estimated to be Missing Completly at Random.
For the 2 other variables, when taking out the COHORT variable form Little’s MCAR test, the MCAR test is not significant. This may mean that our Cohort variable is the covariate which has an impact on the missingness pattern.
#checking missing data for dependent(y) variables of interest
missing_age_data <- df1 %>% filter(age_LGB_comingout_friend == 98 |
age_LGB_comingout_family == 98 |
age_LGB_outed_to_family == 98 |
is.na(age_LGB_comingout_friend)|
is.na(age_LGB_comingout_family)|
is.na(age_LGB_outed_to_family))
ggplot(missing_age_data) +
aes(x = COHORT, fill = COHORT) +
geom_bar()+
labs(title="Missing Data by Age Group", caption = "this data includes 'don't recall' age missing data")+
theme_minimal()
explanatory = c("COHORT","gender","lesbian_gay","bisexual",
"other_sexual_minority","white","black",
"latino",
"W1POVERTYCAT","US_born","W1CONVERSION","GEDUC2",
"ACE_index")
dependent = "age_LGB_comingout_friend"
df1 %>% missing_compare(dependent, explanatory)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
dependent = "age_LGB_outed_to_family"
df1 %>% missing_compare(dependent, explanatory)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
Missing data does have a pattern for two of the variables of interest related to age/COHORT
Middle and Older Cohorts have more missing data as more time has passed since the event took place which may explain the higher frequency of missing data for these cohorts.