Exploratory Data Analysis
Data summary
|
Name
|
df2
|
|
Number of rows
|
1416
|
|
Number of columns
|
16
|
|
_______________________
|
|
|
Column type frequency:
|
|
|
factor
|
15
|
|
numeric
|
1
|
|
________________________
|
|
|
Group variables
|
None
|
Variable type: factor
|
skim_variable
|
n_missing
|
complete_rate
|
ordered
|
n_unique
|
top_counts
|
|
COHORT
|
0
|
1
|
FALSE
|
3
|
You: 629, Old: 442, Mid: 345
|
|
gender
|
0
|
1
|
FALSE
|
3
|
Wom: 707, Man: 617, Non: 92
|
|
lesbian_gay
|
0
|
1
|
FALSE
|
2
|
Yes: 774, No: 642
|
|
bisexual
|
0
|
1
|
FALSE
|
2
|
No: 942, Yes: 474
|
|
other_sexual_minority
|
0
|
1
|
FALSE
|
2
|
No: 1248, Yes: 168
|
|
white
|
0
|
1
|
FALSE
|
2
|
Yes: 929, No: 487
|
|
black
|
0
|
1
|
FALSE
|
2
|
No: 1203, Yes: 213
|
|
latino
|
0
|
1
|
FALSE
|
2
|
No: 1142, Yes: 274
|
|
W1POVERTYCAT
|
0
|
1
|
FALSE
|
4
|
300: 773, 100: 268, <10: 193, 200: 182
|
|
US_born
|
0
|
1
|
FALSE
|
2
|
Yes: 1331, No: 85
|
|
W1CONVERSION
|
0
|
1
|
FALSE
|
2
|
No: 1313, Yes: 103
|
|
GEDUC2
|
0
|
1
|
FALSE
|
2
|
Mor: 1133, Hig: 283
|
|
came_out_friend
|
0
|
1
|
FALSE
|
2
|
Yes: 1360, No: 56
|
|
came_out_family
|
0
|
1
|
FALSE
|
2
|
Yes: 1209, No: 207
|
|
outed_to_family
|
0
|
1
|
FALSE
|
2
|
Yes: 825, No: 591
|
Variable type: numeric
|
skim_variable
|
n_missing
|
complete_rate
|
mean
|
sd
|
p0
|
p25
|
p50
|
p75
|
p100
|
hist
|
|
ACE_index
|
0
|
1
|
3.17
|
2.14
|
0
|
1
|
3
|
5
|
8
|
▇▇▃▆▂
|
load("~/Desktop/M.A. Demography/7. Thesis/LGBT_coming out/R/df1.rda")
load("~/Desktop/M.A. Demography/7. Thesis/LGBT_coming out/R/df3.rda")
df = df3 %>% dplyr::select(age_LGB_comingout_friend,age_LGB_comingout_family,
age_LGB_outed_to_family)
df = df %>% mutate(age_LGB_comingout_friend = ifelse(age_LGB_comingout_friend == 99,
NA,age_LGB_comingout_friend),
age_LGB_comingout_family = ifelse(age_LGB_comingout_family == 99,
NA,age_LGB_comingout_family),
age_LGB_outed_to_family = ifelse(age_LGB_outed_to_family == 99,
NA,age_LGB_outed_to_family))
skim(df)
Data summary
|
Name
|
df
|
|
Number of rows
|
1340
|
|
Number of columns
|
3
|
|
_______________________
|
|
|
Column type frequency:
|
|
|
numeric
|
3
|
|
________________________
|
|
|
Group variables
|
None
|
Variable type: numeric
|
skim_variable
|
n_missing
|
complete_rate
|
mean
|
sd
|
p0
|
p25
|
p50
|
p75
|
p100
|
hist
|
|
age_LGB_comingout_friend
|
55
|
0.96
|
19.74
|
6.85
|
4
|
16
|
18
|
21
|
55
|
▂▇▂▁▁
|
|
age_LGB_comingout_family
|
200
|
0.85
|
21.39
|
7.47
|
3
|
17
|
19
|
24
|
57
|
▁▇▂▁▁
|
|
age_LGB_outed_to_family
|
580
|
0.57
|
19.54
|
7.72
|
2
|
15
|
18
|
22
|
56
|
▁▇▂▁▁
|
median(df$age_LGB_comingout_family,na.rm=TRUE)
## [1] 19
median(df$age_LGB_comingout_friend,na.rm=TRUE)
## [1] 18
median(df$age_LGB_outed_to_family,na.rm=TRUE)
## [1] 18
Outcome Variable of Interest
df2 %>% tabyl(came_out_friend) %>% adorn_totals()
df2 %>% tabyl(came_out_family)%>% adorn_totals()
df2 %>% tabyl(outed_to_family)%>% adorn_totals()
a+b+c
## Warning: Removed 55 rows containing non-finite values (stat_bin).
## Warning: Removed 200 rows containing non-finite values (stat_bin).
## Warning: Removed 580 rows containing non-finite values (stat_bin).

Independent Variable of Interest
Categorical Variable
x = df2 %>%
dplyr::select(-came_out_family,-came_out_friend,
-outed_to_family) %>%
inspect_cat()
names(df2)
## [1] "COHORT" "gender" "lesbian_gay"
## [4] "bisexual" "other_sexual_minority" "white"
## [7] "black" "latino" "W1POVERTYCAT"
## [10] "US_born" "W1CONVERSION" "GEDUC2"
## [13] "ACE_index" "came_out_friend" "came_out_family"
## [16] "outed_to_family"
x %>% show_plot()

Univariate Statistics
|
gender
|
n
|
percent
|
|
Woman
|
707
|
0.499
|
|
Man
|
617
|
0.436
|
|
Non-binary/genderqueer
|
92
|
0.065
|
|
Total
|
1416
|
1.000
|
|
COHORT
|
n
|
percent
|
|
Younger
|
629
|
0.444
|
|
Middle
|
345
|
0.244
|
|
Older
|
442
|
0.312
|
|
Total
|
1416
|
1.000
|
|
lesbian_gay
|
n
|
percent
|
|
Yes
|
774
|
0.547
|
|
No
|
642
|
0.453
|
|
Total
|
1416
|
1.000
|
|
white
|
n
|
percent
|
|
No
|
487
|
0.344
|
|
Yes
|
929
|
0.656
|
|
Total
|
1416
|
1.000
|
|
W1POVERTYCAT
|
n
|
percent
|
|
<100%
|
193
|
0.136
|
|
100-199%
|
268
|
0.189
|
|
200-299%
|
182
|
0.129
|
|
300%+
|
773
|
0.546
|
|
Total
|
1416
|
1.000
|
|
US_born
|
n
|
percent
|
|
Yes
|
1331
|
0.94
|
|
No
|
85
|
0.06
|
|
Total
|
1416
|
1.00
|
|
W1CONVERSION
|
n
|
percent
|
|
No
|
1313
|
0.927
|
|
Yes
|
103
|
0.073
|
|
Total
|
1416
|
1.000
|
|
GEDUC2
|
n
|
percent
|
|
High school less
|
283
|
0.2
|
|
More than high school
|
1133
|
0.8
|
|
Total
|
1416
|
1.0
|
Continuous Variable
df2 %>% tabyl(ACE_index) %>% adorn_totals() %>%
adorn_rounding(digits = 3, rounding = "half up")%>% kable() %>%
kable_classic(full_width = F, html_font = "Cambria")
|
ACE_index
|
n
|
percent
|
|
0
|
140
|
0.099
|
|
1
|
245
|
0.173
|
|
2
|
221
|
0.156
|
|
3
|
219
|
0.155
|
|
4
|
183
|
0.129
|
|
5
|
180
|
0.127
|
|
6
|
114
|
0.081
|
|
7
|
77
|
0.054
|
|
8
|
37
|
0.026
|
|
Total
|
1416
|
1.000
|
Data summary
|
Name
|
df2$ACE_index
|
|
Number of rows
|
1416
|
|
Number of columns
|
1
|
|
_______________________
|
|
|
Column type frequency:
|
|
|
numeric
|
1
|
|
________________________
|
|
|
Group variables
|
None
|
Variable type: numeric
|
skim_variable
|
n_missing
|
complete_rate
|
mean
|
sd
|
p0
|
p25
|
p50
|
p75
|
p100
|
hist
|
|
data
|
0
|
1
|
3.17
|
2.14
|
0
|
1
|
3
|
5
|
8
|
▇▇▃▆▂
|
__