#entropy plot - elbow indicates number of classes that should be consideredd <-subset(d_std, select=-c(1))# output <- clustCombi(data = d)output2 <-clustCombiOptim(output, plot=TRUE)
#calculate and plot LRT scores#BLRT - bootstrap likelihood ratio test - compares same model with diff number of clases, sig indicates that model with more classes fits better than one with fewer - should be sig# lrt <- mclustBootstrapLRT(d, mclust.options("emModelNames"))print(lrt)
-------------------------------------------------------------
Bootstrap sequential LRT for the number of mixture components
-------------------------------------------------------------
Model = EII
Replications = 999
LRTS bootstrap p-value
1 vs 2 4.504357e+02 0.001
2 vs 3 7.954769e+01 0.001
3 vs 4 1.784418e+02 0.001
4 vs 5 7.649047e+01 0.001
5 vs 6 5.237173e+01 0.001
6 vs 7 5.490722e+01 0.001
7 vs 8 4.971794e+01 0.001
8 vs 9 1.254765e-03 0.803
Calculate and Plot BIC Scores
Code
#calculate and plot BIC scores#sample adjusted BIC - in the literature, lowest BIC equals best fitting model (specific to Mplus), in R higher BIC is better# BIC <- mclustBIC(d, G = 1:9, mclust.options("emModelNames"))plot.mclustBIC(BIC)
Calculate and Plot ICL Scores
Code
#calculate ICL scores#BIC penalized by estimated mean entropy, not great for datasets with overlapping, higher BIC is better# icl <- mclustICL(d, G = 1:9, mclust.options("emModelNames"))plot.mclustICL(icl)
viz2 <-function(orig){ mcm <- origprint(summary.Mclust(mcm)) d_std$class <- mcm$classification df_long <-gather(d_std, key ="variable", value ="value", s_std, r_std, e_std) summ <-summarySE(df_long, measurevar="value", groupvars =c("variable","class"), na.rm = T) #the summarySE() command creates a table with the information we need for our plot# Create a barplot of means for each column by class means <-ggplot(summ, aes(x = class, y = value, fill = variable)) +geom_bar(stat ="summary", fun ="mean", position ="dodge") +geom_errorbar(aes(ymin=value-se, ymax=value+se),width=.2,position=position_dodge(.9))print(means) df_long$class <-as.factor(df_long$class) dist <- df_long %>%ggplot(aes(x=value, fill=class)) +geom_density(alpha=0.4, bw=.20) +geom_vline(xintercept=0, size=.5, color="black") +facet_wrap(~variable) +xlim(-4.5,2)print(dist) vars <-unique(df_long$variable) n <-1for (i in1:length(vars)) {print(vars[n])print(subset(df_long, variable == vars[n]) %>%ggplot(aes(x=value, fill=class)) +geom_density(alpha=0.4, bw=.20) +geom_vline(xintercept=0, size=.5, color="black") +facet_wrap(~class) +xlim(-4.5,2)) n <- n +1 } n <-1for (i in1:length(unique(df_long$class))) { df_long$class_spec <-0 df_long$class_spec[d_std$class == n] <-1 df_long$class_spec <-as.factor(df_long$class_spec) scatter <-ggplot(df_long, aes(x = variable, y = value, color = class_spec)) +geom_jitter()print(paste("Class",n))print(scatter) n <- n +1 } class <- mcm$classification uncer <- mcm$uncertainty posterior_prob <-1-round(uncer, 5) density <-plot(density(posterior_prob))print(density)}
Five-Class Model
Code
viz2(m5)
----------------------------------------------------
Gaussian finite mixture model fitted by EM algorithm
----------------------------------------------------
Mclust VII (spherical, varying volume) model with 5 components:
log-likelihood n df BIC ICL
-4123.645 1133 24 -8416.073 -8812.255
Clustering table:
1 2 3 4 5
226 179 630 65 33
[1] "s_std"
[1] "r_std"
[1] "e_std"
[1] "Class 1"
[1] "Class 2"
[1] "Class 3"
[1] "Class 4"
[1] "Class 5"
NULL
Six-Class Model
Code
viz2(m6)
----------------------------------------------------
Gaussian finite mixture model fitted by EM algorithm
----------------------------------------------------
Mclust VII (spherical, varying volume) model with 6 components:
log-likelihood n df BIC ICL
-4099.492 1133 29 -8402.93 -8845.879
Clustering table:
1 2 3 4 5 6
231 198 554 78 39 33
A zorplon from Sagitarius-7
1
Alpha Male
1
An ether sniffing robot sex toy on wheels
1
Attack helicopter
1
Attack Helicopter
1
Hairdresser
1
HOKIE
1
Non-binary
1
Not relevant
1
Not Relevant
1
questioning
1
Sexist question
1
there are only two genders
1
there are only two genders fuck off
1
This is a silly option. Your biology determines this. That's science, that's fact. A better option is "what I feel my gender is".
1
Turtle
1
XX, XY, XO, XXY, XXX, or XYY the rest is made up
1
Code
# legitimate responses: "Non-binary" - already recoded as gq, all else as NA
# 1 = “American Indian or Alaska Native”, 2 = “Asian”, 3 = “Black or African American”# 4 = “Hispanic, Latino/Latina/Latinx, or Spanish origin”, 5 = “Middle Eastern or North African”# 6 = “Native Hawaiian or Other Pacific Islander”, 7 = “White”, 8 = “Another race or ethnicity not listed above” (Write in response optional)df$race_fin <-"Bi/Multiracial"df$race_fin[df$race =="2"] <-c("Asian")df$race_fin[df$race =="3"] <-c("Black or African American")df$race_fin[df$race =="4"] <-c("Hispanic, Latino/Latina/Latinx, or Spanish origin")df$race_fin[df$race =="5"] <-c("Middle Eastern or North African")df$race_fin[df$race =="7"] <-c("White")df$race_fin[df$race =="6"] <-c("Native Hawaiian or Other Pacific Islander")df$race_fin[df$race =="8"] <-c("Another race or ethnicity not listed above")df$race_fin[is.na(df$race)] <-NA# df$race_fin_temp <- 1# df$race_fin_temp[!is.na(df$race_fin)] <- 2# table(df$race, df$race_fin_temp)table(df$racetxt)
--
1
AS THE DRIVEN SNOW
1
Asian
1
Basque
1
Black African/French
1
Caucasian (Pakistani)
1
Demographics are racist
1
Earthling
1
HOKIE
1
I highlighted groups that are closely related to mine throughout history/migration. If places are going to put "white", put european american as well. It's only being consistent. Speaking of which, hispanics are europeans, caucasian, white etc. This is all silly. Race/ethnicity is a personal identification, not a tool or weapon which this question aims to use it as.
1
I identify as a race and I don't want to divulge that info
1
Indian
2
Iranian
1
Israelite
1
Nepalese
1
Not relevant
1
South American Indian
1
south asian
1
South asian
1
South Asian
2
southern asian
1
this question is racist
1
Unaffiliated
1
unknown
1
Another race or ethnicity not listed above
18
Asian
482
Bi/Multiracial
123
Black or African American
37
Hispanic, Latino/Latina/Latinx, or Spanish origin
53
Middle Eastern or North African
49
Native Hawaiian or Other Pacific Islander
1
White
846
<NA>
145
Code
df$race_fin2 <- df$race_findf$race_fin2[df$race_fin =="Another race or ethnicity not listed above"] <-"URM"df$race_fin2[df$race_fin =="Native Hawaiian or Other Pacific Islander"] <-"URM"df$race_fin2[df$race_fin =="Black or African American"] <-"URM"df$race_fin2[df$race_fin =="Hispanic, Latino/Latina/Latinx, or Spanish origin"] <-"URM"df$race_fin2[df$race_fin =="Middle Eastern or North African"] <-"URM"table(df$race_fin2, useNA ="always")
Asian Bi/Multiracial URM White <NA>
482 123 158 846 145
Call:corr.test(x = df$dadp, y = df$itp)
Correlation matrix
[1] -0.11
Sample Size
[1] 1718
These are the unadjusted probability values.
The probability values adjusted for multiple tests are in the p.adj object.
[1] 0
To see confidence intervals of the correlations, print with the short=FALSE option
female gq male <NA>
0.35127979 0.01323919 0.61076787 0.02471315
Code
table(d$race_fin, useNA ="always")
Another race or ethnicity not listed above
10
Asian
315
Bi/Multiracial
90
Black or African American
23
Hispanic, Latino/Latina/Latinx, or Spanish origin
36
Middle Eastern or North African
44
Native Hawaiian or Other Pacific Islander
1
White
581
<NA>
33
Code
prop.table(table(d$race_fin, useNA ="always"))
Another race or ethnicity not listed above
0.0088261253
Asian
0.2780229479
Bi/Multiracial
0.0794351280
Black or African American
0.0203000883
Hispanic, Latino/Latina/Latinx, or Spanish origin
0.0317740512
Middle Eastern or North African
0.0388349515
Native Hawaiian or Other Pacific Islander
0.0008826125
White
0.5127978817
<NA>
0.0291262136
Code
table(d$race_fin2, useNA ="always")
Asian Bi/Multiracial URM White <NA>
315 90 114 581 33
Code
table(d$intl_fin, useNA ="always")
domestic international <NA>
718 405 10
Code
prop.table(table(d$intl_fin, useNA ="always"))
domestic international <NA>
0.633715799 0.357458076 0.008826125
Chi-Square Analyses
Discipline
Code
table(d$maj_fin, useNA ="always")
Aerospace Engineering Agricultural and Biological Engineering
28 27
Bio Engineering Chemical Engineering
102 149
Civil Engineering Computer Engineering
100 19
Computer Science Computer Science and Engineering
55 21
Elec Engineering Environmental Engineering
157 28
Industrial Engineering Material Science and Engineering
29 162
Mechanical Engineering Nuclear Engineering
165 44
Other Engineering <NA>
34 13
Bio Engineering Chemical Engineering
Aerospace Engineering 0 0
Agricultural and Biological Engineering 27 0
Bio Engineering 102 0
Chemical Engineering 0 149
Civil Engineering 0 0
Computer Engineering 0 0
Computer Science 0 0
Computer Science and Engineering 0 0
Elec Engineering 0 0
Environmental Engineering 0 0
Industrial Engineering 0 0
Material Science and Engineering 0 0
Mechanical Engineering 0 0
Nuclear Engineering 0 0
Other Engineering 0 0
<NA> 0 0
Civil Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 100
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Computer Science and Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 19
Computer Science 55
Computer Science and Engineering 21
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Elec Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 157
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Material Science and Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 162
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Mechanical Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 165
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Other Engineering <NA>
Aerospace Engineering 28 0
Agricultural and Biological Engineering 0 0
Bio Engineering 0 0
Chemical Engineering 0 0
Civil Engineering 0 0
Computer Engineering 0 0
Computer Science 0 0
Computer Science and Engineering 0 0
Elec Engineering 0 0
Environmental Engineering 28 0
Industrial Engineering 29 0
Material Science and Engineering 0 0
Mechanical Engineering 0 0
Nuclear Engineering 44 0
Other Engineering 34 0
<NA> 13 0
Code
table(d$maj_fin2, useNA ="always")
Bio Engineering Chemical Engineering
129 149
Civil Engineering Computer Science and Engineering
100 95
Elec Engineering Material Science and Engineering
157 162
Mechanical Engineering Other Engineering
165 176
<NA>
0
Bio Engineering Chemical Engineering Civil Engineering
domestic 100 111 46
international 28 37 54
Computer Science and Engineering Elec Engineering
domestic 47 66
international 48 91
Material Science and Engineering Mechanical Engineering
domestic 140 87
international 21 76
Other Engineering
domestic 121
international 50
Code
chiout$stdres
Bio Engineering Chemical Engineering Civil Engineering
domestic 3.551637 3.008327 -3.913496
international -3.551637 -3.008327 3.913496
Computer Science and Engineering Elec Engineering
domestic -3.068171 -6.160828
international 3.068171 6.160828
Material Science and Engineering Mechanical Engineering
domestic 6.572379 -3.037176
international -6.572379 3.037176
Other Engineering
domestic 2.018462
international -2.018462
Bio Engineering Chemical Engineering
Aerospace Engineering 0 0
Agricultural and Biological Engineering 27 0
Bio Engineering 102 0
Chemical Engineering 0 149
Civil Engineering 0 0
Computer Engineering 0 0
Computer Science 0 0
Computer Science and Engineering 0 0
Elec Engineering 0 0
Environmental Engineering 0 0
Industrial Engineering 0 0
Material Science and Engineering 0 0
Mechanical Engineering 0 0
Nuclear Engineering 0 0
Other Engineering 0 0
<NA> 0 0
Civil Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 100
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Computer Science and Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 19
Computer Science 55
Computer Science and Engineering 21
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Electrical Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 157
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Material Science and Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 162
Mechanical Engineering 0
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Mechanical Engineering
Aerospace Engineering 0
Agricultural and Biological Engineering 0
Bio Engineering 0
Chemical Engineering 0
Civil Engineering 0
Computer Engineering 0
Computer Science 0
Computer Science and Engineering 0
Elec Engineering 0
Environmental Engineering 0
Industrial Engineering 0
Material Science and Engineering 0
Mechanical Engineering 165
Nuclear Engineering 0
Other Engineering 0
<NA> 0
Other Engineering <NA>
Aerospace Engineering 28 0
Agricultural and Biological Engineering 0 0
Bio Engineering 0 0
Chemical Engineering 0 0
Civil Engineering 0 0
Computer Engineering 0 0
Computer Science 0 0
Computer Science and Engineering 0 0
Elec Engineering 0 0
Environmental Engineering 28 0
Industrial Engineering 29 0
Material Science and Engineering 0 0
Mechanical Engineering 0 0
Nuclear Engineering 44 0
Other Engineering 34 0
<NA> 13 0
Code
table(d$maj_fin2, useNA ="always")
Bio Engineering Chemical Engineering
129 149
Civil Engineering Computer Science and Engineering
100 95
Electrical Engineering Material Science and Engineering
157 162
Mechanical Engineering Other Engineering
165 176
<NA>
0