1 Load Libraries

library(reshape2) # for colsplit()
library(afex)
library(emmeans)
library(sjPlot)
library(nnet)
library(ggplot2)
library(psych)
library(dplyr)
library(tidyr)

2 Load Data

import <- read.csv(file="finaldata.csv", header=T, na.strings = c(""," ","NA"))
df <- subset(import, select=c(R_IDs, Major, Start.Year.Month, Q19.3, Q19.16, Q19.16_7_TEXT, Q19.13, Q19.13_8_TEXT, Q13.1_1, Q13.1_2, Country))
colnames(df) <- c("R_IDs","Major","start","progress","gender","gendertxt","race","racetxt","dadp","itp","country")

classes <- read.csv(file="classes.csv", header=T)
classes <- merge(classes, subset(import, select=c(R_IDs, X)), by="X")

classes$class_bu <- classes$class
classes$class[classes$class_bu == "2"] <- "3"
classes$class[classes$class_bu == "3"] <- "2"

2.1 Clean Major

table(df$Major, useNA = "always")
## 
##                        Acoustic Engineering 
##                                           2 
##                       Aerospace Engineering 
##                                          50 
##     Agricultural and Biological Engineering 
##                                          47 
##                      Biological Engineering 
##                                           2 
##                                  Biomedical 
##                                         128 
##                      Biomedical Engineering 
##                                           3 
##       Chemical and Biomolecular Engineering 
##                                          18 
##          Chemical and Materials Engineering 
##                                           2 
##                        Chemical Engineering 
##                                         170 
##         Civil and Environmental Engineering 
##                                          18 
##                           Civil Engineering 
##                                         142 
##                        Computer Engineering 
##                                          33 
##                            Computer Science 
##                                          93 
##            Computer Science and Engineering 
##                                          21 
##                Computer Science Engineering 
##                                           4 
##           Electrical & Computer Engineering 
##                                          62 
##       Electrical and Electronic Engineering 
##                                           2 
##                      Electrical Engineering 
##                                         200 
## Electrical Engineering and Computer Science 
##                                           2 
##                                 Engineering 
##                                           2 
##                       Engineering Education 
##                                           1 
##                      Engineering Management 
##                                           4 
##                         Engineering Physics 
##                                           5 
##                               Environmental 
##                                          51 
##                   Environmental Engineering 
##                                           1 
##                                         Geo 
##                                           6 
##                                  Industrial 
##                                          40 
##            Material Science and Engineering 
##                                         185 
##                      Mechanical Engineering 
##                                         260 
##                         Nuclear Engineering 
##                                          59 
##                           Ocean Engineering 
##                                          11 
##                           Other Engineering 
##                                           3 
##                                   Petroleum 
##                                          17 
##                         Systems Engineering 
##                                          10 
##                                        <NA> 
##                                         100
df$maj_fin <- df$Major
df$maj_fin[df$Major == "Acoustic Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering Education"] <- "Other Engineering" #n = 1
df$maj_fin[df$Major == "Engineering Management"] <- "Other Engineering" #n = 4
df$maj_fin[df$Major == "Engineering Physics"] <- "Other Engineering" #n = 5
df$maj_fin[df$Major == "Geo"] <- "Other Engineering" #n = 6
df$maj_fin[df$Major == "Other Engineering"] <- "Other Engineering" #n = 3

df$maj_fin[df$Major == "Biological Engineering"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical Engineering"] <- "Bio Engineering"

df$maj_fin[df$Major == "Chemical and Biomolecular Engineering"] <- "Chemical Engineering"
df$maj_fin[df$Major == "Chemical and Materials Engineering"] <- "Chemical Engineering"

df$maj_fin[df$Major == "Civil and Environmental Engineering"] <- "Civil Engineering"

df$maj_fin[df$Major == "Computer Science Engineering"] <- "Computer Science and Engineering"

df$maj_fin[df$Major == "Electrical & Computer Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical and Electronic Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering and Computer Science"] <- "Elec Engineering"

df$maj_fin[df$Major == "Environmental"] <- "Environmental Engineering"

df$maj_fin[df$Major == "Industrial"] <- "Industrial Engineering"

df$maj_fin[df$Major == "Ocean Engineering"] <- "Other Engineering"
df$maj_fin[df$Major == "Petroleum"] <- "Other Engineering"
df$maj_fin[df$Major == "Systems Engineering"] <- "Other Engineering"

table(df$Major, df$maj_fin, useNA = "always")
##                                              
##                                               Aerospace Engineering
##   Acoustic Engineering                                            0
##   Aerospace Engineering                                          50
##   Agricultural and Biological Engineering                         0
##   Biological Engineering                                          0
##   Biomedical                                                      0
##   Biomedical Engineering                                          0
##   Chemical and Biomolecular Engineering                           0
##   Chemical and Materials Engineering                              0
##   Chemical Engineering                                            0
##   Civil and Environmental Engineering                             0
##   Civil Engineering                                               0
##   Computer Engineering                                            0
##   Computer Science                                                0
##   Computer Science and Engineering                                0
##   Computer Science Engineering                                    0
##   Electrical & Computer Engineering                               0
##   Electrical and Electronic Engineering                           0
##   Electrical Engineering                                          0
##   Electrical Engineering and Computer Science                     0
##   Engineering                                                     0
##   Engineering Education                                           0
##   Engineering Management                                          0
##   Engineering Physics                                             0
##   Environmental                                                   0
##   Environmental Engineering                                       0
##   Geo                                                             0
##   Industrial                                                      0
##   Material Science and Engineering                                0
##   Mechanical Engineering                                          0
##   Nuclear Engineering                                             0
##   Ocean Engineering                                               0
##   Other Engineering                                               0
##   Petroleum                                                       0
##   Systems Engineering                                             0
##   <NA>                                                            0
##                                              
##                                               Agricultural and Biological Engineering
##   Acoustic Engineering                                                              0
##   Aerospace Engineering                                                             0
##   Agricultural and Biological Engineering                                          47
##   Biological Engineering                                                            0
##   Biomedical                                                                        0
##   Biomedical Engineering                                                            0
##   Chemical and Biomolecular Engineering                                             0
##   Chemical and Materials Engineering                                                0
##   Chemical Engineering                                                              0
##   Civil and Environmental Engineering                                               0
##   Civil Engineering                                                                 0
##   Computer Engineering                                                              0
##   Computer Science                                                                  0
##   Computer Science and Engineering                                                  0
##   Computer Science Engineering                                                      0
##   Electrical & Computer Engineering                                                 0
##   Electrical and Electronic Engineering                                             0
##   Electrical Engineering                                                            0
##   Electrical Engineering and Computer Science                                       0
##   Engineering                                                                       0
##   Engineering Education                                                             0
##   Engineering Management                                                            0
##   Engineering Physics                                                               0
##   Environmental                                                                     0
##   Environmental Engineering                                                         0
##   Geo                                                                               0
##   Industrial                                                                        0
##   Material Science and Engineering                                                  0
##   Mechanical Engineering                                                            0
##   Nuclear Engineering                                                               0
##   Ocean Engineering                                                                 0
##   Other Engineering                                                                 0
##   Petroleum                                                                         0
##   Systems Engineering                                                               0
##   <NA>                                                                              0
##                                              
##                                               Bio Engineering
##   Acoustic Engineering                                      0
##   Aerospace Engineering                                     0
##   Agricultural and Biological Engineering                   0
##   Biological Engineering                                    2
##   Biomedical                                              128
##   Biomedical Engineering                                    3
##   Chemical and Biomolecular Engineering                     0
##   Chemical and Materials Engineering                        0
##   Chemical Engineering                                      0
##   Civil and Environmental Engineering                       0
##   Civil Engineering                                         0
##   Computer Engineering                                      0
##   Computer Science                                          0
##   Computer Science and Engineering                          0
##   Computer Science Engineering                              0
##   Electrical & Computer Engineering                         0
##   Electrical and Electronic Engineering                     0
##   Electrical Engineering                                    0
##   Electrical Engineering and Computer Science               0
##   Engineering                                               0
##   Engineering Education                                     0
##   Engineering Management                                    0
##   Engineering Physics                                       0
##   Environmental                                             0
##   Environmental Engineering                                 0
##   Geo                                                       0
##   Industrial                                                0
##   Material Science and Engineering                          0
##   Mechanical Engineering                                    0
##   Nuclear Engineering                                       0
##   Ocean Engineering                                         0
##   Other Engineering                                         0
##   Petroleum                                                 0
##   Systems Engineering                                       0
##   <NA>                                                      0
##                                              
##                                               Chemical Engineering
##   Acoustic Engineering                                           0
##   Aerospace Engineering                                          0
##   Agricultural and Biological Engineering                        0
##   Biological Engineering                                         0
##   Biomedical                                                     0
##   Biomedical Engineering                                         0
##   Chemical and Biomolecular Engineering                         18
##   Chemical and Materials Engineering                             2
##   Chemical Engineering                                         170
##   Civil and Environmental Engineering                            0
##   Civil Engineering                                              0
##   Computer Engineering                                           0
##   Computer Science                                               0
##   Computer Science and Engineering                               0
##   Computer Science Engineering                                   0
##   Electrical & Computer Engineering                              0
##   Electrical and Electronic Engineering                          0
##   Electrical Engineering                                         0
##   Electrical Engineering and Computer Science                    0
##   Engineering                                                    0
##   Engineering Education                                          0
##   Engineering Management                                         0
##   Engineering Physics                                            0
##   Environmental                                                  0
##   Environmental Engineering                                      0
##   Geo                                                            0
##   Industrial                                                     0
##   Material Science and Engineering                               0
##   Mechanical Engineering                                         0
##   Nuclear Engineering                                            0
##   Ocean Engineering                                              0
##   Other Engineering                                              0
##   Petroleum                                                      0
##   Systems Engineering                                            0
##   <NA>                                                           0
##                                              
##                                               Civil Engineering
##   Acoustic Engineering                                        0
##   Aerospace Engineering                                       0
##   Agricultural and Biological Engineering                     0
##   Biological Engineering                                      0
##   Biomedical                                                  0
##   Biomedical Engineering                                      0
##   Chemical and Biomolecular Engineering                       0
##   Chemical and Materials Engineering                          0
##   Chemical Engineering                                        0
##   Civil and Environmental Engineering                        18
##   Civil Engineering                                         142
##   Computer Engineering                                        0
##   Computer Science                                            0
##   Computer Science and Engineering                            0
##   Computer Science Engineering                                0
##   Electrical & Computer Engineering                           0
##   Electrical and Electronic Engineering                       0
##   Electrical Engineering                                      0
##   Electrical Engineering and Computer Science                 0
##   Engineering                                                 0
##   Engineering Education                                       0
##   Engineering Management                                      0
##   Engineering Physics                                         0
##   Environmental                                               0
##   Environmental Engineering                                   0
##   Geo                                                         0
##   Industrial                                                  0
##   Material Science and Engineering                            0
##   Mechanical Engineering                                      0
##   Nuclear Engineering                                         0
##   Ocean Engineering                                           0
##   Other Engineering                                           0
##   Petroleum                                                   0
##   Systems Engineering                                         0
##   <NA>                                                        0
##                                              
##                                               Computer Engineering
##   Acoustic Engineering                                           0
##   Aerospace Engineering                                          0
##   Agricultural and Biological Engineering                        0
##   Biological Engineering                                         0
##   Biomedical                                                     0
##   Biomedical Engineering                                         0
##   Chemical and Biomolecular Engineering                          0
##   Chemical and Materials Engineering                             0
##   Chemical Engineering                                           0
##   Civil and Environmental Engineering                            0
##   Civil Engineering                                              0
##   Computer Engineering                                          33
##   Computer Science                                               0
##   Computer Science and Engineering                               0
##   Computer Science Engineering                                   0
##   Electrical & Computer Engineering                              0
##   Electrical and Electronic Engineering                          0
##   Electrical Engineering                                         0
##   Electrical Engineering and Computer Science                    0
##   Engineering                                                    0
##   Engineering Education                                          0
##   Engineering Management                                         0
##   Engineering Physics                                            0
##   Environmental                                                  0
##   Environmental Engineering                                      0
##   Geo                                                            0
##   Industrial                                                     0
##   Material Science and Engineering                               0
##   Mechanical Engineering                                         0
##   Nuclear Engineering                                            0
##   Ocean Engineering                                              0
##   Other Engineering                                              0
##   Petroleum                                                      0
##   Systems Engineering                                            0
##   <NA>                                                           0
##                                              
##                                               Computer Science
##   Acoustic Engineering                                       0
##   Aerospace Engineering                                      0
##   Agricultural and Biological Engineering                    0
##   Biological Engineering                                     0
##   Biomedical                                                 0
##   Biomedical Engineering                                     0
##   Chemical and Biomolecular Engineering                      0
##   Chemical and Materials Engineering                         0
##   Chemical Engineering                                       0
##   Civil and Environmental Engineering                        0
##   Civil Engineering                                          0
##   Computer Engineering                                       0
##   Computer Science                                          93
##   Computer Science and Engineering                           0
##   Computer Science Engineering                               0
##   Electrical & Computer Engineering                          0
##   Electrical and Electronic Engineering                      0
##   Electrical Engineering                                     0
##   Electrical Engineering and Computer Science                0
##   Engineering                                                0
##   Engineering Education                                      0
##   Engineering Management                                     0
##   Engineering Physics                                        0
##   Environmental                                              0
##   Environmental Engineering                                  0
##   Geo                                                        0
##   Industrial                                                 0
##   Material Science and Engineering                           0
##   Mechanical Engineering                                     0
##   Nuclear Engineering                                        0
##   Ocean Engineering                                          0
##   Other Engineering                                          0
##   Petroleum                                                  0
##   Systems Engineering                                        0
##   <NA>                                                       0
##                                              
##                                               Computer Science and Engineering
##   Acoustic Engineering                                                       0
##   Aerospace Engineering                                                      0
##   Agricultural and Biological Engineering                                    0
##   Biological Engineering                                                     0
##   Biomedical                                                                 0
##   Biomedical Engineering                                                     0
##   Chemical and Biomolecular Engineering                                      0
##   Chemical and Materials Engineering                                         0
##   Chemical Engineering                                                       0
##   Civil and Environmental Engineering                                        0
##   Civil Engineering                                                          0
##   Computer Engineering                                                       0
##   Computer Science                                                           0
##   Computer Science and Engineering                                          21
##   Computer Science Engineering                                               4
##   Electrical & Computer Engineering                                          0
##   Electrical and Electronic Engineering                                      0
##   Electrical Engineering                                                     0
##   Electrical Engineering and Computer Science                                0
##   Engineering                                                                0
##   Engineering Education                                                      0
##   Engineering Management                                                     0
##   Engineering Physics                                                        0
##   Environmental                                                              0
##   Environmental Engineering                                                  0
##   Geo                                                                        0
##   Industrial                                                                 0
##   Material Science and Engineering                                           0
##   Mechanical Engineering                                                     0
##   Nuclear Engineering                                                        0
##   Ocean Engineering                                                          0
##   Other Engineering                                                          0
##   Petroleum                                                                  0
##   Systems Engineering                                                        0
##   <NA>                                                                       0
##                                              
##                                               Elec Engineering
##   Acoustic Engineering                                       0
##   Aerospace Engineering                                      0
##   Agricultural and Biological Engineering                    0
##   Biological Engineering                                     0
##   Biomedical                                                 0
##   Biomedical Engineering                                     0
##   Chemical and Biomolecular Engineering                      0
##   Chemical and Materials Engineering                         0
##   Chemical Engineering                                       0
##   Civil and Environmental Engineering                        0
##   Civil Engineering                                          0
##   Computer Engineering                                       0
##   Computer Science                                           0
##   Computer Science and Engineering                           0
##   Computer Science Engineering                               0
##   Electrical & Computer Engineering                         62
##   Electrical and Electronic Engineering                      2
##   Electrical Engineering                                   200
##   Electrical Engineering and Computer Science                2
##   Engineering                                                0
##   Engineering Education                                      0
##   Engineering Management                                     0
##   Engineering Physics                                        0
##   Environmental                                              0
##   Environmental Engineering                                  0
##   Geo                                                        0
##   Industrial                                                 0
##   Material Science and Engineering                           0
##   Mechanical Engineering                                     0
##   Nuclear Engineering                                        0
##   Ocean Engineering                                          0
##   Other Engineering                                          0
##   Petroleum                                                  0
##   Systems Engineering                                        0
##   <NA>                                                       0
##                                              
##                                               Environmental Engineering
##   Acoustic Engineering                                                0
##   Aerospace Engineering                                               0
##   Agricultural and Biological Engineering                             0
##   Biological Engineering                                              0
##   Biomedical                                                          0
##   Biomedical Engineering                                              0
##   Chemical and Biomolecular Engineering                               0
##   Chemical and Materials Engineering                                  0
##   Chemical Engineering                                                0
##   Civil and Environmental Engineering                                 0
##   Civil Engineering                                                   0
##   Computer Engineering                                                0
##   Computer Science                                                    0
##   Computer Science and Engineering                                    0
##   Computer Science Engineering                                        0
##   Electrical & Computer Engineering                                   0
##   Electrical and Electronic Engineering                               0
##   Electrical Engineering                                              0
##   Electrical Engineering and Computer Science                         0
##   Engineering                                                         0
##   Engineering Education                                               0
##   Engineering Management                                              0
##   Engineering Physics                                                 0
##   Environmental                                                      51
##   Environmental Engineering                                           1
##   Geo                                                                 0
##   Industrial                                                          0
##   Material Science and Engineering                                    0
##   Mechanical Engineering                                              0
##   Nuclear Engineering                                                 0
##   Ocean Engineering                                                   0
##   Other Engineering                                                   0
##   Petroleum                                                           0
##   Systems Engineering                                                 0
##   <NA>                                                                0
##                                              
##                                               Industrial Engineering
##   Acoustic Engineering                                             0
##   Aerospace Engineering                                            0
##   Agricultural and Biological Engineering                          0
##   Biological Engineering                                           0
##   Biomedical                                                       0
##   Biomedical Engineering                                           0
##   Chemical and Biomolecular Engineering                            0
##   Chemical and Materials Engineering                               0
##   Chemical Engineering                                             0
##   Civil and Environmental Engineering                              0
##   Civil Engineering                                                0
##   Computer Engineering                                             0
##   Computer Science                                                 0
##   Computer Science and Engineering                                 0
##   Computer Science Engineering                                     0
##   Electrical & Computer Engineering                                0
##   Electrical and Electronic Engineering                            0
##   Electrical Engineering                                           0
##   Electrical Engineering and Computer Science                      0
##   Engineering                                                      0
##   Engineering Education                                            0
##   Engineering Management                                           0
##   Engineering Physics                                              0
##   Environmental                                                    0
##   Environmental Engineering                                        0
##   Geo                                                              0
##   Industrial                                                      40
##   Material Science and Engineering                                 0
##   Mechanical Engineering                                           0
##   Nuclear Engineering                                              0
##   Ocean Engineering                                                0
##   Other Engineering                                                0
##   Petroleum                                                        0
##   Systems Engineering                                              0
##   <NA>                                                             0
##                                              
##                                               Material Science and Engineering
##   Acoustic Engineering                                                       0
##   Aerospace Engineering                                                      0
##   Agricultural and Biological Engineering                                    0
##   Biological Engineering                                                     0
##   Biomedical                                                                 0
##   Biomedical Engineering                                                     0
##   Chemical and Biomolecular Engineering                                      0
##   Chemical and Materials Engineering                                         0
##   Chemical Engineering                                                       0
##   Civil and Environmental Engineering                                        0
##   Civil Engineering                                                          0
##   Computer Engineering                                                       0
##   Computer Science                                                           0
##   Computer Science and Engineering                                           0
##   Computer Science Engineering                                               0
##   Electrical & Computer Engineering                                          0
##   Electrical and Electronic Engineering                                      0
##   Electrical Engineering                                                     0
##   Electrical Engineering and Computer Science                                0
##   Engineering                                                                0
##   Engineering Education                                                      0
##   Engineering Management                                                     0
##   Engineering Physics                                                        0
##   Environmental                                                              0
##   Environmental Engineering                                                  0
##   Geo                                                                        0
##   Industrial                                                                 0
##   Material Science and Engineering                                         185
##   Mechanical Engineering                                                     0
##   Nuclear Engineering                                                        0
##   Ocean Engineering                                                          0
##   Other Engineering                                                          0
##   Petroleum                                                                  0
##   Systems Engineering                                                        0
##   <NA>                                                                       0
##                                              
##                                               Mechanical Engineering
##   Acoustic Engineering                                             0
##   Aerospace Engineering                                            0
##   Agricultural and Biological Engineering                          0
##   Biological Engineering                                           0
##   Biomedical                                                       0
##   Biomedical Engineering                                           0
##   Chemical and Biomolecular Engineering                            0
##   Chemical and Materials Engineering                               0
##   Chemical Engineering                                             0
##   Civil and Environmental Engineering                              0
##   Civil Engineering                                                0
##   Computer Engineering                                             0
##   Computer Science                                                 0
##   Computer Science and Engineering                                 0
##   Computer Science Engineering                                     0
##   Electrical & Computer Engineering                                0
##   Electrical and Electronic Engineering                            0
##   Electrical Engineering                                           0
##   Electrical Engineering and Computer Science                      0
##   Engineering                                                      0
##   Engineering Education                                            0
##   Engineering Management                                           0
##   Engineering Physics                                              0
##   Environmental                                                    0
##   Environmental Engineering                                        0
##   Geo                                                              0
##   Industrial                                                       0
##   Material Science and Engineering                                 0
##   Mechanical Engineering                                         260
##   Nuclear Engineering                                              0
##   Ocean Engineering                                                0
##   Other Engineering                                                0
##   Petroleum                                                        0
##   Systems Engineering                                              0
##   <NA>                                                             0
##                                              
##                                               Nuclear Engineering
##   Acoustic Engineering                                          0
##   Aerospace Engineering                                         0
##   Agricultural and Biological Engineering                       0
##   Biological Engineering                                        0
##   Biomedical                                                    0
##   Biomedical Engineering                                        0
##   Chemical and Biomolecular Engineering                         0
##   Chemical and Materials Engineering                            0
##   Chemical Engineering                                          0
##   Civil and Environmental Engineering                           0
##   Civil Engineering                                             0
##   Computer Engineering                                          0
##   Computer Science                                              0
##   Computer Science and Engineering                              0
##   Computer Science Engineering                                  0
##   Electrical & Computer Engineering                             0
##   Electrical and Electronic Engineering                         0
##   Electrical Engineering                                        0
##   Electrical Engineering and Computer Science                   0
##   Engineering                                                   0
##   Engineering Education                                         0
##   Engineering Management                                        0
##   Engineering Physics                                           0
##   Environmental                                                 0
##   Environmental Engineering                                     0
##   Geo                                                           0
##   Industrial                                                    0
##   Material Science and Engineering                              0
##   Mechanical Engineering                                        0
##   Nuclear Engineering                                          59
##   Ocean Engineering                                             0
##   Other Engineering                                             0
##   Petroleum                                                     0
##   Systems Engineering                                           0
##   <NA>                                                          0
##                                              
##                                               Other Engineering <NA>
##   Acoustic Engineering                                        2    0
##   Aerospace Engineering                                       0    0
##   Agricultural and Biological Engineering                     0    0
##   Biological Engineering                                      0    0
##   Biomedical                                                  0    0
##   Biomedical Engineering                                      0    0
##   Chemical and Biomolecular Engineering                       0    0
##   Chemical and Materials Engineering                          0    0
##   Chemical Engineering                                        0    0
##   Civil and Environmental Engineering                         0    0
##   Civil Engineering                                           0    0
##   Computer Engineering                                        0    0
##   Computer Science                                            0    0
##   Computer Science and Engineering                            0    0
##   Computer Science Engineering                                0    0
##   Electrical & Computer Engineering                           0    0
##   Electrical and Electronic Engineering                       0    0
##   Electrical Engineering                                      0    0
##   Electrical Engineering and Computer Science                 0    0
##   Engineering                                                 2    0
##   Engineering Education                                       1    0
##   Engineering Management                                      4    0
##   Engineering Physics                                         5    0
##   Environmental                                               0    0
##   Environmental Engineering                                   0    0
##   Geo                                                         6    0
##   Industrial                                                  0    0
##   Material Science and Engineering                            0    0
##   Mechanical Engineering                                      0    0
##   Nuclear Engineering                                         0    0
##   Ocean Engineering                                          11    0
##   Other Engineering                                           3    0
##   Petroleum                                                  17    0
##   Systems Engineering                                        10    0
##   <NA>                                                        0  100
table(df$maj_fin, useNA = "always")
## 
##                   Aerospace Engineering Agricultural and Biological Engineering 
##                                      50                                      47 
##                         Bio Engineering                    Chemical Engineering 
##                                     133                                     190 
##                       Civil Engineering                    Computer Engineering 
##                                     160                                      33 
##                        Computer Science        Computer Science and Engineering 
##                                      93                                      25 
##                        Elec Engineering               Environmental Engineering 
##                                     266                                      52 
##                  Industrial Engineering        Material Science and Engineering 
##                                      40                                     185 
##                  Mechanical Engineering                     Nuclear Engineering 
##                                     260                                      59 
##                       Other Engineering                                    <NA> 
##                                      61                                     100

2.2 Clean Start Date

# survey closed 7/25/2018

spl <- colsplit(df$start," ",c("year", "month"))
spl$year <- as.numeric(spl$year)
spl$years <- 2018-spl$year

df$years <- spl$years

2.3 Clean Milestones

table(df$progress, useNA="always")
## 
##     1   1,2 1,2,3   1,3   1,4     2   2,3   2,4     3     4  <NA> 
##   398   254    41     1     3    36     4     3     9   803   202
# VALUES: 1 through 4, multiples possible; 1 = “Comprehensive Exam (Written or Oral)
# 2 = “Dissertation Proposal”, 3 = “Dissertation Defense”, 4 = “Not applicable”
df$prog_fin <- NA
df$prog_fin[df$progress == "2,4"] <- 2
df$prog_fin[df$progress == "2,3"] <- 3
df$prog_fin[df$progress == "1,4"] <- 1
df$prog_fin[df$progress == "1,3"] <- 3
df$prog_fin[df$progress == "1,2,3"] <- 3
df$prog_fin[df$progress == "1,2"] <- 2
df$prog_fin[df$progress == "1"] <- 1
df$prog_fin[df$progress == "2"] <- 2
df$prog_fin[df$progress == "3"] <- 3
df$prog_fin[df$progress == "4"] <- 0
table(df$prog, useNA = "always")
## 
## <NA> 
##    0

2.4 Clean Gender

table(df$gender, useNA = "always")
## 
##             1 1,2,3,4,5,6,7           1,3         1,3,5           1,4 
##           541             1             3             1             1 
##           1,5           1,6             2           2,3         2,3,5 
##             1            23           987             5             1 
##           2,5           2,6           2,7             3             5 
##             1            33             6             2             1 
##             6             7          <NA> 
##             1            12           134
# VALUES: 1 through 7, multiples possible; 1 = “Female”, 2 = “Male”, 3 = “Genderqueer”
# 4 = “Agender”, 5 = “Transgender”, 6 = “Cisgender”, 7 = “A gender not listed” (Write in response optional)
df$gen_fin <- NA
df$gen_fin[df$gender == 1 | df$gender == "1,6" | df$gender == "1,5"] <- "female"
df$gen_fin[df$gender == 2 | df$gender == "2,6" | df$gender == "2,5"] <- "male"
df$gen_fin[df$gender == "1,3,5"] <- "gq"
df$gen_fin[df$gender == "2,3,5"] <- "gq"
df$gen_fin[df$gender == 3 | df$gender == 4 | df$gender == 7 | df$gender == "2,3" | df$gender == "1,3" | df$gender == "2,7" | df$gender == "1,4"] <- "gq"
table(df$gen_fin, df$gender, useNA = "always")
##         
##            1 1,2,3,4,5,6,7 1,3 1,3,5 1,4 1,5 1,6   2 2,3 2,3,5 2,5 2,6 2,7   3
##   female 541             0   0     0   0   1  23   0   0     0   0   0   0   0
##   gq       0             0   3     1   1   0   0   0   5     1   0   0   6   2
##   male     0             0   0     0   0   0   0 987   0     0   1  33   0   0
##   <NA>     0             1   0     0   0   0   0   0   0     0   0   0   0   0
##         
##            5   6   7 <NA>
##   female   0   0   0    0
##   gq       0   0  12    0
##   male     0   0   0    0
##   <NA>     1   1   0  134
table(df$gen_fin, useNA = "always")
## 
## female     gq   male   <NA> 
##    565     31   1021    137
table(df$gendertxt)
## 
##                                                                                                       A zorplon from Sagitarius-7 
##                                                                                                                                 1 
##                                                                                                                        Alpha Male 
##                                                                                                                                 1 
##                                                                                         An ether sniffing robot sex toy on wheels 
##                                                                                                                                 1 
##                                                                                                                 Attack helicopter 
##                                                                                                                                 1 
##                                                                                                                 Attack Helicopter 
##                                                                                                                                 1 
##                                                                                                                       Hairdresser 
##                                                                                                                                 1 
##                                                                                                                             HOKIE 
##                                                                                                                                 1 
##                                                                                                                        Non-binary 
##                                                                                                                                 1 
##                                                                                                                      Not relevant 
##                                                                                                                                 1 
##                                                                                                                      Not Relevant 
##                                                                                                                                 1 
##                                                                                                                       questioning 
##                                                                                                                                 1 
##                                                                                                                   Sexist question 
##                                                                                                                                 1 
##                                                                                                        there are only two genders 
##                                                                                                                                 1 
##                                                                                              there are only two genders fuck off  
##                                                                                                                                 1 
## This is a silly option. Your biology determines this. That's science, that's fact. A better option is "what I feel my gender is". 
##                                                                                                                                 1 
##                                                                                                                            Turtle 
##                                                                                                                                 1 
##                                                                                  XX, XY, XO, XXY, XXX, or XYY the rest is made up 
##                                                                                                                                 1
# legitimate responses: "Non-binary" - already recoded as gq, all else as NA

2.5 Clean Race

table(df$race, useNA = "always")
## 
##       1,2       1,3     1,3,4     1,3,7 1,4,5,7,8     1,4,7       1,7         2 
##         1         1         1         1         1         3         4       474 
##   2,3,5,7       2,4   2,4,6,7     2,4,7     2,5,7       2,6     2,6,7       2,7 
##         1         3         1         3         2         1         1        30 
##       2,8         3       3,4       3,6       3,7       3,8         4       4,7 
##         1        37         2         1         2         1        53        36 
##     4,7,8       4,8         5       5,7         6       6,7         7       7,8 
##         1         2        49        21         1         1       846         2 
##         8      <NA> 
##        25       145
# 1 = “American Indian or Alaska Native”, 2 = “Asian”, 3 = “Black or African American”
# 4 = “Hispanic, Latino/Latina/Latinx, or Spanish origin”, 5 = “Middle Eastern or North African”
# 6 = “Native Hawaiian or Other Pacific Islander”, 7 = “White”, 8 = “Another race or ethnicity not listed above” (Write in response optional)
df$race_fin <- "Bi/Multiracial"
df$race_fin[df$race == "2"] <- c("Asian")
df$race_fin[df$race == "3"] <- c("Black or African American")
df$race_fin[df$race == "4"] <- c("Hispanic, Latino/Latina/Latinx, or Spanish origin")
df$race_fin[df$race == "5"] <- c("Middle Eastern or North African")
df$race_fin[df$race == "7"] <- c("White")

df$race_fin[df$race == "6"] <- c("Native Hawaiian or Other Pacific Islander")
df$race_fin[df$race == "8"] <- c("Another race or ethnicity not listed above")

df$race_fin[is.na(df$race)] <- NA

table(df$racetxt)
## 
##                                                                                                                                                                                                                                                                                                                                                                               -- 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                               AS THE DRIVEN SNOW 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                            Asian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                           Basque 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                             Black African/French 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                            Caucasian (Pakistani) 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                          Demographics are racist 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                        Earthling 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                            HOKIE 
##                                                                                                                                                                                                                                                                                                                                                                                1 
## I highlighted groups that are closely related to mine throughout history/migration. If places are going to put "white", put european american as well. It's only being consistent. Speaking of which, hispanics are europeans, caucasian, white etc. This is all silly. Race/ethnicity is a personal identification, not a tool or weapon which this question aims to use it as. 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                       I identify as a race and I don't want to divulge that info 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                           Indian 
##                                                                                                                                                                                                                                                                                                                                                                                2 
##                                                                                                                                                                                                                                                                                                                                                                          Iranian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                        Israelite 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                         Nepalese 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                     Not relevant 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                            South American Indian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                      south asian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                      South asian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                      South Asian 
##                                                                                                                                                                                                                                                                                                                                                                                2 
##                                                                                                                                                                                                                                                                                                                                                                   southern asian 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                         this question is racist  
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                     Unaffiliated 
##                                                                                                                                                                                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                                                                                                                                                                          unknown 
##                                                                                                                                                                                                                                                                                                                                                                                1
# legitimate entries: "Basque", "Black African/French", "Caucasian (Pakistani)", "Indian", "Iranian", "Israelite", "Nepalese", "South American Indian", "south asian", "South asian", "South Asian", "southern asian"

df$race_fin[df$racetxt == "Indian"] <- c("Asian")
df$race_fin[df$racetxt == "Nepalese"] <- c("Asian")
df$race_fin[df$racetxt == "south asian"] <- c("Asian")
df$race_fin[df$racetxt == "South asian"] <- c("Asian")
df$race_fin[df$racetxt == "South Asian"] <- c("Asian")
df$race_fin[df$racetxt == "southern asian"] <- c("Asian")

table(df$race_fin, useNA = "always")
## 
##        Another race or ethnicity not listed above 
##                                                18 
##                                             Asian 
##                                               482 
##                                    Bi/Multiracial 
##                                               123 
##                         Black or African American 
##                                                37 
## Hispanic, Latino/Latina/Latinx, or Spanish origin 
##                                                53 
##                   Middle Eastern or North African 
##                                                49 
##         Native Hawaiian or Other Pacific Islander 
##                                                 1 
##                                             White 
##                                               846 
##                                              <NA> 
##                                               145
df$race_fin2 <- df$race_fin
df$race_fin2[df$race_fin == "Another race or ethnicity not listed above"] <- "URM"
df$race_fin2[df$race_fin == "Native Hawaiian or Other Pacific Islander"] <- "URM"
df$race_fin2[df$race_fin == "Black or African American"] <- "URM"
df$race_fin2[df$race_fin == "Hispanic, Latino/Latina/Latinx, or Spanish origin"] <- "URM"
df$race_fin2[df$race_fin == "Middle Eastern or North African"] <- "URM"
table(df$race_fin2, useNA = "always")
## 
##          Asian Bi/Multiracial            URM          White           <NA> 
##            482            123            158            846            145

2.6 Clean Country

table(df$country, useNA = "always")
## 
##              Afghanistan                  Albania                Argentina 
##                        1                        1                        1 
##                Australia               Bangladesh                  Belgium 
##                        4                       24                        1 
##                  Bermuda                  Bolivia                   Brazil 
##                        1                        1                        4 
##                 Cameroon                   Canada                    Chile 
##                        1                        8                        5 
##                    China                 Colombia                  Croatia 
##                      172                       11                        1 
##                   Cyprus           Czech Republic       Dominican Republic 
##                        1                        1                        3 
##                    Egypt              El Salvador                  Equador 
##                        6                        2                        4 
##                  Eritrea                 Ethiopia               ficticious 
##                        1                        1                        1 
##                   France                  Germany                    Ghana 
##                        3                        2                        2 
##                   Greece                    Haiti                Hong Kong 
##                        2                        1                        2 
##                    India                Indonesia                     Iran 
##                      143                        2                       48 
##                     Iraq                    Italy                    Japan 
##                        8                        5                        4 
##                   Jordan               Kazakhstan                    Kenya 
##                        3                        1                        1 
##                   Kuwait                  Lebanon                    Libya 
##                        1                        3                        1 
##                Lithuania                Macedonia                 Malaysia 
##                        1                        1                        3 
##                   Mexico                  Morocco            Multinational 
##                        9                        1                        7 
##                    Nepal              Netherlands              New Zealand 
##                       11                        1                        1 
##                  Nigeria                No Answer                 Pakistan 
##                        8                        3                        9 
##                 Paraguay                     Peru              Philippines 
##                        1                        1                        3 
##                 Portugal              Puerto Rico                  Romania 
##                        2                        5                        1 
##                   Russia             Saudi Arabia             South Africa 
##                        3                        5                        1 
##              South Korea              South Sudan                    Spain 
##                       17                        1                        2 
##                Sri Lanka                    Syria                   Taiwan 
##                        2                        1                       13 
##                 Thailand                   Turkey                   Uganda 
##                        4                       10                        1 
##                  Ukraine           United Kingdom United States of America 
##                        1                        3                      924 
##                Venezuela                  Vietnam                     <NA> 
##                        2                        3                      205
df$intl_fin <- "international"
df$intl_fin[df$country == "United States of America"] <- "domestic"
df$intl_fin[df$country == "No Answer"] <- NA
df$intl_fin[df$country == "ficticious"] <- NA
df$intl_fin[is.na(df$country)] <- NA

table(df$intl_fin, useNA = "always")
## 
##      domestic international          <NA> 
##           924           621           209

2.7 Clean DADP and ITP

table(df$dadp, useNA = "always")
## 
##    1    2    3    4    5 <NA> 
##  271  441  382  402  222   36
table(df$itp, useNA = "always")
## 
##    1    2    3    4    5 <NA> 
##   18   22   69  221 1389   35
corr.test(df$dadp, df$itp)
## Call:corr.test(x = df$dadp, y = df$itp)
## Correlation matrix 
## [1] -0.11
## Sample Size 
## [1] 1718
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option
df$itp_rc[df$itp == 1] <- 5
df$itp_rc[df$itp == 2] <- 4
df$itp_rc[df$itp == 3] <- 3
df$itp_rc[df$itp == 4] <- 2
df$itp_rc[df$itp == 5] <- 1
df$uncer <- (df$dadp + df$itp_rc)/2

2.8 Finalize Dataset

names(df)
##  [1] "R_IDs"     "Major"     "start"     "progress"  "gender"    "gendertxt"
##  [7] "race"      "racetxt"   "dadp"      "itp"       "country"   "maj_fin"  
## [13] "years"     "prog_fin"  "gen_fin"   "race_fin"  "race_fin2" "intl_fin" 
## [19] "itp_rc"    "uncer"
df2 <- subset(df, select=c(R_IDs, maj_fin, years, prog_fin, gen_fin, race_fin, race_fin2, intl_fin, dadp, itp, uncer))
d <- merge(df2, classes, by = "R_IDs")
rm(import, spl, df, df2, classes)

# write.csv(d, file="auxvar.csv", row.names = F)

3 Chi-Square Analyses

3.1 Discipline

table(d$maj_fin, useNA = "always")
## 
##                   Aerospace Engineering Agricultural and Biological Engineering 
##                                      28                                      27 
##                         Bio Engineering                    Chemical Engineering 
##                                     102                                     149 
##                       Civil Engineering                    Computer Engineering 
##                                     100                                      19 
##                        Computer Science        Computer Science and Engineering 
##                                      55                                      21 
##                        Elec Engineering               Environmental Engineering 
##                                     157                                      28 
##                  Industrial Engineering        Material Science and Engineering 
##                                      29                                     162 
##                  Mechanical Engineering                     Nuclear Engineering 
##                                     165                                      44 
##                       Other Engineering                                    <NA> 
##                                      34                                      13
d$maj_fin2 <- d$maj_fin
d$maj_fin2[d$maj_fin == "Computer Engineering"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Computer Science"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Agricultural and Biological Engineering"] <- "Bio Engineering"
d$maj_fin2[d$maj_fin == "Aerospace Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Environmental Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Industrial Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Nuclear Engineering"] <- "Other Engineering"
d$maj_fin2[is.na(d$maj_fin)] <- "Other Engineering"
table(d$maj_fin, d$maj_fin2, useNA = "always")
##                                          
##                                           Bio Engineering Chemical Engineering
##   Aerospace Engineering                                 0                    0
##   Agricultural and Biological Engineering              27                    0
##   Bio Engineering                                     102                    0
##   Chemical Engineering                                  0                  149
##   Civil Engineering                                     0                    0
##   Computer Engineering                                  0                    0
##   Computer Science                                      0                    0
##   Computer Science and Engineering                      0                    0
##   Elec Engineering                                      0                    0
##   Environmental Engineering                             0                    0
##   Industrial Engineering                                0                    0
##   Material Science and Engineering                      0                    0
##   Mechanical Engineering                                0                    0
##   Nuclear Engineering                                   0                    0
##   Other Engineering                                     0                    0
##   <NA>                                                  0                    0
##                                          
##                                           Civil Engineering
##   Aerospace Engineering                                   0
##   Agricultural and Biological Engineering                 0
##   Bio Engineering                                         0
##   Chemical Engineering                                    0
##   Civil Engineering                                     100
##   Computer Engineering                                    0
##   Computer Science                                        0
##   Computer Science and Engineering                        0
##   Elec Engineering                                        0
##   Environmental Engineering                               0
##   Industrial Engineering                                  0
##   Material Science and Engineering                        0
##   Mechanical Engineering                                  0
##   Nuclear Engineering                                     0
##   Other Engineering                                       0
##   <NA>                                                    0
##                                          
##                                           Computer Science and Engineering
##   Aerospace Engineering                                                  0
##   Agricultural and Biological Engineering                                0
##   Bio Engineering                                                        0
##   Chemical Engineering                                                   0
##   Civil Engineering                                                      0
##   Computer Engineering                                                  19
##   Computer Science                                                      55
##   Computer Science and Engineering                                      21
##   Elec Engineering                                                       0
##   Environmental Engineering                                              0
##   Industrial Engineering                                                 0
##   Material Science and Engineering                                       0
##   Mechanical Engineering                                                 0
##   Nuclear Engineering                                                    0
##   Other Engineering                                                      0
##   <NA>                                                                   0
##                                          
##                                           Elec Engineering
##   Aerospace Engineering                                  0
##   Agricultural and Biological Engineering                0
##   Bio Engineering                                        0
##   Chemical Engineering                                   0
##   Civil Engineering                                      0
##   Computer Engineering                                   0
##   Computer Science                                       0
##   Computer Science and Engineering                       0
##   Elec Engineering                                     157
##   Environmental Engineering                              0
##   Industrial Engineering                                 0
##   Material Science and Engineering                       0
##   Mechanical Engineering                                 0
##   Nuclear Engineering                                    0
##   Other Engineering                                      0
##   <NA>                                                   0
##                                          
##                                           Material Science and Engineering
##   Aerospace Engineering                                                  0
##   Agricultural and Biological Engineering                                0
##   Bio Engineering                                                        0
##   Chemical Engineering                                                   0
##   Civil Engineering                                                      0
##   Computer Engineering                                                   0
##   Computer Science                                                       0
##   Computer Science and Engineering                                       0
##   Elec Engineering                                                       0
##   Environmental Engineering                                              0
##   Industrial Engineering                                                 0
##   Material Science and Engineering                                     162
##   Mechanical Engineering                                                 0
##   Nuclear Engineering                                                    0
##   Other Engineering                                                      0
##   <NA>                                                                   0
##                                          
##                                           Mechanical Engineering
##   Aerospace Engineering                                        0
##   Agricultural and Biological Engineering                      0
##   Bio Engineering                                              0
##   Chemical Engineering                                         0
##   Civil Engineering                                            0
##   Computer Engineering                                         0
##   Computer Science                                             0
##   Computer Science and Engineering                             0
##   Elec Engineering                                             0
##   Environmental Engineering                                    0
##   Industrial Engineering                                       0
##   Material Science and Engineering                             0
##   Mechanical Engineering                                     165
##   Nuclear Engineering                                          0
##   Other Engineering                                            0
##   <NA>                                                         0
##                                          
##                                           Other Engineering <NA>
##   Aerospace Engineering                                  28    0
##   Agricultural and Biological Engineering                 0    0
##   Bio Engineering                                         0    0
##   Chemical Engineering                                    0    0
##   Civil Engineering                                       0    0
##   Computer Engineering                                    0    0
##   Computer Science                                        0    0
##   Computer Science and Engineering                        0    0
##   Elec Engineering                                        0    0
##   Environmental Engineering                              28    0
##   Industrial Engineering                                 29    0
##   Material Science and Engineering                        0    0
##   Mechanical Engineering                                  0    0
##   Nuclear Engineering                                    44    0
##   Other Engineering                                      34    0
##   <NA>                                                   13    0
table(d$maj_fin2, useNA = "always")
## 
##                  Bio Engineering             Chemical Engineering 
##                              129                              149 
##                Civil Engineering Computer Science and Engineering 
##                              100                               95 
##                 Elec Engineering Material Science and Engineering 
##                              157                              162 
##           Mechanical Engineering                Other Engineering 
##                              165                              176 
##                             <NA> 
##                                0
d$maj_fin2 <- as.factor(d$maj_fin2)

chiout <- chisq.test(d$maj_fin2, d$class)
chiout
## 
##  Pearson's Chi-squared test
## 
## data:  d$maj_fin2 and d$class
## X-squared = 83.451, df = 35, p-value = 7.788e-06
chiout$observed
##                                   d$class
## d$maj_fin2                          1  2  3  4  5  6
##   Bio Engineering                  23 63 26 12  1  4
##   Chemical Engineering             27 66 35 12  2  7
##   Civil Engineering                19 59 13  2  6  1
##   Computer Science and Engineering 20 46 16  8  4  1
##   Elec Engineering                 36 71 28  6  9  7
##   Material Science and Engineering 34 80 19 26  2  1
##   Mechanical Engineering           33 76 37  0 11  8
##   Other Engineering                39 93 24 12  4  4
chiout$stdres
##                                   d$class
## d$maj_fin2                                   1           2           3
##   Bio Engineering                  -0.76632958 -0.01436740  0.85125079
##   Chemical Engineering             -0.73720274 -1.20570018  2.07434150
##   Civil Engineering                -0.36089782  2.11671464 -1.23429794
##   Computer Science and Engineering  0.16790022 -0.09690145 -0.16990277
##   Elec Engineering                  0.85165847 -0.99218269  0.12750361
##   Material Science and Engineering  0.20451787  0.13366525 -2.08075393
##   Mechanical Engineering           -0.13395615 -0.78846217  1.81086296
##   Other Engineering                 0.63444081  1.13895670 -1.45937256
##                                   d$class
## d$maj_fin2                                   4           5           6
##   Bio Engineering                   1.15224878 -1.76503822  0.13499964
##   Chemical Engineering              0.60491992 -1.50868838  1.39063837
##   Civil Engineering                -2.02036770  1.46933969 -1.19116080
##   Computer Science and Engineering  0.61803981  0.42915979 -1.12632892
##   Elec Engineering                 -1.63306619  1.69597826  1.24113665
##   Material Science and Engineering  4.97681996 -1.66485458 -1.87666165
##   Mechanical Engineering           -3.77868178  2.45791909  1.59981798
##   Other Engineering                -0.03774010 -0.92595575 -0.54928788
sr_di <- data.frame(chiout$stdres)
colnames(sr_di) <- c("discipline","class","sr")

3.1.1 Discipline Plot

# Count the occurrences of each race within each class
disc_counts <- d %>%
  group_by(class, race_fin) %>%
  summarise(count = n()) %>%
  spread(key = race_fin, value = count, fill = 0)

# Calculate the total count of each race for each class
disc_counts$total <- rowSums(disc_counts[, -1])

# Convert counts to proportions
disc_counts <- disc_counts %>%
  mutate_at(vars(-class, -total), funs(./total)) %>%
  select(-total)

# Reshape the data for plotting
disc_counts_long <- gather(disc_counts, key = "maj_fin", value = "proportion", -class)

# Create a stacked bar plot
ggplot(disc_counts_long, aes(x = as.factor(class), y = proportion, fill = maj_fin)) +
  geom_bar(stat = "identity") +
  labs(x = "Class", y = "Proportion", fill = "Discipline") +
  ggtitle("Disciplinary Distribution by Class")

3.2 Race/Ethnicity

d$race_fin2 <- as.factor(d$race_fin2)
d$race_fin2 <- relevel(d$race_fin2, ref = "White")

# d$race_fin3 <- d$race_fin2
# d$race_fin3[d$race_fin == "Bi/Multiracial"] <- "URM"
# d$race_fin3 <- droplevels(d$race_fin3)

chiout <- chisq.test(table(d$race_fin2, d$class))
chiout
## 
##  Pearson's Chi-squared test
## 
## data:  table(d$race_fin2, d$class)
## X-squared = 23.989, df = 15, p-value = 0.06529
chiout$observed
##                 
##                    1   2   3   4   5   6
##   White          102 286 110  50  15  18
##   Asian           80 151  41  17  17   9
##   Bi/Multiracial  15  47  16   7   2   3
##   URM             25  58  22   4   4   1
chiout$stdres
##                 
##                            1           2           3           4           5
##   White          -2.29585560 -0.03316766  1.62893368  2.07117563 -1.67706174
##   Asian           2.72986170 -0.56152680 -2.32024014 -1.38666511  2.23443253
##   Bi/Multiracial -0.86710142  0.58409099  0.15641425  0.26494185 -0.66806724
##   URM             0.49115918  0.36192342  0.63272688 -1.57388305  0.03348577
##                 
##                            6
##   White           0.59356084
##   Asian           0.04946184
##   Bi/Multiracial  0.30818740
##   URM            -1.32268471
sr_re <- data.frame(chiout$stdres)
colnames(sr_re) <- c("raceethnicity","class","sr")

3.2.1 Race/Ethnicity Plot

# Count the occurrences of each race within each class
race_counts <- d %>%
  group_by(class, race_fin) %>%
  summarise(count = n()) %>%
  spread(key = race_fin, value = count, fill = 0)

# Calculate the total count of each race for each class
race_counts$total <- rowSums(race_counts[, -1])

# Convert counts to proportions
race_counts <- race_counts %>%
  mutate_at(vars(-class, -total), funs(./total)) %>%
  select(-total)

# Reshape the data for plotting
race_counts_long <- gather(race_counts, key = "race_fin", value = "proportion", -class)

# Create a stacked bar plot
ggplot(race_counts_long, aes(x = as.factor(class), y = proportion, fill = race_fin)) +
  geom_bar(stat = "identity") +
  labs(x = "Class", y = "Proportion", fill = "Race") +
  ggtitle("Race Distribution by Class")

3.3 Gender

table(d$gen_fin)
## 
## female     gq   male 
##    398     15    692
d$gen_fin2 <- d$gen_fin
d$gen_fin2[d$gen_fin == "gq"] <- "fgq"
d$gen_fin2[d$gen_fin == "female"] <- "fgq"
d$gen_fin2 <- as.factor(d$gen_fin2)
table(d$gen_fin2)
## 
##  fgq male 
##  413  692
d$gen_fin2 <- as.factor(d$gen_fin2)
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")

chiout <- chisq.test(table(d$gen_fin2, d$class))
chiout
## 
##  Pearson's Chi-squared test
## 
## data:  table(d$gen_fin2, d$class)
## X-squared = 5.5919, df = 5, p-value = 0.348
chiout$observed
##       
##          1   2   3   4   5   6
##   male 137 333 130  45  25  22
##   fgq   87 212  60  33  12   9
chiout$stdres
##       
##                 1          2          3          4          5          6
##   male -0.5071185 -1.0326826  1.8149150 -0.9339242  0.6321686  0.9739402
##   fgq   0.5071185  1.0326826 -1.8149150  0.9339242 -0.6321686 -0.9739402
sr_ge <- data.frame(chiout$stdres)
colnames(sr_ge) <- c("gender","class","sr")

3.3.1 Gender Plot

# Count the occurrences of each gender within each class
gender_counts <- d %>%
  group_by(class, gen_fin) %>%
  summarise(count = n()) %>%
  spread(key = gen_fin, value = count, fill = 0)

# Calculate the total count of each gender for each class
gender_counts$total <- rowSums(gender_counts[, -1])

# Convert counts to proportions
gender_counts <- gender_counts %>%
  mutate_at(vars(-class, -total), funs(./total)) %>%
  select(-total)

# Reshape the data for plotting
gender_counts_long <- gather(gender_counts, key = "gen_fin2", value = "proportion", -class)

# Create a stacked bar plot
ggplot(gender_counts_long, aes(x = as.factor(class), y = proportion, fill = gen_fin2)) +
  geom_bar(stat = "identity") +
  labs(x = "Class", y = "Proportion", fill = "Gender") +
  ggtitle("Gender Distribution by Class")

3.4 Degree Progress

table(d$prog_fin)
## 
##   0   1   2   3 
## 324 373 286  51
d$prog_fin <- as.factor(d$prog_fin)

chiout <- chisq.test(table(d$prog_fin, d$class))
chiout
## 
##  Pearson's Chi-squared test
## 
## data:  table(d$prog_fin, d$class)
## X-squared = 28.679, df = 15, p-value = 0.01769
chiout$observed
##    
##       1   2   3   4   5   6
##   0  62 173  55  14  11   9
##   1  89 172  63  23  17   9
##   2  53 139  48  32   5   9
##   3   5  24  11   4   5   2
chiout$stdres
##    
##               1           2           3           4           5           6
##   0 -0.58253899  1.85338691 -0.08228287 -2.32267728 -0.32325106 -0.03534427
##   1  2.19415971 -1.45775355 -0.14615610 -0.84279759  1.13311296 -0.57317542
##   2 -0.83244866 -0.21007906 -0.17671955  3.20493329 -2.03623281  0.41212118
##   3 -1.89841666 -0.30338656  0.86543503  0.22393674  2.38587841  0.49548634
sr_pr <- data.frame(chiout$stdres)
colnames(sr_pr) <- c("degreeprog","class","sr")

3.4.1 Degree Progress Plot

# Count the occurrences of each gender within each class
prog_counts <- d %>%
  group_by(class, prog_fin) %>%
  summarise(count = n()) %>%
  spread(key = prog_fin, value = count, fill = 0)

# Calculate the total count of each gender for each class
prog_counts$total <- rowSums(prog_counts[, -1])

# Convert counts to proportions
prog_counts <- prog_counts %>%
  mutate_at(vars(-class, -total), funs(./total)) %>%
  select(-total)

# Reshape the data for plotting
prog_counts_long <- gather(prog_counts, key = "prog_fin", value = "proportion", -class)

# Create a stacked bar plot
ggplot(prog_counts_long, aes(x = as.factor(class), y = proportion, fill = prog_fin)) +
  geom_bar(stat = "identity") +
  labs(x = "Class", y = "Proportion", fill = "Degree Progress") +
  ggtitle("Degree Progress Distribution by Class")

4 Multivariate Chi Square

# https://www.sas.upenn.edu/~allison/combchi.sas
# MACRO COMBCHI combines chi-square statistics from an analysis of several data
# sets created by multiple imputation, using the method described on p. 115 of
# 
# Schafer, J.L. (1997) Analysis of Incomplete Multivariate Data.  London: 
#    Chapman and Hall. 
# 
# The chi-square statistics can be either Wald statistics or likelihood ratio
# statistics.  All that's needed are the several chi-square values and the
# segrees of freedom. 
# 
# COMBCHI requires the installation of IML.
# 
# Example of usage:
# 
# Suppose a 3 d.f. test on four data sets produces chi-squares of 5.8, 7.2,
# 6.1 and 8.5.  Submit the statement:
# 
#    %combchi(df=3, chi=5.8 7.2 6.1 8.5)
# 
# The following output is printed
# 
#                  F        DF       DDF
# 
#          2.1201613         3 342.22381
# 
# 
#                       P
# 
#                    0.0974097
# 
# The macro calculates an F-statistic of 2.12 with 3 and 342 degrees of freedom.  
# The associated p-value is .097.
# 
# *****************************************************************************/
# proc iml;
#   df=&df;
#   g2={&chi};
#   m=ncol(g2);
#   g=sqrt(g2);
#   mg2=sum(g2)/m;
#   r=(1+1/m)*(ssq(g)-(sum(g)**2)/m)/(m-1);
#   f=(mg2/df - r*(m-1)/(m+1))/(1+r);
#   ddf=(m-1)*(1+1/r)**2/df**(3/m);
#   p=1-probf(f,df,ddf);
#   print f df ddf;
#   print p;
# run;
# %mend combchi;

# from chat-gpt
# library(statmod)
# 
# combchi <- function(df, chi) {
#   g2 <- chi
#   m <- length(g2)
#   g <- sqrt(g2)
#   mg2 <- sum(g2) / m
#   r <- (1 + 1 / m) * ((sum(g^2) - sum(g)^2 / m) / (m - 1))
#   f <- (mg2 / df - r * (m - 1) / (m + 1)) / (1 + r)
#   ddf <- (m - 1) * (1 + 1 / r)^2 / df^(3 / m)
#   p <- 1-pf(f, df, ddf) #by me
#   
#   cat("f:", f, "\ndf:", df, "\nddf:", ddf, "\np:", p, "\n")
# }
# 
# # Example usage
# combchi(df=3, chi=c(5.8, 7.2, 6.1, 8.5))
# 
# #                  F        DF       DDF
# # 
# #          2.1201613         3 342.22381
# # 
# # 
# #                       P
# # 
# #                    0.0974097
# # The macro calculates an F-statistic of 2.12 with 3 and 342 degrees of freedom.  
# # The associated p-value is .097.
# 
# combchi(df=3, chi=c(97.68,23.99,5.59,28.68))

5 Regression Analyses

5.1 All Variables Predicting Uncertainty

d$maj_fin2 <- relevel(d$maj_fin2, ref = "Mechanical Engineering")
d$race_fin2 <- relevel(d$race_fin2, ref = "White")
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")
d$prog_fin <- relevel(d$prog_fin, ref = "0")

d$class_fin[d$class == "1"] <- "disorganized"
d$class_fin[d$class == "2"] <- "moderate"
d$class_fin[d$class == "3"] <- "strong"
d$class_fin[d$class == "4"] <- "mle"
d$class_fin[d$class == "5"] <- "mls"
d$class_fin[d$class == "6"] <- "max"
d$class_fin <- as.factor(d$class_fin)
d$class_fin <- relevel(d$class_fin, ref = "moderate")

d <- subset(d, maj_fin2 != "Other Engineering")

regout1 <- lm(uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin, data = d)
plot_model(regout1, type="diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

qqnorm(d$uncer)
qqline(d$uncer)

plot(regout1, 4)

plot(regout1, 5)

plot(regout1, 3)

summary(regout1)
## 
## Call:
## lm(formula = uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin, 
##     data = d)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.45562 -0.60741 -0.02873  0.51139  2.85647 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               2.114591   0.080499  26.269  < 2e-16
## maj_fin2Bio Engineering                   0.076006   0.094226   0.807 0.420098
## maj_fin2Chemical Engineering              0.109113   0.089550   1.218 0.223389
## maj_fin2Civil Engineering                -0.007770   0.099651  -0.078 0.937869
## maj_fin2Computer Science and Engineering  0.110718   0.101366   1.092 0.275029
## maj_fin2Elec Engineering                 -0.009890   0.086934  -0.114 0.909455
## maj_fin2Material Science and Engineering  0.178011   0.090555   1.966 0.049649
## gen_fin2fgq                               0.008089   0.053504   0.151 0.879868
## race_fin2Asian                           -0.125985   0.058473  -2.155 0.031474
## race_fin2Bi/Multiracial                   0.150457   0.097234   1.547 0.122145
## race_fin2URM                             -0.037927   0.086344  -0.439 0.660589
## prog_fin1                                 0.154928   0.062119   2.494 0.012819
## prog_fin2                                -0.035878   0.067304  -0.533 0.594123
## prog_fin3                                -0.484187   0.124213  -3.898 0.000105
##                                             
## (Intercept)                              ***
## maj_fin2Bio Engineering                     
## maj_fin2Chemical Engineering                
## maj_fin2Civil Engineering                   
## maj_fin2Computer Science and Engineering    
## maj_fin2Elec Engineering                    
## maj_fin2Material Science and Engineering *  
## gen_fin2fgq                                 
## race_fin2Asian                           *  
## race_fin2Bi/Multiracial                     
## race_fin2URM                                
## prog_fin1                                *  
## prog_fin2                                   
## prog_fin3                                ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.74 on 851 degrees of freedom
##   (92 observations deleted due to missingness)
## Multiple R-squared:  0.05328,    Adjusted R-squared:  0.03881 
## F-statistic: 3.684 on 13 and 851 DF,  p-value: 1.009e-05
tab_model(regout1, show.ci = F, show.se = T, show.stat = T)
  uncer
Predictors Estimates std. Error Statistic p
(Intercept) 2.11 0.08 26.27 <0.001
maj fin2 [Bio
Engineering]
0.08 0.09 0.81 0.420
maj fin2 [Chemical
Engineering]
0.11 0.09 1.22 0.223
maj fin2 [Civil
Engineering]
-0.01 0.10 -0.08 0.938
maj fin2 [Computer
Science and Engineering]
0.11 0.10 1.09 0.275
maj fin2 [Elec
Engineering]
-0.01 0.09 -0.11 0.909
maj fin2 [Material
Science and Engineering]
0.18 0.09 1.97 0.050
gen fin2 [fgq] 0.01 0.05 0.15 0.880
race fin2 [Asian] -0.13 0.06 -2.15 0.031
race fin2
[Bi/Multiracial]
0.15 0.10 1.55 0.122
race fin2 [URM] -0.04 0.09 -0.44 0.661
prog fin [1] 0.15 0.06 2.49 0.013
prog fin [2] -0.04 0.07 -0.53 0.594
prog fin [3] -0.48 0.12 -3.90 <0.001
Observations 865
R2 / R2 adjusted 0.053 / 0.039
plot_model(regout1)

regout2 <- lm(uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + prog_fin, data = d)
# plot_model(regout2, type="diag")
plot(regout2, 4)

plot(regout2, 5)

plot(regout2, 3)

summary(regout2)
## 
## Call:
## lm(formula = uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + 
##     prog_fin, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6042 -0.5065  0.0292  0.5301  2.5510 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               2.18624    0.08102  26.984  < 2e-16
## class_findisorganized                     0.29786    0.06450   4.618 4.48e-06
## class_finmax                             -0.34474    0.15632  -2.205 0.027698
## class_finmle                             -0.35397    0.09907  -3.573 0.000373
## class_finmls                             -0.15477    0.12962  -1.194 0.232821
## class_finstrong                          -0.36062    0.06671  -5.406 8.40e-08
## maj_fin2Bio Engineering                   0.09572    0.09066   1.056 0.291364
## maj_fin2Chemical Engineering              0.13283    0.08602   1.544 0.122915
## maj_fin2Civil Engineering                -0.03557    0.09552  -0.372 0.709726
## maj_fin2Computer Science and Engineering  0.09546    0.09724   0.982 0.326522
## maj_fin2Elec Engineering                 -0.02458    0.08316  -0.296 0.767657
## maj_fin2Material Science and Engineering  0.15759    0.08858   1.779 0.075578
## gen_fin2fgq                              -0.01115    0.05123  -0.218 0.827791
## race_fin2Asian                           -0.17972    0.05628  -3.193 0.001459
## race_fin2Bi/Multiracial                   0.15798    0.09294   1.700 0.089536
## race_fin2URM                             -0.05939    0.08253  -0.720 0.471985
## prog_fin1                                 0.14463    0.05952   2.430 0.015310
## prog_fin2                                -0.02076    0.06441  -0.322 0.747280
## prog_fin3                                -0.40026    0.11952  -3.349 0.000847
##                                             
## (Intercept)                              ***
## class_findisorganized                    ***
## class_finmax                             *  
## class_finmle                             ***
## class_finmls                                
## class_finstrong                          ***
## maj_fin2Bio Engineering                     
## maj_fin2Chemical Engineering                
## maj_fin2Civil Engineering                   
## maj_fin2Computer Science and Engineering    
## maj_fin2Elec Engineering                    
## maj_fin2Material Science and Engineering .  
## gen_fin2fgq                                 
## race_fin2Asian                           ** 
## race_fin2Bi/Multiracial                  .  
## race_fin2URM                                
## prog_fin1                                *  
## prog_fin2                                   
## prog_fin3                                ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7066 on 846 degrees of freedom
##   (92 observations deleted due to missingness)
## Multiple R-squared:  0.1418, Adjusted R-squared:  0.1236 
## F-statistic: 7.767 on 18 and 846 DF,  p-value: < 2.2e-16
tab_model(regout2, show.ci = F, show.se = T, show.stat = T)
  uncer
Predictors Estimates std. Error Statistic p
(Intercept) 2.19 0.08 26.98 <0.001
class fin [disorganized] 0.30 0.06 4.62 <0.001
class fin [max] -0.34 0.16 -2.21 0.028
class fin [mle] -0.35 0.10 -3.57 <0.001
class fin [mls] -0.15 0.13 -1.19 0.233
class fin [strong] -0.36 0.07 -5.41 <0.001
maj fin2 [Bio
Engineering]
0.10 0.09 1.06 0.291
maj fin2 [Chemical
Engineering]
0.13 0.09 1.54 0.123
maj fin2 [Civil
Engineering]
-0.04 0.10 -0.37 0.710
maj fin2 [Computer
Science and Engineering]
0.10 0.10 0.98 0.327
maj fin2 [Elec
Engineering]
-0.02 0.08 -0.30 0.768
maj fin2 [Material
Science and Engineering]
0.16 0.09 1.78 0.076
gen fin2 [fgq] -0.01 0.05 -0.22 0.828
race fin2 [Asian] -0.18 0.06 -3.19 0.001
race fin2
[Bi/Multiracial]
0.16 0.09 1.70 0.090
race fin2 [URM] -0.06 0.08 -0.72 0.472
prog fin [1] 0.14 0.06 2.43 0.015
prog fin [2] -0.02 0.06 -0.32 0.747
prog fin [3] -0.40 0.12 -3.35 0.001
Observations 865
R2 / R2 adjusted 0.142 / 0.124
plot_model(regout2)

anova(regout1,regout2)
## Analysis of Variance Table
## 
## Model 1: uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin
## Model 2: uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + prog_fin
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    851 465.99                                  
## 2    846 422.41  5    43.579 17.456 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1