library(reshape2) # for colsplit()
library(afex)
library(emmeans)
library(sjPlot)
library(nnet)
library(ggplot2)
library(psych)
library(dplyr)
library(tidyr)
import <- read.csv(file="finaldata.csv", header=T, na.strings = c(""," ","NA"))
df <- subset(import, select=c(R_IDs, Major, Start.Year.Month, Q19.3, Q19.16, Q19.16_7_TEXT, Q19.13, Q19.13_8_TEXT, Q13.1_1, Q13.1_2, Country))
colnames(df) <- c("R_IDs","Major","start","progress","gender","gendertxt","race","racetxt","dadp","itp","country")
classes <- read.csv(file="classes.csv", header=T)
classes <- merge(classes, subset(import, select=c(R_IDs, X)), by="X")
classes$class_bu <- classes$class
classes$class[classes$class_bu == "2"] <- "3"
classes$class[classes$class_bu == "3"] <- "2"
table(df$Major, useNA = "always")
##
## Acoustic Engineering
## 2
## Aerospace Engineering
## 50
## Agricultural and Biological Engineering
## 47
## Biological Engineering
## 2
## Biomedical
## 128
## Biomedical Engineering
## 3
## Chemical and Biomolecular Engineering
## 18
## Chemical and Materials Engineering
## 2
## Chemical Engineering
## 170
## Civil and Environmental Engineering
## 18
## Civil Engineering
## 142
## Computer Engineering
## 33
## Computer Science
## 93
## Computer Science and Engineering
## 21
## Computer Science Engineering
## 4
## Electrical & Computer Engineering
## 62
## Electrical and Electronic Engineering
## 2
## Electrical Engineering
## 200
## Electrical Engineering and Computer Science
## 2
## Engineering
## 2
## Engineering Education
## 1
## Engineering Management
## 4
## Engineering Physics
## 5
## Environmental
## 51
## Environmental Engineering
## 1
## Geo
## 6
## Industrial
## 40
## Material Science and Engineering
## 185
## Mechanical Engineering
## 260
## Nuclear Engineering
## 59
## Ocean Engineering
## 11
## Other Engineering
## 3
## Petroleum
## 17
## Systems Engineering
## 10
## <NA>
## 100
df$maj_fin <- df$Major
df$maj_fin[df$Major == "Acoustic Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering Education"] <- "Other Engineering" #n = 1
df$maj_fin[df$Major == "Engineering Management"] <- "Other Engineering" #n = 4
df$maj_fin[df$Major == "Engineering Physics"] <- "Other Engineering" #n = 5
df$maj_fin[df$Major == "Geo"] <- "Other Engineering" #n = 6
df$maj_fin[df$Major == "Other Engineering"] <- "Other Engineering" #n = 3
df$maj_fin[df$Major == "Biological Engineering"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical Engineering"] <- "Bio Engineering"
df$maj_fin[df$Major == "Chemical and Biomolecular Engineering"] <- "Chemical Engineering"
df$maj_fin[df$Major == "Chemical and Materials Engineering"] <- "Chemical Engineering"
df$maj_fin[df$Major == "Civil and Environmental Engineering"] <- "Civil Engineering"
df$maj_fin[df$Major == "Computer Science Engineering"] <- "Computer Science and Engineering"
df$maj_fin[df$Major == "Electrical & Computer Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical and Electronic Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering and Computer Science"] <- "Elec Engineering"
df$maj_fin[df$Major == "Environmental"] <- "Environmental Engineering"
df$maj_fin[df$Major == "Industrial"] <- "Industrial Engineering"
df$maj_fin[df$Major == "Ocean Engineering"] <- "Other Engineering"
df$maj_fin[df$Major == "Petroleum"] <- "Other Engineering"
df$maj_fin[df$Major == "Systems Engineering"] <- "Other Engineering"
table(df$Major, df$maj_fin, useNA = "always")
##
## Aerospace Engineering
## Acoustic Engineering 0
## Aerospace Engineering 50
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Agricultural and Biological Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 47
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Bio Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 2
## Biomedical 128
## Biomedical Engineering 3
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Chemical Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 18
## Chemical and Materials Engineering 2
## Chemical Engineering 170
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Civil Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 18
## Civil Engineering 142
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Computer Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 33
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Computer Science
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 93
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Computer Science and Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 21
## Computer Science Engineering 4
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Elec Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 62
## Electrical and Electronic Engineering 2
## Electrical Engineering 200
## Electrical Engineering and Computer Science 2
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Environmental Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 51
## Environmental Engineering 1
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Industrial Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 40
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Material Science and Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 185
## Mechanical Engineering 0
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Mechanical Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 260
## Nuclear Engineering 0
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Nuclear Engineering
## Acoustic Engineering 0
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Biological Engineering 0
## Biomedical 0
## Biomedical Engineering 0
## Chemical and Biomolecular Engineering 0
## Chemical and Materials Engineering 0
## Chemical Engineering 0
## Civil and Environmental Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Computer Science Engineering 0
## Electrical & Computer Engineering 0
## Electrical and Electronic Engineering 0
## Electrical Engineering 0
## Electrical Engineering and Computer Science 0
## Engineering 0
## Engineering Education 0
## Engineering Management 0
## Engineering Physics 0
## Environmental 0
## Environmental Engineering 0
## Geo 0
## Industrial 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 59
## Ocean Engineering 0
## Other Engineering 0
## Petroleum 0
## Systems Engineering 0
## <NA> 0
##
## Other Engineering <NA>
## Acoustic Engineering 2 0
## Aerospace Engineering 0 0
## Agricultural and Biological Engineering 0 0
## Biological Engineering 0 0
## Biomedical 0 0
## Biomedical Engineering 0 0
## Chemical and Biomolecular Engineering 0 0
## Chemical and Materials Engineering 0 0
## Chemical Engineering 0 0
## Civil and Environmental Engineering 0 0
## Civil Engineering 0 0
## Computer Engineering 0 0
## Computer Science 0 0
## Computer Science and Engineering 0 0
## Computer Science Engineering 0 0
## Electrical & Computer Engineering 0 0
## Electrical and Electronic Engineering 0 0
## Electrical Engineering 0 0
## Electrical Engineering and Computer Science 0 0
## Engineering 2 0
## Engineering Education 1 0
## Engineering Management 4 0
## Engineering Physics 5 0
## Environmental 0 0
## Environmental Engineering 0 0
## Geo 6 0
## Industrial 0 0
## Material Science and Engineering 0 0
## Mechanical Engineering 0 0
## Nuclear Engineering 0 0
## Ocean Engineering 11 0
## Other Engineering 3 0
## Petroleum 17 0
## Systems Engineering 10 0
## <NA> 0 100
table(df$maj_fin, useNA = "always")
##
## Aerospace Engineering Agricultural and Biological Engineering
## 50 47
## Bio Engineering Chemical Engineering
## 133 190
## Civil Engineering Computer Engineering
## 160 33
## Computer Science Computer Science and Engineering
## 93 25
## Elec Engineering Environmental Engineering
## 266 52
## Industrial Engineering Material Science and Engineering
## 40 185
## Mechanical Engineering Nuclear Engineering
## 260 59
## Other Engineering <NA>
## 61 100
# survey closed 7/25/2018
spl <- colsplit(df$start," ",c("year", "month"))
spl$year <- as.numeric(spl$year)
spl$years <- 2018-spl$year
df$years <- spl$years
table(df$progress, useNA="always")
##
## 1 1,2 1,2,3 1,3 1,4 2 2,3 2,4 3 4 <NA>
## 398 254 41 1 3 36 4 3 9 803 202
# VALUES: 1 through 4, multiples possible; 1 = “Comprehensive Exam (Written or Oral)
# 2 = “Dissertation Proposal”, 3 = “Dissertation Defense”, 4 = “Not applicable”
df$prog_fin <- NA
df$prog_fin[df$progress == "2,4"] <- 2
df$prog_fin[df$progress == "2,3"] <- 3
df$prog_fin[df$progress == "1,4"] <- 1
df$prog_fin[df$progress == "1,3"] <- 3
df$prog_fin[df$progress == "1,2,3"] <- 3
df$prog_fin[df$progress == "1,2"] <- 2
df$prog_fin[df$progress == "1"] <- 1
df$prog_fin[df$progress == "2"] <- 2
df$prog_fin[df$progress == "3"] <- 3
df$prog_fin[df$progress == "4"] <- 0
table(df$prog, useNA = "always")
##
## <NA>
## 0
table(df$gender, useNA = "always")
##
## 1 1,2,3,4,5,6,7 1,3 1,3,5 1,4
## 541 1 3 1 1
## 1,5 1,6 2 2,3 2,3,5
## 1 23 987 5 1
## 2,5 2,6 2,7 3 5
## 1 33 6 2 1
## 6 7 <NA>
## 1 12 134
# VALUES: 1 through 7, multiples possible; 1 = “Female”, 2 = “Male”, 3 = “Genderqueer”
# 4 = “Agender”, 5 = “Transgender”, 6 = “Cisgender”, 7 = “A gender not listed” (Write in response optional)
df$gen_fin <- NA
df$gen_fin[df$gender == 1 | df$gender == "1,6" | df$gender == "1,5"] <- "female"
df$gen_fin[df$gender == 2 | df$gender == "2,6" | df$gender == "2,5"] <- "male"
df$gen_fin[df$gender == "1,3,5"] <- "gq"
df$gen_fin[df$gender == "2,3,5"] <- "gq"
df$gen_fin[df$gender == 3 | df$gender == 4 | df$gender == 7 | df$gender == "2,3" | df$gender == "1,3" | df$gender == "2,7" | df$gender == "1,4"] <- "gq"
table(df$gen_fin, df$gender, useNA = "always")
##
## 1 1,2,3,4,5,6,7 1,3 1,3,5 1,4 1,5 1,6 2 2,3 2,3,5 2,5 2,6 2,7 3
## female 541 0 0 0 0 1 23 0 0 0 0 0 0 0
## gq 0 0 3 1 1 0 0 0 5 1 0 0 6 2
## male 0 0 0 0 0 0 0 987 0 0 1 33 0 0
## <NA> 0 1 0 0 0 0 0 0 0 0 0 0 0 0
##
## 5 6 7 <NA>
## female 0 0 0 0
## gq 0 0 12 0
## male 0 0 0 0
## <NA> 1 1 0 134
table(df$gen_fin, useNA = "always")
##
## female gq male <NA>
## 565 31 1021 137
table(df$gendertxt)
##
## A zorplon from Sagitarius-7
## 1
## Alpha Male
## 1
## An ether sniffing robot sex toy on wheels
## 1
## Attack helicopter
## 1
## Attack Helicopter
## 1
## Hairdresser
## 1
## HOKIE
## 1
## Non-binary
## 1
## Not relevant
## 1
## Not Relevant
## 1
## questioning
## 1
## Sexist question
## 1
## there are only two genders
## 1
## there are only two genders fuck off
## 1
## This is a silly option. Your biology determines this. That's science, that's fact. A better option is "what I feel my gender is".
## 1
## Turtle
## 1
## XX, XY, XO, XXY, XXX, or XYY the rest is made up
## 1
# legitimate responses: "Non-binary" - already recoded as gq, all else as NA
table(df$race, useNA = "always")
##
## 1,2 1,3 1,3,4 1,3,7 1,4,5,7,8 1,4,7 1,7 2
## 1 1 1 1 1 3 4 474
## 2,3,5,7 2,4 2,4,6,7 2,4,7 2,5,7 2,6 2,6,7 2,7
## 1 3 1 3 2 1 1 30
## 2,8 3 3,4 3,6 3,7 3,8 4 4,7
## 1 37 2 1 2 1 53 36
## 4,7,8 4,8 5 5,7 6 6,7 7 7,8
## 1 2 49 21 1 1 846 2
## 8 <NA>
## 25 145
# 1 = “American Indian or Alaska Native”, 2 = “Asian”, 3 = “Black or African American”
# 4 = “Hispanic, Latino/Latina/Latinx, or Spanish origin”, 5 = “Middle Eastern or North African”
# 6 = “Native Hawaiian or Other Pacific Islander”, 7 = “White”, 8 = “Another race or ethnicity not listed above” (Write in response optional)
df$race_fin <- "Bi/Multiracial"
df$race_fin[df$race == "2"] <- c("Asian")
df$race_fin[df$race == "3"] <- c("Black or African American")
df$race_fin[df$race == "4"] <- c("Hispanic, Latino/Latina/Latinx, or Spanish origin")
df$race_fin[df$race == "5"] <- c("Middle Eastern or North African")
df$race_fin[df$race == "7"] <- c("White")
df$race_fin[df$race == "6"] <- c("Native Hawaiian or Other Pacific Islander")
df$race_fin[df$race == "8"] <- c("Another race or ethnicity not listed above")
df$race_fin[is.na(df$race)] <- NA
table(df$racetxt)
##
## --
## 1
## AS THE DRIVEN SNOW
## 1
## Asian
## 1
## Basque
## 1
## Black African/French
## 1
## Caucasian (Pakistani)
## 1
## Demographics are racist
## 1
## Earthling
## 1
## HOKIE
## 1
## I highlighted groups that are closely related to mine throughout history/migration. If places are going to put "white", put european american as well. It's only being consistent. Speaking of which, hispanics are europeans, caucasian, white etc. This is all silly. Race/ethnicity is a personal identification, not a tool or weapon which this question aims to use it as.
## 1
## I identify as a race and I don't want to divulge that info
## 1
## Indian
## 2
## Iranian
## 1
## Israelite
## 1
## Nepalese
## 1
## Not relevant
## 1
## South American Indian
## 1
## south asian
## 1
## South asian
## 1
## South Asian
## 2
## southern asian
## 1
## this question is racist
## 1
## Unaffiliated
## 1
## unknown
## 1
# legitimate entries: "Basque", "Black African/French", "Caucasian (Pakistani)", "Indian", "Iranian", "Israelite", "Nepalese", "South American Indian", "south asian", "South asian", "South Asian", "southern asian"
df$race_fin[df$racetxt == "Indian"] <- c("Asian")
df$race_fin[df$racetxt == "Nepalese"] <- c("Asian")
df$race_fin[df$racetxt == "south asian"] <- c("Asian")
df$race_fin[df$racetxt == "South asian"] <- c("Asian")
df$race_fin[df$racetxt == "South Asian"] <- c("Asian")
df$race_fin[df$racetxt == "southern asian"] <- c("Asian")
table(df$race_fin, useNA = "always")
##
## Another race or ethnicity not listed above
## 18
## Asian
## 482
## Bi/Multiracial
## 123
## Black or African American
## 37
## Hispanic, Latino/Latina/Latinx, or Spanish origin
## 53
## Middle Eastern or North African
## 49
## Native Hawaiian or Other Pacific Islander
## 1
## White
## 846
## <NA>
## 145
df$race_fin2 <- df$race_fin
df$race_fin2[df$race_fin == "Another race or ethnicity not listed above"] <- "URM"
df$race_fin2[df$race_fin == "Native Hawaiian or Other Pacific Islander"] <- "URM"
df$race_fin2[df$race_fin == "Black or African American"] <- "URM"
df$race_fin2[df$race_fin == "Hispanic, Latino/Latina/Latinx, or Spanish origin"] <- "URM"
df$race_fin2[df$race_fin == "Middle Eastern or North African"] <- "URM"
table(df$race_fin2, useNA = "always")
##
## Asian Bi/Multiracial URM White <NA>
## 482 123 158 846 145
table(df$country, useNA = "always")
##
## Afghanistan Albania Argentina
## 1 1 1
## Australia Bangladesh Belgium
## 4 24 1
## Bermuda Bolivia Brazil
## 1 1 4
## Cameroon Canada Chile
## 1 8 5
## China Colombia Croatia
## 172 11 1
## Cyprus Czech Republic Dominican Republic
## 1 1 3
## Egypt El Salvador Equador
## 6 2 4
## Eritrea Ethiopia ficticious
## 1 1 1
## France Germany Ghana
## 3 2 2
## Greece Haiti Hong Kong
## 2 1 2
## India Indonesia Iran
## 143 2 48
## Iraq Italy Japan
## 8 5 4
## Jordan Kazakhstan Kenya
## 3 1 1
## Kuwait Lebanon Libya
## 1 3 1
## Lithuania Macedonia Malaysia
## 1 1 3
## Mexico Morocco Multinational
## 9 1 7
## Nepal Netherlands New Zealand
## 11 1 1
## Nigeria No Answer Pakistan
## 8 3 9
## Paraguay Peru Philippines
## 1 1 3
## Portugal Puerto Rico Romania
## 2 5 1
## Russia Saudi Arabia South Africa
## 3 5 1
## South Korea South Sudan Spain
## 17 1 2
## Sri Lanka Syria Taiwan
## 2 1 13
## Thailand Turkey Uganda
## 4 10 1
## Ukraine United Kingdom United States of America
## 1 3 924
## Venezuela Vietnam <NA>
## 2 3 205
df$intl_fin <- "international"
df$intl_fin[df$country == "United States of America"] <- "domestic"
df$intl_fin[df$country == "No Answer"] <- NA
df$intl_fin[df$country == "ficticious"] <- NA
df$intl_fin[is.na(df$country)] <- NA
table(df$intl_fin, useNA = "always")
##
## domestic international <NA>
## 924 621 209
table(df$dadp, useNA = "always")
##
## 1 2 3 4 5 <NA>
## 271 441 382 402 222 36
table(df$itp, useNA = "always")
##
## 1 2 3 4 5 <NA>
## 18 22 69 221 1389 35
corr.test(df$dadp, df$itp)
## Call:corr.test(x = df$dadp, y = df$itp)
## Correlation matrix
## [1] -0.11
## Sample Size
## [1] 1718
## These are the unadjusted probability values.
## The probability values adjusted for multiple tests are in the p.adj object.
## [1] 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
df$itp_rc[df$itp == 1] <- 5
df$itp_rc[df$itp == 2] <- 4
df$itp_rc[df$itp == 3] <- 3
df$itp_rc[df$itp == 4] <- 2
df$itp_rc[df$itp == 5] <- 1
df$uncer <- (df$dadp + df$itp_rc)/2
names(df)
## [1] "R_IDs" "Major" "start" "progress" "gender" "gendertxt"
## [7] "race" "racetxt" "dadp" "itp" "country" "maj_fin"
## [13] "years" "prog_fin" "gen_fin" "race_fin" "race_fin2" "intl_fin"
## [19] "itp_rc" "uncer"
df2 <- subset(df, select=c(R_IDs, maj_fin, years, prog_fin, gen_fin, race_fin, race_fin2, intl_fin, dadp, itp, uncer))
d <- merge(df2, classes, by = "R_IDs")
rm(import, spl, df, df2, classes)
# write.csv(d, file="auxvar.csv", row.names = F)
table(d$maj_fin, useNA = "always")
##
## Aerospace Engineering Agricultural and Biological Engineering
## 28 27
## Bio Engineering Chemical Engineering
## 102 149
## Civil Engineering Computer Engineering
## 100 19
## Computer Science Computer Science and Engineering
## 55 21
## Elec Engineering Environmental Engineering
## 157 28
## Industrial Engineering Material Science and Engineering
## 29 162
## Mechanical Engineering Nuclear Engineering
## 165 44
## Other Engineering <NA>
## 34 13
d$maj_fin2 <- d$maj_fin
d$maj_fin2[d$maj_fin == "Computer Engineering"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Computer Science"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Agricultural and Biological Engineering"] <- "Bio Engineering"
d$maj_fin2[d$maj_fin == "Aerospace Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Environmental Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Industrial Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Nuclear Engineering"] <- "Other Engineering"
d$maj_fin2[is.na(d$maj_fin)] <- "Other Engineering"
table(d$maj_fin, d$maj_fin2, useNA = "always")
##
## Bio Engineering Chemical Engineering
## Aerospace Engineering 0 0
## Agricultural and Biological Engineering 27 0
## Bio Engineering 102 0
## Chemical Engineering 0 149
## Civil Engineering 0 0
## Computer Engineering 0 0
## Computer Science 0 0
## Computer Science and Engineering 0 0
## Elec Engineering 0 0
## Environmental Engineering 0 0
## Industrial Engineering 0 0
## Material Science and Engineering 0 0
## Mechanical Engineering 0 0
## Nuclear Engineering 0 0
## Other Engineering 0 0
## <NA> 0 0
##
## Civil Engineering
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Bio Engineering 0
## Chemical Engineering 0
## Civil Engineering 100
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Elec Engineering 0
## Environmental Engineering 0
## Industrial Engineering 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Other Engineering 0
## <NA> 0
##
## Computer Science and Engineering
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Bio Engineering 0
## Chemical Engineering 0
## Civil Engineering 0
## Computer Engineering 19
## Computer Science 55
## Computer Science and Engineering 21
## Elec Engineering 0
## Environmental Engineering 0
## Industrial Engineering 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Other Engineering 0
## <NA> 0
##
## Elec Engineering
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Bio Engineering 0
## Chemical Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Elec Engineering 157
## Environmental Engineering 0
## Industrial Engineering 0
## Material Science and Engineering 0
## Mechanical Engineering 0
## Nuclear Engineering 0
## Other Engineering 0
## <NA> 0
##
## Material Science and Engineering
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Bio Engineering 0
## Chemical Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Elec Engineering 0
## Environmental Engineering 0
## Industrial Engineering 0
## Material Science and Engineering 162
## Mechanical Engineering 0
## Nuclear Engineering 0
## Other Engineering 0
## <NA> 0
##
## Mechanical Engineering
## Aerospace Engineering 0
## Agricultural and Biological Engineering 0
## Bio Engineering 0
## Chemical Engineering 0
## Civil Engineering 0
## Computer Engineering 0
## Computer Science 0
## Computer Science and Engineering 0
## Elec Engineering 0
## Environmental Engineering 0
## Industrial Engineering 0
## Material Science and Engineering 0
## Mechanical Engineering 165
## Nuclear Engineering 0
## Other Engineering 0
## <NA> 0
##
## Other Engineering <NA>
## Aerospace Engineering 28 0
## Agricultural and Biological Engineering 0 0
## Bio Engineering 0 0
## Chemical Engineering 0 0
## Civil Engineering 0 0
## Computer Engineering 0 0
## Computer Science 0 0
## Computer Science and Engineering 0 0
## Elec Engineering 0 0
## Environmental Engineering 28 0
## Industrial Engineering 29 0
## Material Science and Engineering 0 0
## Mechanical Engineering 0 0
## Nuclear Engineering 44 0
## Other Engineering 34 0
## <NA> 13 0
table(d$maj_fin2, useNA = "always")
##
## Bio Engineering Chemical Engineering
## 129 149
## Civil Engineering Computer Science and Engineering
## 100 95
## Elec Engineering Material Science and Engineering
## 157 162
## Mechanical Engineering Other Engineering
## 165 176
## <NA>
## 0
d$maj_fin2 <- as.factor(d$maj_fin2)
chiout <- chisq.test(d$maj_fin2, d$class)
chiout
##
## Pearson's Chi-squared test
##
## data: d$maj_fin2 and d$class
## X-squared = 83.451, df = 35, p-value = 7.788e-06
chiout$observed
## d$class
## d$maj_fin2 1 2 3 4 5 6
## Bio Engineering 23 63 26 12 1 4
## Chemical Engineering 27 66 35 12 2 7
## Civil Engineering 19 59 13 2 6 1
## Computer Science and Engineering 20 46 16 8 4 1
## Elec Engineering 36 71 28 6 9 7
## Material Science and Engineering 34 80 19 26 2 1
## Mechanical Engineering 33 76 37 0 11 8
## Other Engineering 39 93 24 12 4 4
chiout$stdres
## d$class
## d$maj_fin2 1 2 3
## Bio Engineering -0.76632958 -0.01436740 0.85125079
## Chemical Engineering -0.73720274 -1.20570018 2.07434150
## Civil Engineering -0.36089782 2.11671464 -1.23429794
## Computer Science and Engineering 0.16790022 -0.09690145 -0.16990277
## Elec Engineering 0.85165847 -0.99218269 0.12750361
## Material Science and Engineering 0.20451787 0.13366525 -2.08075393
## Mechanical Engineering -0.13395615 -0.78846217 1.81086296
## Other Engineering 0.63444081 1.13895670 -1.45937256
## d$class
## d$maj_fin2 4 5 6
## Bio Engineering 1.15224878 -1.76503822 0.13499964
## Chemical Engineering 0.60491992 -1.50868838 1.39063837
## Civil Engineering -2.02036770 1.46933969 -1.19116080
## Computer Science and Engineering 0.61803981 0.42915979 -1.12632892
## Elec Engineering -1.63306619 1.69597826 1.24113665
## Material Science and Engineering 4.97681996 -1.66485458 -1.87666165
## Mechanical Engineering -3.77868178 2.45791909 1.59981798
## Other Engineering -0.03774010 -0.92595575 -0.54928788
sr_di <- data.frame(chiout$stdres)
colnames(sr_di) <- c("discipline","class","sr")
# Count the occurrences of each race within each class
disc_counts <- d %>%
group_by(class, race_fin) %>%
summarise(count = n()) %>%
spread(key = race_fin, value = count, fill = 0)
# Calculate the total count of each race for each class
disc_counts$total <- rowSums(disc_counts[, -1])
# Convert counts to proportions
disc_counts <- disc_counts %>%
mutate_at(vars(-class, -total), funs(./total)) %>%
select(-total)
# Reshape the data for plotting
disc_counts_long <- gather(disc_counts, key = "maj_fin", value = "proportion", -class)
# Create a stacked bar plot
ggplot(disc_counts_long, aes(x = as.factor(class), y = proportion, fill = maj_fin)) +
geom_bar(stat = "identity") +
labs(x = "Class", y = "Proportion", fill = "Discipline") +
ggtitle("Disciplinary Distribution by Class")
d$race_fin2 <- as.factor(d$race_fin2)
d$race_fin2 <- relevel(d$race_fin2, ref = "White")
# d$race_fin3 <- d$race_fin2
# d$race_fin3[d$race_fin == "Bi/Multiracial"] <- "URM"
# d$race_fin3 <- droplevels(d$race_fin3)
chiout <- chisq.test(table(d$race_fin2, d$class))
chiout
##
## Pearson's Chi-squared test
##
## data: table(d$race_fin2, d$class)
## X-squared = 23.989, df = 15, p-value = 0.06529
chiout$observed
##
## 1 2 3 4 5 6
## White 102 286 110 50 15 18
## Asian 80 151 41 17 17 9
## Bi/Multiracial 15 47 16 7 2 3
## URM 25 58 22 4 4 1
chiout$stdres
##
## 1 2 3 4 5
## White -2.29585560 -0.03316766 1.62893368 2.07117563 -1.67706174
## Asian 2.72986170 -0.56152680 -2.32024014 -1.38666511 2.23443253
## Bi/Multiracial -0.86710142 0.58409099 0.15641425 0.26494185 -0.66806724
## URM 0.49115918 0.36192342 0.63272688 -1.57388305 0.03348577
##
## 6
## White 0.59356084
## Asian 0.04946184
## Bi/Multiracial 0.30818740
## URM -1.32268471
sr_re <- data.frame(chiout$stdres)
colnames(sr_re) <- c("raceethnicity","class","sr")
# Count the occurrences of each race within each class
race_counts <- d %>%
group_by(class, race_fin) %>%
summarise(count = n()) %>%
spread(key = race_fin, value = count, fill = 0)
# Calculate the total count of each race for each class
race_counts$total <- rowSums(race_counts[, -1])
# Convert counts to proportions
race_counts <- race_counts %>%
mutate_at(vars(-class, -total), funs(./total)) %>%
select(-total)
# Reshape the data for plotting
race_counts_long <- gather(race_counts, key = "race_fin", value = "proportion", -class)
# Create a stacked bar plot
ggplot(race_counts_long, aes(x = as.factor(class), y = proportion, fill = race_fin)) +
geom_bar(stat = "identity") +
labs(x = "Class", y = "Proportion", fill = "Race") +
ggtitle("Race Distribution by Class")
table(d$gen_fin)
##
## female gq male
## 398 15 692
d$gen_fin2 <- d$gen_fin
d$gen_fin2[d$gen_fin == "gq"] <- "fgq"
d$gen_fin2[d$gen_fin == "female"] <- "fgq"
d$gen_fin2 <- as.factor(d$gen_fin2)
table(d$gen_fin2)
##
## fgq male
## 413 692
d$gen_fin2 <- as.factor(d$gen_fin2)
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")
chiout <- chisq.test(table(d$gen_fin2, d$class))
chiout
##
## Pearson's Chi-squared test
##
## data: table(d$gen_fin2, d$class)
## X-squared = 5.5919, df = 5, p-value = 0.348
chiout$observed
##
## 1 2 3 4 5 6
## male 137 333 130 45 25 22
## fgq 87 212 60 33 12 9
chiout$stdres
##
## 1 2 3 4 5 6
## male -0.5071185 -1.0326826 1.8149150 -0.9339242 0.6321686 0.9739402
## fgq 0.5071185 1.0326826 -1.8149150 0.9339242 -0.6321686 -0.9739402
sr_ge <- data.frame(chiout$stdres)
colnames(sr_ge) <- c("gender","class","sr")
# Count the occurrences of each gender within each class
gender_counts <- d %>%
group_by(class, gen_fin) %>%
summarise(count = n()) %>%
spread(key = gen_fin, value = count, fill = 0)
# Calculate the total count of each gender for each class
gender_counts$total <- rowSums(gender_counts[, -1])
# Convert counts to proportions
gender_counts <- gender_counts %>%
mutate_at(vars(-class, -total), funs(./total)) %>%
select(-total)
# Reshape the data for plotting
gender_counts_long <- gather(gender_counts, key = "gen_fin2", value = "proportion", -class)
# Create a stacked bar plot
ggplot(gender_counts_long, aes(x = as.factor(class), y = proportion, fill = gen_fin2)) +
geom_bar(stat = "identity") +
labs(x = "Class", y = "Proportion", fill = "Gender") +
ggtitle("Gender Distribution by Class")
table(d$prog_fin)
##
## 0 1 2 3
## 324 373 286 51
d$prog_fin <- as.factor(d$prog_fin)
chiout <- chisq.test(table(d$prog_fin, d$class))
chiout
##
## Pearson's Chi-squared test
##
## data: table(d$prog_fin, d$class)
## X-squared = 28.679, df = 15, p-value = 0.01769
chiout$observed
##
## 1 2 3 4 5 6
## 0 62 173 55 14 11 9
## 1 89 172 63 23 17 9
## 2 53 139 48 32 5 9
## 3 5 24 11 4 5 2
chiout$stdres
##
## 1 2 3 4 5 6
## 0 -0.58253899 1.85338691 -0.08228287 -2.32267728 -0.32325106 -0.03534427
## 1 2.19415971 -1.45775355 -0.14615610 -0.84279759 1.13311296 -0.57317542
## 2 -0.83244866 -0.21007906 -0.17671955 3.20493329 -2.03623281 0.41212118
## 3 -1.89841666 -0.30338656 0.86543503 0.22393674 2.38587841 0.49548634
sr_pr <- data.frame(chiout$stdres)
colnames(sr_pr) <- c("degreeprog","class","sr")
# Count the occurrences of each gender within each class
prog_counts <- d %>%
group_by(class, prog_fin) %>%
summarise(count = n()) %>%
spread(key = prog_fin, value = count, fill = 0)
# Calculate the total count of each gender for each class
prog_counts$total <- rowSums(prog_counts[, -1])
# Convert counts to proportions
prog_counts <- prog_counts %>%
mutate_at(vars(-class, -total), funs(./total)) %>%
select(-total)
# Reshape the data for plotting
prog_counts_long <- gather(prog_counts, key = "prog_fin", value = "proportion", -class)
# Create a stacked bar plot
ggplot(prog_counts_long, aes(x = as.factor(class), y = proportion, fill = prog_fin)) +
geom_bar(stat = "identity") +
labs(x = "Class", y = "Proportion", fill = "Degree Progress") +
ggtitle("Degree Progress Distribution by Class")
# https://www.sas.upenn.edu/~allison/combchi.sas
# MACRO COMBCHI combines chi-square statistics from an analysis of several data
# sets created by multiple imputation, using the method described on p. 115 of
#
# Schafer, J.L. (1997) Analysis of Incomplete Multivariate Data. London:
# Chapman and Hall.
#
# The chi-square statistics can be either Wald statistics or likelihood ratio
# statistics. All that's needed are the several chi-square values and the
# segrees of freedom.
#
# COMBCHI requires the installation of IML.
#
# Example of usage:
#
# Suppose a 3 d.f. test on four data sets produces chi-squares of 5.8, 7.2,
# 6.1 and 8.5. Submit the statement:
#
# %combchi(df=3, chi=5.8 7.2 6.1 8.5)
#
# The following output is printed
#
# F DF DDF
#
# 2.1201613 3 342.22381
#
#
# P
#
# 0.0974097
#
# The macro calculates an F-statistic of 2.12 with 3 and 342 degrees of freedom.
# The associated p-value is .097.
#
# *****************************************************************************/
# proc iml;
# df=&df;
# g2={&chi};
# m=ncol(g2);
# g=sqrt(g2);
# mg2=sum(g2)/m;
# r=(1+1/m)*(ssq(g)-(sum(g)**2)/m)/(m-1);
# f=(mg2/df - r*(m-1)/(m+1))/(1+r);
# ddf=(m-1)*(1+1/r)**2/df**(3/m);
# p=1-probf(f,df,ddf);
# print f df ddf;
# print p;
# run;
# %mend combchi;
# from chat-gpt
# library(statmod)
#
# combchi <- function(df, chi) {
# g2 <- chi
# m <- length(g2)
# g <- sqrt(g2)
# mg2 <- sum(g2) / m
# r <- (1 + 1 / m) * ((sum(g^2) - sum(g)^2 / m) / (m - 1))
# f <- (mg2 / df - r * (m - 1) / (m + 1)) / (1 + r)
# ddf <- (m - 1) * (1 + 1 / r)^2 / df^(3 / m)
# p <- 1-pf(f, df, ddf) #by me
#
# cat("f:", f, "\ndf:", df, "\nddf:", ddf, "\np:", p, "\n")
# }
#
# # Example usage
# combchi(df=3, chi=c(5.8, 7.2, 6.1, 8.5))
#
# # F DF DDF
# #
# # 2.1201613 3 342.22381
# #
# #
# # P
# #
# # 0.0974097
# # The macro calculates an F-statistic of 2.12 with 3 and 342 degrees of freedom.
# # The associated p-value is .097.
#
# combchi(df=3, chi=c(97.68,23.99,5.59,28.68))
d$maj_fin2 <- relevel(d$maj_fin2, ref = "Mechanical Engineering")
d$race_fin2 <- relevel(d$race_fin2, ref = "White")
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")
d$prog_fin <- relevel(d$prog_fin, ref = "0")
d$class_fin[d$class == "1"] <- "disorganized"
d$class_fin[d$class == "2"] <- "moderate"
d$class_fin[d$class == "3"] <- "strong"
d$class_fin[d$class == "4"] <- "mle"
d$class_fin[d$class == "5"] <- "mls"
d$class_fin[d$class == "6"] <- "max"
d$class_fin <- as.factor(d$class_fin)
d$class_fin <- relevel(d$class_fin, ref = "moderate")
d <- subset(d, maj_fin2 != "Other Engineering")
regout1 <- lm(uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin, data = d)
plot_model(regout1, type="diag")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
qqnorm(d$uncer)
qqline(d$uncer)
plot(regout1, 4)
plot(regout1, 5)
plot(regout1, 3)
summary(regout1)
##
## Call:
## lm(formula = uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin,
## data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.45562 -0.60741 -0.02873 0.51139 2.85647
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.114591 0.080499 26.269 < 2e-16
## maj_fin2Bio Engineering 0.076006 0.094226 0.807 0.420098
## maj_fin2Chemical Engineering 0.109113 0.089550 1.218 0.223389
## maj_fin2Civil Engineering -0.007770 0.099651 -0.078 0.937869
## maj_fin2Computer Science and Engineering 0.110718 0.101366 1.092 0.275029
## maj_fin2Elec Engineering -0.009890 0.086934 -0.114 0.909455
## maj_fin2Material Science and Engineering 0.178011 0.090555 1.966 0.049649
## gen_fin2fgq 0.008089 0.053504 0.151 0.879868
## race_fin2Asian -0.125985 0.058473 -2.155 0.031474
## race_fin2Bi/Multiracial 0.150457 0.097234 1.547 0.122145
## race_fin2URM -0.037927 0.086344 -0.439 0.660589
## prog_fin1 0.154928 0.062119 2.494 0.012819
## prog_fin2 -0.035878 0.067304 -0.533 0.594123
## prog_fin3 -0.484187 0.124213 -3.898 0.000105
##
## (Intercept) ***
## maj_fin2Bio Engineering
## maj_fin2Chemical Engineering
## maj_fin2Civil Engineering
## maj_fin2Computer Science and Engineering
## maj_fin2Elec Engineering
## maj_fin2Material Science and Engineering *
## gen_fin2fgq
## race_fin2Asian *
## race_fin2Bi/Multiracial
## race_fin2URM
## prog_fin1 *
## prog_fin2
## prog_fin3 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.74 on 851 degrees of freedom
## (92 observations deleted due to missingness)
## Multiple R-squared: 0.05328, Adjusted R-squared: 0.03881
## F-statistic: 3.684 on 13 and 851 DF, p-value: 1.009e-05
tab_model(regout1, show.ci = F, show.se = T, show.stat = T)
| uncer | ||||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Statistic | p |
| (Intercept) | 2.11 | 0.08 | 26.27 | <0.001 |
|
maj fin2 [Bio Engineering] |
0.08 | 0.09 | 0.81 | 0.420 |
|
maj fin2 [Chemical Engineering] |
0.11 | 0.09 | 1.22 | 0.223 |
|
maj fin2 [Civil Engineering] |
-0.01 | 0.10 | -0.08 | 0.938 |
|
maj fin2 [Computer Science and Engineering] |
0.11 | 0.10 | 1.09 | 0.275 |
|
maj fin2 [Elec Engineering] |
-0.01 | 0.09 | -0.11 | 0.909 |
|
maj fin2 [Material Science and Engineering] |
0.18 | 0.09 | 1.97 | 0.050 |
| gen fin2 [fgq] | 0.01 | 0.05 | 0.15 | 0.880 |
| race fin2 [Asian] | -0.13 | 0.06 | -2.15 | 0.031 |
|
race fin2 [Bi/Multiracial] |
0.15 | 0.10 | 1.55 | 0.122 |
| race fin2 [URM] | -0.04 | 0.09 | -0.44 | 0.661 |
| prog fin [1] | 0.15 | 0.06 | 2.49 | 0.013 |
| prog fin [2] | -0.04 | 0.07 | -0.53 | 0.594 |
| prog fin [3] | -0.48 | 0.12 | -3.90 | <0.001 |
| Observations | 865 | |||
| R2 / R2 adjusted | 0.053 / 0.039 | |||
plot_model(regout1)
regout2 <- lm(uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + prog_fin, data = d)
# plot_model(regout2, type="diag")
plot(regout2, 4)
plot(regout2, 5)
plot(regout2, 3)
summary(regout2)
##
## Call:
## lm(formula = uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 +
## prog_fin, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6042 -0.5065 0.0292 0.5301 2.5510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.18624 0.08102 26.984 < 2e-16
## class_findisorganized 0.29786 0.06450 4.618 4.48e-06
## class_finmax -0.34474 0.15632 -2.205 0.027698
## class_finmle -0.35397 0.09907 -3.573 0.000373
## class_finmls -0.15477 0.12962 -1.194 0.232821
## class_finstrong -0.36062 0.06671 -5.406 8.40e-08
## maj_fin2Bio Engineering 0.09572 0.09066 1.056 0.291364
## maj_fin2Chemical Engineering 0.13283 0.08602 1.544 0.122915
## maj_fin2Civil Engineering -0.03557 0.09552 -0.372 0.709726
## maj_fin2Computer Science and Engineering 0.09546 0.09724 0.982 0.326522
## maj_fin2Elec Engineering -0.02458 0.08316 -0.296 0.767657
## maj_fin2Material Science and Engineering 0.15759 0.08858 1.779 0.075578
## gen_fin2fgq -0.01115 0.05123 -0.218 0.827791
## race_fin2Asian -0.17972 0.05628 -3.193 0.001459
## race_fin2Bi/Multiracial 0.15798 0.09294 1.700 0.089536
## race_fin2URM -0.05939 0.08253 -0.720 0.471985
## prog_fin1 0.14463 0.05952 2.430 0.015310
## prog_fin2 -0.02076 0.06441 -0.322 0.747280
## prog_fin3 -0.40026 0.11952 -3.349 0.000847
##
## (Intercept) ***
## class_findisorganized ***
## class_finmax *
## class_finmle ***
## class_finmls
## class_finstrong ***
## maj_fin2Bio Engineering
## maj_fin2Chemical Engineering
## maj_fin2Civil Engineering
## maj_fin2Computer Science and Engineering
## maj_fin2Elec Engineering
## maj_fin2Material Science and Engineering .
## gen_fin2fgq
## race_fin2Asian **
## race_fin2Bi/Multiracial .
## race_fin2URM
## prog_fin1 *
## prog_fin2
## prog_fin3 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7066 on 846 degrees of freedom
## (92 observations deleted due to missingness)
## Multiple R-squared: 0.1418, Adjusted R-squared: 0.1236
## F-statistic: 7.767 on 18 and 846 DF, p-value: < 2.2e-16
tab_model(regout2, show.ci = F, show.se = T, show.stat = T)
| uncer | ||||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Statistic | p |
| (Intercept) | 2.19 | 0.08 | 26.98 | <0.001 |
| class fin [disorganized] | 0.30 | 0.06 | 4.62 | <0.001 |
| class fin [max] | -0.34 | 0.16 | -2.21 | 0.028 |
| class fin [mle] | -0.35 | 0.10 | -3.57 | <0.001 |
| class fin [mls] | -0.15 | 0.13 | -1.19 | 0.233 |
| class fin [strong] | -0.36 | 0.07 | -5.41 | <0.001 |
|
maj fin2 [Bio Engineering] |
0.10 | 0.09 | 1.06 | 0.291 |
|
maj fin2 [Chemical Engineering] |
0.13 | 0.09 | 1.54 | 0.123 |
|
maj fin2 [Civil Engineering] |
-0.04 | 0.10 | -0.37 | 0.710 |
|
maj fin2 [Computer Science and Engineering] |
0.10 | 0.10 | 0.98 | 0.327 |
|
maj fin2 [Elec Engineering] |
-0.02 | 0.08 | -0.30 | 0.768 |
|
maj fin2 [Material Science and Engineering] |
0.16 | 0.09 | 1.78 | 0.076 |
| gen fin2 [fgq] | -0.01 | 0.05 | -0.22 | 0.828 |
| race fin2 [Asian] | -0.18 | 0.06 | -3.19 | 0.001 |
|
race fin2 [Bi/Multiracial] |
0.16 | 0.09 | 1.70 | 0.090 |
| race fin2 [URM] | -0.06 | 0.08 | -0.72 | 0.472 |
| prog fin [1] | 0.14 | 0.06 | 2.43 | 0.015 |
| prog fin [2] | -0.02 | 0.06 | -0.32 | 0.747 |
| prog fin [3] | -0.40 | 0.12 | -3.35 | 0.001 |
| Observations | 865 | |||
| R2 / R2 adjusted | 0.142 / 0.124 | |||
plot_model(regout2)
anova(regout1,regout2)
## Analysis of Variance Table
##
## Model 1: uncer ~ maj_fin2 + gen_fin2 + race_fin2 + prog_fin
## Model 2: uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + prog_fin
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 851 465.99
## 2 846 422.41 5 43.579 17.456 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1