suppressMessages(suppressWarnings(library(tidyr)))
suppressMessages(suppressWarnings(library(dplyr)))
suppressMessages(suppressWarnings(library(psych)))
suppressMessages(suppressWarnings(library(stringr)))
url1 <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/recent-grads.csv"
recent_grads <- url1 %>% read.csv(stringsAsFactors = FALSE) %>% tbl_df() %>% arrange(Major_category)
recent_grads_ag <- recent_grads %>% filter(Major_category == "Engineering")
head(recent_grads_ag)
## # A tibble: 6 x 21
## Rank Major_code Major Total Men Women Major_category ShareWomen
## <int> <int> <chr> <int> <int> <int> <chr> <dbl>
## 1 1 2419 PETR~ 2339 2057 282 Engineering 0.121
## 2 2 2416 MINI~ 756 679 77 Engineering 0.102
## 3 3 2415 META~ 856 725 131 Engineering 0.153
## 4 4 2417 NAVA~ 1258 1123 135 Engineering 0.107
## 5 5 2405 CHEM~ 32260 21239 11021 Engineering 0.342
## 6 6 2418 NUCL~ 2573 2200 373 Engineering 0.145
## # ... with 13 more variables: Sample_size <int>, Employed <int>,
## # Full_time <int>, Part_time <int>, Full_time_year_round <int>,
## # Unemployed <int>, Unemployment_rate <dbl>, Median <int>, P25th <int>,
## # P75th <int>, College_jobs <int>, Non_college_jobs <int>,
## # Low_wage_jobs <int>
rct_art <- recent_grads %>% filter(Major_category == "Arts")
rct_bio <- recent_grads %>% filter(Major_category == "Biology & Life Science")
rct_bsn <- recent_grads %>% filter(Major_category == "Business")
rct_cj <- recent_grads %>% filter(Major_category == "Communications & Journalism")
rct_com <- recent_grads %>% filter(Major_category == "Computers & Mathematics")
rct_ed <- recent_grads %>% filter(Major_category == "Education")
rct_hlt <- recent_grads %>% filter(Major_category == "Health")
rct_la <- recent_grads %>% filter(Major_category == "Humanities & Liberal Arts" | Major_category == "Interdisciplinary")
rct_ia <- recent_grads %>% filter(Major_category == "Industrial Arts & Consumer Services")
rct_law <- recent_grads %>% filter(Major_category == "Law & Public Policy")
rct_sci <- recent_grads %>% filter(Major_category == "Physical Sciences")
rct_psy <- recent_grads %>% filter(Major_category == "Psychology & Social Work")
rct_ssc <- recent_grads %>% filter(Major_category == "Social Science")
url2 <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/grad-students.csv"
grad_stdnt <- url2 %>% read.csv(stringsAsFactors = FALSE) %>% tbl_df() %>% arrange(Major_category)
grad_ag <- grad_stdnt %>% filter(Major_category == "Engineering")
head(grad_ag)
## # A tibble: 6 x 22
## Major_code Major Major_category Grad_total Grad_sample_size Grad_employed
## <int> <chr> <chr> <int> <int> <int>
## 1 2504 MECH~ Engineering 6065 111 4442
## 2 2599 MISC~ Engineering 14816 315 12433
## 3 2503 INDU~ Engineering 19885 408 14752
## 4 2502 ELEC~ Engineering 28155 521 22501
## 5 2500 ENGI~ Engineering 11724 219 9471
## 6 2403 ARCH~ Engineering 6466 143 4857
## # ... with 16 more variables: Grad_full_time_year_round <int>,
## # Grad_unemployed <int>, Grad_unemployment_rate <dbl>,
## # Grad_median <dbl>, Grad_P25 <int>, Grad_P75 <dbl>,
## # Nongrad_total <int>, Nongrad_employed <int>,
## # Nongrad_full_time_year_round <int>, Nongrad_unemployed <int>,
## # Nongrad_unemployment_rate <dbl>, Nongrad_median <dbl>,
## # Nongrad_P25 <int>, Nongrad_P75 <dbl>, Grad_share <dbl>,
## # Grad_premium <dbl>
grad_art <- grad_stdnt %>% filter(Major_category == "Arts")
grad_bio <- grad_stdnt %>% filter(Major_category == "Biology & Life Science")
grad_bsn <- grad_stdnt %>% filter(Major_category == "Business")
grad_cj <- grad_stdnt %>% filter(Major_category == "Communications & Journalism")
grad_com <- grad_stdnt %>% filter(Major_category == "Computers & Mathematics")
grad_ed <- grad_stdnt %>% filter(Major_category == "Education")
grad_hlt <- grad_stdnt %>% filter(Major_category == "Health")
grad_la <- grad_stdnt %>% filter(Major_category == "Humanities & Liberal Arts" | Major_category == "Interdisciplinary")
grad_ia <- grad_stdnt %>% filter(Major_category == "Industrial Arts & Consumer Services")
grad_law <- grad_stdnt %>% filter(Major_category == "Law & Public Policy")
grad_sci <- grad_stdnt %>% filter(Major_category == "Physical Sciences")
grad_psy <- grad_stdnt %>% filter(Major_category == "Psychology & Social Work")
grad_ssc <- grad_stdnt %>% filter(Major_category == "Social Science")
Research Question
Which college majors offer the good prediction in terms of employment rate and salary?
Cases
Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include both undergrads and grad students.
Grad Students
Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include only grad students aged 25+ years.
Recent Grads
Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include only undergraduate students aged <28 years.
Data collection
Type of Study
This is an observational Study
Explanatory Variables
The explanatory variables are the counts of employed and unemployed college degree holders and the statistics of their income. These data are numerical.
Relavent Summary Statistics
First we will look at overall unemployment rate for the 2 categories:recent grads and grad students.
summary(recent_grads$Employed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 3608 11797 31193 31433 307933
summary(grad_stdnt$Grad_employed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1008 12659 28930 94037 109944 915341
hist(recent_grads$Employed)

hist(grad_stdnt$Grad_employed)

summary(recent_grads$Median)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22000 33000 36000 40151 45000 110000
summary(grad_stdnt$Grad_median)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 47000 65000 75000 76756 90000 135000
hist(recent_grads$Median, main = "Histogram for Median Income Recent Grads", xlab = "Median Income by Major Recent Grads (USD)", col = "yellow")

hist(grad_stdnt$Grad_median, main = "Histogram for Median Income Grad Students", xlab = "Median Income by Major Grad Student (USD)", col = "dark blue")
