library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(stringr)
data_url<- "https://raw.githubusercontent.com/jgarcia71/Data-606-Assignments/master/all-ages.csv"
all_ages <- data_url %>% read.csv(stringsAsFactors = FALSE) %>% tbl_df() %>% arrange(Major_category)
all_ages_degree <- all_ages %>% filter(Major_category == "Agriculture & Natural Resources")
head(all_ages_degree)
## # A tibble: 6 x 11
## Major_code Major Major_category Total Employed Employed_full_t~
## <int> <chr> <chr> <int> <int> <int>
## 1 1100 GENE~ Agriculture &~ 128148 90245 74078
## 2 1101 AGRI~ Agriculture &~ 95326 76865 64240
## 3 1102 AGRI~ Agriculture &~ 33955 26321 22810
## 4 1103 ANIM~ Agriculture &~ 103549 81177 64937
## 5 1104 FOOD~ Agriculture &~ 24280 17281 12722
## 6 1105 PLAN~ Agriculture &~ 79409 63043 51077
## # ... with 5 more variables: Unemployed <int>, Unemployment_rate <dbl>,
## # Median <int>, P25th <int>, P75th <dbl>
data_url <- "https://raw.githubusercontent.com/jgarcia71/Data-606-Assignments/master/grad-students.csv"
grad_stdnt <- data_url %>% read.csv(stringsAsFactors = FALSE) %>% tbl_df() %>% arrange(Major_category)
grad_college_degree<- grad_stdnt %>% filter(Major_category == "Agriculture & Natural Resources")
head(grad_college_degree)
## # A tibble: 6 x 22
## Major_code Major Major_category Grad_total Grad_sample_size Grad_employed
## <int> <chr> <chr> <int> <int> <int>
## 1 1101 AGRI~ Agriculture &~ 17488 386 13104
## 2 1100 GENE~ Agriculture &~ 44306 764 28930
## 3 1302 FORE~ Agriculture &~ 24713 487 16831
## 4 1303 NATU~ Agriculture &~ 29357 659 23394
## 5 1105 PLAN~ Agriculture &~ 30983 624 22782
## 6 1102 AGRI~ Agriculture &~ 14800 305 10592
## # ... with 16 more variables: Grad_full_time_year_round <int>,
## # Grad_unemployed <int>, Grad_unemployment_rate <dbl>,
## # Grad_median <dbl>, Grad_P25 <int>, Grad_P75 <dbl>,
## # Nongrad_total <int>, Nongrad_employed <int>,
## # Nongrad_full_time_year_round <int>, Nongrad_unemployed <int>,
## # Nongrad_unemployment_rate <dbl>, Nongrad_median <dbl>,
## # Nongrad_P25 <int>, Nongrad_P75 <dbl>, Grad_share <dbl>,
## # Grad_premium <dbl>
data_url <- "https://raw.githubusercontent.com/jgarcia71/Data-606-Assignments/master/recent-grads.csv"
rct_grad <- data_url %>% read.csv(stringsAsFactors = FALSE) %>% tbl_df() %>% arrange(Major_category)
rct_grad_degrees <- rct_grad %>% filter(Major_category == "Agriculture & Natural Resources")
head(rct_grad_degrees)
## # A tibble: 6 x 21
## Rank Major_code Major Major_category Total Sample_size Men Women
## <int> <int> <chr> <chr> <int> <int> <int> <int>
## 1 22 1104 FOOD~ Agriculture &~ 4361 36 99743 28576
## 2 64 1101 AGRI~ Agriculture &~ 14240 273 7426 10874
## 3 65 1100 GENE~ Agriculture &~ 10399 158 1761 1874
## 4 72 1102 AGRI~ Agriculture &~ 2439 44 10624 15270
## 5 108 1303 NATU~ Agriculture &~ 13773 152 27015 35037
## 6 112 1302 FORE~ Agriculture &~ 3607 48 32041 71439
## # ... with 13 more variables: ShareWomen <dbl>, Employed <int>,
## # Full_time <int>, Part_time <int>, Full_time_year_round <int>,
## # Unemployed <int>, Unemployment_rate <dbl>, Median <int>, P25th <int>,
## # P75th <int>, College_jobs <int>, Non_college_jobs <int>,
## # Low_wage_jobs <int>
Which college majors offer the best opportunities in terms of unemployment rate and salary?
What are the cases, and how many are there? All_ages Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include both undergrads and grad students.
Grad Students Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include only grad students aged 25+ years.
Recent Grads Each case represents majors offered by colleges and universities in the US. There are 173 majors represented. These data include only undergraduate students aged <28 years. These data also include gender statistics.
Describe the method of data collection. These Data were collated by the following website: https://data.world/
What type of study is this (observational/experiment)? This is an observational Study
What is the explanatory variable, and what type is it (numerical/categorical)? The explanatory variables are the counts of employed and unemployed college degree holders and the statistics of their income. These data are numerical.
Provide summary statistics relevant to your research question. For example, if you’re comparing means across groups provide means, SDs, sample sizes of each group. This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed. First we will look at overall unemployment rate for the 3 categories: all ages, recent grads, and grad students.
summary(all_ages$Unemployment_rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.04626 0.05472 0.05736 0.06904 0.15615
summary(rct_grad$Unemployment_rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.05072 0.06827 0.06859 0.08760 0.17723 1
summary(grad_stdnt$Grad_unemployment_rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.02607 0.03665 0.03934 0.04805 0.13851
unempl <- cbind(all_ages$Unemployment_rate, rct_grad$Unemployment_rate, grad_stdnt$Grad_unemployment_rate)
barplot(unempl/nrow(unempl), names.arg = c("All", "Recent Grad", "Grad Student"), xlab = "Unemployment Rate", col = terrain.colors(nrow(unempl)))
summary(all_ages$Median)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35000 46000 53000 56816 65000 125000
hist(all_ages$Median, main = "Histogram for Median Income All Ages", xlab = "Median Income by Major All Ages (USD)", border="blue",
col="green")
summary(grad_stdnt$Grad_median)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 47000 65000 75000 76756 90000 135000
hist(grad_stdnt$Grad_median, main = "Histogram for Median Income Grad Students", xlab = "Median Income by Major Grad Student (USD)", border="green",
col="yellow")
medsal <- cbind(all_ages$Median, rct_grad$Median, grad_stdnt$Grad_median)
barplot(medsal/nrow(medsal), names.arg = c("All", "Recent Grad", "Grad Student"), xlab = "Median Salary", col = topo.colors(nrow(medsal)))