This data is about The Economic Guide to picking a college Major
The data shows numbers of graduate from different college major and their employment rate we try to see the employment rate of each college major.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.1
## Warning: package 'readr' was built under R version 4.1.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
This reads the file from github into a tibble (dataframe) and check the head to have an idea of the data
url <- "https://raw.githubusercontent.com/omocharly/DATA-607/main/grad-students.csv"
grad_students <- read.csv(url)
head(grad_students) # read the file into a tibble
## Major_code Major
## 1 5601 CONSTRUCTION SERVICES
## 2 6004 COMMERCIAL ART AND GRAPHIC DESIGN
## 3 6211 HOSPITALITY MANAGEMENT
## 4 2201 COSMETOLOGY SERVICES AND CULINARY ARTS
## 5 2001 COMMUNICATION TECHNOLOGIES
## 6 3201 COURT REPORTING
## Major_category Grad_total Grad_sample_size Grad_employed
## 1 Industrial Arts & Consumer Services 9173 200 7098
## 2 Arts 53864 882 40492
## 3 Business 24417 437 18368
## 4 Industrial Arts & Consumer Services 5411 72 3590
## 5 Computers & Mathematics 9109 171 7512
## 6 Law & Public Policy 1542 22 1008
## Grad_full_time_year_round Grad_unemployed Grad_unemployment_rate Grad_median
## 1 6511 681 0.08754339 75000
## 2 29553 2482 0.05775585 60000
## 3 14784 1465 0.07386679 65000
## 4 2701 316 0.08090118 47000
## 5 5622 466 0.05841063 57000
## 6 860 0 0.00000000 75000
## Grad_P25 Grad_P75 Nongrad_total Nongrad_employed Nongrad_full_time_year_round
## 1 53000 110000 86062 73607 62435
## 2 40000 89000 461977 347166 250596
## 3 45000 100000 179335 145597 113579
## 4 24500 85000 37575 29738 23249
## 5 40600 83700 53819 43163 34231
## 6 55000 120000 8921 6967 6063
## Nongrad_unemployed Nongrad_unemployment_rate Nongrad_median Nongrad_P25
## 1 3928 0.05066099 65000 47000
## 2 25484 0.06838588 48000 34000
## 3 7409 0.04842294 50000 35000
## 4 1661 0.05289977 41600 29000
## 5 3389 0.07280031 52000 36000
## 6 518 0.06920508 50000 34000
## Nongrad_P75 Grad_share Grad_premium
## 1 98000 0.09631963 0.15384615
## 2 71000 0.10441977 0.25000000
## 3 75000 0.11983686 0.30000000
## 4 60000 0.12587819 0.12980769
## 5 78000 0.14475273 0.09615385
## 6 75000 0.14737647 0.50000000
Explore the data set to see the names of each columns
colnames(grad_students)
## [1] "Major_code" "Major"
## [3] "Major_category" "Grad_total"
## [5] "Grad_sample_size" "Grad_employed"
## [7] "Grad_full_time_year_round" "Grad_unemployed"
## [9] "Grad_unemployment_rate" "Grad_median"
## [11] "Grad_P25" "Grad_P75"
## [13] "Nongrad_total" "Nongrad_employed"
## [15] "Nongrad_full_time_year_round" "Nongrad_unemployed"
## [17] "Nongrad_unemployment_rate" "Nongrad_median"
## [19] "Nongrad_P25" "Nongrad_P75"
## [21] "Grad_share" "Grad_premium"
Check the Structure of the data
str(grad_students)
## 'data.frame': 173 obs. of 22 variables:
## $ Major_code : int 5601 6004 6211 2201 2001 3201 6206 1101 2101 1904 ...
## $ Major : chr "CONSTRUCTION SERVICES" "COMMERCIAL ART AND GRAPHIC DESIGN" "HOSPITALITY MANAGEMENT" "COSMETOLOGY SERVICES AND CULINARY ARTS" ...
## $ Major_category : chr "Industrial Arts & Consumer Services" "Arts" "Business" "Industrial Arts & Consumer Services" ...
## $ Grad_total : int 9173 53864 24417 5411 9109 1542 190996 17488 5611 33928 ...
## $ Grad_sample_size : int 200 882 437 72 171 22 3738 386 98 688 ...
## $ Grad_employed : int 7098 40492 18368 3590 7512 1008 151570 13104 4716 28517 ...
## $ Grad_full_time_year_round : int 6511 29553 14784 2701 5622 860 123045 11207 3981 22523 ...
## $ Grad_unemployed : int 681 2482 1465 316 466 0 8324 473 119 899 ...
## $ Grad_unemployment_rate : num 0.0875 0.0578 0.0739 0.0809 0.0584 ...
## $ Grad_median : num 75000 60000 65000 47000 57000 75000 80000 67000 85000 60000 ...
## $ Grad_P25 : int 53000 40000 45000 24500 40600 55000 50000 41600 56000 43000 ...
## $ Grad_P75 : num 110000 89000 100000 85000 83700 120000 120000 100000 114000 90000 ...
## $ Nongrad_total : int 86062 461977 179335 37575 53819 8921 1029181 89169 28314 163435 ...
## $ Nongrad_employed : int 73607 347166 145597 29738 43163 6967 817906 71781 22024 127832 ...
## $ Nongrad_full_time_year_round: int 62435 250596 113579 23249 34231 6063 662346 61335 18381 100330 ...
## $ Nongrad_unemployed : int 3928 25484 7409 1661 3389 518 45519 1869 2222 8706 ...
## $ Nongrad_unemployment_rate : num 0.0507 0.0684 0.0484 0.0529 0.0728 ...
## $ Nongrad_median : num 65000 48000 50000 41600 52000 50000 60000 55000 60000 51000 ...
## $ Nongrad_P25 : int 47000 34000 35000 29000 36000 34000 40000 38000 40000 37800 ...
## $ Nongrad_P75 : num 98000 71000 75000 60000 78000 75000 91500 80000 85000 78000 ...
## $ Grad_share : num 0.0963 0.1044 0.1198 0.1259 0.1448 ...
## $ Grad_premium : num 0.1538 0.25 0.3 0.1298 0.0962 ...
Subset the original data “grad_students” by using dplyr
grad_students_Subset <- select(grad_students, Major_code:Grad_unemployment_rate)
head(grad_students_Subset)
## Major_code Major
## 1 5601 CONSTRUCTION SERVICES
## 2 6004 COMMERCIAL ART AND GRAPHIC DESIGN
## 3 6211 HOSPITALITY MANAGEMENT
## 4 2201 COSMETOLOGY SERVICES AND CULINARY ARTS
## 5 2001 COMMUNICATION TECHNOLOGIES
## 6 3201 COURT REPORTING
## Major_category Grad_total Grad_sample_size Grad_employed
## 1 Industrial Arts & Consumer Services 9173 200 7098
## 2 Arts 53864 882 40492
## 3 Business 24417 437 18368
## 4 Industrial Arts & Consumer Services 5411 72 3590
## 5 Computers & Mathematics 9109 171 7512
## 6 Law & Public Policy 1542 22 1008
## Grad_full_time_year_round Grad_unemployed Grad_unemployment_rate
## 1 6511 681 0.08754339
## 2 29553 2482 0.05775585
## 3 14784 1465 0.07386679
## 4 2701 316 0.08090118
## 5 5622 466 0.05841063
## 6 860 0 0.00000000
Check the descriptive Statistics Summary
summary(grad_students)
## Major_code Major Major_category Grad_total
## Min. :1100 Length:173 Length:173 Min. : 1542
## 1st Qu.:2403 Class :character Class :character 1st Qu.: 15284
## Median :3608 Mode :character Mode :character Median : 37872
## Mean :3880 Mean : 127672
## 3rd Qu.:5503 3rd Qu.: 148255
## Max. :6403 Max. :1184158
## Grad_sample_size Grad_employed Grad_full_time_year_round Grad_unemployed
## Min. : 22 Min. : 1008 Min. : 770 Min. : 0
## 1st Qu.: 314 1st Qu.: 12659 1st Qu.: 9894 1st Qu.: 453
## Median : 688 Median : 28930 Median : 22523 Median : 1179
## Mean : 2251 Mean : 94037 Mean : 72861 Mean : 3506
## 3rd Qu.: 2528 3rd Qu.:109944 3rd Qu.: 80794 3rd Qu.: 3329
## Max. :21994 Max. :915341 Max. :703347 Max. :35718
## Grad_unemployment_rate Grad_median Grad_P25 Grad_P75
## Min. :0.00000 Min. : 47000 Min. :24500 Min. : 65000
## 1st Qu.:0.02607 1st Qu.: 65000 1st Qu.:45000 1st Qu.: 93000
## Median :0.03665 Median : 75000 Median :50000 Median :108000
## Mean :0.03934 Mean : 76756 Mean :52597 Mean :112087
## 3rd Qu.:0.04805 3rd Qu.: 90000 3rd Qu.:60000 3rd Qu.:130000
## Max. :0.13851 Max. :135000 Max. :85000 Max. :294000
## Nongrad_total Nongrad_employed Nongrad_full_time_year_round
## Min. : 2232 Min. : 1328 Min. : 980
## 1st Qu.: 20564 1st Qu.: 15914 1st Qu.: 11755
## Median : 68993 Median : 50092 Median : 38384
## Mean : 214720 Mean : 154554 Mean : 120737
## 3rd Qu.: 184971 3rd Qu.: 129179 3rd Qu.: 103629
## Max. :2996892 Max. :2253649 Max. :1882507
## Nongrad_unemployed Nongrad_unemployment_rate Nongrad_median Nongrad_P25
## Min. : 0 Min. :0.00000 Min. : 37000 Min. :25000
## 1st Qu.: 880 1st Qu.:0.04198 1st Qu.: 48700 1st Qu.:34000
## Median : 3157 Median :0.05103 Median : 55000 Median :38000
## Mean : 8486 Mean :0.05395 Mean : 58584 Mean :40078
## 3rd Qu.: 7409 3rd Qu.:0.06439 3rd Qu.: 65000 3rd Qu.:44000
## Max. :136978 Max. :0.16091 Max. :126000 Max. :80000
## Nongrad_P75 Grad_share Grad_premium
## Min. : 48000 Min. :0.09632 Min. :-0.0250
## 1st Qu.: 72000 1st Qu.:0.26757 1st Qu.: 0.2308
## Median : 80000 Median :0.39875 Median : 0.3208
## Mean : 84333 Mean :0.40059 Mean : 0.3285
## 3rd Qu.: 97000 3rd Qu.:0.49912 3rd Qu.: 0.4000
## Max. :215000 Max. :0.93117 Max. : 1.6471