Title: CUNY SPS MDS Data_607_Assignment1"

Author: Charles Ugiagbe

Date: “8/29/2021”

Introduction

This data is about The Economic Guide to picking a college Major
The data shows numbers of graduate from different college major and their employment rate we try to see the employment rate of each college major.

Load the required libraries

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.1
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.1
## Warning: package 'readr' was built under R version 4.1.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Read Files

This reads the file from github into a tibble (dataframe) and check the head to have an idea of the data

url <- "https://raw.githubusercontent.com/omocharly/DATA-607/main/grad-students.csv"
grad_students <- read.csv(url)
head(grad_students) # read the file into a tibble
##   Major_code                                  Major
## 1       5601                  CONSTRUCTION SERVICES
## 2       6004      COMMERCIAL ART AND GRAPHIC DESIGN
## 3       6211                 HOSPITALITY MANAGEMENT
## 4       2201 COSMETOLOGY SERVICES AND CULINARY ARTS
## 5       2001             COMMUNICATION TECHNOLOGIES
## 6       3201                        COURT REPORTING
##                        Major_category Grad_total Grad_sample_size Grad_employed
## 1 Industrial Arts & Consumer Services       9173              200          7098
## 2                                Arts      53864              882         40492
## 3                            Business      24417              437         18368
## 4 Industrial Arts & Consumer Services       5411               72          3590
## 5             Computers & Mathematics       9109              171          7512
## 6                 Law & Public Policy       1542               22          1008
##   Grad_full_time_year_round Grad_unemployed Grad_unemployment_rate Grad_median
## 1                      6511             681             0.08754339       75000
## 2                     29553            2482             0.05775585       60000
## 3                     14784            1465             0.07386679       65000
## 4                      2701             316             0.08090118       47000
## 5                      5622             466             0.05841063       57000
## 6                       860               0             0.00000000       75000
##   Grad_P25 Grad_P75 Nongrad_total Nongrad_employed Nongrad_full_time_year_round
## 1    53000   110000         86062            73607                        62435
## 2    40000    89000        461977           347166                       250596
## 3    45000   100000        179335           145597                       113579
## 4    24500    85000         37575            29738                        23249
## 5    40600    83700         53819            43163                        34231
## 6    55000   120000          8921             6967                         6063
##   Nongrad_unemployed Nongrad_unemployment_rate Nongrad_median Nongrad_P25
## 1               3928                0.05066099          65000       47000
## 2              25484                0.06838588          48000       34000
## 3               7409                0.04842294          50000       35000
## 4               1661                0.05289977          41600       29000
## 5               3389                0.07280031          52000       36000
## 6                518                0.06920508          50000       34000
##   Nongrad_P75 Grad_share Grad_premium
## 1       98000 0.09631963   0.15384615
## 2       71000 0.10441977   0.25000000
## 3       75000 0.11983686   0.30000000
## 4       60000 0.12587819   0.12980769
## 5       78000 0.14475273   0.09615385
## 6       75000 0.14737647   0.50000000
Columns

Explore the data set to see the names of each columns

colnames(grad_students)
##  [1] "Major_code"                   "Major"                       
##  [3] "Major_category"               "Grad_total"                  
##  [5] "Grad_sample_size"             "Grad_employed"               
##  [7] "Grad_full_time_year_round"    "Grad_unemployed"             
##  [9] "Grad_unemployment_rate"       "Grad_median"                 
## [11] "Grad_P25"                     "Grad_P75"                    
## [13] "Nongrad_total"                "Nongrad_employed"            
## [15] "Nongrad_full_time_year_round" "Nongrad_unemployed"          
## [17] "Nongrad_unemployment_rate"    "Nongrad_median"              
## [19] "Nongrad_P25"                  "Nongrad_P75"                 
## [21] "Grad_share"                   "Grad_premium"

Structure

Check the Structure of the data

str(grad_students)
## 'data.frame':    173 obs. of  22 variables:
##  $ Major_code                  : int  5601 6004 6211 2201 2001 3201 6206 1101 2101 1904 ...
##  $ Major                       : chr  "CONSTRUCTION SERVICES" "COMMERCIAL ART AND GRAPHIC DESIGN" "HOSPITALITY MANAGEMENT" "COSMETOLOGY SERVICES AND CULINARY ARTS" ...
##  $ Major_category              : chr  "Industrial Arts & Consumer Services" "Arts" "Business" "Industrial Arts & Consumer Services" ...
##  $ Grad_total                  : int  9173 53864 24417 5411 9109 1542 190996 17488 5611 33928 ...
##  $ Grad_sample_size            : int  200 882 437 72 171 22 3738 386 98 688 ...
##  $ Grad_employed               : int  7098 40492 18368 3590 7512 1008 151570 13104 4716 28517 ...
##  $ Grad_full_time_year_round   : int  6511 29553 14784 2701 5622 860 123045 11207 3981 22523 ...
##  $ Grad_unemployed             : int  681 2482 1465 316 466 0 8324 473 119 899 ...
##  $ Grad_unemployment_rate      : num  0.0875 0.0578 0.0739 0.0809 0.0584 ...
##  $ Grad_median                 : num  75000 60000 65000 47000 57000 75000 80000 67000 85000 60000 ...
##  $ Grad_P25                    : int  53000 40000 45000 24500 40600 55000 50000 41600 56000 43000 ...
##  $ Grad_P75                    : num  110000 89000 100000 85000 83700 120000 120000 100000 114000 90000 ...
##  $ Nongrad_total               : int  86062 461977 179335 37575 53819 8921 1029181 89169 28314 163435 ...
##  $ Nongrad_employed            : int  73607 347166 145597 29738 43163 6967 817906 71781 22024 127832 ...
##  $ Nongrad_full_time_year_round: int  62435 250596 113579 23249 34231 6063 662346 61335 18381 100330 ...
##  $ Nongrad_unemployed          : int  3928 25484 7409 1661 3389 518 45519 1869 2222 8706 ...
##  $ Nongrad_unemployment_rate   : num  0.0507 0.0684 0.0484 0.0529 0.0728 ...
##  $ Nongrad_median              : num  65000 48000 50000 41600 52000 50000 60000 55000 60000 51000 ...
##  $ Nongrad_P25                 : int  47000 34000 35000 29000 36000 34000 40000 38000 40000 37800 ...
##  $ Nongrad_P75                 : num  98000 71000 75000 60000 78000 75000 91500 80000 85000 78000 ...
##  $ Grad_share                  : num  0.0963 0.1044 0.1198 0.1259 0.1448 ...
##  $ Grad_premium                : num  0.1538 0.25 0.3 0.1298 0.0962 ...

Subset the data

Subset the original data “grad_students” by using dplyr

grad_students_Subset <- select(grad_students, Major_code:Grad_unemployment_rate)
head(grad_students_Subset)
##   Major_code                                  Major
## 1       5601                  CONSTRUCTION SERVICES
## 2       6004      COMMERCIAL ART AND GRAPHIC DESIGN
## 3       6211                 HOSPITALITY MANAGEMENT
## 4       2201 COSMETOLOGY SERVICES AND CULINARY ARTS
## 5       2001             COMMUNICATION TECHNOLOGIES
## 6       3201                        COURT REPORTING
##                        Major_category Grad_total Grad_sample_size Grad_employed
## 1 Industrial Arts & Consumer Services       9173              200          7098
## 2                                Arts      53864              882         40492
## 3                            Business      24417              437         18368
## 4 Industrial Arts & Consumer Services       5411               72          3590
## 5             Computers & Mathematics       9109              171          7512
## 6                 Law & Public Policy       1542               22          1008
##   Grad_full_time_year_round Grad_unemployed Grad_unemployment_rate
## 1                      6511             681             0.08754339
## 2                     29553            2482             0.05775585
## 3                     14784            1465             0.07386679
## 4                      2701             316             0.08090118
## 5                      5622             466             0.05841063
## 6                       860               0             0.00000000

Check the Summary

Check the descriptive Statistics Summary

summary(grad_students)
##    Major_code      Major           Major_category       Grad_total     
##  Min.   :1100   Length:173         Length:173         Min.   :   1542  
##  1st Qu.:2403   Class :character   Class :character   1st Qu.:  15284  
##  Median :3608   Mode  :character   Mode  :character   Median :  37872  
##  Mean   :3880                                         Mean   : 127672  
##  3rd Qu.:5503                                         3rd Qu.: 148255  
##  Max.   :6403                                         Max.   :1184158  
##  Grad_sample_size Grad_employed    Grad_full_time_year_round Grad_unemployed
##  Min.   :   22    Min.   :  1008   Min.   :   770            Min.   :    0  
##  1st Qu.:  314    1st Qu.: 12659   1st Qu.:  9894            1st Qu.:  453  
##  Median :  688    Median : 28930   Median : 22523            Median : 1179  
##  Mean   : 2251    Mean   : 94037   Mean   : 72861            Mean   : 3506  
##  3rd Qu.: 2528    3rd Qu.:109944   3rd Qu.: 80794            3rd Qu.: 3329  
##  Max.   :21994    Max.   :915341   Max.   :703347            Max.   :35718  
##  Grad_unemployment_rate  Grad_median        Grad_P25        Grad_P75     
##  Min.   :0.00000        Min.   : 47000   Min.   :24500   Min.   : 65000  
##  1st Qu.:0.02607        1st Qu.: 65000   1st Qu.:45000   1st Qu.: 93000  
##  Median :0.03665        Median : 75000   Median :50000   Median :108000  
##  Mean   :0.03934        Mean   : 76756   Mean   :52597   Mean   :112087  
##  3rd Qu.:0.04805        3rd Qu.: 90000   3rd Qu.:60000   3rd Qu.:130000  
##  Max.   :0.13851        Max.   :135000   Max.   :85000   Max.   :294000  
##  Nongrad_total     Nongrad_employed  Nongrad_full_time_year_round
##  Min.   :   2232   Min.   :   1328   Min.   :    980             
##  1st Qu.:  20564   1st Qu.:  15914   1st Qu.:  11755             
##  Median :  68993   Median :  50092   Median :  38384             
##  Mean   : 214720   Mean   : 154554   Mean   : 120737             
##  3rd Qu.: 184971   3rd Qu.: 129179   3rd Qu.: 103629             
##  Max.   :2996892   Max.   :2253649   Max.   :1882507             
##  Nongrad_unemployed Nongrad_unemployment_rate Nongrad_median    Nongrad_P25   
##  Min.   :     0     Min.   :0.00000           Min.   : 37000   Min.   :25000  
##  1st Qu.:   880     1st Qu.:0.04198           1st Qu.: 48700   1st Qu.:34000  
##  Median :  3157     Median :0.05103           Median : 55000   Median :38000  
##  Mean   :  8486     Mean   :0.05395           Mean   : 58584   Mean   :40078  
##  3rd Qu.:  7409     3rd Qu.:0.06439           3rd Qu.: 65000   3rd Qu.:44000  
##  Max.   :136978     Max.   :0.16091           Max.   :126000   Max.   :80000  
##   Nongrad_P75       Grad_share       Grad_premium    
##  Min.   : 48000   Min.   :0.09632   Min.   :-0.0250  
##  1st Qu.: 72000   1st Qu.:0.26757   1st Qu.: 0.2308  
##  Median : 80000   Median :0.39875   Median : 0.3208  
##  Mean   : 84333   Mean   :0.40059   Mean   : 0.3285  
##  3rd Qu.: 97000   3rd Qu.:0.49912   3rd Qu.: 0.4000  
##  Max.   :215000   Max.   :0.93117   Max.   : 1.6471

R Markdown