Load Data(w/Bonus)
urldata="https://raw.githubusercontent.com/kglan/MS-Data-Science-Bridge-/main/R/CPS1988.csv"
wagedatarough<-read_csv(url(urldata))
## New names:
## * `` -> ...1
## Rows: 28155 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): ethnicity, smsa, region, parttime
## dbl (4): ...1, wage, education, experience
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
wagedatarough
## # A tibble: 28,155 x 8
## ...1 wage education experience ethnicity smsa region parttime
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 1 355. 7 45 cauc yes northeast no
## 2 2 123. 12 1 cauc yes northeast yes
## 3 3 370. 9 9 cauc yes northeast no
## 4 4 755. 11 46 cauc yes northeast no
## 5 5 594. 12 36 cauc yes northeast no
## 6 6 377. 16 22 cauc yes northeast no
## 7 7 285. 8 51 cauc yes northeast no
## 8 8 561. 12 34 cauc yes northeast no
## 9 9 264. 12 0 cauc yes northeast no
## 10 10 1644. 14 18 cauc yes northeast no
## # ... with 28,145 more rows
Question 1
summary(wagedatarough)
## ...1 wage education experience
## Min. : 1 Min. : 50.05 Min. : 0.00 Min. :-4.0
## 1st Qu.: 7040 1st Qu.: 308.64 1st Qu.:12.00 1st Qu.: 8.0
## Median :14078 Median : 522.32 Median :12.00 Median :16.0
## Mean :14078 Mean : 603.73 Mean :13.07 Mean :18.2
## 3rd Qu.:21117 3rd Qu.: 783.48 3rd Qu.:15.00 3rd Qu.:27.0
## Max. :28155 Max. :18777.20 Max. :18.00 Max. :63.0
## ethnicity smsa region parttime
## Length:28155 Length:28155 Length:28155 Length:28155
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
meanedu1 <- mean(wagedatarough$education)
mededu1 <- median(wagedatarough$education)
meanexp1<- mean(wagedatarough$experience)
medexp1<- median(wagedatarough$experience)
meanedu1
## [1] 13.06787
mededu1
## [1] 12
meanexp1
## [1] 18.19993
medexp1
## [1] 16
Question 2,3,5
colnames(wagedatarough)
## [1] "...1" "wage" "education" "experience" "ethnicity"
## [6] "smsa" "region" "parttime"
wagedata <- wagedatarough%>%
select(-"...1")%>%
rename(Wage_week = "wage",
Education_yrs = "education",
Experience_yrs = "experience",
Ethnicity = "ethnicity",
Urban_Area = "smsa",
Region = "region",
PartTime= "parttime")%>%
mutate(Salary = as.numeric(Wage_week*52),
.before = Education_yrs)%>%
filter(wagedatarough$parttime == "no")%>%
select(-PartTime)%>%
mutate(Ethnicity = str_replace(Ethnicity,"cauc", "White"))%>%
mutate(Ethnicity = str_replace(Ethnicity,"afam", "Black"))
wagedata
## # A tibble: 25,631 x 7
## Wage_week Salary Education_yrs Experience_yrs Ethnicity Urban_Area Region
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 355. 18457. 7 45 White yes northeast
## 2 370. 19259. 9 9 White yes northeast
## 3 755. 39257. 11 46 White yes northeast
## 4 594. 30864. 12 36 White yes northeast
## 5 377. 19616. 16 22 White yes northeast
## 6 285. 14815. 8 51 White yes northeast
## 7 561. 29179. 12 34 White yes northeast
## 8 264. 13731. 12 0 White yes northeast
## 9 1644. 85479. 14 18 White yes northeast
## 10 475. 24691. 12 17 White yes northeast
## # ... with 25,621 more rows
Question #4
summary(wagedata)
## Wage_week Salary Education_yrs Experience_yrs
## Min. : 50.39 Min. : 2620 Min. : 0.00 Min. :-4.00
## 1st Qu.: 356.13 1st Qu.: 18519 1st Qu.:12.00 1st Qu.: 9.00
## Median : 567.23 Median : 29496 Median :12.00 Median :16.00
## Mean : 640.16 Mean : 33288 Mean :13.08 Mean :18.59
## 3rd Qu.: 826.21 3rd Qu.: 42963 3rd Qu.:16.00 3rd Qu.:27.00
## Max. :18777.20 Max. :976414 Max. :18.00 Max. :63.00
## Ethnicity Urban_Area Region
## Length:25631 Length:25631 Length:25631
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
meanedu2 <- mean(wagedata$Education_yrs)
mededu2 <- median(wagedata$Education_yrs)
meanexp2 <- mean(wagedata$Experience_yrs)
medexp2 <- median(wagedata$Experience_yrs)
meanedu2
## [1] 13.07627
mededu2
## [1] 12
meanexp2
## [1] 18.58656
medexp2
## [1] 16
compare(meanedu1,meanedu2)
## FALSE
compare(mededu1, mededu2)
## TRUE
compare(meanexp1, meanexp2)
## FALSE
compare(medexp1,medexp2)
## TRUE
Question 5,6
wagedata[order(-wagedata$Experience_yrs),]
## # A tibble: 25,631 x 7
## Wage_week Salary Education_yrs Experience_yrs Ethnicity Urban_Area Region
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 370. 19259. 0 63 White yes south
## 2 166. 8642. 2 61 Black no south
## 3 356. 18519. 0 60 Black yes midwest
## 4 368 19136 0 58 White yes midwest
## 5 206. 10700. 3 58 Black no south
## 6 286. 14894. 7 57 White yes south
## 7 712. 37037 3 57 White yes south
## 8 92.6 4815. 4 57 White yes south
## 9 237. 12346. 2 57 White yes south
## 10 353. 18376. 5 57 White yes west
## # ... with 25,621 more rows