Previously.
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/Uriel/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Uriel\AppData\Local\Temp\RtmpAfJ0v3\downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
install.packages("haven", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/Uriel/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'haven' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Uriel\AppData\Local\Temp\RtmpAfJ0v3\downloaded_packages
library(haven)
install.packages("curl", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/Uriel/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## package 'curl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Uriel\AppData\Local\Temp\RtmpAfJ0v3\downloaded_packages
acs<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
2) Calculate these summary statistics by level of completed education for people who are currently in the labor force by sex and over age 25.
acs%>%
mutate(incwage2=ifelse(incwage%in%c(999998,999999), NA, incwage))%>%
mutate(sex2=ifelse(sex==1, "Male", "Female"))%>%
mutate(edurec=case_when(acs$educd %in% c(0:61)~"No High school", acs$educd %in% c(62:64)~"High school",
acs$educd %in% c(65:100)~"Some collage", acs$educd %in% c(101:116)~"Collage graduate",
acs$educd == 999 ~"Missing"))%>%
filter(labforce==2, age>=25)%>%
group_by(sex2, edurec)%>%
summarise(mean(incwage2), median(incwage2), sd(incwage2), n=n())
## # A tibble: 8 x 6
## # Groups: sex2 [?]
## sex2 edurec `mean(incwage2)` `median(incwage2)`
## <chr> <chr> <dbl> <dbl>
## 1 Female Collage graduate 57775.23 48000
## 2 Female High school 25607.74 21600
## 3 Female No High school 18332.41 15000
## 4 Female Some collage 32798.44 28000
## 5 Male Collage graduate 92236.55 70000
## 6 Male High school 37929.02 32000
## 7 Male No High school 28148.86 23000
## 8 Male Some collage 48098.73 40000
## # ... with 2 more variables: `sd(incwage2)` <dbl>, n <int>