Load data from Sparks Github data file usa_00045.dta
ipums%>%
mutate(mywage = ifelse(incwage %in% c(999998,999999),NA, incwage)) %>%
summarise(meanold = mean(incwage), meannew = mean(mywage, na.rm=T), n=n())
## # A tibble: 1 x 3
## meanold meannew n
## <dbl> <dbl> <int>
## 1 205672.4 27489.69 300552
ipums%>%
mutate(mywage = ifelse(incwage %in% c(999998,999999),NA, incwage)) %>%
summarise(medianold = median(incwage), mediannew = median(mywage, na.rm =T), n=n())
## # A tibble: 1 x 3
## medianold mediannew n
## <dbl> <dbl> <int>
## 1 20000 7000 300552
ipums%>%
mutate(mywage = ifelse(incwage %in% c(999998,999999),NA, incwage)) %>%
summarise(sdold = sd(incwage), sdnew = sd(mywage, na.rm=T), n=n())
## # A tibble: 1 x 3
## sdold sdnew n
## <dbl> <dbl> <int>
## 1 378988.6 50665.1 300552
ipums%>%
mutate(mywage = ifelse(incwage %in% c(999998,999999),NA, incwage)) %>%
mutate(edurec = case_when(.$educd %in% c(0:61)~"nohs",
.$educd %in% c(62:64)~"hs",
.$educd %in% c(65:100)~"somecoll",
.$educd %in% c(101:116)~"collgrad",
.$educd ==999 ~ "missing")) %>%
mutate(sexrec = ifelse(sex==1, "male", "female")) %>%
filter(labforce==2, age >= 25) %>%
group_by(sexrec, edurec) %>%
summarise(meaninc = mean(mywage, na.rm=T), medianinc = median(mywage, na.rm=T), sdinc = sd(mywage, na.rm=T), n=n())
## # A tibble: 8 x 6
## # Groups: sexrec [?]
## sexrec edurec meaninc medianinc sdinc n
## <chr> <chr> <dbl> <dbl> <dbl> <int>
## 1 female collgrad 57775.23 48000 56722.86 23539
## 2 female hs 25607.74 21600 26361.37 13454
## 3 female nohs 18332.41 15000 23212.05 3797
## 4 female somecoll 32798.44 28000 31673.19 19404
## 5 male collgrad 92236.55 70000 98040.42 23860
## 6 male hs 37929.02 32000 37056.63 17412
## 7 male nohs 28148.86 23000 32879.64 6394
## 8 male somecoll 48098.73 40000 47090.02 19201