Create the required
chart
library("ggplot2")
ggplot(data = data,
mapping = aes(x = LABFORCE,y = INCWAGE))+geom_point()
## Warning: Removed 2602318 rows containing missing values (`geom_point()`).

# 99999999 = N.I.U. (Not in Universe)
# 99999998 = Missing (1962-1966 only)
df <- data # duplicate data
df <- df[c(1,3,12:16)]
df$inc <- df$INCWAGE
library("psych")
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
describe(df$inc)
## vars n mean sd median trimmed mad min max range skew
## X1 1 474234 20945796 40657777 33000 20945796 48925.8 0 1e+08 1e+08 1.43
## kurtosis se
## X1 0.05 59040.13
df$inc [ df$inc == 99999999] <- NA
df$inc [ df$inc == 99999998] <- NA
describe(df$inc)
## vars n mean sd median trimmed mad min max range skew
## X1 1 375035 35487.1 66514.04 15000 35487.1 22239 0 2099999 2099999 7.67
## kurtosis se
## X1 103.9 108.61
ipums_val_labels(df$LABFORCE)
## # A tibble: 3 × 2
## val lbl
## <int> <chr>
## 1 0 NIU
## 2 1 No, not in the labor force
## 3 2 Yes, in the labor force
ggplot(data = df,
mapping = aes(x = LABFORCE,y = inc)
) + geom_point() + scale_x_discrete(labels=c("0" = "Not in Universe", "1" = "No, not in the Labor Force", "2" = "Yes, In the Labor Force"))
## Warning: Removed 2701517 rows containing missing values (`geom_point()`).

ggplot(data = df,
mapping = aes(x = LABFORCE,y = inc)
) + geom_point() + scale_x_discrete(breaks=c("0","1","2"),
labels=c("NIU", "NILF", "ILF"))
## Warning: Removed 2701517 rows containing missing values (`geom_point()`).

df$labforce <- as.character(df$LABFORCE)
df$labforce[df$labforce=="0"] <- "NIU"
df$labforce[df$labforce=="1"] <- "NILF"
df$labforce[df$labforce=="2"] <- "ILF"
ggplot(data = df,
mapping = aes(x = labforce,y = inc))+geom_point()+labs(title="Income Wage by Labor Force Status \n Current Population Survey",
x ="Labor Force Status", y = "Personal Income")
## Warning: Removed 2701517 rows containing missing values (`geom_point()`).
