library(readxl)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
data <- read_excel("ECON465_DataScience/data/Wage_GenderDS.xlsx")
glimpse(data)Rows: 500
Columns: 6
$ Observation <dbl> 119, 2, 41, 65, 246, 254, 74, 12, 9, 237, 79, 294, 182, 25…
$ Wage <dbl> 32, 34, 37, 38, 38, 38, 39, 40, 42, 43, 44, 45, 46, 46, 47…
$ Female <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1…
$ Age <dbl> 31, 42, 31, 33, 21, 28, 31, 28, 25, 25, 44, 25, 31, 42, 38…
$ Educ <dbl> 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1…
$ Parttime <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1…
wage_data <- data |>
select(Wage, Female, Age, Educ, Parttime) |>
filter(!is.na(Wage),
!is.na(Female),
!is.na(Age),
!is.na(Educ),
!is.na(Parttime))