Load the libraries and view the “flights” dataset
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
library(RColorBrewer)
flights <- flights
Remove observations with NA values
flights_nona <- flights %>%
filter(!is.na(distance) & !is.na(arr_delay)) # remove observations with NA values - notice number of rows changed from 336,776 to 327,346