Load the libraries and view the “flights” dataset

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
library(RColorBrewer)
flights <- flights

Remove observations with NA values

flights_nona <- flights %>%
  filter(!is.na(distance) & !is.na(arr_delay))   # remove observations with NA values - notice number of rows changed from 336,776 to 327,346