nycflights 23

Load the libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(nycflights23)
library(RColorBrewer)
library(dplyr)

##Load the data

data("flights")

view some of the data

head(flights)
# A tibble: 6 × 19
   year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
  <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
1  2023     1     1        1           2038       203      328              3
2  2023     1     1       18           2300        78      228            135
3  2023     1     1       31           2344        47      500            426
4  2023     1     1       33           2140       173      238           2352
5  2023     1     1       36           2048       228      223           2252
6  2023     1     1      503            500         3      808            815
# ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#   hour <dbl>, minute <dbl>, time_hour <dttm>

carriers

carrier_names <- c("UA"= "United Airlines",
                   "AA"= "American Airlines",
                   "DL"= "Delta Airlines",
                   "B6"= "Jet Blue Airways",
                   "F9"= "Frontier Arilines",
                   "HA"= "Hawaiian Airlines",
                   "NK"= "Spirit Airlines",
                   "AS"= "Alaska Airlines",
                   "WN"= "Southwest Airlines")
flights$carrier[flights$carrier == "UA" ]<-"United Airlines"
flights$carrier[flights$carrier == "AA" ]<-"American Airlines"
flights$carrier[flights$carrier == "DL" ]<-"Delta Airlines"
flights$carrier[flights$carrier == "B6" ]<-"Jet Blue"
flights$carrier[flights$carrier == "F9" ]<-"Frontier Airlines"
filter(airlines)
# A tibble: 14 × 2
   carrier name                  
   <chr>   <chr>                 
 1 9E      Endeavor Air Inc.     
 2 AA      American Airlines Inc.
 3 AS      Alaska Airlines Inc.  
 4 B6      JetBlue Airways       
 5 DL      Delta Air Lines Inc.  
 6 F9      Frontier Airlines Inc.
 7 G4      Allegiant Air         
 8 HA      Hawaiian Airlines Inc.
 9 MQ      Envoy Air             
10 NK      Spirit Air Lines      
11 OO      SkyWest Airlines Inc. 
12 UA      United Air Lines Inc. 
13 WN      Southwest Airlines Co.
14 YX      Republic Airline      
selected_airlines <- c("United Airlines",
"American Airlines", "Delta Airlines", "JetBlue",
"Frontier Airlines")
ggplot(flights,aes(x=carrier, y= arr_time))+geom_boxplot()+ labs(title= "Box plot of flight delays by carrier", x="carrier", y="Air time") + theme_dark()
Warning: Removed 11453 rows containing non-finite outside the scale range
(`stat_boxplot()`).