title: “Emrick NYC flights”

format: html

editor: visual


#install.packages("nycflights13")

library(nycflights13)

library(RColorBrewer)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("flights")
library(nycflights13)

library(dplyr)

# Load the flights dataset

data(flights)



# Filter the dataset for flights with departure delay > 20 and arrival delay > 20

filtered_flights <- flights %>%

  filter(dep_delay > 20, arr_delay > 20)

# Display the filtered dataset

head(filtered_flights)
## # A tibble: 6 × 19
##    year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
## 1  2013     1     1      732            645        47     1011            941
## 2  2013     1     1      749            710        39      939            850
## 3  2013     1     1      811            630       101     1047            830
## 4  2013     1     1      826            715        71     1136           1045
## 5  2013     1     1      848           1835       853     1001           1950
## 6  2013     1     1      903            820        43     1045            955
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
library(nycflights13)

library(ggplot2)

data(filtered_flights)
## Warning in data(filtered_flights): data set 'filtered_flights' not found
# Create a scatterplot of departure delay vs arrival delay

ggplot(filtered_flights, aes(x = dep_delay, y = arr_delay)) +

  geom_point() +

  labs(title = "Departure Delay vs Arrival Delay",

       x = "Departure Delay (minutes)",

       y = "Arrival Delay (minutes)")

library(RColorBrewer)

library(nycflights13)

library(ggplot2)

data("filtered_flights")
## Warning in data("filtered_flights"): data set 'filtered_flights' not found
# Create a scatterplot with colors using RBrewerColor

ggplot(filtered_flights, aes(x = arr_delay, y = dep_delay, color = as.factor(carrier))) +

  geom_point() +

  scale_color_brewer(palette = "Set1") + 

  labs(title = "Scatterplot of Arrival Delay vs Departure Delay",

       x = "Arrival Delay",

       y = "Departure Delay")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 16073 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Write a brief paragraph that describes the visualization you have created and at least one aspect of the plot that you would like to highlight

This scatterplot is a visualization of the arrival delay vs departure delay in minutes. The second scatterplot is just a replica with color added using the RBrewerColor package. It is color coded by the carrier.