getwd()[1] "/Users/bettyovalle/Desktop/College/007 – Spring 2026/DATA 110/week 5"
getwd()[1] "/Users/bettyovalle/Desktop/College/007 – Spring 2026/DATA 110/week 5"
library(nycflights23)
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(conflicted)
library(dplyr)
data(flights)
data(airlines)
conflicts_prefer(dplyr::filter)[conflicted] Will prefer dplyr::filter over any other package.
flights_nona <- flights |>
filter(!is.na(distance) & !is.na(arr_delay) & !is.na(dep_delay))
by_dest <- flights_nona |>
group_by(dest) |> # group all destinations
summarise(count = n(), #counts totals for each destination
.groups = "drop") # remove the grouping structure after summarizing
head(by_dest)# A tibble: 6 × 2
dest count
<chr> <int>
1 ABQ 218
2 ACK 875
3 AGS 20
4 ALB 1510
5 ANC 91
6 ATL 17234
flights2 <- left_join(flights_nona, airlines, by = "carrier")
flights2$name <- gsub("Inc\\.|Co\\.", "", flights2$name)
dmv_flights <- flights2 |>
filter(dest %in% c("DCA", "IAD", "BWI")) |>
group_by(name, dest) |>
summarise(total_flights = n(),
avg_arr_delay = mean(arr_delay, na.rm = TRUE), #youtube Brian Byrne preguntar a profesora un poquito más
.groups = "drop")
library(RColorBrewer)
library(treemap)
treemap(dmv_flights,
index= c("name"),
vSize="total_flights",
vColor="avg_arr_delay",
type="value",
palette = "Pastel2",
title = "Flights from NYC to DMV Area airports", #title
title.legend = "Average Arrival Delay (min)" ) # legend label For the Heatmaps, Treemaps, and Alluvials assignment, we worked with the NYC flights dataset to explore flight data. I focused on flights from New York City to the DMV area airports, including DCA, IAD, and BWI.
I began cleaning the dataset by removing missing values and joined it with airline names. I summarized the data to calculate the total number of flights and the average arrival delay for each airline.
Finally, I created a treemap to display this information. In the treemap, the size of each rectangle represents the number of flights, and the color shows the average arrival delay. This visualization helps quickly see which airlines have the most flights and which ones tend to have higher delays. This helps provide a clear and simple way to understand the airlines performance.