# A tibble: 10 × 2
dest count
<chr> <int>
1 ATL 16898
2 ORD 16642
3 LAX 16076
4 BOS 15049
5 MCO 13982
6 CLT 13698
7 SFO 13230
8 FLL 11934
9 MIA 11633
10 DCA 9157
Filter flights_nona to include only the top 10 destinations.
top10_nona <- flights_nona |>filter(dest =="ATL"| dest =="ORD"| dest =="LAX"| dest =="BOS"| dest =="MCO"| dest =="CLT"| dest =="SFO"| dest =="FLL"| dest =="MIA"| dest =="DCA")
Create a bar graph
plot1 <- top10_nona |>ggplot() +geom_bar(aes(x=sched_dep_time, y=dep_delay, fill = dest),position ="dodge", stat ="identity") +labs(fill ="Destination",y ="Delay of Departure",title ="NYC Departure Delays by Scheduled Departure Time",caption ="Source: NYCFlights13 Dataset")plot1
This graph is hard to read since it is too cluttered. I redid the bar graph to include only the top 3 destinations: ATL, ORD, and LAX.
top3_nona <- flights_nona |>filter(dest =="ATL"| dest =="ORD"| dest =="LAX"& dep_delay <=700)plot4 <- top3_nona |>ggplot() +geom_bar(aes(x=sched_dep_time, y=dep_delay, fill = dest),position ="dodge", stat ="identity") +labs(fill ="Destination",x ="Scheduled Departure Time",y ="Delay of Departure",title ="Delays to Top 3 Destinations from NYC by Scheduled Departure Time",caption ="Source: NYCFlights13 Dataset")plot4 +coord_cartesian(xlim =c(500, 2250)) +coord_cartesian(ylim =c(0, 650))
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
This visualization shows delay departures of flights to the top 3 destinations from NYC and their scheduled departure time. In general, midday and afternoon flights appear to be more delayed upon departure. This might be because there are more incoming flights as the day progresses and these planes might have to wait longer before being cleared for takeoff. There are probably fewer delays in the evening since there are fewer flights then. It is hard to tell whether there are significant differences in delays based on time of departure among the different destinations. They all generally seem to follow the same pattern described above.
The graph above is clearer than the first one, but still cluttered since there is a lot of data. If I had more time I would have liked to group each set of flights by 30-minute or 60-minute periods, or take the mean delay to each destination within these time periods. This would have made the visualization easier to interpret.
Other visualizations/data manipulation I attempted but was not successful with:
I started a heatmap to look at all flights and their departure times but realized that it would not work with the data I had intended to use.
#flights_nona <- flights_nona[order(flights_nona$dest),]#row.names(flights_nona) <- flights_nona$sched_dep_time#flights_nona_matrix <- data.matrix(flights_nona)#flights_nona_heatmap <- heatmap(flights_nona, #Rowv=NA, #Colv=NA, #col = cm.colors(20), #scale="column", #margins=c(5,10),#xlab = "Scheduled Time of Departure",#ylab = "Destination",#main = "NYC Flight Delays by Time of Departure in 2013")
I was also unable to create a heatmap using the top 10 destinations for the same reason.
#top10_nona <- top10_nona[order(top10_nona$sched_dep_time),]#top10_nona <- top10_nona[6,14]#top10_nona_matrix <- data.matrix(top10_nona)#top10_nona_heatmap <- heatmap(top10_nona_matrix, #Rowv=NA, #Colv=NA, #col = cm.colors(20), #scale="column", #margins=c(5,10),#xlab = "Destination",#ylab = "Scheduled Time of Departure",#main = "NYC Flight Delays by Time of Departure in 2013")
I made a treemap but it is confusing to me, even though I was the one who made it. It would probably not make sense to anyone else.
I tried to make a streamgraph but couldn’t get it to display properly.
devtools::install_github("hrbrmstr/streamgraph")
Skipping install of 'streamgraph' from a github remote, the SHA1 (76f7173e) has not changed since last install.
Use `force = TRUE` to force installation
Warning in widget_html(name, package, id = x$id, style = css(width =
validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
`list` instead of a `shiny.tag`.
Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
p(), etc.), not objects of type 'list'.
I also wanted to plot an alluvial but I was not able to do so.
top10_nona_lodes <-to_lodes_form(top10_nona_all,key ="sched_dep_time", value ="dep_delay", id ="dest")
Plot the alluvial
ggalluv <- top10_nona |>ggplot(aes(x = sched_dep_time, y = dep_delay, alluvium = dest)) +theme_bw() +geom_alluvium(aes(x = dep_delay, y = sched_dep_time, fill = dest), color ="white",width = .1, alpha = .8,decreasing =FALSE) +scale_fill_brewer(palette ="Spectral") +scale_x_continuous(lim =c(540, 2255)) +labs(title ="Flight Delays by Time of Departure\n (Top 10 Destinations from NYC Airports)",y ="Scheduled Departure Time", fill ="Destination",caption ="Source: NYCFlights13 Dataset")
{r}# ggalluv
(Tried this earlier when exploring data but did not use this dataframe)
Create a dataframe with summary statistics. I want to look at whether scheduled departure time, destination, and the mean delay of departure might be related.