Load libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(nycflights13)

Mutate the Data to Create “speed” variable

flights_speed <- mutate(flights, speed = distance/(air_time/60))
summary(flights_speed)
##       year          month             day           dep_time    sched_dep_time
##  Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
##  1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
##  Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
##  Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
##  3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
##  Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
##                                                  NA's   :8255                 
##    dep_delay          arr_time    sched_arr_time   arr_delay       
##  Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000  
##  1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000  
##  Median :  -2.00   Median :1535   Median :1556   Median :  -5.000  
##  Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895  
##  3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000  
##  Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000  
##  NA's   :8255      NA's   :8713                  NA's   :9430      
##    carrier              flight       tailnum             origin         
##  Length:336776      Min.   :   1   Length:336776      Length:336776     
##  Class :character   1st Qu.: 553   Class :character   Class :character  
##  Mode  :character   Median :1496   Mode  :character   Mode  :character  
##                     Mean   :1972                                        
##                     3rd Qu.:3465                                        
##                     Max.   :8500                                        
##                                                                         
##      dest              air_time        distance         hour      
##  Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00  
##  Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00  
##  Mode  :character   Median :129.0   Median : 872   Median :13.00  
##                     Mean   :150.7   Mean   :1040   Mean   :13.18  
##                     3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00  
##                     Max.   :695.0   Max.   :4983   Max.   :23.00  
##                     NA's   :9430                                  
##      minute        time_hour                          speed      
##  Min.   : 0.00   Min.   :2013-01-01 05:00:00.00   Min.   : 76.8  
##  1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00.00   1st Qu.:358.1  
##  Median :29.00   Median :2013-07-03 10:00:00.00   Median :404.2  
##  Mean   :26.23   Mean   :2013-07-03 05:22:54.64   Mean   :394.3  
##  3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00.00   3rd Qu.:438.8  
##  Max.   :59.00   Max.   :2013-12-31 23:00:00.00   Max.   :703.4  
##                                                   NA's   :9430

Change month column to names of months instead of numbers

flights_speed$month[flights_speed$month==1] <- "January"
flights_speed$month[flights_speed$month==2] <- "February"
flights_speed$month[flights_speed$month==3] <- "March"
flights_speed$month[flights_speed$month==4] <- "April"
flights_speed$month[flights_speed$month==5] <- "May"
flights_speed$month[flights_speed$month==6] <- "June"
flights_speed$month[flights_speed$month==7] <- "July"
flights_speed$month[flights_speed$month==8] <- "August"
flights_speed$month[flights_speed$month==9] <- "September"
flights_speed$month[flights_speed$month==10] <- "October"
flights_speed$month[flights_speed$month==11] <- "November"
flights_speed$month[flights_speed$month==12] <- "December"

Histogram of Frquencies of Flight Speed for Each Month

flights_speed_histogram <- flights_speed %>%
  ggplot(aes(x = speed, fill = month))+
  geom_histogram(position = "identity", alpha = 0.2, binwidth = 25, color = "black")+
  scale_fill_ordinal(name = "Month", labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))+
  ggtitle("Flight Speed Distribution of Flights for the Months of 2013")+
  labs(y = "Frequency", x = "Speed(mph)")
flights_speed_histogram
## Warning: Removed 9430 rows containing non-finite values (`stat_bin()`).

Visualization Description

This visualization uses a new variable mutated into the data called “speed” which takes the distance/(air_time/60) to show speed in miles per hour. In this histogram, the frequency of flight speeds of flights for each month of 2013.It reveals that most flights’ speeds are between 300mph and 500mph, with the highest frequency of flights flying at between 450mph and 475mph. The data appears to have a normal distribution, if not slightly skewed left. One aspect about this histogram I would like to draw attention to is the opacity, or the alpha value, of the bars of the graph. Since there are twelve months, there are 12 separate bars of frequency at most speeds, one representing each month. This proposed a problem for me in trying to find an opacity in which all of the data was visible but the histogram was still visually appealing. Eventually, I settled on an alpha value 0.2, as it shows off all of all of the data while values greater don’t show as much and values lower don’t look as visually appealing as it. Overall, I believe that a histogram of this variety does have its limitations in showing off as much data as I tried to in this case, and there are probably better methods to handle data of this size than this plot.