library(tidyverse)
library(openintro)
data(nycflights)

Exercise 1

After observing the three histograms, they become more defined each time. In turn, allowing its viewer to specifically focus on the mode of the departure delays.

names(nycflights)
##  [1] "year"      "month"     "day"       "dep_time"  "dep_delay" "arr_time" 
##  [7] "arr_delay" "carrier"   "tailnum"   "flight"    "origin"    "dest"     
## [13] "air_time"  "distance"  "hour"      "minute"
ggplot(data=nycflights, aes(x=dep_delay))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=nycflights, aes(x=dep_delay))+geom_histogram(binwidth=15)

ggplot(data=nycflights, aes(x=dep_delay))+geom_histogram(binwidth=150)

Exercise 2

Based on the data frame that includes flights headed to SFO in February, a total of 68 flights meet these criteria.

sfo_feb_flights<-nycflights%>%filter(dest=="SFO", month==2)

###Exercise 3

The triangular shape of the histogram suggests a mode, while the spread of data at both extremes indicates a high standard deviation and a wide interquartile range, implying multiple outliers. Thus, the most appropriate statistical tools for analysis would be the mean and median.

ggplot(data=sfo_feb_flights, aes(x=arr_delay))+geom_histogram(binwidth=5)

sfo_feb_flights%>%summarize(mean_ad=mean(arr_delay),
                            median_ad=median(arr_delay),
                            n=n())
## # A tibble: 1 × 3
##   mean_ad median_ad     n
##     <dbl>     <dbl> <int>
## 1    -4.5       -11    68

###Exercise 4

Based on the executed code, the arrival delays exhibit the highest variability for carriers DL and UA, while carrier B6 demonstrates the least variability, evidenced by its smallest interquartile range in the dataset.

sfo_feb_flights%>%
  group_by(carrier)%>%
  summarize(median_ad=median(arr_delay),
            IQR_ad=IQR(arr_delay))
## # A tibble: 5 × 3
##   carrier median_ad IQR_ad
##   <chr>       <dbl>  <dbl>
## 1 AA            5     17.5
## 2 B6          -10.5   12.2
## 3 DL          -15     22  
## 4 UA          -10     22  
## 5 VX          -22.5   21.2

###Exercise 5

Using mean departure delays provides less information about the probability of a delay occurring compared to using median departure delays. The median better represents the likelihood of encountering a delay accurately. In the dataset relying on median departure delays, delays might be less frequent, but when they do occur, they tend to be significant. Conversely, in the dataset relying on mean departure delays, delays are more frequent but generally smaller in magnitude. Therefore, choosing mean and median departure delays depends on whether one prioritizes the frquency of magnitude of delays.

nycflights%>%
  group_by(month)%>%
  summarize(mean_dd=mean(dep_delay))%>%
  arrange(desc(mean_dd))
## # A tibble: 12 × 2
##    month mean_dd
##    <int>   <dbl>
##  1     7   20.8 
##  2     6   20.4 
##  3    12   17.4 
##  4     4   14.6 
##  5     3   13.5 
##  6     5   13.3 
##  7     8   12.6 
##  8     2   10.7 
##  9     1   10.2 
## 10     9    6.87
## 11    11    6.10
## 12    10    5.88
nycflights%>%
  group_by(month)%>%
  summarize(median_dd=median(dep_delay))%>%
  arrange(desc(median_dd))
## # A tibble: 12 × 2
##    month median_dd
##    <int>     <dbl>
##  1    12         1
##  2     6         0
##  3     7         0
##  4     3        -1
##  5     5        -1
##  6     8        -1
##  7     1        -2
##  8     2        -2
##  9     4        -2
## 10    11        -2
## 11     9        -3
## 12    10        -3

###Exercise 6

In my opinion, I would make the decision to fly out of the LGA airport based on the percentage calculated here.

nycflights<-nycflights%>%
  mutate(dep_type=ifelse(dep_delay<5, "on time", "delayed"))
nycflights%>%
  group_by(origin)%>%
  summarize(ot_dep_rate=sum(dep_type=="on time")/n())%>%
  arrange(desc(ot_dep_rate))
## # A tibble: 3 × 2
##   origin ot_dep_rate
##   <chr>        <dbl>
## 1 LGA          0.728
## 2 JFK          0.694
## 3 EWR          0.637

###Exercise 7

nycflights<-nycflights%>%
  mutate(avg_speed=distance/(air_time/60))

###Exercise 8

ggplot(data=nycflights, aes(x=avg_speed, y=distance))+geom_point()

###Exercise 9

flights<-nycflights%>%filter(carrier=="AA"|carrier=="DL"|carrier=="UA")
ggplot(data=flights, aes(x=dep_delay,y=arr_delay, color=carrier))+geom_point()

flights%>%
  group_by(arr_delay)%>%
  summarize(median_dd=median(dep_delay),
            sd_dd=sd(dep_delay))%>%
  arrange(desc(sd_dd))
## # A tibble: 339 × 3
##    arr_delay median_dd sd_dd
##        <dbl>     <dbl> <dbl>
##  1       236     198.  104. 
##  2       242     224.   98.3
##  3       104      56    87.7
##  4       124      69    81.7
##  5       194     201    78.6
##  6       164      99    63.3
##  7        96      44.5  62.5
##  8       256     254.   62.5
##  9       255     230.   60.1
## 10       122     110    58.9
## # ℹ 329 more rows
LS0tDQp0aXRsZTogIkxhYiAyOiBJbnRyb2R1Y3Rpb24gdG8gRGF0YSINCmF1dGhvcjogIkNvbGluIFMuIg0KZGF0ZTogIjIvMTgvMjAyNCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpkYXRhKG55Y2ZsaWdodHMpDQoNCmBgYA0KDQojIyMgRXhlcmNpc2UgMQ0KDQpBZnRlciBvYnNlcnZpbmcgdGhlIHRocmVlIGhpc3RvZ3JhbXMsIHRoZXkgYmVjb21lIG1vcmUgZGVmaW5lZCBlYWNoIHRpbWUuIEluIHR1cm4sIGFsbG93aW5nIGl0cyB2aWV3ZXIgdG8gc3BlY2lmaWNhbGx5IGZvY3VzIG9uIHRoZSBtb2RlIG9mIHRoZSBkZXBhcnR1cmUgZGVsYXlzLg0KDQpgYGB7ciBjb2RlLWNodW5rLWxhYmVsfQ0KbmFtZXMobnljZmxpZ2h0cykNCmdncGxvdChkYXRhPW55Y2ZsaWdodHMsIGFlcyh4PWRlcF9kZWxheSkpK2dlb21faGlzdG9ncmFtKCkNCmdncGxvdChkYXRhPW55Y2ZsaWdodHMsIGFlcyh4PWRlcF9kZWxheSkpK2dlb21faGlzdG9ncmFtKGJpbndpZHRoPTE1KQ0KZ2dwbG90KGRhdGE9bnljZmxpZ2h0cywgYWVzKHg9ZGVwX2RlbGF5KSkrZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MTUwKQ0KYGBgDQoNCiMjIyBFeGVyY2lzZSAyDQoNCkJhc2VkIG9uIHRoZSBkYXRhIGZyYW1lIHRoYXQgaW5jbHVkZXMgZmxpZ2h0cyBoZWFkZWQgdG8gU0ZPIGluIEZlYnJ1YXJ5LCBhIHRvdGFsIG9mIDY4IGZsaWdodHMgbWVldCB0aGVzZSBjcml0ZXJpYS4NCg0KYGBge3J9DQpzZm9fZmViX2ZsaWdodHM8LW55Y2ZsaWdodHMlPiVmaWx0ZXIoZGVzdD09IlNGTyIsIG1vbnRoPT0yKQ0KYGBgDQoNCiMjI0V4ZXJjaXNlIDMNCg0KVGhlIHRyaWFuZ3VsYXIgc2hhcGUgb2YgdGhlIGhpc3RvZ3JhbSBzdWdnZXN0cyBhIG1vZGUsIHdoaWxlIHRoZSBzcHJlYWQgb2YgZGF0YSBhdCBib3RoIGV4dHJlbWVzIGluZGljYXRlcyBhIGhpZ2ggc3RhbmRhcmQgZGV2aWF0aW9uIGFuZCBhIHdpZGUgaW50ZXJxdWFydGlsZSByYW5nZSwgaW1wbHlpbmcgbXVsdGlwbGUgb3V0bGllcnMuIFRodXMsIHRoZSBtb3N0IGFwcHJvcHJpYXRlIHN0YXRpc3RpY2FsIHRvb2xzIGZvciBhbmFseXNpcyB3b3VsZCBiZSB0aGUgbWVhbiBhbmQgbWVkaWFuLg0KDQpgYGB7cn0NCmdncGxvdChkYXRhPXNmb19mZWJfZmxpZ2h0cywgYWVzKHg9YXJyX2RlbGF5KSkrZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9NSkNCg0Kc2ZvX2ZlYl9mbGlnaHRzJT4lc3VtbWFyaXplKG1lYW5fYWQ9bWVhbihhcnJfZGVsYXkpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lZGlhbl9hZD1tZWRpYW4oYXJyX2RlbGF5KSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBuPW4oKSkNCmBgYA0KDQojIyNFeGVyY2lzZSA0DQoNCkJhc2VkIG9uIHRoZSBleGVjdXRlZCBjb2RlLCB0aGUgYXJyaXZhbCBkZWxheXMgZXhoaWJpdCB0aGUgaGlnaGVzdCB2YXJpYWJpbGl0eSBmb3IgY2FycmllcnMgREwgYW5kIFVBLCB3aGlsZSBjYXJyaWVyIEI2IGRlbW9uc3RyYXRlcyB0aGUgbGVhc3QgdmFyaWFiaWxpdHksIGV2aWRlbmNlZCBieSBpdHMgc21hbGxlc3QgaW50ZXJxdWFydGlsZSByYW5nZSBpbiB0aGUgZGF0YXNldC4NCg0KDQoNCmBgYHtyfQ0Kc2ZvX2ZlYl9mbGlnaHRzJT4lDQogIGdyb3VwX2J5KGNhcnJpZXIpJT4lDQogIHN1bW1hcml6ZShtZWRpYW5fYWQ9bWVkaWFuKGFycl9kZWxheSksDQogICAgICAgICAgICBJUVJfYWQ9SVFSKGFycl9kZWxheSkpDQpgYGANCg0KIyMjRXhlcmNpc2UgNQ0KDQpVc2luZyBtZWFuIGRlcGFydHVyZSBkZWxheXMgcHJvdmlkZXMgbGVzcyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvYmFiaWxpdHkgb2YgYSBkZWxheSBvY2N1cnJpbmcgY29tcGFyZWQgdG8gdXNpbmcgbWVkaWFuIGRlcGFydHVyZSBkZWxheXMuIFRoZSBtZWRpYW4gYmV0dGVyIHJlcHJlc2VudHMgdGhlIGxpa2VsaWhvb2Qgb2YgZW5jb3VudGVyaW5nIGEgZGVsYXkgYWNjdXJhdGVseS4gSW4gdGhlIGRhdGFzZXQgcmVseWluZyBvbiBtZWRpYW4gZGVwYXJ0dXJlIGRlbGF5cywgZGVsYXlzIG1pZ2h0IGJlIGxlc3MgZnJlcXVlbnQsIGJ1dCB3aGVuIHRoZXkgZG8gb2NjdXIsIHRoZXkgdGVuZCB0byBiZSBzaWduaWZpY2FudC4gQ29udmVyc2VseSwgaW4gdGhlIGRhdGFzZXQgcmVseWluZyBvbiBtZWFuIGRlcGFydHVyZSBkZWxheXMsIGRlbGF5cyBhcmUgbW9yZSBmcmVxdWVudCBidXQgZ2VuZXJhbGx5IHNtYWxsZXIgaW4gbWFnbml0dWRlLiBUaGVyZWZvcmUsIGNob29zaW5nIG1lYW4gYW5kIG1lZGlhbiBkZXBhcnR1cmUgZGVsYXlzIGRlcGVuZHMgb24gd2hldGhlciBvbmUgcHJpb3JpdGl6ZXMgdGhlIGZycXVlbmN5IG9mIG1hZ25pdHVkZSBvZiBkZWxheXMuIA0KDQpgYGB7cn0NCm55Y2ZsaWdodHMlPiUNCiAgZ3JvdXBfYnkobW9udGgpJT4lDQogIHN1bW1hcml6ZShtZWFuX2RkPW1lYW4oZGVwX2RlbGF5KSklPiUNCiAgYXJyYW5nZShkZXNjKG1lYW5fZGQpKQ0KDQpueWNmbGlnaHRzJT4lDQogIGdyb3VwX2J5KG1vbnRoKSU+JQ0KICBzdW1tYXJpemUobWVkaWFuX2RkPW1lZGlhbihkZXBfZGVsYXkpKSU+JQ0KICBhcnJhbmdlKGRlc2MobWVkaWFuX2RkKSkNCmBgYA0KIyMjRXhlcmNpc2UgNg0KDQpJbiBteSBvcGluaW9uLCBJIHdvdWxkIG1ha2UgdGhlIGRlY2lzaW9uIHRvIGZseSBvdXQgb2YgdGhlIExHQSBhaXJwb3J0IGJhc2VkIG9uIHRoZSBwZXJjZW50YWdlIGNhbGN1bGF0ZWQgaGVyZS4NCg0KYGBge3J9DQpueWNmbGlnaHRzPC1ueWNmbGlnaHRzJT4lDQogIG11dGF0ZShkZXBfdHlwZT1pZmVsc2UoZGVwX2RlbGF5PDUsICJvbiB0aW1lIiwgImRlbGF5ZWQiKSkNCm55Y2ZsaWdodHMlPiUNCiAgZ3JvdXBfYnkob3JpZ2luKSU+JQ0KICBzdW1tYXJpemUob3RfZGVwX3JhdGU9c3VtKGRlcF90eXBlPT0ib24gdGltZSIpL24oKSklPiUNCiAgYXJyYW5nZShkZXNjKG90X2RlcF9yYXRlKSkNCmBgYA0KIyMjRXhlcmNpc2UgNw0KYGBge3J9DQpueWNmbGlnaHRzPC1ueWNmbGlnaHRzJT4lDQogIG11dGF0ZShhdmdfc3BlZWQ9ZGlzdGFuY2UvKGFpcl90aW1lLzYwKSkNCmBgYA0KIyMjRXhlcmNpc2UgOA0KYGBge3J9DQpnZ3Bsb3QoZGF0YT1ueWNmbGlnaHRzLCBhZXMoeD1hdmdfc3BlZWQsIHk9ZGlzdGFuY2UpKStnZW9tX3BvaW50KCkNCmBgYA0KIyMjRXhlcmNpc2UgOQ0KYGBge3J9DQpmbGlnaHRzPC1ueWNmbGlnaHRzJT4lZmlsdGVyKGNhcnJpZXI9PSJBQSJ8Y2Fycmllcj09IkRMInxjYXJyaWVyPT0iVUEiKQ0KZ2dwbG90KGRhdGE9ZmxpZ2h0cywgYWVzKHg9ZGVwX2RlbGF5LHk9YXJyX2RlbGF5LCBjb2xvcj1jYXJyaWVyKSkrZ2VvbV9wb2ludCgpDQoNCmZsaWdodHMlPiUNCiAgZ3JvdXBfYnkoYXJyX2RlbGF5KSU+JQ0KICBzdW1tYXJpemUobWVkaWFuX2RkPW1lZGlhbihkZXBfZGVsYXkpLA0KICAgICAgICAgICAgc2RfZGQ9c2QoZGVwX2RlbGF5KSklPiUNCiAgYXJyYW5nZShkZXNjKHNkX2RkKSkNCmBgYA==