library(tidyverse)
library(openintro)

Exercise 1

The below histograms show then count of flights in certain departure windows. The second graph has a small bin width than the other two allowing you to see a more accurate representation of the number of flights by departure window. The third has too large of a bin width to get useful analysis from.

data(nycflights)
names(nycflights)
##  [1] "year"      "month"     "day"       "dep_time"  "dep_delay" "arr_time" 
##  [7] "arr_delay" "carrier"   "tailnum"   "flight"    "origin"    "dest"     
## [13] "air_time"  "distance"  "hour"      "minute"
?nycflights
glimpse(nycflights)
## Rows: 32,735
## Columns: 16
## $ year      <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, …
## $ month     <int> 6, 5, 12, 5, 7, 1, 12, 8, 9, 4, 6, 11, 4, 3, 10, 1, 2, 8, 10…
## $ day       <int> 30, 7, 8, 14, 21, 1, 9, 13, 26, 30, 17, 22, 26, 25, 21, 23, …
## $ dep_time  <int> 940, 1657, 859, 1841, 1102, 1817, 1259, 1920, 725, 1323, 940…
## $ dep_delay <dbl> 15, -3, -1, -4, -3, -3, 14, 85, -10, 62, 5, 5, -2, 115, -4, …
## $ arr_time  <int> 1216, 2104, 1238, 2122, 1230, 2008, 1617, 2032, 1027, 1549, …
## $ arr_delay <dbl> -4, 10, 11, -34, -8, 3, 22, 71, -8, 60, -4, -2, 22, 91, -6, …
## $ carrier   <chr> "VX", "DL", "DL", "DL", "9E", "AA", "WN", "B6", "AA", "EV", …
## $ tailnum   <chr> "N626VA", "N3760C", "N712TW", "N914DL", "N823AY", "N3AXAA", …
## $ flight    <int> 407, 329, 422, 2391, 3652, 353, 1428, 1407, 2279, 4162, 20, …
## $ origin    <chr> "JFK", "JFK", "JFK", "JFK", "LGA", "LGA", "EWR", "JFK", "LGA…
## $ dest      <chr> "LAX", "SJU", "LAX", "TPA", "ORF", "ORD", "HOU", "IAD", "MIA…
## $ air_time  <dbl> 313, 216, 376, 135, 50, 138, 240, 48, 148, 110, 50, 161, 87,…
## $ distance  <dbl> 2475, 1598, 2475, 1005, 296, 733, 1411, 228, 1096, 820, 264,…
## $ hour      <dbl> 9, 16, 8, 18, 11, 18, 12, 19, 7, 13, 9, 13, 8, 20, 12, 20, 6…
## $ minute    <dbl> 40, 57, 59, 41, 2, 17, 59, 20, 25, 23, 40, 20, 9, 54, 17, 24…
ggplot(data = nycflights, aes(x = dep_delay)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = nycflights, aes(x = dep_delay)) +
  geom_histogram(binwidth = 15)

ggplot(data = nycflights, aes(x = dep_delay)) +
  geom_histogram(binwidth = 150)

Exercise 2

There are 68 observations in the subset sfo_feb_flights

sfo_feb_flights <- nycflights %>%
  filter(dest == "SFO", month == 2)

count(sfo_feb_flights)
## # A tibble: 1 × 1
##       n
##   <int>
## 1    68

Exercise 3

ggplot(data = sfo_feb_flights, aes(x = arr_delay)) +
  geom_histogram(binwidth = 10)

mean(sfo_feb_flights$arr_delay)
## [1] -4.5
median(sfo_feb_flights$arr_delay)
## [1] -11
sd(sfo_feb_flights$arr_delay)
## [1] 36.28062
IQR(sfo_feb_flights$arr_delay)
## [1] 23.25
var(sfo_feb_flights$arr_delay)
## [1] 1316.284

Exercise 4

VX has the most variable arrival delays.

sfo_feb_flights %>%
  group_by(carrier) %>%
  summarise(median_ad = median(arr_delay), iqr_ad = IQR(arr_delay), n_flights = n())
## # A tibble: 5 × 4
##   carrier median_ad iqr_ad n_flights
##   <chr>       <dbl>  <dbl>     <int>
## 1 AA            5     17.5        10
## 2 B6          -10.5   12.2         6
## 3 DL          -15     22          19
## 4 UA          -10     22          21
## 5 VX          -22.5   21.2        12

Exercise 5

When deciding on which month to travel based on departure I would use the mean over the median.

nycflights %>%
  group_by(month) %>%
  summarise(mean_dd = mean(dep_delay), median_dd = median(dep_delay)) %>%
  arrange(desc(mean_dd))
## # A tibble: 12 × 3
##    month mean_dd median_dd
##    <int>   <dbl>     <dbl>
##  1     7   20.8          0
##  2     6   20.4          0
##  3    12   17.4          1
##  4     4   14.6         -2
##  5     3   13.5         -1
##  6     5   13.3         -1
##  7     8   12.6         -1
##  8     2   10.7         -2
##  9     1   10.2         -2
## 10     9    6.87        -3
## 11    11    6.10        -2
## 12    10    5.88        -3

Exercise 6

Based on the below findings I would pick LGA as my airport to fly out of. The rate of on time flights is about .73 or 73% of flights from LGA are “on time”

nycflights <- nycflights %>%
  mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))

nycflights %>%
  group_by(origin) %>%
  summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
  arrange(desc(ot_dep_rate))
## # A tibble: 3 × 2
##   origin ot_dep_rate
##   <chr>        <dbl>
## 1 LGA          0.728
## 2 JFK          0.694
## 3 EWR          0.637
ggplot(data = nycflights, aes(x = origin, fill = dep_type)) +
  geom_bar()

Exercise 7

nycflights <- nycflights %>%
  mutate(avg_speed = (distance/(air_time/60)))

Exercise 8

ggplot(data = nycflights, aes(x = distance, y = avg_speed)) +
geom_point()

Exercise 9

At about 80 minutes you have reached the point where you can no longer expect to arrive on time as seen below on the plot.

maj_carrier_flights <- nycflights %>%
  filter(carrier %in% c("AA", "DL", "UA"))

ggplot(data = maj_carrier_flights, aes(x = dep_delay, y = arr_delay)) +
geom_point(aes(color = factor(carrier)))

LS0tCnRpdGxlOiAiTGFiIDIiCmF1dGhvcjogIk5laWwgSG9kZ2tpbnNvbiIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydAotLS0KCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG9wZW5pbnRybykKCmBgYAoKIyMjIEV4ZXJjaXNlIDEKClRoZSBiZWxvdyBoaXN0b2dyYW1zIHNob3cgdGhlbiBjb3VudCBvZiBmbGlnaHRzIGluIGNlcnRhaW4gZGVwYXJ0dXJlIHdpbmRvd3MuIFRoZSBzZWNvbmQgZ3JhcGggaGFzIGEgc21hbGwgYmluIHdpZHRoIHRoYW4gdGhlIG90aGVyIHR3byBhbGxvd2luZyB5b3UgdG8gc2VlIGEgbW9yZSBhY2N1cmF0ZSByZXByZXNlbnRhdGlvbiBvZiB0aGUgbnVtYmVyIG9mIGZsaWdodHMgYnkgZGVwYXJ0dXJlIHdpbmRvdy4gVGhlIHRoaXJkIGhhcyB0b28gbGFyZ2Ugb2YgYSBiaW4gd2lkdGggdG8gZ2V0IHVzZWZ1bCBhbmFseXNpcyBmcm9tLgoKYGBge3IgY29kZS1jaHVuay1lMX0KZGF0YShueWNmbGlnaHRzKQpuYW1lcyhueWNmbGlnaHRzKQo/bnljZmxpZ2h0cwpnbGltcHNlKG55Y2ZsaWdodHMpCgpnZ3Bsb3QoZGF0YSA9IG55Y2ZsaWdodHMsIGFlcyh4ID0gZGVwX2RlbGF5KSkgKwogIGdlb21faGlzdG9ncmFtKCkKZ2dwbG90KGRhdGEgPSBueWNmbGlnaHRzLCBhZXMoeCA9IGRlcF9kZWxheSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDE1KQpnZ3Bsb3QoZGF0YSA9IG55Y2ZsaWdodHMsIGFlcyh4ID0gZGVwX2RlbGF5KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMTUwKQpgYGAKCiMjIyBFeGVyY2lzZSAyClRoZXJlIGFyZSA2OCBvYnNlcnZhdGlvbnMgaW4gdGhlIHN1YnNldCBzZm9fZmViX2ZsaWdodHMKYGBge3IgY29kZS1jaHVuay1lMn0Kc2ZvX2ZlYl9mbGlnaHRzIDwtIG55Y2ZsaWdodHMgJT4lCiAgZmlsdGVyKGRlc3QgPT0gIlNGTyIsIG1vbnRoID09IDIpCgpjb3VudChzZm9fZmViX2ZsaWdodHMpCmBgYAojIyMgRXhlcmNpc2UgMwouLi4KYGBge3IgY29kZS1jaHVuay1lM30KCmdncGxvdChkYXRhID0gc2ZvX2ZlYl9mbGlnaHRzLCBhZXMoeCA9IGFycl9kZWxheSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwKQoKbWVhbihzZm9fZmViX2ZsaWdodHMkYXJyX2RlbGF5KQptZWRpYW4oc2ZvX2ZlYl9mbGlnaHRzJGFycl9kZWxheSkKc2Qoc2ZvX2ZlYl9mbGlnaHRzJGFycl9kZWxheSkKSVFSKHNmb19mZWJfZmxpZ2h0cyRhcnJfZGVsYXkpCnZhcihzZm9fZmViX2ZsaWdodHMkYXJyX2RlbGF5KQoKCmBgYAoKCgojIyMgRXhlcmNpc2UgNApWWCBoYXMgdGhlIG1vc3QgdmFyaWFibGUgYXJyaXZhbCBkZWxheXMuCgpgYGB7ciBjb2RlLWNodW5rLWU0fQoKc2ZvX2ZlYl9mbGlnaHRzICU+JQogIGdyb3VwX2J5KGNhcnJpZXIpICU+JQogIHN1bW1hcmlzZShtZWRpYW5fYWQgPSBtZWRpYW4oYXJyX2RlbGF5KSwgaXFyX2FkID0gSVFSKGFycl9kZWxheSksIG5fZmxpZ2h0cyA9IG4oKSkKYGBgCgoKCiMjIyBFeGVyY2lzZSA1CldoZW4gZGVjaWRpbmcgb24gd2hpY2ggbW9udGggdG8gdHJhdmVsIGJhc2VkIG9uIGRlcGFydHVyZSBJIHdvdWxkIHVzZSB0aGUgbWVhbiBvdmVyIHRoZSBtZWRpYW4uIApgYGB7ciBjb2RlLWNodW5rLWU1fQpueWNmbGlnaHRzICU+JQogIGdyb3VwX2J5KG1vbnRoKSAlPiUKICBzdW1tYXJpc2UobWVhbl9kZCA9IG1lYW4oZGVwX2RlbGF5KSwgbWVkaWFuX2RkID0gbWVkaWFuKGRlcF9kZWxheSkpICU+JQogIGFycmFuZ2UoZGVzYyhtZWFuX2RkKSkKYGBgCgojIyMgRXhlcmNpc2UgNgpCYXNlZCBvbiB0aGUgYmVsb3cgZmluZGluZ3MgSSB3b3VsZCBwaWNrIExHQSBhcyBteSBhaXJwb3J0IHRvIGZseSBvdXQgb2YuIFRoZSByYXRlIG9mIG9uIHRpbWUgZmxpZ2h0cyBpcyBhYm91dCAuNzMgb3IgNzMlIG9mIGZsaWdodHMgZnJvbSBMR0EgYXJlICJvbiB0aW1lIgpgYGB7ciBjb2RlLWNodW5rLWU2fQpueWNmbGlnaHRzIDwtIG55Y2ZsaWdodHMgJT4lCiAgbXV0YXRlKGRlcF90eXBlID0gaWZlbHNlKGRlcF9kZWxheSA8IDUsICJvbiB0aW1lIiwgImRlbGF5ZWQiKSkKCm55Y2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkob3JpZ2luKSAlPiUKICBzdW1tYXJpc2Uob3RfZGVwX3JhdGUgPSBzdW0oZGVwX3R5cGUgPT0gIm9uIHRpbWUiKSAvIG4oKSkgJT4lCiAgYXJyYW5nZShkZXNjKG90X2RlcF9yYXRlKSkKCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBvcmlnaW4sIGZpbGwgPSBkZXBfdHlwZSkpICsKICBnZW9tX2JhcigpCmBgYAoKCiMjIyBFeGVyY2lzZSA3CgpgYGB7ciBjb2RlLWNodW5rLWU3fQpueWNmbGlnaHRzIDwtIG55Y2ZsaWdodHMgJT4lCiAgbXV0YXRlKGF2Z19zcGVlZCA9IChkaXN0YW5jZS8oYWlyX3RpbWUvNjApKSkKYGBgCgoKIyMjIEV4ZXJjaXNlIDgKCmBgYHtyIGNvZGUtY2h1bmstZTh9CmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkaXN0YW5jZSwgeSA9IGF2Z19zcGVlZCkpICsKZ2VvbV9wb2ludCgpCmBgYAoKCiMjIyBFeGVyY2lzZSA5CkF0IGFib3V0IDgwIG1pbnV0ZXMgeW91IGhhdmUgcmVhY2hlZCB0aGUgcG9pbnQgd2hlcmUgeW91IGNhbiBubyBsb25nZXIgZXhwZWN0IHRvIGFycml2ZSBvbiB0aW1lIGFzIHNlZW4gYmVsb3cgb24gdGhlIHBsb3QuCgpgYGB7ciBjb2RlLWNodW5rLWU5fQoKbWFqX2NhcnJpZXJfZmxpZ2h0cyA8LSBueWNmbGlnaHRzICU+JQogIGZpbHRlcihjYXJyaWVyICVpbiUgYygiQUEiLCAiREwiLCAiVUEiKSkKCmdncGxvdChkYXRhID0gbWFqX2NhcnJpZXJfZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXksIHkgPSBhcnJfZGVsYXkpKSArCmdlb21fcG9pbnQoYWVzKGNvbG9yID0gZmFjdG9yKGNhcnJpZXIpKSkKYGBgCg==