library(tidyverse)
library(openintro)

Exercise 1

As the value of bandwidth is bigger, the highest count of data visualization can be seen in a wider view.

lax_flights <- nycflights %>%
  filter(dest == "LAX")
ggplot(data = lax_flights, aes(x = dep_delay)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

lax_flights %>% 
  summarise(mean_dd = mean(dep_delay), 
            median_dd = median(dep_delay),
                    n = n())
## # A tibble: 1 × 3
##   mean_dd median_dd     n
##     <dbl>     <dbl> <int>
## 1    9.78        -1  1583

Exercise 2

There are 68 flights headed to SFO in February

sfo_feb_flights <- nycflights %>%
  filter(dest == "SFO", month == 2)

Exercise 3

sfo_feb_flights <- nycflights %>%
  filter(dest == "SFO")
ggplot(data = sfo_feb_flights, aes(x = arr_delay)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Exercise 4

sfo_feb_flights %>%
  group_by (carrier) %>%
  summarise(median_dd = median(arr_delay), iqr_dd = IQR(arr_delay), n_flights = n())
## # A tibble: 5 × 4
##   carrier median_dd iqr_dd n_flights
##   <chr>       <dbl>  <dbl>     <int>
## 1 AA           -0.5   43.8       136
## 2 B6           -9     35.2        96
## 3 DL          -12     27         205
## 4 UA           -4     34         685
## 5 VX          -12     33         223

Exercise 5

Pros: In the month of lowest mean departure delay, we save waiting-time when we schedule for travel.

Cons: In the month of highest median departure delay, the arrival time to our destination will be late and travalling schedule will be disrupted.

nycflights %>%
  group_by(month) %>%
  summarise(mean_dd = mean(dep_delay)) %>%
  arrange(desc(mean_dd))
## # A tibble: 12 × 2
##    month mean_dd
##    <int>   <dbl>
##  1     7   20.8 
##  2     6   20.4 
##  3    12   17.4 
##  4     4   14.6 
##  5     3   13.5 
##  6     5   13.3 
##  7     8   12.6 
##  8     2   10.7 
##  9     1   10.2 
## 10     9    6.87
## 11    11    6.10
## 12    10    5.88
nycflights %>%
  group_by(month) %>%
  summarise(min = min(mean(dep_delay)), max = max(median(dep_delay)))
## # A tibble: 12 × 3
##    month   min   max
##    <int> <dbl> <dbl>
##  1     1 10.2     -2
##  2     2 10.7     -2
##  3     3 13.5     -1
##  4     4 14.6     -2
##  5     5 13.3     -1
##  6     6 20.4      0
##  7     7 20.8      0
##  8     8 12.6     -1
##  9     9  6.87    -3
## 10    10  5.88    -3
## 11    11  6.10    -2
## 12    12 17.4      1

Exercise 6

The EWR air port with minimum ontime departure rate should be chosen.

nycflights <- nycflights %>%
  mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))
nycflights %>%
  group_by(origin) %>%
  summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
  arrange(desc(ot_dep_rate))
## # A tibble: 3 × 2
##   origin ot_dep_rate
##   <chr>        <dbl>
## 1 LGA          0.728
## 2 JFK          0.694
## 3 EWR          0.637
ggplot(data = nycflights, aes(x = origin, fill = dep_type)) + geom_bar()

Exercise 7

nycflights %>%
group_by(origin) %>%
  summarise(avg_speed_mph = sum(distance * 60 / air_time) / n()) %>%
  arrange(desc(avg_speed_mph))
## # A tibble: 3 × 2
##   origin avg_speed_mph
##   <chr>          <dbl>
## 1 JFK             398.
## 2 EWR             396.
## 3 LGA             387.

Exercise 8

nycflights <- nycflights %>%
  mutate(avg_speed = distance * 60 / air_time / n())

ggplot(data = nycflights, aes(x = distance, y = avg_speed)) + geom_point()

Exercise 9

The cutoff point is 5 for departure delays that we estimate our arrival time is on time.

names(nycflights)
##  [1] "year"      "month"     "day"       "dep_time"  "dep_delay" "arr_time" 
##  [7] "arr_delay" "carrier"   "tailnum"   "flight"    "origin"    "dest"     
## [13] "air_time"  "distance"  "hour"      "minute"    "dep_type"  "avg_speed"
ggplot(data = nycflights) + geom_point(mapping = aes(x = dep_delay, y = arr_delay, color = carrier))

ggplot(data = nycflights) + geom_point(mapping = aes(x = dep_delay, y = arr_delay, color = dep_type))

LS0tCnRpdGxlOiAiTGFiIDIgSW50cm9kdWN0aW9uIHRvIERhdGEiCmF1dGhvcjogIkx3aW4gTmFuZGFyIFNod2UiCmRhdGU6ICJgMDItMTctMjNgIgpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydAotLS0KCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG9wZW5pbnRybykKYGBgCgojIyMgRXhlcmNpc2UgMQoKQXMgdGhlIHZhbHVlIG9mIGJhbmR3aWR0aCBpcyBiaWdnZXIsIHRoZSBoaWdoZXN0IGNvdW50IG9mIGRhdGEgdmlzdWFsaXphdGlvbiBjYW4gYmUgc2VlbiBpbiBhIHdpZGVyIHZpZXcuCgpgYGB7ciBkZWxheS1kZXBhcnR1cmV9CmxheF9mbGlnaHRzIDwtIG55Y2ZsaWdodHMgJT4lCiAgZmlsdGVyKGRlc3QgPT0gIkxBWCIpCmdncGxvdChkYXRhID0gbGF4X2ZsaWdodHMsIGFlcyh4ID0gZGVwX2RlbGF5KSkgKyBnZW9tX2hpc3RvZ3JhbSgpCmxheF9mbGlnaHRzICU+JSAKICBzdW1tYXJpc2UobWVhbl9kZCA9IG1lYW4oZGVwX2RlbGF5KSwgCiAgICAgICAgICAgIG1lZGlhbl9kZCA9IG1lZGlhbihkZXBfZGVsYXkpLAogICAgICAgICAgICAgICAgICAgIG4gPSBuKCkpCmBgYAoKIyMjIEV4ZXJjaXNlIDIKClRoZXJlIGFyZSA2OCBmbGlnaHRzIGhlYWRlZCB0byBTRk8gaW4gRmVicnVhcnkKCmBgYHtSIGZlYi1zZm99CnNmb19mZWJfZmxpZ2h0cyA8LSBueWNmbGlnaHRzICU+JQogIGZpbHRlcihkZXN0ID09ICJTRk8iLCBtb250aCA9PSAyKQpgYGAKCgojIyMgRXhlcmNpc2UgMwoKYGBge3IgZGVsYXktc2ZvfQpzZm9fZmViX2ZsaWdodHMgPC0gbnljZmxpZ2h0cyAlPiUKICBmaWx0ZXIoZGVzdCA9PSAiU0ZPIikKZ2dwbG90KGRhdGEgPSBzZm9fZmViX2ZsaWdodHMsIGFlcyh4ID0gYXJyX2RlbGF5KSkgKyBnZW9tX2hpc3RvZ3JhbSgpCmBgYAoKIyMjIEV4ZXJjaXNlIDQKCmBgYHtSIGZpbmQtZGQtcmFuZ2V9CnNmb19mZWJfZmxpZ2h0cyAlPiUKICBncm91cF9ieSAoY2FycmllcikgJT4lCiAgc3VtbWFyaXNlKG1lZGlhbl9kZCA9IG1lZGlhbihhcnJfZGVsYXkpLCBpcXJfZGQgPSBJUVIoYXJyX2RlbGF5KSwgbl9mbGlnaHRzID0gbigpKQpgYGAKCiMjIyBFeGVyY2lzZSA1CgpQcm9zOiBJbiB0aGUgbW9udGggb2YgbG93ZXN0IG1lYW4gZGVwYXJ0dXJlIGRlbGF5LCB3ZSBzYXZlIHdhaXRpbmctdGltZSB3aGVuIHdlIHNjaGVkdWxlIGZvciB0cmF2ZWwuCgpDb25zOiBJbiB0aGUgbW9udGggb2YgaGlnaGVzdCBtZWRpYW4gZGVwYXJ0dXJlIGRlbGF5LCB0aGUgYXJyaXZhbCB0aW1lIHRvIG91ciBkZXN0aW5hdGlvbiB3aWxsIGJlIGxhdGUgYW5kIHRyYXZhbGxpbmcgc2NoZWR1bGUgd2lsbCBiZSBkaXNydXB0ZWQuCgoKYGBge1IgaGlnaGVzdC1sb3dlc3R9Cm55Y2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIHN1bW1hcmlzZShtZWFuX2RkID0gbWVhbihkZXBfZGVsYXkpKSAlPiUKICBhcnJhbmdlKGRlc2MobWVhbl9kZCkpCm55Y2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIHN1bW1hcmlzZShtaW4gPSBtaW4obWVhbihkZXBfZGVsYXkpKSwgbWF4ID0gbWF4KG1lZGlhbihkZXBfZGVsYXkpKSkKYGBgCgojIyMgRXhlcmNpc2UgNgoKVGhlIEVXUiBhaXIgcG9ydCB3aXRoIG1pbmltdW0gb250aW1lIGRlcGFydHVyZSByYXRlIHNob3VsZCBiZSBjaG9zZW4uCgpgYGB7ciBvbnRpbWUtZGVsYXl9Cm55Y2ZsaWdodHMgPC0gbnljZmxpZ2h0cyAlPiUKICBtdXRhdGUoZGVwX3R5cGUgPSBpZmVsc2UoZGVwX2RlbGF5IDwgNSwgIm9uIHRpbWUiLCAiZGVsYXllZCIpKQpueWNmbGlnaHRzICU+JQogIGdyb3VwX2J5KG9yaWdpbikgJT4lCiAgc3VtbWFyaXNlKG90X2RlcF9yYXRlID0gc3VtKGRlcF90eXBlID09ICJvbiB0aW1lIikgLyBuKCkpICU+JQogIGFycmFuZ2UoZGVzYyhvdF9kZXBfcmF0ZSkpCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBvcmlnaW4sIGZpbGwgPSBkZXBfdHlwZSkpICsgZ2VvbV9iYXIoKQoKYGBgCgojIyMgRXhlcmNpc2UgNwoKYGBge1Igc3BlZWQtbXBofQpueWNmbGlnaHRzICU+JQpncm91cF9ieShvcmlnaW4pICU+JQogIHN1bW1hcmlzZShhdmdfc3BlZWRfbXBoID0gc3VtKGRpc3RhbmNlICogNjAgLyBhaXJfdGltZSkgLyBuKCkpICU+JQogIGFycmFuZ2UoZGVzYyhhdmdfc3BlZWRfbXBoKSkKYGBgCgojIyMgRXhlcmNpc2UgOAoKYGBge1IgcmVsYXRpb25zaGlwLWJldC1kaXMtc3BlZWR9Cm55Y2ZsaWdodHMgPC0gbnljZmxpZ2h0cyAlPiUKICBtdXRhdGUoYXZnX3NwZWVkID0gZGlzdGFuY2UgKiA2MCAvIGFpcl90aW1lIC8gbigpKQoKZ2dwbG90KGRhdGEgPSBueWNmbGlnaHRzLCBhZXMoeCA9IGRpc3RhbmNlLCB5ID0gYXZnX3NwZWVkKSkgKyBnZW9tX3BvaW50KCkKYGBgCgoKIyMjIEV4ZXJjaXNlIDkKVGhlIGN1dG9mZiBwb2ludCBpcyA1IGZvciBkZXBhcnR1cmUgZGVsYXlzIHRoYXQgd2UgZXN0aW1hdGUgb3VyIGFycml2YWwgdGltZSBpcyBvbiB0aW1lLgoKYGBge3IgY3V0b2ZmLXZhbHVlfQpuYW1lcyhueWNmbGlnaHRzKQpnZ3Bsb3QoZGF0YSA9IG55Y2ZsaWdodHMpICsgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSBkZXBfZGVsYXksIHkgPSBhcnJfZGVsYXksIGNvbG9yID0gY2FycmllcikpCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cykgKyBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IGRlcF9kZWxheSwgeSA9IGFycl9kZWxheSwgY29sb3IgPSBkZXBfdHlwZSkpCmBgYAo=