library(tidyverse)
library(openintro)
data("nycflights")
names(nycflights)
## [1] "year" "month" "day" "dep_time" "dep_delay" "arr_time"
## [7] "arr_delay" "carrier" "tailnum" "flight" "origin" "dest"
## [13] "air_time" "distance" "hour" "minute"
Exercise 1
Look carefully at these three histograms. How do they compare? Are
features revealed in one that are obscured in another?
These three histograms have different bin widths and insights. The
first histogram without any specified bin width ajust automatically to
the best present the data distribution for identifying specific
patterns. The second plot with bin width of 15 divides the data into
narrower bins that shows detailed information about distribution and
smaller variations in departure delays. The third plot with broader bin
width of 150 can be seen the high level insights of data distribution
and larger groups of departure delay.
ggplot(data = nycflights, aes(x = dep_delay)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = nycflights, aes(x = dep_delay)) +
geom_histogram(binwidth = 15)

ggplot(data = nycflights, aes(x = dep_delay)) +
geom_histogram(binwidth = 150)

Exercise 2
Create a new data frame that includes flights headed to SFO in
February, and save this data frame as sfo_feb_flights. How many flights
meet these criteria? There are 68 flights.
sfo_feb_flights <- nycflights %>%
filter(dest == "SFO", month == 2)
sfo_feb_flights <- nycflights %>% filter(dest == "SFO", month == 2)
view(sfo_feb_flights)
Exercise 3
Describe the distribution of the arrival delays of these flights
using a histogram and appropriate summary statistics. Hint: The summary
statistics you use should depend on the shape of the distribution.
The histogram seemingly looks positive skewness distribution of
arrival delays with mean and medium does not exit at the center, some
data points deviate significantly to the far right.
sfo_feb_flights %>%
summarise(mean_ad = mean(arr_delay),
median_ad = median(arr_delay), n = n())
## # A tibble: 1 × 3
## mean_ad median_ad n
## <dbl> <dbl> <int>
## 1 -4.5 -11 68
ggplot(data = sfo_feb_flights, aes(x = arr_delay)) +
geom_histogram(binwidth = 15)

Exercise 4
Calculate the median and interquartile range for arr_delays of
flights in in the sfo_feb_flights data frame, grouped by carrier. Which
carrier has the most variable arrival delays?
The carrier with the largest median AA has the moste variable arrival
delays.
library(dplyr)
sfo_feb_flights %>%
group_by(carrier) %>%
summarise(mean_ad = mean(arr_delay),
median_ad = median(arr_delay),
iqr_ad = IQR(arr_delay), n = n())
## # A tibble: 5 × 5
## carrier mean_ad median_ad iqr_ad n
## <chr> <dbl> <dbl> <dbl> <int>
## 1 AA 11.5 5 17.5 10
## 2 B6 -6.33 -10.5 12.2 6
## 3 DL -13.5 -15 22 19
## 4 UA 1.81 -10 22 21
## 5 VX -13.8 -22.5 21.2 12
Exercise 5
Suppose you really dislike departure delays and you want to schedule
your travel in a month that minimizes your potential departure delay
leaving NYC. One option is to choose the month with the lowest mean
departure delay. Another option is to choose the month with the lowest
median departure delay. What are the pros and cons of these two
choices?
Pro: Save time, high flight availability, smoothly travel Cons:
earlier schedule constraint, may not accurate delay experienced by most
travelers.
# Group data by month and calculate mean and median departure delays
month_delays <- nycflights %>%
group_by(month) %>%
summarise(mean_ad = mean(arr_delay),
median_ad = median(arr_delay), n = n())
# Find the month with the lowest mean departure delay
min_mean_delay_month <- month_delays %>%
filter(mean_ad == min(mean_ad))
# Find the month with the lowest median departure delay
min_median_delay_month <- month_delays %>%
filter(median_ad == min(median_ad))
min_mean_delay_month
## # A tibble: 1 × 4
## month mean_ad median_ad n
## <int> <dbl> <dbl> <int>
## 1 9 -4.03 -12 2681
## # A tibble: 1 × 4
## month mean_ad median_ad n
## <int> <dbl> <dbl> <int>
## 1 9 -4.03 -12 2681
Exercise 6
If you were selecting an airport simply based on on time departure
percentage, which NYC airport would you choose to fly out of?
LGA (LaGuardia Airport) can be chosen.
ot_dept_delay <- nycflights %>%
group_by(origin) %>%
summarise(min_mean_ad = min(mean(dep_delay)),
min_median_ad = min(median(dep_delay)), n = n())
ot_dept_delay
## # A tibble: 3 × 4
## origin min_mean_ad min_median_ad n
## <chr> <dbl> <dbl> <int>
## 1 EWR 15.3 -1 11771
## 2 JFK 12.3 -1 10897
## 3 LGA 10.1 -3 10067
Exercise 7
Mutate the data frame so that it includes a new variable that
contains the average speed, avg_speed traveled by the plane for each
flight (in mph). Hint: Average speed can be calculated as distance
divided by number of hours of travel, and note that air_time is given in
minutes.
nycflights <- nycflights %>%
mutate(avg_speed = distance / air_time * 60)
Exercise 8
Make a scatter plot of avg_speed vs. distance. Describe the
relationship between average speed and distance. Hint: Use
geom_point().
ggplot(data = nycflights, aes(x = dep_delay, y = arr_delay, color= carrier)) + geom_point()

Exercise 9
Replicate the following plot. Hint: The data frame plotted only
contains flights from American Airlines, Delta Airlines, and United
Airlines, and the points are colored by carrier. Once you replicate the
plot, determine (roughly) what the cutoff point is for departure delays
where you can still expect to get to your destination on time.
Most points below the dashed diagonal line are said to be cutoff
points where we can still expect to get to our destination on time for
specified carriers.
nycflights_3carriers <- nycflights %>%
filter(carrier == "AA" | carrier == "DL" | carrier == "UA")
ggplot(data = nycflights_3carriers, aes(x = dep_delay, y = arr_delay, color= carrier)) + geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
labs(x = "Departure Delay (minutes)", y = "Arrival Delay (minutes)") +
theme_minimal()

LS0tCnRpdGxlOiAiREFUQSA2MDYgTGFiIDI6IEludHJvIHRvIERhdGEiCmF1dGhvcjogIkx3aW4gU2h3ZSIKZGF0ZTogIlNlcHQgMTAgMjAyMyIKb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQKLS0tCgpgYGB7ciBsb2FkLXBhY2thZ2VzLCBtZXNzYWdlPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShvcGVuaW50cm8pCmRhdGEoIm55Y2ZsaWdodHMiKQpuYW1lcyhueWNmbGlnaHRzKQpgYGAKCiMjIEV4ZXJjaXNlIDEgCgpMb29rIGNhcmVmdWxseSBhdCB0aGVzZSB0aHJlZSBoaXN0b2dyYW1zLiBIb3cgZG8gdGhleSBjb21wYXJlPyBBcmUgZmVhdHVyZXMgcmV2ZWFsZWQgaW4gb25lIHRoYXQgYXJlIG9ic2N1cmVkIGluIGFub3RoZXI/CgpUaGVzZSB0aHJlZSBoaXN0b2dyYW1zIGhhdmUgZGlmZmVyZW50IGJpbiB3aWR0aHMgYW5kIGluc2lnaHRzLiBUaGUgZmlyc3QgaGlzdG9ncmFtIHdpdGhvdXQgYW55IHNwZWNpZmllZCBiaW4gd2lkdGggYWp1c3QgYXV0b21hdGljYWxseSB0byB0aGUgYmVzdCBwcmVzZW50IHRoZSBkYXRhIGRpc3RyaWJ1dGlvbiBmb3IgaWRlbnRpZnlpbmcgc3BlY2lmaWMgcGF0dGVybnMuClRoZSBzZWNvbmQgcGxvdCB3aXRoIGJpbiB3aWR0aCBvZiAxNSBkaXZpZGVzIHRoZSBkYXRhIGludG8gbmFycm93ZXIgYmlucyB0aGF0IHNob3dzIGRldGFpbGVkIGluZm9ybWF0aW9uIGFib3V0IGRpc3RyaWJ1dGlvbiBhbmQgc21hbGxlciB2YXJpYXRpb25zIGluIGRlcGFydHVyZSBkZWxheXMuIApUaGUgdGhpcmQgcGxvdCB3aXRoIGJyb2FkZXIgYmluIHdpZHRoIG9mIDE1MCBjYW4gYmUgc2VlbiB0aGUgaGlnaCBsZXZlbCBpbnNpZ2h0cyBvZiBkYXRhIGRpc3RyaWJ1dGlvbiBhbmQgbGFyZ2VyIGdyb3VwcyBvZiBkZXBhcnR1cmUgZGVsYXkuCgpgYGB7ciB0aGUtZGF0YX0KZ2dwbG90KGRhdGEgPSBueWNmbGlnaHRzLCBhZXMoeCA9IGRlcF9kZWxheSkpICsKICBnZW9tX2hpc3RvZ3JhbSgpCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXkpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxNSkKZ2dwbG90KGRhdGEgPSBueWNmbGlnaHRzLCBhZXMoeCA9IGRlcF9kZWxheSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDE1MCkKYGBgCgoKIyMjIEV4ZXJjaXNlIDIgCgpDcmVhdGUgYSBuZXcgZGF0YSBmcmFtZSB0aGF0IGluY2x1ZGVzIGZsaWdodHMgaGVhZGVkIHRvIFNGTyBpbiBGZWJydWFyeSwgYW5kIHNhdmUgdGhpcyBkYXRhIGZyYW1lIGFzIHNmb19mZWJfZmxpZ2h0cy4gSG93IG1hbnkgZmxpZ2h0cyBtZWV0IHRoZXNlIGNyaXRlcmlhPwpUaGVyZSBhcmUgNjggZmxpZ2h0cy4KCmBgYHtyIHRyZW5kLWdpcmxzfQpzZm9fZmViX2ZsaWdodHMgPC0gbnljZmxpZ2h0cyAlPiUKICBmaWx0ZXIoZGVzdCA9PSAiU0ZPIiwgbW9udGggPT0gMikKc2ZvX2ZlYl9mbGlnaHRzIDwtIG55Y2ZsaWdodHMgJT4lIGZpbHRlcihkZXN0ID09ICJTRk8iLCBtb250aCA9PSAyKQp2aWV3KHNmb19mZWJfZmxpZ2h0cykKYGBgCgojIyBFeGVyY2lzZSAzCgpEZXNjcmliZSB0aGUgZGlzdHJpYnV0aW9uIG9mIHRoZSBhcnJpdmFsIGRlbGF5cyBvZiB0aGVzZSBmbGlnaHRzIHVzaW5nIGEgaGlzdG9ncmFtIGFuZCBhcHByb3ByaWF0ZSBzdW1tYXJ5IHN0YXRpc3RpY3MuIEhpbnQ6IFRoZSBzdW1tYXJ5IHN0YXRpc3RpY3MgeW91IHVzZSBzaG91bGQgZGVwZW5kIG9uIHRoZSBzaGFwZSBvZiB0aGUgZGlzdHJpYnV0aW9uLgoKVGhlIGhpc3RvZ3JhbSBzZWVtaW5nbHkgbG9va3MgcG9zaXRpdmUgc2tld25lc3MgZGlzdHJpYnV0aW9uIG9mIGFycml2YWwgZGVsYXlzIHdpdGggbWVhbiBhbmQgbWVkaXVtIGRvZXMgbm90IGV4aXQgYXQgIHRoZSBjZW50ZXIsIHNvbWUgZGF0YSBwb2ludHMgZGV2aWF0ZSBzaWduaWZpY2FudGx5IHRvIHRoZSBmYXIgcmlnaHQuCgpgYGB7ciBhcnJpdmFsLWRlbGF5c30Kc2ZvX2ZlYl9mbGlnaHRzICU+JQogIHN1bW1hcmlzZShtZWFuX2FkICAgPSBtZWFuKGFycl9kZWxheSksIAogICAgICAgICAgICBtZWRpYW5fYWQgPSBtZWRpYW4oYXJyX2RlbGF5KSwgbiA9IG4oKSkKZ2dwbG90KGRhdGEgPSBzZm9fZmViX2ZsaWdodHMsIGFlcyh4ID0gYXJyX2RlbGF5KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMTUpCgpgYGAKCgojIyBFeGVyY2lzZSA0CgpDYWxjdWxhdGUgdGhlIG1lZGlhbiBhbmQgaW50ZXJxdWFydGlsZSByYW5nZSBmb3IgYXJyX2RlbGF5cyBvZiBmbGlnaHRzIGluIGluIHRoZSBzZm9fZmViX2ZsaWdodHMgZGF0YSBmcmFtZSwgZ3JvdXBlZCBieSBjYXJyaWVyLiBXaGljaCBjYXJyaWVyIGhhcyB0aGUgbW9zdCB2YXJpYWJsZSBhcnJpdmFsIGRlbGF5cz8KClRoZSBjYXJyaWVyIHdpdGggdGhlIGxhcmdlc3QgbWVkaWFuIEFBIGhhcyB0aGUgbW9zdGUgdmFyaWFibGUgYXJyaXZhbCBkZWxheXMuIAoKYGBge3IgZmluZC1tZWQtSVFSfQpsaWJyYXJ5KGRwbHlyKQpzZm9fZmViX2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkoY2FycmllcikgJT4lCiAgc3VtbWFyaXNlKG1lYW5fYWQgICA9IG1lYW4oYXJyX2RlbGF5KSwgCiAgICAgICAgICAgIG1lZGlhbl9hZCA9IG1lZGlhbihhcnJfZGVsYXkpLAogICAgICAgICAgICBpcXJfYWQgPSBJUVIoYXJyX2RlbGF5KSwgbiA9IG4oKSkKYGBgCgojIyBFeGVyY2lzZSA1IAoKU3VwcG9zZSB5b3UgcmVhbGx5IGRpc2xpa2UgZGVwYXJ0dXJlIGRlbGF5cyBhbmQgeW91IHdhbnQgdG8gc2NoZWR1bGUgeW91ciB0cmF2ZWwgaW4gYSBtb250aCB0aGF0IG1pbmltaXplcyB5b3VyIHBvdGVudGlhbCBkZXBhcnR1cmUgZGVsYXkgbGVhdmluZyBOWUMuIE9uZSBvcHRpb24gaXMgdG8gY2hvb3NlIHRoZSBtb250aCB3aXRoIHRoZSBsb3dlc3QgbWVhbiBkZXBhcnR1cmUgZGVsYXkuIEFub3RoZXIgb3B0aW9uIGlzIHRvIGNob29zZSB0aGUgbW9udGggd2l0aCB0aGUgbG93ZXN0IG1lZGlhbiBkZXBhcnR1cmUgZGVsYXkuIFdoYXQgYXJlIHRoZSBwcm9zIGFuZCBjb25zIG9mIHRoZXNlIHR3byBjaG9pY2VzPwoKICBQcm86IFNhdmUgdGltZSwgaGlnaCBmbGlnaHQgYXZhaWxhYmlsaXR5LCBzbW9vdGhseSB0cmF2ZWwKICBDb25zOiBlYXJsaWVyIHNjaGVkdWxlIGNvbnN0cmFpbnQsIG1heSBub3QgYWNjdXJhdGUgZGVsYXkgZXhwZXJpZW5jZWQgYnkgbW9zdCB0cmF2ZWxlcnMuCgpgYGB7ciBvbnRpbWUtZGVwdC1yYXRlfQoKIyBHcm91cCBkYXRhIGJ5IG1vbnRoIGFuZCBjYWxjdWxhdGUgbWVhbiBhbmQgbWVkaWFuIGRlcGFydHVyZSBkZWxheXMKbW9udGhfZGVsYXlzIDwtIG55Y2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIHN1bW1hcmlzZShtZWFuX2FkICAgPSBtZWFuKGFycl9kZWxheSksIAogICAgICAgICAgICBtZWRpYW5fYWQgPSBtZWRpYW4oYXJyX2RlbGF5KSwgbiA9IG4oKSkKCiMgRmluZCB0aGUgbW9udGggd2l0aCB0aGUgbG93ZXN0IG1lYW4gZGVwYXJ0dXJlIGRlbGF5Cm1pbl9tZWFuX2RlbGF5X21vbnRoIDwtIG1vbnRoX2RlbGF5cyAlPiUKICBmaWx0ZXIobWVhbl9hZCA9PSBtaW4obWVhbl9hZCkpCgojIEZpbmQgdGhlIG1vbnRoIHdpdGggdGhlIGxvd2VzdCBtZWRpYW4gZGVwYXJ0dXJlIGRlbGF5Cm1pbl9tZWRpYW5fZGVsYXlfbW9udGggPC0gbW9udGhfZGVsYXlzICU+JQogIGZpbHRlcihtZWRpYW5fYWQgPT0gbWluKG1lZGlhbl9hZCkpCgptaW5fbWVhbl9kZWxheV9tb250aAptaW5fbWVkaWFuX2RlbGF5X21vbnRoCgpgYGAKCgojIyBFeGVyY2lzZSA2CgpJZiB5b3Ugd2VyZSBzZWxlY3RpbmcgYW4gYWlycG9ydCBzaW1wbHkgYmFzZWQgb24gb24gdGltZSBkZXBhcnR1cmUgcGVyY2VudGFnZSwgd2hpY2ggTllDIGFpcnBvcnQgd291bGQgeW91IGNob29zZSB0byBmbHkgb3V0IG9mPwoKTEdBIChMYUd1YXJkaWEgQWlycG9ydCkgY2FuIGJlIGNob3Nlbi4KCmBgYHtyIGNob29zZS1haXJwb3J0fQpvdF9kZXB0X2RlbGF5IDwtIG55Y2ZsaWdodHMgJT4lCiAgZ3JvdXBfYnkob3JpZ2luKSAlPiUKICBzdW1tYXJpc2UobWluX21lYW5fYWQgICA9IG1pbihtZWFuKGRlcF9kZWxheSkpLCAKICAgICAgICAgICAgbWluX21lZGlhbl9hZCA9IG1pbihtZWRpYW4oZGVwX2RlbGF5KSksIG4gPSBuKCkpCm90X2RlcHRfZGVsYXkKYGBgCgoKIyMgRXhlcmNpc2UgNyAKCk11dGF0ZSB0aGUgZGF0YSBmcmFtZSBzbyB0aGF0IGl0IGluY2x1ZGVzIGEgbmV3IHZhcmlhYmxlIHRoYXQgY29udGFpbnMgdGhlIGF2ZXJhZ2Ugc3BlZWQsIGF2Z19zcGVlZCB0cmF2ZWxlZCBieSB0aGUgcGxhbmUgZm9yIGVhY2ggZmxpZ2h0IChpbiBtcGgpLiBIaW50OiBBdmVyYWdlIHNwZWVkIGNhbiBiZSBjYWxjdWxhdGVkIGFzIGRpc3RhbmNlIGRpdmlkZWQgYnkgbnVtYmVyIG9mIGhvdXJzIG9mIHRyYXZlbCwgYW5kIG5vdGUgdGhhdCBhaXJfdGltZSBpcyBnaXZlbiBpbiBtaW51dGVzLgoKCmBgYHtyIGF2ZXJhZ2Utc3BlZWR9Cm55Y2ZsaWdodHMgPC0gbnljZmxpZ2h0cyAlPiUKICBtdXRhdGUoYXZnX3NwZWVkID0gZGlzdGFuY2UgLyBhaXJfdGltZSAqIDYwKQpgYGAKCiMjIEV4ZXJjaXNlIDgKCk1ha2UgYSBzY2F0dGVyIHBsb3Qgb2YgYXZnX3NwZWVkIHZzLiBkaXN0YW5jZS4gRGVzY3JpYmUgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIGF2ZXJhZ2Ugc3BlZWQgYW5kIGRpc3RhbmNlLiBIaW50OiBVc2UgZ2VvbV9wb2ludCgpLgoKYGBge3IgZ3JhcGh9CmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXksIHkgPSBhcnJfZGVsYXksIGNvbG9yPSBjYXJyaWVyKSkgKyBnZW9tX3BvaW50KCkKYGBgCgojIyBFeGVyY2lzZSA5CgpSZXBsaWNhdGUgdGhlIGZvbGxvd2luZyBwbG90LiBIaW50OiBUaGUgZGF0YSBmcmFtZSBwbG90dGVkIG9ubHkgY29udGFpbnMgZmxpZ2h0cyBmcm9tIEFtZXJpY2FuIEFpcmxpbmVzLCBEZWx0YSBBaXJsaW5lcywgYW5kIFVuaXRlZCBBaXJsaW5lcywgYW5kIHRoZSBwb2ludHMgYXJlIGNvbG9yZWQgYnkgY2Fycmllci4gT25jZSB5b3UgcmVwbGljYXRlIHRoZSBwbG90LCBkZXRlcm1pbmUgKHJvdWdobHkpIHdoYXQgdGhlIGN1dG9mZiBwb2ludCBpcyBmb3IgZGVwYXJ0dXJlIGRlbGF5cyB3aGVyZSB5b3UgY2FuIHN0aWxsIGV4cGVjdCB0byBnZXQgdG8geW91ciBkZXN0aW5hdGlvbiBvbiB0aW1lLgoKTW9zdCBwb2ludHMgYmVsb3cgdGhlIGRhc2hlZCBkaWFnb25hbCBsaW5lIGFyZSBzYWlkIHRvIGJlIGN1dG9mZiBwb2ludHMgd2hlcmUgd2UgY2FuIHN0aWxsIGV4cGVjdCB0byBnZXQgdG8gb3VyIGRlc3RpbmF0aW9uIG9uIHRpbWUgZm9yIHNwZWNpZmllZCBjYXJyaWVycy4KCmBgYHtyIGN1dG9mZi1wb2ludHN9Cm55Y2ZsaWdodHNfM2NhcnJpZXJzIDwtIG55Y2ZsaWdodHMgJT4lCiAgZmlsdGVyKGNhcnJpZXIgPT0gIkFBIiB8IGNhcnJpZXIgPT0gIkRMIiB8IGNhcnJpZXIgPT0gIlVBIikKZ2dwbG90KGRhdGEgPSBueWNmbGlnaHRzXzNjYXJyaWVycywgYWVzKHggPSBkZXBfZGVsYXksIHkgPSBhcnJfZGVsYXksIGNvbG9yPSBjYXJyaWVyKSkgKyBnZW9tX3BvaW50KCkgKwogIGdlb21fYWJsaW5lKGludGVyY2VwdCA9IDAsIHNsb3BlID0gMSwgbGluZXR5cGUgPSAiZGFzaGVkIikgKwogIGxhYnMoeCA9ICJEZXBhcnR1cmUgRGVsYXkgKG1pbnV0ZXMpIiwgeSA9ICJBcnJpdmFsIERlbGF5IChtaW51dGVzKSIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCg==