data(nycflights)
names(nycflights) ?nycflights
glimpse(nycflights)
ggplot(data = nycflights, aes(x = dep_delay)) + geom_histogram()
ggplot(data = nycflights, aes(x = dep_delay)) +
geom_histogram(binwidth = 15)
ggplot(data = nycflights, aes(x = dep_delay)) +
geom_histogram(binwidth = 150)
#exercise 1- The three histograms show the same data but look
different because of the bin width. The histogram with a 15-minute
binwidth shows more detail in the distribution of delays, while the
150-minute binwidth groups too many values together and hides important
patterns. The default histogram falls somewhere in between, showing the
overall shape without as much detail as the smaller bins.
lax_flights <- nycflights |> filter(dest == “LAX”) ggplot(data
= lax_flights, aes(x = dep_delay)) + geom_histogram()
lax_flights |> summarise(mean_dd = mean(dep_delay), median_dd =
median(dep_delay), n = n()) sfo_feb_flights <- nycflights |>
filter(dest == “SFO”, month == 2)
#Exercise 2- 68 flights meet this criteria
#Exercise 3- The shape of the histogram peaks before 0 and declines
from there.
ggplot(data = sfo_feb_flights, aes(x = arr_delay)) +
geom_histogram(binwidth = 15)
sfo_feb_flights |> group_by(origin) |> summarise(median_dd =
median(dep_delay), iqr_dd = IQR(dep_delay), n_flights = n())
nycflights |> group_by(month) |> summarise(mean_dd =
mean(dep_delay)) |> arrange(desc(mean_dd))
sfo_feb_flights |> group_by(carrier) |> summarise(median_dd =
median(arr_delay), iqr_dd = IQR(arr_delay), n_flights = n())
#Exercise 4- American Airlines had the most variable arrival delays
with 5
nycflights |> group_by(month) |> summarise(mean_dd =
mean(dep_delay)) |> arrange(desc(mean_dd))
#Exercise 5- Choosing the month with the lowest mean departure delay
accounts for all flights, including rare extreme delays, but can be
skewed by outliers. Choosing the month with the lowest median reflects
the typical flight experience and ignores extreme delays, though it
might miss occasional very long delays.
nycflights <- nycflights |> mutate(dep_type = ifelse(dep_delay
< 5, “on time”, “delayed”))
nycflights |> group_by(origin) |> summarise(ot_dep_rate =
sum(dep_type == “on time”) / n()) |> arrange(desc(ot_dep_rate))
#Exercise 6- Based on the data, LaGuardia Airport has the most on
time departures.
#Exercise 7- Average speed is calculated as distance ÷ (air_time /
60) to convert minutes into hours and produce speed in mph.
nycflights <- nycflights %>% mutate(avg_speed = distance /
(air_time / 60))
ggplot(data = nycflights, aes(x = distance, y = avg_speed)) +
geom_point()
#Exercise 8- The scatterplot shows that average speed generally
increases as distance increases
nycflights %>% filter(carrier %in% c(“AA”, “DL”, “UA”)) %>%
ggplot(aes(x = dep_delay, y = arr_delay, color = carrier)) +
geom_point()
#Exercise 9- From the scatterplot, it appears that flights with a
departure delay of about 20–30 minutes or less can still sometimes
arrive on time because flights may make up time in the air.
LS0tCnRpdGxlOiAiQ2hhcHRlciA2IEVEQSBGbGlnaHQgRGVsYXlzIgphdXRob3I6ICJCZW5qYW1pbiBUcmFjeSIKZGF0ZTogImBNYXJjaCAyMywgMjAyNmAiCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0Ci0tLQoKZGF0YShueWNmbGlnaHRzKQoKbmFtZXMobnljZmxpZ2h0cykKP255Y2ZsaWdodHMKCmdsaW1wc2UobnljZmxpZ2h0cykKCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXkpKSArCiAgZ2VvbV9oaXN0b2dyYW0oKQogIApnZ3Bsb3QoZGF0YSA9IG55Y2ZsaWdodHMsIGFlcyh4ID0gZGVwX2RlbGF5KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMTUpCiAgCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXkpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxNTApCiAgCiNleGVyY2lzZSAxLSBUaGUgdGhyZWUgaGlzdG9ncmFtcyBzaG93IHRoZSBzYW1lIGRhdGEgYnV0IGxvb2sgZGlmZmVyZW50IGJlY2F1c2Ugb2YgdGhlIGJpbiB3aWR0aC4gVGhlIGhpc3RvZ3JhbSB3aXRoIGEgMTUtbWludXRlIGJpbndpZHRoIHNob3dzIG1vcmUgZGV0YWlsIGluIHRoZSBkaXN0cmlidXRpb24gb2YgZGVsYXlzLCB3aGlsZSB0aGUgMTUwLW1pbnV0ZSBiaW53aWR0aCBncm91cHMgdG9vIG1hbnkgdmFsdWVzIHRvZ2V0aGVyIGFuZCBoaWRlcyBpbXBvcnRhbnQgcGF0dGVybnMuIFRoZSBkZWZhdWx0IGhpc3RvZ3JhbSBmYWxscyBzb21ld2hlcmUgaW4gYmV0d2Vlbiwgc2hvd2luZyB0aGUgb3ZlcmFsbCBzaGFwZSB3aXRob3V0IGFzIG11Y2ggZGV0YWlsIGFzIHRoZSBzbWFsbGVyIGJpbnMuCiAgCmxheF9mbGlnaHRzIDwtIG55Y2ZsaWdodHMgfD4KICBmaWx0ZXIoZGVzdCA9PSAiTEFYIikKZ2dwbG90KGRhdGEgPSBsYXhfZmxpZ2h0cywgYWVzKHggPSBkZXBfZGVsYXkpKSArCiAgZ2VvbV9oaXN0b2dyYW0oKQogIApsYXhfZmxpZ2h0cyB8PgogIHN1bW1hcmlzZShtZWFuX2RkICAgPSBtZWFuKGRlcF9kZWxheSksIAogICAgICAgICAgICBtZWRpYW5fZGQgPSBtZWRpYW4oZGVwX2RlbGF5KSwgCiAgICAgICAgICAgIG4gICAgICAgICA9IG4oKSkKc2ZvX2ZlYl9mbGlnaHRzIDwtIG55Y2ZsaWdodHMgfD4KICBmaWx0ZXIoZGVzdCA9PSAiU0ZPIiwgbW9udGggPT0gMikKCiNFeGVyY2lzZSAyLSA2OCBmbGlnaHRzIG1lZXQgdGhpcyBjcml0ZXJpYQoKI0V4ZXJjaXNlIDMtIFRoZSBzaGFwZSBvZiB0aGUgaGlzdG9ncmFtIHBlYWtzIGJlZm9yZSAwIGFuZCBkZWNsaW5lcyBmcm9tIHRoZXJlLiAKCmdncGxvdChkYXRhID0gc2ZvX2ZlYl9mbGlnaHRzLCBhZXMoeCA9IGFycl9kZWxheSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDE1KQoKc2ZvX2ZlYl9mbGlnaHRzIHw+CiAgZ3JvdXBfYnkob3JpZ2luKSB8PgogIHN1bW1hcmlzZShtZWRpYW5fZGQgPSBtZWRpYW4oZGVwX2RlbGF5KSwgaXFyX2RkID0gSVFSKGRlcF9kZWxheSksIG5fZmxpZ2h0cyA9IG4oKSkKCm55Y2ZsaWdodHMgfD4KICBncm91cF9ieShtb250aCkgfD4KICBzdW1tYXJpc2UobWVhbl9kZCA9IG1lYW4oZGVwX2RlbGF5KSkgfD4KICBhcnJhbmdlKGRlc2MobWVhbl9kZCkpCgpzZm9fZmViX2ZsaWdodHMgfD4KICBncm91cF9ieShjYXJyaWVyKSB8PgogIHN1bW1hcmlzZShtZWRpYW5fZGQgPSBtZWRpYW4oYXJyX2RlbGF5KSwgaXFyX2RkID0gSVFSKGFycl9kZWxheSksIG5fZmxpZ2h0cyA9IG4oKSkKICAKI0V4ZXJjaXNlIDQtIEFtZXJpY2FuIEFpcmxpbmVzIGhhZCB0aGUgbW9zdCB2YXJpYWJsZSBhcnJpdmFsIGRlbGF5cyB3aXRoIDUKCm55Y2ZsaWdodHMgfD4KICBncm91cF9ieShtb250aCkgfD4KICBzdW1tYXJpc2UobWVhbl9kZCA9IG1lYW4oZGVwX2RlbGF5KSkgfD4KICBhcnJhbmdlKGRlc2MobWVhbl9kZCkpCiAgCiNFeGVyY2lzZSA1LSBDaG9vc2luZyB0aGUgbW9udGggd2l0aCB0aGUgbG93ZXN0IG1lYW4gZGVwYXJ0dXJlIGRlbGF5IGFjY291bnRzIGZvciBhbGwgZmxpZ2h0cywgaW5jbHVkaW5nIHJhcmUgZXh0cmVtZSBkZWxheXMsIGJ1dCBjYW4gYmUgc2tld2VkIGJ5IG91dGxpZXJzLiBDaG9vc2luZyB0aGUgbW9udGggd2l0aCB0aGUgbG93ZXN0IG1lZGlhbiByZWZsZWN0cyB0aGUgdHlwaWNhbCBmbGlnaHQgZXhwZXJpZW5jZSBhbmQgaWdub3JlcyBleHRyZW1lIGRlbGF5cywgdGhvdWdoIGl0IG1pZ2h0IG1pc3Mgb2NjYXNpb25hbCB2ZXJ5IGxvbmcgZGVsYXlzLgoKbnljZmxpZ2h0cyA8LSBueWNmbGlnaHRzIHw+CiAgbXV0YXRlKGRlcF90eXBlID0gaWZlbHNlKGRlcF9kZWxheSA8IDUsICJvbiB0aW1lIiwgImRlbGF5ZWQiKSkKCm55Y2ZsaWdodHMgfD4KICBncm91cF9ieShvcmlnaW4pIHw+CiAgc3VtbWFyaXNlKG90X2RlcF9yYXRlID0gc3VtKGRlcF90eXBlID09ICJvbiB0aW1lIikgLyBuKCkpIHw+CiAgYXJyYW5nZShkZXNjKG90X2RlcF9yYXRlKSkKICAKI0V4ZXJjaXNlIDYtIEJhc2VkIG9uIHRoZSBkYXRhLCBMYUd1YXJkaWEgQWlycG9ydCBoYXMgdGhlIG1vc3Qgb24gdGltZSBkZXBhcnR1cmVzLgoKI0V4ZXJjaXNlIDctIEF2ZXJhZ2Ugc3BlZWQgaXMgY2FsY3VsYXRlZCBhcyBkaXN0YW5jZSDDtyAoYWlyX3RpbWUgLyA2MCkgdG8gY29udmVydCBtaW51dGVzIGludG8gaG91cnMgYW5kIHByb2R1Y2Ugc3BlZWQgaW4gbXBoLgoKbnljZmxpZ2h0cyA8LSBueWNmbGlnaHRzICU+JQogIG11dGF0ZShhdmdfc3BlZWQgPSBkaXN0YW5jZSAvIChhaXJfdGltZSAvIDYwKSkKCmdncGxvdChkYXRhID0gbnljZmxpZ2h0cywgYWVzKHggPSBkaXN0YW5jZSwgeSA9IGF2Z19zcGVlZCkpICsKICBnZW9tX3BvaW50KCkKCiNFeGVyY2lzZSA4LSBUaGUgc2NhdHRlcnBsb3Qgc2hvd3MgdGhhdCBhdmVyYWdlIHNwZWVkIGdlbmVyYWxseSBpbmNyZWFzZXMgYXMgZGlzdGFuY2UgaW5jcmVhc2VzCgpueWNmbGlnaHRzICU+JQogIGZpbHRlcihjYXJyaWVyICVpbiUgYygiQUEiLCAiREwiLCAiVUEiKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gZGVwX2RlbGF5LCB5ID0gYXJyX2RlbGF5LCBjb2xvciA9IGNhcnJpZXIpKSArCiAgZ2VvbV9wb2ludCgpCgojRXhlcmNpc2UgOS0gRnJvbSB0aGUgc2NhdHRlcnBsb3QsIGl0IGFwcGVhcnMgdGhhdCBmbGlnaHRzIHdpdGggYSBkZXBhcnR1cmUgZGVsYXkgb2YgYWJvdXQgMjDigJMzMCBtaW51dGVzIG9yIGxlc3MgY2FuIHN0aWxsIHNvbWV0aW1lcyBhcnJpdmUgb24gdGltZSBiZWNhdXNlIGZsaWdodHMgbWF5IG1ha2UgdXAgdGltZSBpbiB0aGUgYWlyLg==