10 Sept 2020
## Warning: package 'tidyverse' was built under R version 4.0.2
## Warning: package 'tibble' was built under R version 4.0.2
## Warning: package 'tidyr' was built under R version 4.0.2
## Warning: package 'dplyr' was built under R version 4.0.2
## Warning: package 'openintro' was built under R version 4.0.2
## Warning: package 'airports' was built under R version 4.0.2
## Warning: package 'cherryblossom' was built under R version 4.0.2
## Warning: package 'usdata' was built under R version 4.0.2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lax_flights <- nycflights %>%
filter(dest == "LAX")
ggplot(data = lax_flights, aes(x = dep_delay)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lax_flights <- nycflights %>%
filter(dest == "LAX")
ggplot(data = lax_flights, aes(x = dep_delay)) +
geom_histogram(binwidth = 5)
## # A tibble: 1 x 3
## mean_dd median_dd n
## <dbl> <dbl> <int>
## 1 9.78 -1 1583
## year month day dep_time dep_delay
## Min. :2013 Min. :2 Min. : 1.00 Min. : 613 Min. :-10.0
## 1st Qu.:2013 1st Qu.:2 1st Qu.: 7.00 1st Qu.: 943 1st Qu.: -5.0
## Median :2013 Median :2 Median :16.00 Median :1268 Median : -2.0
## Mean :2013 Mean :2 Mean :15.26 Mean :1298 Mean : 10.5
## 3rd Qu.:2013 3rd Qu.:2 3rd Qu.:22.50 3rd Qu.:1742 3rd Qu.: 9.0
## Max. :2013 Max. :2 Max. :28.00 Max. :2159 Max. :209.0
## arr_time arr_delay carrier tailnum
## Min. : 118 Min. :-66.00 Length:68 Length:68
## 1st Qu.:1233 1st Qu.:-21.25 Class :character Class :character
## Median :1497 Median :-11.00 Mode :character Mode :character
## Mean :1607 Mean : -4.50
## 3rd Qu.:2062 3rd Qu.: 2.00
## Max. :2256 Max. :196.00
## flight origin dest air_time
## Min. : 11.0 Length:68 Length:68 Min. :317.0
## 1st Qu.: 85.0 Class :character Class :character 1st Qu.:345.0
## Median : 641.0 Mode :character Mode :character Median :354.0
## Mean : 795.1 Mean :351.9
## 3rd Qu.:1487.2 3rd Qu.:360.0
## Max. :2126.0 Max. :376.0
## distance hour minute
## Min. :2565 Min. : 6.00 Min. : 1.00
## 1st Qu.:2586 1st Qu.: 9.00 1st Qu.:25.00
## Median :2586 Median :12.50 Median :33.50
## Mean :2584 Mean :12.62 Mean :36.35
## 3rd Qu.:2586 3rd Qu.:17.00 3rd Qu.:54.00
## Max. :2586 Max. :21.00 Max. :59.00
Now I need to start over.
I tried to make the histogram with the sfo_feb_flights file i created before class. It didn’t work. I got the following error message: Error: data
must be a data frame, or other object coercible by fortify()
, not a character vectort
## year month day dep_time dep_delay
## Min. :2013 Min. :2 Min. : 1.00 Min. : 613 Min. :-10.0
## 1st Qu.:2013 1st Qu.:2 1st Qu.: 7.00 1st Qu.: 943 1st Qu.: -5.0
## Median :2013 Median :2 Median :16.00 Median :1268 Median : -2.0
## Mean :2013 Mean :2 Mean :15.26 Mean :1298 Mean : 10.5
## 3rd Qu.:2013 3rd Qu.:2 3rd Qu.:22.50 3rd Qu.:1742 3rd Qu.: 9.0
## Max. :2013 Max. :2 Max. :28.00 Max. :2159 Max. :209.0
## arr_time arr_delay carrier tailnum
## Min. : 118 Min. :-66.00 Length:68 Length:68
## 1st Qu.:1233 1st Qu.:-21.25 Class :character Class :character
## Median :1497 Median :-11.00 Mode :character Mode :character
## Mean :1607 Mean : -4.50
## 3rd Qu.:2062 3rd Qu.: 2.00
## Max. :2256 Max. :196.00
## flight origin dest air_time
## Min. : 11.0 Length:68 Length:68 Min. :317.0
## 1st Qu.: 85.0 Class :character Class :character 1st Qu.:345.0
## Median : 641.0 Mode :character Mode :character Median :354.0
## Mean : 795.1 Mean :351.9
## 3rd Qu.:1487.2 3rd Qu.:360.0
## Max. :2126.0 Max. :376.0
## distance hour minute
## Min. :2565 Min. : 6.00 Min. : 1.00
## 1st Qu.:2586 1st Qu.: 9.00 1st Qu.:25.00
## Median :2586 Median :12.50 Median :33.50
## Mean :2584 Mean :12.62 Mean :36.35
## 3rd Qu.:2586 3rd Qu.:17.00 3rd Qu.:54.00
## Max. :2586 Max. :21.00 Max. :59.00
###Then I re-ran Exercise 3 Histogram
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -10.0 -5.0 -2.0 10.5 9.0 209.0
sfo_feb_flights_2 %>%
group_by(origin) %>%
summarize(median_dd = median(dep_delay), iqr_dd = IQR(dep_delay,), n_flights=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
## origin median_dd iqr_dd n_flights
## <chr> <dbl> <dbl> <int>
## 1 EWR 0.5 5.75 8
## 2 JFK -2.5 15.2 60
sfo_feb_flights_2 %>%
group_by(carrier) %>%
summarise (median_dd = median(dep_delay), iqr_dd = IQR(dep_delay), n_flights = n()) %>%
arrange (desc(iqr_dd))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 4
## carrier median_dd iqr_dd n_flights
## <chr> <dbl> <dbl> <int>
## 1 AA 13 32.8 10
## 2 VX -3.5 16.8 12
## 3 UA -2 13 21
## 4 DL -3 6.5 19
## 5 B6 -2 3.5 6
nycflights %>%
group_by (month) %>%
summarise (mean_dd = mean(dep_delay)) %>%
arrange (desc(mean_dd))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 2
## month mean_dd
## <int> <dbl>
## 1 7 20.8
## 2 6 20.4
## 3 12 17.4
## 4 4 14.6
## 5 3 13.5
## 6 5 13.3
## 7 8 12.6
## 8 2 10.7
## 9 1 10.2
## 10 9 6.87
## 11 11 6.10
## 12 10 5.88
nycflights %>%
group_by(month)%>%
summarise (median_dd = median(dep_delay)) %>%
arrange (desc(median_dd))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 12 x 2
## month median_dd
## <int> <dbl>
## 1 12 1
## 2 6 0
## 3 7 0
## 4 3 -1
## 5 5 -1
## 6 8 -1
## 7 1 -2
## 8 2 -2
## 9 4 -2
## 10 11 -2
## 11 9 -3
## 12 10 -3
nycflights <-nycflights %>%
mutate (dep_type = ifelse(dep_delay <5, "on time", "delayed"))
nycflights %>%
group_by (origin)%>%
summarise (ot_dep_rate = sum(dep_type == "on time") / n()) %>%
arrange(desc(ot_dep_rate))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## origin ot_dep_rate
## <chr> <dbl>
## 1 LGA 0.728
## 2 JFK 0.694
## 3 EWR 0.637
### Exercise 9
plot2 <- nycflights %>%
filter (carrier == "AA" | carrier == "UA" | carrier == "DL") %>%
ggplot (aes(x=dep_delay, y = arr_delay, color = carrier)) +
geom_point()
plot2