#Load Tidyverse
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Load NYC Flights Dataset
library(nycflights13)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
view(flights)
describe(flights)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## vars n mean sd median trimmed mad min max
## year 1 336776 2013.00 0.00 2013 2013.00 0.00 2013 2013
## month 2 336776 6.55 3.41 7 6.56 4.45 1 12
## day 3 336776 15.71 8.77 16 15.70 11.86 1 31
## dep_time 4 328521 1349.11 488.28 1401 1346.82 634.55 1 2400
## sched_dep_time 5 336776 1344.25 467.34 1359 1341.60 613.80 106 2359
## dep_delay 6 328521 12.64 40.21 -2 3.32 5.93 -43 1301
## arr_time 7 328063 1502.05 533.26 1535 1526.42 619.73 1 2400
## sched_arr_time 8 336776 1536.38 497.46 1556 1550.67 618.24 1 2359
## arr_delay 9 327346 6.90 44.63 -5 -1.03 20.76 -86 1272
## carrier* 10 336776 7.14 4.14 6 7.00 5.93 1 16
## flight 11 336776 1971.92 1632.47 1496 1830.51 1608.62 1 8500
## tailnum* 12 334264 1814.32 1199.75 1798 1778.21 1587.86 1 4043
## origin* 13 336776 1.95 0.82 2 1.94 1.48 1 3
## dest* 14 336776 50.03 28.12 50 49.56 32.62 1 105
## air_time 15 327346 150.69 93.69 129 140.03 75.61 20 695
## distance 16 336776 1039.91 733.23 872 955.27 569.32 17 4983
## hour 17 336776 13.18 4.66 13 13.15 5.93 1 23
## minute 18 336776 26.23 19.30 29 25.64 23.72 0 59
## time_hour 19 336776 NaN NA NA NaN NA Inf -Inf
## range skew kurtosis se
## year 0 NaN NaN 0.00
## month 11 -0.01 -1.19 0.01
## day 30 0.01 -1.19 0.02
## dep_time 2399 -0.02 -1.09 0.85
## sched_dep_time 2253 -0.01 -1.20 0.81
## dep_delay 1344 4.80 43.95 0.07
## arr_time 2399 -0.47 -0.19 0.93
## sched_arr_time 2358 -0.35 -0.38 0.86
## arr_delay 1358 3.72 29.23 0.08
## carrier* 15 0.36 -1.21 0.01
## flight 8499 0.66 -0.85 2.81
## tailnum* 4042 0.17 -1.24 2.08
## origin* 2 0.09 -1.50 0.00
## dest* 104 0.13 -1.08 0.05
## air_time 675 1.07 0.86 0.16
## distance 4966 1.13 1.19 1.26
## hour 22 0.00 -1.21 0.01
## minute 59 0.09 -1.24 0.03
## time_hour -Inf NA NA NA
#Filter: Hartsfield Jackson Atlanta International Airpot
Atlanta <- filter(flights, carrier %in% c("DL"))
Atlanta
## # A tibble: 48,110 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 554 600 -6 812 837
## 2 2013 1 1 602 610 -8 812 820
## 3 2013 1 1 606 610 -4 837 845
## 4 2013 1 1 615 615 0 833 842
## 5 2013 1 1 653 700 -7 936 1009
## 6 2013 1 1 655 655 0 1021 1030
## 7 2013 1 1 655 700 -5 1037 1045
## 8 2013 1 1 655 700 -5 1002 1020
## 9 2013 1 1 657 700 -3 959 1013
## 10 2013 1 1 658 700 -2 944 939
## # ... with 48,100 more rows, and 11 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
#Filter: March Flights
MarchATL <- filter(Atlanta, month == 3)
MarchATL
## # A tibble: 4,189 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 3 1 558 600 -2 750 759
## 2 2013 3 1 600 600 0 848 837
## 3 2013 3 1 609 615 -6 759 825
## 4 2013 3 1 611 615 -4 838 842
## 5 2013 3 1 624 630 -6 857 859
## 6 2013 3 1 652 700 -8 1016 1019
## 7 2013 3 1 653 655 -2 955 1029
## 8 2013 3 1 656 700 -4 1018 953
## 9 2013 3 1 656 700 -4 1003 1014
## 10 2013 3 1 657 700 -3 953 1034
## # ... with 4,179 more rows, and 11 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
#Filter: Putting it all together
AtlSelect <- select(MarchATL, dep_time, dep_delay, arr_time, arr_delay)
AtlSelect
## # A tibble: 4,189 x 4
## dep_time dep_delay arr_time arr_delay
## <int> <dbl> <int> <dbl>
## 1 558 -2 750 -9
## 2 600 0 848 11
## 3 609 -6 759 -26
## 4 611 -4 838 -4
## 5 624 -6 857 -2
## 6 652 -8 1016 -3
## 7 653 -2 955 -34
## 8 656 -4 1018 25
## 9 656 -4 1003 -11
## 10 657 -3 953 -41
## # ... with 4,179 more rows
AtlSumStats <- summary(MarchATL$arr_time)
AtlSumStats
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 1205 1652 1598 2019 2400 60
#Lets Make a Table
ATLSchTable <- table(MarchATL$sched_arr_time)
ATLSchTable
##
## 31 32 759 800 814 816 825 829 830 837 842 844 849 859 901 907
## 1 25 6 20 5 20 1 5 25 1 5 25 25 1 4 15
## 908 916 918 928 931 933 938 944 951 953 1004 1007 1009 1010 1011 1012
## 10 5 25 1 20 5 1 5 25 1 2 5 25 10 25 25
## 1014 1018 1019 1020 1021 1022 1024 1027 1029 1030 1031 1032 1034 1039 1040 1042
## 1 5 1 30 2 20 2 1 1 25 5 25 1 1 5 1
## 1043 1044 1045 1051 1055 1056 1057 1100 1102 1104 1105 1106 1108 1112 1113 1114
## 5 25 26 4 25 4 1 21 4 5 31 30 25 6 4 19
## 1115 1116 1117 1120 1122 1123 1124 1126 1127 1129 1130 1131 1133 1135 1136 1140
## 31 21 1 5 4 26 24 5 2 26 5 26 1 1 5 25
## 1142 1143 1145 1205 1207 1210 1212 1213 1214 1216 1220 1221 1224 1227 1228 1231
## 5 25 1 1 1 5 4 26 6 25 25 5 26 1 1 5
## 1232 1234 1237 1239 1246 1247 1248 1251 1254 1300 1305 1306 1307 1308 1309 1314
## 30 30 1 1 5 5 23 5 1 5 26 5 3 24 1 4
## 1315 1316 1323 1331 1334 1335 1337 1339 1341 1342 1345 1348 1359 1402 1407 1409
## 25 16 1 25 25 6 1 25 1 1 1 2 5 50 6 2
## 1410 1412 1413 1414 1416 1419 1420 1421 1424 1430 1434 1438 1440 1444 1446 1447
## 6 10 5 50 30 5 1 25 1 1 25 29 2 2 12 28
## 1449 1450 1451 1453 1454 1459 1508 1511 1529 1534 1535 1537 1547 1549 1552 1554
## 25 30 25 1 1 1 5 25 1 25 5 1 1 5 25 5
## 1556 1606 1613 1629 1630 1631 1632 1634 1635 1636 1638 1641 1645 1649 1651 1653
## 25 1 5 30 10 24 25 1 1 1 2 4 25 3 18 25
## 1654 1655 1701 1737 1738 1742 1748 1749 1751 1753 1758 1801 1802 1803 1805 1815
## 25 1 2 25 5 1 1 2 11 25 5 40 10 25 1 2
## 1819 1820 1822 1823 1824 1825 1830 1831 1832 1834 1837 1838 1840 1842 1843 1845
## 55 24 4 1 25 6 1 6 25 25 1 25 3 5 5 1
## 1847 1850 1853 1856 1857 1859 1900 1901 1903 1904 1907 1909 1912 1916 1917 1919
## 49 27 25 25 5 1 6 31 1 24 25 1 1 5 2 9
## 1920 1921 1922 1924 1925 1926 1930 1931 1933 1934 1935 1936 1939 1942 1943 1944
## 1 30 25 54 1 5 6 25 30 25 1 5 25 25 1 6
## 1951 2002 2003 2006 2008 2014 2015 2017 2019 2020 2024 2025 2027 2028 2029 2031
## 1 5 30 5 25 5 2 25 1 3 25 5 35 25 1 1
## 2033 2034 2035 2036 2037 2040 2041 2042 2043 2044 2045 2046 2049 2050 2052 2058
## 6 5 25 26 25 26 1 1 25 50 25 6 26 24 1 1
## 2100 2101 2104 2107 2110 2117 2130 2131 2132 2133 2134 2136 2139 2140 2142 2143
## 24 1 4 3 19 1 7 25 4 1 25 1 28 1 1 1
## 2145 2148 2149 2154 2155 2157 2200 2202 2203 2204 2205 2206 2207 2209 2210 2212
## 1 5 24 6 5 29 5 19 5 1 25 25 1 1 11 2
## 2218 2220 2221 2222 2223 2226 2227 2229 2230 2231 2232 2235 2236 2237 2238 2239
## 5 21 4 2 3 25 5 2 1 25 6 25 5 5 1 30
## 2240 2242 2243 2245 2246 2247 2248 2250 2251 2256 2257 2302 2303 2306 2316 2318
## 25 30 75 1 2 27 1 1 5 25 2 6 1 1 1 25
## 2327 2328 2334 2342
## 17 1 1 25
#Head and Tails
AtlHead <- head(MarchATL$arr_time)
Atltail <- tail(MarchATL$arr_time)
barplot(table(MarchATL$arr_time, MarchATL$sched_arr_time))