TORNADOES
Question 1
(A) How many tornadoes occurred in WA (Washington)
and (Mississippi)?
20 Tornadoes occurred in Washington while in Mississippi,
there were 510 Tornadoes. This makes a total of 530 Tornadoes
occurrences in both Washington and Mississippi.
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/tornado.csv",
header = TRUE)
head( csvData )
## yr mo dy date time tz st stf stn f
## 1 2007 1 4 2007-01-04 15:45:00 3 LA 22 1 EF-1
## 2 2007 1 4 2007-01-04 16:35:00 3 LA 22 2 EF-1
## 3 2007 1 5 2007-01-05 00:27:00 3 MS 28 1 EF-1
## 4 2007 1 5 2007-01-05 00:40:00 3 MS 28 2 EF-0
## 5 2007 1 5 2007-01-05 00:57:00 3 MS 28 3 EF-1
## 6 2007 1 5 2007-01-05 01:07:00 3 MS 28 4 EF-1
sum(csvData$st == "WA")
## [1] 20
sum(csvData$st == "MS")
## [1] 510
sum(csvData$st == "WA" | csvData$st =="MS")
## [1] 530
(B) How many Tornadoes occurred in Washington before
2012?
13 Tornadoes occurred in Washington before
2012.
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/tornado.csv",
header = TRUE)
sum(csvData$st == "WA" & csvData$date <= "2012")
## [1] 13
(C) All Tornadoes that occurred in Washington in
2011, 2013 and 2014.
Nine Tornadoes occurred in Washington as at 2011, seven in
2013 and five in 2014. This makes a total of 21 Tornadoes that occurred
in 2011, 2013 and 2014.
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/tornado.csv",
header = TRUE)
sum(csvData$st == "WA" & csvData$date >= "2011")
## [1] 9
sum(csvData$st == "WA" & csvData$date >= "2013")
## [1] 7
sum(csvData$st == "WA" & csvData$date >= "2014")
## [1] 5
(D) Which ‘mo’(month) had the most tornadoes in
HI(Hawaii) and how do you know?
The code below shows the frequent occurrence of tornadoes in
Hawaii, in some specific months. In February(2), Hawaii experienced two
tornadoes i.e., twice, once in September(9), once in December(12), once
in march(3) and once in April(4). Based on this, it is evident that
February had the most tornadoes in Hawaii.
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/tornado.csv",
header = TRUE)
Hawaii <- (csvData$st == "HI")
unique(csvData[Hawaii, ])
## yr mo dy date time tz st stf stn f
## 2766 2008 9 23 2008-09-23 18:27:00 3 HI 15 0 EF-0
## 2842 2008 12 13 2008-12-13 16:50:00 3 HI 15 0 EF-0
## 2878 2009 2 11 2009-02-11 17:05:00 3 HI 15 0 EF-0
## 5371 2011 2 11 2011-02-11 18:15:00 3 HI 15 1 EF-0
## 7359 2012 3 9 2012-03-09 11:10:00 3 HI 15 0 EF-0
## 10093 2015 4 23 2015-04-23 18:56:00 3 HI 15 0 EF-0
csvData[Hawaii, c(2)]
## [1] 9 12 2 2 3 4
(E) Get the count of Tornadoes by year.
As shown below, there were tornado occurrences from
2007-2015, and within this 9-year period, 11,141 Tornadoes
occurred.
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/tornado.csv",
header = TRUE)
unique(csvData$yr)
## [1] 2007 2008 2009 2010 2011 2012 2013 2014 2015
sum(csvData$yr == 2007)
## [1] 1116
sum(csvData$yr == 2008)
## [1] 1738
sum(csvData$yr == 2009)
## [1] 1182
sum(csvData$yr == 2010)
## [1] 1315
sum(csvData$yr == 2011)
## [1] 1777
sum(csvData$yr == 2012)
## [1] 956
sum(csvData$yr == 2013)
## [1] 939
sum(csvData$yr == 2014)
## [1] 906
sum(csvData$yr == 2015)
## [1] 1212
sum(csvData$yr == 2007 | csvData$yr == 2008 | csvData$yr == 2009 | csvData$yr == 2010 | csvData$yr == 2011 | csvData$yr == 2012 | csvData$yr == 2013 | csvData$yr == 2014 | csvData$yr == 2015)
## [1] 11141
AIRLINES
Question 2
(A)
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
head( csvData )
## Year Month DayofMonth DayOfWeek DepTime CRSDepTime ArrTime CRSArrTime
## 1 2004 3 25 4 1118 1125 1231 1236
## 2 2004 3 25 4 810 815 1016 1033
## 3 2004 3 25 4 1529 1530 2101 2104
## 4 2004 3 25 4 1119 1125 1423 1441
## 5 2004 3 25 4 1156 1155 1452 1444
## 6 2004 3 25 4 903 905 1047 1054
## UniqueCarrier FlightNum TailNum ActualElapsedTime CRSElapsedTime AirTime
## 1 UA 425 N840UA 73 71 54
## 2 UA 425 N840UA 246 258 211
## 3 UA 426 N335UA 212 214 191
## 4 UA 426 N335UA 124 136 111
## 5 UA 427 N567UA 236 229 199
## 6 UA 427 N567UA 164 169 143
## ArrDelay DepDelay Origin Dest Distance TaxiIn TaxiOut Cancelled
## 1 -5 -7 DEN ABQ 349 3 16 0
## 2 -17 -5 PHL DEN 1557 5 30 0
## 3 -3 -1 DEN EWR 1605 7 14 0
## 4 -18 -6 SAN DEN 853 4 9 0
## 5 8 1 ORD PHX 1440 7 30 0
## 6 -7 -2 TPA ORD 1012 5 16 0
## CancellationCode Diverted CarrierDelay WeatherDelay NASDelay SecurityDelay
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
## LateAircraftDelay
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
Departafter9pm <- (csvData$DepTime > 2100 & csvData$Dest == "BNA")
na.omit(unique(csvData[Departafter9pm, c(5, 9, 10, 17, 18) ]))
## DepTime UniqueCarrier FlightNum Origin Dest
## 2588 2105 WN 2181 BWI BNA
## 7021 2312 OH 5421 CVG BNA
## 10692 2157 DL 938 ATL BNA
## 17401 2217 AA 2435 ORD BNA
## 19478 2317 DH 7332 ORD BNA
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Departafter9pm2 <- (csvData$DepTime > 2100 & csvData$Origin == "BNA")
na.omit(unique(csvData[Departafter9pm2, c(5, 9, 10, 17, 18) ]))
## DepTime UniqueCarrier FlightNum Origin Dest
## 2392 2218 WN 1447 BNA AUS
## 2406 2150 WN 2658 BNA BWI
## 2446 2150 WN 1511 BNA MDW
## 2469 2125 WN 1732 BNA RDU
## 2472 2110 WN 2769 BNA SAT
## 2474 2125 WN 459 BNA TPA
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Departafter9pm2A <- (csvData$DepTime > 2100 & csvData$Dest == "MEM")
na.omit (unique(csvData[Departafter9pm2A, c(5, 9, 10, 17, 18)]))
## DepTime UniqueCarrier FlightNum Origin Dest
## 8169 2201 OO 6838 ORD MEM
## 10405 2217 DL 579 ATL MEM
## 12744 2110 FL 1723 ATL MEM
## 15214 2103 NW 279 DTW MEM
## 19352 2313 DH 6270 CVG MEM
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Departafter9pm2B <- (csvData$DepTime > 2100 & csvData$Origin == "BNA" & csvData$Dest == "MEM")
unique(csvData[Departafter9pm2B, c(5, 9, 10, 17, 18)])
## [1] DepTime UniqueCarrier FlightNum Origin Dest
## <0 rows> (or 0-length row.names)
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Early <- csvData$ArrDelay < 0 & csvData$ArrTime < 8
na.omit( csvData[ Early, c(5, 7, 9, 10, 17, 18) ] )
## DepTime ArrTime UniqueCarrier FlightNum Origin Dest
## 2714 2150 1 WN 1033 BWI TPA
## 6047 2253 1 OH 5235 CVG CHO
## 6048 2304 5 OH 5736 CVG SCE
## 6049 2340 6 OH 5932 ATL TYS
## 6050 2258 7 OH 5589 CVG MDT
## 9754 2126 2 UA 205 IAD SFO
## 12664 2243 5 FL 218 ATL CAK
## 16935 2118 3 AA 1742 SJU JFK
## 17667 2140 6 AS 197 SEA ANC
## 19167 2250 5 DH 6135 CVG CAE
## 19275 2304 3 DH 6197 CVG CHA
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Twohourslate <- (csvData$ArrDelay >= 120 & csvData$DepDelay <= 1)
na.omit(csvData[Twohourslate, c(5, 9, 10, 15, 16, 17, 18)])
## DepTime UniqueCarrier FlightNum ArrDelay DepDelay Origin Dest
## 9495 1135 TZ 505 193 -8 PIE MDW
(B)
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Fiveleastdelayed1 <- order(csvData$DepDelay)
head (csvData[ Fiveleastdelayed1, c(5, 9, 10, 15, 16, 17, 18), 5 ])
## DepTime UniqueCarrier FlightNum ArrDelay DepDelay Origin Dest
## 14202 2047 MQ 3647 -2 -26 DFW SGF
## 6293 911 OH 5260 -16 -24 RIC CVG
## 5109 1053 NW 707 -30 -22 GTF FCA
## 16874 2109 AA 1685 -19 -21 HOU AUS
## 5983 1015 NW 1926 -17 -20 MDW DTW
## 6628 1410 OH 5474 -22 -20 BGR CVG
csvData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv",
header = TRUE)
Fiveleastdelayed2 <- order(csvData$ArrDelay)
head (csvData[ Fiveleastdelayed2, c(5, 9, 10, 15, 16, 17, 18), 5 ])
## DepTime UniqueCarrier FlightNum ArrDelay DepDelay Origin Dest
## 6128 600 OH 5071 -52 -5 BUF ATL
## 6099 557 OH 5710 -49 -3 BGM CVG
## 6688 1530 OH 5082 -49 -10 BUF ATL
## 8118 1355 OO 6704 -48 0 MFR DEN
## 10935 1647 DL 1181 -48 -4 BOS DFW
## 15908 1240 AA 577 -48 -5 BOS DFW
BONUS Section
(A). Add a new column reporting departure delays minus the median departure delay to the new data frame
new_col=median(csvData$DepDelay)
new_col=sapply(csvData$DepDelay,median)
head(new_col)
## [1] -7 -5 -1 -6 1 -2
tail(new_col)
## [1] -8 -1 -6 -2 -7 -5
csvData$DepMed_newcolumn <- csvData$DepDelay - sapply(csvData$DepDelay,median)
head(csvData$DepMed_newcolumn)
## [1] 0 0 0 0 0 0
(B). Convert the departure and arrival delays columns from minutes into hours
minutes_to_hours <- csvData$DepDelay / 60
head(csvData[minutes_to_hours, c(1, 2, 15, 16), 5])
## Year Month ArrDelay DepDelay
## 1 2004 3 -5 -7
## 1.1 2004 3 -5 -7
## 1.2 2004 3 -5 -7
## 2 2004 3 -17 -5
## 2.1 2004 3 -17 -5
## 2.2 2004 3 -17 -5
(C). Add a column with the average flight speed (in mph)
csvData$average_flight_speed <- (csvData$Distance/csvData$AirTime) * 1000
head(csvData$average_flight_speed)
## [1] 6462.963 7379.147 8403.141 7684.685 7236.181 7076.923
(D).
summary (csvData$DepMed_newcolumn)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 0 0 0 0 0 183
summary (csvData$average_flight_speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1736 5571 6603 6336 7307 237500 207
dim(csvData)
## [1] 19928 31