Dataset Tabs

Tornadoes

This project will showcase how R can perform statistical analysis to gain information from data.

How many tornadoes occured in WA or MS?

data <- read.csv(file="http://latul.be/mbaa_531/data/tornado.csv",header = TRUE)

index1<- data$st== "WA" | data$st== "MS"

print(paste("Tronadoes recoreded in the WA or MS =",
sum(index1)))
## [1] "Tronadoes recoreded in the WA or MS = 530"

How many tornadoes occurred in WA after 2012?

index2<-data$st== "WA"|data$yr>2012
print(paste("Tronadoes recoreded in WA After 2012 =",
sum(index2)))
## [1] "Tronadoes recoreded in WA After 2012 = 3070"

Get all tornadoes that occurred in WA in 2012, 2013, and 2014

data <- read.csv(file ="http://latul.be/mbaa_531/data/tornado.csv",header = TRUE)

index <- data$st =="WA" & data$yr==2012|data$st =="WA"& data$yr==2013|data$st =="WA"& data$yr==2014
data[index,c("yr","mo","dy","date","time","tz","st","stf","f")]

Return the ’mo’ (month), ’yr’ (year), and ’f’ (F-scale) for all tornadoes that occurred in HI.

index1<-data$st =="HI"
data[index1,c("st","mo","yr","f")]

Arrange the tornadoes by date and time. What state had the most recent tornado?

meta <- read.csv(file ="http://latul.be/mbaa_531/data/tornado.csv",header = TRUE)
ordertype<- order(data$date,data$time)
head(meta[ordertype,])

Get the count of tornadoes by months

for (x in 1:12) {
  month<-data$mo == x
  print(paste("month=",x,"Tornadoes=",sum(month)))
  
}
## [1] "month= 1 Tornadoes= 355"
## [1] "month= 2 Tornadoes= 463"
## [1] "month= 3 Tornadoes= 751"
## [1] "month= 4 Tornadoes= 2171"
## [1] "month= 5 Tornadoes= 2500"
## [1] "month= 6 Tornadoes= 1899"
## [1] "month= 7 Tornadoes= 861"
## [1] "month= 8 Tornadoes= 517"
## [1] "month= 9 Tornadoes= 401"
## [1] "month= 10 Tornadoes= 525"
## [1] "month= 11 Tornadoes= 350"
## [1] "month= 12 Tornadoes= 348"

Airlines

airlineData <- read.csv(file = "http://latul.be/mbaa_531/data/airline.csv", header = TRUE)

A-Find the carrier (UniqueCarrier), the flight number (FlightNum), and origin (origin) of all the flights that:

1. Departed (DepTime) after 10pm and flew (Dest) to Nashville (’BNA’).

col <- c("UniqueCarrier", "FlightNum", "Origin")
row <- airlineData$DepTime > 2200 & airlineData$Dest == "BNA"
print(airlineData[row,col])
##       UniqueCarrier FlightNum Origin
## NA             <NA>        NA   <NA>
## 7021             OH      5421    CVG
## NA.1           <NA>        NA   <NA>
## 17401            AA      2435    ORD
## 19478            DH      7332    ORD

2. departed (DepTime) after 10pm, but Originated (Origin) from Nashville ’BNA’ or landed (Dest) in Memphis ’MEM’. If you don’t get a list of four flight, something is wrong.

col2 <- c("UniqueCarrier", "FlightNum", "Origin", "DepTime", "Dest")
#row2 <- (airlineData$DepTime > 2200 & airlineData$Origin == "BNA" ) | (airlineData$DepTime > 2200 & airlineData$Dest == "MEM")
row2 <- (airlineData$DepTime > 2200) & (airlineData$Origin == "BNA"  |  airlineData$Dest == "MEM")
airlineData[row2,col2]

3. were delayed (ArrDelay) by more two hours. Note that a negative delay is a flight that arrived early.

col3 <- c("UniqueCarrier", "FlightNum", "Origin", "ArrDelay")
row3 <- airlineData$ArrDelay > 2
head(airlineData[row3,col3], n=20)

4. arrived (ArrDelay) more than two hours late, but didn’t leave late (DepDelay)

col4 <- c("UniqueCarrier", "FlightNum", "Origin", "ArrDelay", "DepDelay")
row4 <- airlineData$ArrDelay > 2 & airlineData$DepDelay <= 0 
head(airlineData[row4,col4], n=20)

B- Make use of minus sign in front of the function to reverse order() and the function head() to get the relevant part: (Use the Airline data)

1. Sort flights to find the 5 most delayed (DepDelay) flights

col <- c("UniqueCarrier", "FlightNum", "Origin", "DepDelay")
order <- order(-airlineData$DepDelay)
head(airlineData[order,col], n=5)

2. Sort flights to find the 5 least delayed (DepDelay) flights

col <- c("UniqueCarrier", "FlightNum", "Origin", "DepDelay")
order <- order(airlineData$DepDelay)
head(airlineData[order,col], n=5)

3. Sort flights by destination (Dest) and break ties by descending arrival delay (ArrDelay)

col <- c("UniqueCarrier", "FlightNum", "Origin", "Dest", "ArrDelay")
order <- order(airlineData$Dest,-airlineData$ArrDelay)
head(airlineData[order,col], n=30)