Raw Data Importation and Subsetting

library(ggplot2)
library(data.table)

D <- fread("StormData.csv") #Reads from the CSV file
## 
Read 0.0% of 967216 rows
Read 31.0% of 967216 rows
Read 51.7% of 967216 rows
Read 70.3% of 967216 rows
Read 79.6% of 967216 rows
Read 92.0% of 967216 rows
Read 902297 rows and 37 (of 37) columns from 0.523 GB file in 00:00:09
Dt <- as.data.table(D)
rm(D) # We remove D in the interest of Memory Conservation

Dt <- subset(Dt, select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", 
                    "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")) #Columns to Keep

Data Processing

# Only want data where we have injuries, fatalities, property damage, crop damage > 0
Dt <- subset(Dt, INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)

Dt$PROPDMGEXP <- toupper(as.character(Dt$PROPDMGEXP))
Dt$CROPDMGEXP <- toupper(as.character(Dt$CROPDMGEXP))

Dt$PROPDMGEXP[is.na(Dt$PROPDMGEXP)] <- 0
Dt$CROPDMGEXP[is.na(Dt$CROPDMGEXP)] <- 0

# Set all missing values to 0
Dt$FATALITIES[(Dt$FATALITIES == "")] <- 0
Dt$INJURIES[(Dt$INJURIES == "")] <- 0
Dt$PROPDMG[(Dt$PROPDMG == "")] <- 0
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "")] <- 0
Dt$CROPDMG[(Dt$CROPDMG == "")] <- 0
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "")] <- 0

# Set exponent codes to zero
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "H")] <- 2
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "K")] <- 3
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "M")] <- 6
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "B")] <- 9

Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "+")] <- 0
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "-")] <- 0
Dt$PROPDMGEXP[(Dt$PROPDMGEXP == "?")] <- 0

Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "+")] <- 0
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "-")] <- 0
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "?")] <- 0

Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "H")] <- 2
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "K")] <- 3
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "M")] <- 6
Dt$CROPDMGEXP[(Dt$CROPDMGEXP == "B")] <- 9

Check how many NA values there are

na_Dt <- Dt[rowSums(is.na(Dt)) > 0,] #Check how many NA values there are
head(na_Dt)
## [1] EVTYPE     FATALITIES INJURIES   PROPDMG    PROPDMGEXP CROPDMG   
## [7] CROPDMGEXP
## <0 rows> (or 0-length row.names)

Since there are few/no NA values, we do not need to omit the NA values

Set as integers for easy data processing

Dt$PROPDMGEXP <- as.integer(Dt$PROPDMGEXP) #Set as integers
Dt$CROPDMGEXP <- as.integer(Dt$CROPDMGEXP)

Calculate Total Property and Crop Damage

Dt$PROPDMGTOT <- Dt$PROPDMG * 10^Dt$PROPDMGEXP #Total Property and Crop Damage
Dt$CROPDMGTOT <- Dt$CROPDMG * 10^Dt$CROPDMGEXP

Dt$TOTDMG <- Dt$PROPDMGTOT + Dt$PROPDMGTOT #Total Damage (Property + Crop)

Aggregate Everything

Fatalities  <- aggregate(FATALITIES ~ EVTYPE, data = Dt, FUN=sum)
Injuries    <- aggregate(INJURIES ~ EVTYPE, data = Dt, FUN=sum)
PropertyDamage  <- aggregate(PROPDMGTOT ~ EVTYPE, data = Dt, FUN=sum)
CropDamage  <- aggregate(CROPDMGTOT ~ EVTYPE, data = Dt, FUN=sum)
TotalDamage   <- aggregate(TOTDMG ~ EVTYPE, data = Dt, FUN=sum)

Create a summary set by merging together the sets by event type

Summary <- merge(Fatalities, Injuries, by="EVTYPE", all=TRUE)
Summary <- merge(Summary, PropertyDamage, by="EVTYPE", all=TRUE)
Summary <- merge(Summary, CropDamage, by="EVTYPE", all=TRUE)
Summary <- merge(Summary, TotalDamage, by="EVTYPE", all=TRUE)

R Summary

#Order all data into Summary Statistics for Subsequent Plotting
Fatalities  <- Summary[order(Summary$FATALITIES, decreasing=TRUE),][1:10,]
Injuries    <- Summary[order(Summary$INJURIES, decreasing=TRUE),][1:10,]
PropertyDamage  <- Summary[order(Summary$PROPDMGTOT, decreasing=TRUE),][1:10,]
CropDamage  <- Summary[order(Summary$CROPDMGTOT, decreasing=TRUE),][1:10,]
TotalDamage   <- Summary[order(Summary$TOTDMG, decreasing=TRUE),][1:10,]

Results

Plot Fatalities and Injuries

par(mfrow=c(2,1), mar=c(5,4,4,2), oma=c(4,2,2,2), cex=0.7)
barplot(Fatalities$FATALITIES, names.arg=Fatalities$EVTYPE, las=3,
        cex.names=0.7, xlab="", ylab="Total Fatalities", col="blue",
        main="Weather Events with Highest Fatalities")
barplot(Injuries$INJURIES, names.arg=Injuries$EVTYPE, las=3, cex.names=0.7,
        xlab="", ylab="Total Injuries", col="blue", main="Weather Events
    with Highest Injuries")

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

The histogram shows clearly that Tornadoes, Excessive Heat, Flash Floods, Heat, and Lightning are the weather events with the highest fatalities. The weather events with the highest injuries are Tornadoes, TSTM Wind, Floods, Excessive Heat, and Lightning.

Plot Damages (Property and Crops)

par(mfrow=c(2,1), mar=c(5,4,4,2), oma=c(4,2,2,2), cex=0.7)
barplot(PropertyDamage$PROPDMGTOT/10^6, names.arg=PropertyDamage$EVTYPE, las=3,
        cex.names=0.7, xlab="", ylab="Total Property Damage in USD (Millions)",
        col="blue", main="Weather Events with Highest Property Damage Cost")
barplot(CropDamage$CROPDMGTOT/10^6, names.arg=CropDamage$EVTYPE, las=3,
        cex.names=0.7, xlab="", ylab="Total Crop Damage in USD (Millions)",
        col="blue", main="Weather Events with Highest Crop Damage Cost")

Across the United States, which types of events have the greatest economic consequences?

The histogram shows clearly that Floods, Hurricanes, Tornadoes, Storm Surges, and Flash Floods are the weather events with the highest property damage. The weather events with the highest crop damage are Droughts, Floods, River Floods, Ice Storms, and Hail.