Synopsis

The data analyzes both health and economic consequences by event type on US counties from 1950-2011. Starting with the health consequences, we can divide the data into deaths and injuries. The data shows that the event type that has caused the greatest harm (in both casualties and injuries) has been tornados. On the contrary, the greatest economic harm has not been caused by tornados, but has been dominated by hail.

Data processing

The data was loaded into R by downloading the file from the internet and read using the “.csv.bz2” extension.

if(!exists("P2")){
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "P2.csv.bz2", method ="curl")
P2 <-  read.csv("P2.csv.bz2")
}

Results

options(scipen=999)
fatal <- with(P2,aggregate(FATALITIES, by = list(EVTYPE), FUN = sum))
max_fatal <- tolower(as.character(fatal[which.max(fatal$x),]$Group.1))
x <- fatal$x
names(x) <- fatal$Group.1
fatal2 <- x[x > 300]
par(mfrow = c(2,1), mgp = c(5,1,0), oma = c(2,2,0,0))
barplot(fatal2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,6000), main = "Cumulative number of fatalities from 1950-2011", yaxp = c(0, 6000, 3))
mtext(expression(italic("Only includes events with cum fatalities of 300+")), cex = 0.8)

injur <- with(P2,aggregate(INJURIES, by = list(EVTYPE), FUN = sum))
max_injury <- tolower(as.character(injur[which.max(injur$x),]$Group.1))
y <- injur$x
names(y) <- injur$Group.1
injur2 <- y[y > 6000]
barplot(injur2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,100000), main = "Cumulative number of injuries from 1950-2011", yaxp = c(0, 100000, 2))
mtext(expression(italic("Only includes events with cum injuries of 6k+")), cex = 0.8)

options(scipen=999)
par(mfrow = c(1,1), mar = c(7,6,4,2), oma = c(2,2,0,0), mgp = c(5,1,0))
economic <- with(P2,aggregate(CROPDMG, by = list(EVTYPE), FUN = sum))
max_econ <- tolower(as.character(economic[which.max(economic$x),]$Group.1))
z <- economic$x
names(z) <- injur$Group.1
economic2 <- z[z > 100000]
barplot(economic2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,600000), main = "Cumulative number of crop damage from 1950-2011", xlab = "Event type", ylab = "Cumulative crop damage")
mtext(expression(italic("Only includes events with cum dmg of 100k+")), cex = 0.8)

library("xtable")
library("plyr")
fat_sum <- with(P2,aggregate(FATALITIES,list(STATE),sum))
inj_sum <- with(P2,aggregate(INJURIES,list(STATE),sum))
eco_sum <- with(P2,aggregate(CROPDMG,list(STATE),sum))
all_sum <- join_all(list(fat_sum,inj_sum,eco_sum), by = "Group.1")
names(all_sum) <- c("State", "Fatalities", "Injuries", "Econdmg")
new_sum <- all_sum[(all_sum$Fatalities > quantile(all_sum$Fatalities)[["75%"]]) & (all_sum$Injuries > quantile(all_sum$Injuries)[["75%"]]) & (all_sum$Econdmg > quantile(all_sum$Econdmg)[["75%"]]),]
sum_row <- rbind(summary(fat_sum$x),summary(inj_sum$x),summary(eco_sum$x))
row.names(sum_row) <- c("Fatalities", "Injuries", "Econ dmg")
print(xtable(sum_row, digits = 0), type = "html")
Min. 1st Qu. Median Mean 3rd Qu. Max.
Fatalities 0 12 86 210 314 1421
Injuries 0 44 520 1952 3217 17667
Econ dmg 0 24 3490 19136 15813 303950
print(xtable(new_sum, digits = 0), type = "html")
State Fatalities Injuries Econdmg
5 AR 530 5550 25819
8 CA 550 3278 21152
22 KS 356 3449 139799
37 MO 754 8998 21087
38 MS 555 6675 56078
40 NC 398 3415 22246
48 OH 403 7112 28328
63 TX 1366 17667 156169