The data analyzes both health and economic consequences by event type on US counties from 1950-2011. Starting with the health consequences, we can divide the data into deaths and injuries. The data shows that the event type that has caused the greatest harm (in both casualties and injuries) has been tornados. On the contrary, the greatest economic harm has not been caused by tornados, but has been dominated by hail.
The data was loaded into R by downloading the file from the internet and read using the “.csv.bz2” extension.
if(!exists("P2")){
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "P2.csv.bz2", method ="curl")
P2 <- read.csv("P2.csv.bz2")
}
options(scipen=999)
fatal <- with(P2,aggregate(FATALITIES, by = list(EVTYPE), FUN = sum))
max_fatal <- tolower(as.character(fatal[which.max(fatal$x),]$Group.1))
x <- fatal$x
names(x) <- fatal$Group.1
fatal2 <- x[x > 300]
par(mfrow = c(2,1), mgp = c(5,1,0), oma = c(2,2,0,0))
barplot(fatal2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,6000), main = "Cumulative number of fatalities from 1950-2011", yaxp = c(0, 6000, 3))
mtext(expression(italic("Only includes events with cum fatalities of 300+")), cex = 0.8)
injur <- with(P2,aggregate(INJURIES, by = list(EVTYPE), FUN = sum))
max_injury <- tolower(as.character(injur[which.max(injur$x),]$Group.1))
y <- injur$x
names(y) <- injur$Group.1
injur2 <- y[y > 6000]
barplot(injur2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,100000), main = "Cumulative number of injuries from 1950-2011", yaxp = c(0, 100000, 2))
mtext(expression(italic("Only includes events with cum injuries of 6k+")), cex = 0.8)
options(scipen=999)
par(mfrow = c(1,1), mar = c(7,6,4,2), oma = c(2,2,0,0), mgp = c(5,1,0))
economic <- with(P2,aggregate(CROPDMG, by = list(EVTYPE), FUN = sum))
max_econ <- tolower(as.character(economic[which.max(economic$x),]$Group.1))
z <- economic$x
names(z) <- injur$Group.1
economic2 <- z[z > 100000]
barplot(economic2, cex.names = 0.5, las = 2, space = 0, ylim = c(0,600000), main = "Cumulative number of crop damage from 1950-2011", xlab = "Event type", ylab = "Cumulative crop damage")
mtext(expression(italic("Only includes events with cum dmg of 100k+")), cex = 0.8)
library("xtable")
library("plyr")
fat_sum <- with(P2,aggregate(FATALITIES,list(STATE),sum))
inj_sum <- with(P2,aggregate(INJURIES,list(STATE),sum))
eco_sum <- with(P2,aggregate(CROPDMG,list(STATE),sum))
all_sum <- join_all(list(fat_sum,inj_sum,eco_sum), by = "Group.1")
names(all_sum) <- c("State", "Fatalities", "Injuries", "Econdmg")
new_sum <- all_sum[(all_sum$Fatalities > quantile(all_sum$Fatalities)[["75%"]]) & (all_sum$Injuries > quantile(all_sum$Injuries)[["75%"]]) & (all_sum$Econdmg > quantile(all_sum$Econdmg)[["75%"]]),]
sum_row <- rbind(summary(fat_sum$x),summary(inj_sum$x),summary(eco_sum$x))
row.names(sum_row) <- c("Fatalities", "Injuries", "Econ dmg")
print(xtable(sum_row, digits = 0), type = "html")
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Fatalities | 0 | 12 | 86 | 210 | 314 | 1421 |
| Injuries | 0 | 44 | 520 | 1952 | 3217 | 17667 |
| Econ dmg | 0 | 24 | 3490 | 19136 | 15813 | 303950 |
print(xtable(new_sum, digits = 0), type = "html")
| State | Fatalities | Injuries | Econdmg | |
|---|---|---|---|---|
| 5 | AR | 530 | 5550 | 25819 |
| 8 | CA | 550 | 3278 | 21152 |
| 22 | KS | 356 | 3449 | 139799 |
| 37 | MO | 754 | 8998 | 21087 |
| 38 | MS | 555 | 6675 | 56078 |
| 40 | NC | 398 | 3415 | 22246 |
| 48 | OH | 403 | 7112 | 28328 |
| 63 | TX | 1366 | 17667 | 156169 |