This analysis used the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database to explore the impact of weather on human and economic. Specifically, it showed the top 10 causes for fatalities and injuries, as well as property and crop damages.
if (!file.exists("StormData.csv.bz2")) {
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "StormData.csv.bz2", method = "curl")
}
bunzip2(filename = "StormData.csv.bz2", destname = "Storm_Data.csv", overwrite = TRUE)
data <- read.csv("Storm_Data.csv", header = TRUE, sep = ",")
summary(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 5/25/2011 0:00:00: 1202 12:00:00 AM: 10163 CST :547493
## 1st Qu.:19.0 4/27/2011 0:00:00: 1193 06:00:00 PM: 7350 EST :245558
## Median :30.0 6/9/2011 0:00:00 : 1030 04:00:00 PM: 7261 MST : 68390
## Mean :31.2 5/30/2004 0:00:00: 1016 05:00:00 PM: 6891 PST : 28302
## 3rd Qu.:45.0 4/4/2011 0:00:00 : 1009 12:00:00 PM: 6703 AST : 6360
## Max. :95.0 4/2/2006 0:00:00 : 981 03:00:00 PM: 6700 HST : 2563
## (Other) :895866 (Other) :857229 (Other): 3631
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0 JEFFERSON : 7840 TX : 83728 HAIL :288661
## 1st Qu.: 31 WASHINGTON: 7603 KS : 53440 TSTM WIND :219940
## Median : 75 JACKSON : 6660 OK : 46802 THUNDERSTORM WIND: 82563
## Mean :101 FRANKLIN : 6256 MO : 35648 TORNADO : 60652
## 3rd Qu.:131 LINCOLN : 5937 IA : 31069 FLASH FLOOD : 54277
## Max. :873 MADISON : 5632 NE : 30271 FLOOD : 25326
## (Other) :862369 (Other):621339 (Other) :170878
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0 :547332 :287743 :243411
## 1st Qu.: 0 N : 86752 COUNTYWIDE : 19680 4/27/2011 0:00:00: 1214
## Median : 0 W : 38446 Countywide : 993 5/25/2011 0:00:00: 1196
## Mean : 1 S : 37558 SPRINGFIELD : 843 6/9/2011 0:00:00 : 1021
## 3rd Qu.: 1 E : 33178 SOUTH PORTION: 810 4/4/2011 0:00:00 : 1007
## Max. :3749 NW : 24041 NORTH PORTION: 784 5/30/2004 0:00:00: 998
## (Other):134990 (Other) :591444 (Other) :653450
## END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI
## :238978 Min. :0 Mode:logical Min. : 0 :724837
## 06:00:00 PM: 9802 1st Qu.:0 NA's:902297 1st Qu.: 0 N : 28082
## 05:00:00 PM: 8314 Median :0 Median : 0 S : 22510
## 04:00:00 PM: 8104 Mean :0 Mean : 1 W : 20119
## 12:00:00 PM: 7483 3rd Qu.:0 3rd Qu.: 0 E : 20047
## 11:59:00 PM: 7184 Max. :0 Max. :925 NE : 14606
## (Other) :622432 (Other): 72096
## END_LOCATI LENGTH WIDTH F
## :499225 Min. : 0.0 Min. : 0 Min. :0
## COUNTYWIDE : 19731 1st Qu.: 0.0 1st Qu.: 0 1st Qu.:0
## SOUTH PORTION : 833 Median : 0.0 Median : 0 Median :1
## NORTH PORTION : 780 Mean : 0.2 Mean : 8 Mean :1
## CENTRAL PORTION: 617 3rd Qu.: 0.0 3rd Qu.: 0 3rd Qu.:1
## SPRINGFIELD : 575 Max. :2315.0 Max. :4400 Max. :5
## (Other) :380536 NA's :843563
## MAG FATALITIES INJURIES PROPDMG PROPDMGEXP
## Min. : 0 Min. : 0 Min. : 0.0 Min. : 0 :465934
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 0 K :424665
## Median : 50 Median : 0 Median : 0.0 Median : 0 M : 11330
## Mean : 47 Mean : 0 Mean : 0.2 Mean : 12 0 : 216
## 3rd Qu.: 75 3rd Qu.: 0 3rd Qu.: 0.0 3rd Qu.: 0 B : 40
## Max. :22000 Max. :583 Max. :1700.0 Max. :5000 5 : 28
## (Other): 84
## CROPDMG CROPDMGEXP WFO
## Min. : 0.0 :618413 :142069
## 1st Qu.: 0.0 K :281832 OUN : 17393
## Median : 0.0 M : 1994 JAN : 13889
## Mean : 1.5 k : 21 LWX : 13174
## 3rd Qu.: 0.0 0 : 19 PHI : 12551
## Max. :990.0 B : 9 TSA : 12483
## (Other): 9 (Other):690738
## STATEOFFIC
## :248769
## TEXAS, North : 12193
## ARKANSAS, Central and North Central: 11738
## IOWA, Central : 11345
## KANSAS, Southwest : 11212
## GEORGIA, North and Central : 11120
## (Other) :595920
## ZONENAMES
## :594029
## :205988
## GREATER RENO / CARSON CITY / M - GREATER RENO / CARSON CITY / M : 639
## GREATER LAKE TAHOE AREA - GREATER LAKE TAHOE AREA : 592
## JEFFERSON - JEFFERSON : 303
## MADISON - MADISON : 302
## (Other) :100444
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_
## Min. : 0 Min. :-14451 Min. : 0 Min. :-14455
## 1st Qu.:2802 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0
## Median :3540 Median : 8707 Median : 0 Median : 0
## Mean :2875 Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.:4019 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. :9706 Max. : 17124 Max. :9706 Max. :106220
## NA's :47 NA's :40
## REMARKS REFNUM
## :287433 Min. : 1
## : 24013 1st Qu.:225575
## Trees down.\n : 1110 Median :451149
## Several trees were blown down.\n : 568 Mean :451149
## Trees were downed.\n : 446 3rd Qu.:676723
## Large trees and power lines were blown down.\n: 432 Max. :902297
## (Other) :588295
Change the damage amount into numerical values in dollars.
data$PROPDMGEXPr <- recode(data$PROPDMGEXP, "c('','-','?','+','0')=0;'B'=9;c('h','H')=2;'K'=3;c('m','M')=6")
data$PROPDMGEXPr <- as.numeric(levels(data$PROPDMGEXPr)[data$PROPDMGEXPr])
data$PROPDMGs <- data$PROPDMG * 10^data$PROPDMGEXPr
data$CROPDMGEXPr <- recode(data$CROPDMGEXP, "c('','?','0')=0;'B'=9;c('K','k')=3;c('m','M')=6")
data$CROPDMGEXPr <- as.numeric(levels(data$CROPDMGEXPr)[data$CROPDMGEXPr])
data$CROPDMGs <- data$CROPDMG * 10^data$CROPDMGEXPr
fatal <- tapply(data$FATALITIES, data$EVTYPE, sum)
fatal_10 <- fatal[order(fatal, decreasing = T)][1:10]
fatal_names <- factor(names(fatal_10), levels = names(fatal_10))
injuries <- tapply(data$INJURIES, data$EVTYPE, sum)
injuries_10 <- injuries[order(injuries, decreasing = T)][1:10]
injuries_names <- factor(names(injuries_10), levels = names(injuries_10))
par(mfrow = c(1, 2))
barplot(fatal_10, names.arg = fatal_names, ylab = "", xlab = "# of people", cex.names = 0.6,
cex.lab = 0.7, cex.axis = 0.7, cex.main = 1.5, main = "Top 10 causes of fatalities", horiz = TRUE,
las = 1)
barplot(injuries_10, names.arg = injuries_names, ylab = "", xlab = "# of people", cex.names = 0.6,
cex.lab = 0.7, cex.axis = 0.7, cex.main = 1.5, main = "Top 10 causes of injuries", horiz = TRUE,
las = 1)
prop <- tapply(data$PROPDMGs, data$EVTYPE, sum)
prop_10 <- prop[order(prop, decreasing = T)][1:10]
prop_names <- factor(names(prop_10), levels = names(prop_10))
crop <- tapply(data$CROPDMGs, data$EVTYPE, sum)
crop_10 <- crop[order(crop, decreasing = T)][1:10]
crop_names <- factor(names(crop_10), levels = names(crop_10))
par(mfrow = c(1, 2))
barplot(prop_10, names.arg = prop_names, ylab = "", xlab = "dollars", cex.names = 0.6, cex.lab = 0.7,
cex.axis = 0.7, cex.main = 1.5, main = "Top 10 causes of property damage", horiz = TRUE,
las = 1)
barplot(crop_10, names.arg = crop_names, ylab = "", xlab = "dollars", cex.names = 0.6, cex.lab = 0.7,
cex.axis = 0.7, cex.main = 1.5, main = "Top 10 causes of crop damage", horiz = TRUE, las = 1)
sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-apple-darwin13.1.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] codetools_0.2-8 car_2.0-20 R.utils_1.29.8 R.oo_1.18.0
## [5] R.methodsS3_1.6.1 lattice_0.20-29 ggplot2_0.9.3.1 knitr_1.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.2-4 digest_0.6.4 evaluate_0.5.3 formatR_0.10 grid_3.1.0
## [6] gtable_0.1.2 MASS_7.3-32 munsell_0.4.2 nnet_7.3-8 plyr_1.8.1
## [11] proto_0.3-10 Rcpp_0.11.1 reshape2_1.4 scales_0.2.4 stringr_0.6.2
## [16] tools_3.1.0