This is the code to completement Reproducible Research Peer Assessment 2.
The assignment involves exploring the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The data for this assignment can be found at: Storm Data
There is also some documentation of the database available. Here you will find how some of the variables are constructed/defined.
National Weather Service Storm Data Documentation
National Climatic Data Center Storm Events FAQ
This data analysis will address the following questions:
storm_data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))[, 1:28]
names(storm_data) <- tolower(names(storm_data))
names(storm_data)
## [1] "state__" "bgn_date" "bgn_time" "time_zone" "county"
## [6] "countyname" "state" "evtype" "bgn_range" "bgn_azi"
## [11] "bgn_locati" "end_date" "end_time" "county_end" "countyendn"
## [16] "end_range" "end_azi" "end_locati" "length" "width"
## [21] "f" "mag" "fatalities" "injuries" "propdmg"
## [26] "propdmgexp" "cropdmg" "cropdmgexp"
head(storm_data, 5)
## state__ bgn_date bgn_time time_zone county countyname state
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## evtype bgn_range bgn_azi bgn_locati end_date end_time county_end
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## countyendn end_range end_azi end_locati length width f mag fatalities
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## injuries propdmg propdmgexp cropdmg cropdmgexp
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
harm_data <- storm_data[, c("evtype", "fatalities", "injuries")]
fatalities_summary <- aggregate(fatalities ~ evtype, harm_data, sum)
injuries_summary <- aggregate(injuries ~ evtype, harm_data, sum)
top_8_evtype_fatalities <- fatalities_summary[with(fatalities_summary, order(-fatalities)),
][1:8, ]
top_8_evtype_injuries <- injuries_summary[with(injuries_summary, order(-injuries)),
][1:8, ]
storm_data$prop_impact <- storm_data$propdmg * 1e-09
storm_data[grepl("K|k", storm_data$propdmgexp), ]$prop_impact <- storm_data[grepl("K|k",
storm_data$propdmgexp), ]$propdmg * 1e-06
storm_data[grepl("M|m", storm_data$propdmgexp), ]$prop_impact <- storm_data[grepl("M|m",
storm_data$propdmgexp), ]$propdmg * 0.001
storm_data[grepl("B|b", storm_data$propdmgexp), ]$prop_impact <- storm_data[grepl("B|b",
storm_data$propdmgexp), ]$propdmg
storm_data$crop_impact <- storm_data$cropdmg * 1e-09
storm_data[grepl("K|k", storm_data$cropdmgexp), ]$crop_impact <- storm_data[grepl("K|k",
storm_data$cropdmgexp), ]$cropdmg * 1e-06
storm_data[grepl("M|m", storm_data$cropdmgexp), ]$crop_impact <- storm_data[grepl("M|m",
storm_data$cropdmgexp), ]$cropdmg * 0.001
storm_data[grepl("B|b", storm_data$cropdmgexp), ]$crop_impact <- storm_data[grepl("B|b",
storm_data$cropdmgexp), ]$cropdmg
storm_data$economic_impact <- storm_data$prop_impact + storm_data$crop_impact
economic_impact_summary <- aggregate(economic_impact ~ evtype, storm_data, sum)
top_8_evtype_economic_impact <- economic_impact_summary[with(economic_impact_summary,
order(-economic_impact)), ][1:8, ]
pie(top_8_evtype_fatalities$fatalities, labels = top_8_evtype_fatalities$evtype)
pie(top_8_evtype_injuries$injuries, labels = top_8_evtype_injuries$evtype)
pie(top_8_evtype_economic_impact$economic_impact, labels = paste(top_8_evtype_economic_impact$evtype,
"=", top_8_evtype_economic_impact$economic_impact, " billions of USD"))