This report analyzes NOAA storm data to find which weather events hurt people most and cost the most money. It looks at data from 1950 to 2011. Tornadoes hurt people the most. Floods cost the most money. The report has tables and charts to show the results.
# Load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Read the data
# First check what files are available
file_list <- list.files()
print(file_list)
## [1] "activity.csv" "pa1.html"
## [3] "pa1.rmd" "repdata_data_StormData.csv.bz2"
## [5] "Reproducible Research" "RStudio"
## [7] "storm_report.Rmd" "storm_report1.Rmd"
# Try to find the correct file name
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
# Look at the data
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
# Calculate total harm
harm <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE),
Total_Harm = sum(FATALITIES, na.rm = TRUE) + sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(Total_Harm))
## `summarise()` ungrouping output (override with `.groups` argument)
# Show top 10
top_harm <- head(harm, 10)
top_harm
## # A tibble: 10 x 4
## EVTYPE Fatalities Injuries Total_Harm
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
# Create plot
barplot(top_harm$Total_Harm,
names.arg = top_harm$EVTYPE,
las = 2,
col = "red",
main = "Most Harmful Weather Events",
ylab = "Total People Harmed")
# Calculate damage
storm_data$PROPDMG_NUM <- storm_data$PROPDMG *
ifelse(storm_data$PROPDMGEXP == "K", 1000,
ifelse(storm_data$PROPDMGEXP == "M", 1000000,
ifelse(storm_data$PROPDMGEXP == "B", 1000000000, 1)))
storm_data$CROPDMG_NUM <- storm_data$CROPDMG *
ifelse(storm_data$CROPDMGEXP == "K", 1000,
ifelse(storm_data$PROPDMGEXP == "M", 1000000,
ifelse(storm_data$PROPDMGEXP == "B", 1000000000, 1)))
storm_data$TOTAL_DAMAGE <- storm_data$PROPDMG_NUM + storm_data$CROPDMG_NUM
damage <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Total_Damage = sum(TOTAL_DAMAGE, na.rm = TRUE) / 1000000000) %>%
arrange(desc(Total_Damage))
## `summarise()` ungrouping output (override with `.groups` argument)
# Show top 10
top_damage <- head(damage, 10)
top_damage
## # A tibble: 10 x 2
## EVTYPE Total_Damage
## <chr> <dbl>
## 1 HURRICANE 815.
## 2 HURRICANE/TYPHOON 500.
## 3 FLOOD 180.
## 4 TORNADO 57.1
## 5 STORM SURGE 43.3
## 6 FLASH FLOOD 17.2
## 7 HAIL 16.9
## 8 WILDFIRE 11.4
## 9 RIVER FLOOD 10.1
## 10 HURRICANE OPAL/HIGH WINDS 10.1
# Create plot
barplot(top_damage$Total_Damage,
names.arg = top_damage$EVTYPE,
las = 2,
col = "blue",
main = "Most Costly Weather Events",
ylab = "Damage (Billions of Dollars)")