Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. Through exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, the goal of this project is to identify the types of weather events that are most harmful with respect to population health, and have the greatest economic consequences across the United States.
url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "NOAAStromData.csv.bz2", "curl")
data<-read.csv("NOAAStromData.csv.bz2")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
In order to group the events, special characters and words end with s/ mix are removed. Only the first two words are taken.
dataC<- select(data, EVTYPE, FATALITIES, INJURIES, PROPDMG) %>%
mutate(EVTYPE = toupper(EVTYPE)) %>%
filter(FATALITIES > 0 | INJURIES > 0) %>%
mutate( EVTYPE = gsub('/', ' ', EVTYPE)) %>%
mutate( EVTYPE = gsub('S$| MIX$|\\.', '', EVTYPE)) %>%
mutate( EVTYPE = gsub('([A-z]+) ([A-z]+) .*', '\\1 \\2', EVTYPE)) %>%
group_by(EVTYPE)
dataC
## # A tibble: 21,929 x 4
## # Groups: EVTYPE [174]
## EVTYPE FATALITIES INJURIES PROPDMG
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 0 15 25
## 2 TORNADO 0 2 25
## 3 TORNADO 0 2 2.5
## 4 TORNADO 0 2 2.5
## 5 TORNADO 0 6 2.5
## 6 TORNADO 0 1 2.5
## 7 TORNADO 1 14 25
## 8 TORNADO 0 3 2.5
## 9 TORNADO 0 3 2.5
## 10 TORNADO 1 26 250
## # … with 21,919 more rows
datahealth<- dataC %>%
summarise(total=sum(FATALITIES, INJURIES)) %>%
arrange(desc(total))
maxH<-datahealth[1,1]
datahealth
## # A tibble: 174 x 2
## EVTYPE total
## <chr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6047
## 6 HEAT 3037
## 7 FLASH FLOOD 2757
## 8 THUNDERSTORM WIND 2593
## 9 ICE STORM 2064
## 10 HIGH WIND 1722
## # … with 164 more rows
Histogram of the top 5 weather events which caused fatalities and injuries
datahealthTotal<- dataC %>%
summarise(FATALITIES =sum(FATALITIES), INJURIES =sum(INJURIES))
datahealthTotal <- as.data.frame(datahealthTotal)
datahealthMelt <- melt(datahealthTotal, id=c("EVTYPE"))
names(datahealthMelt)<-c("EVTYPE", "category", "total")
# Stacked barplot with multiple groups
ggplot(data=datahealthMelt, aes(x = reorder(EVTYPE,-total), y = total, fill=category)) +
geom_bar(stat="identity") +
labs(x= "Weather event", title = "The top 5 weather events which caused fatalities and injuries") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(head(datahealth$EVTYPE, 5))
## Warning: Removed 338 rows containing missing values (position_stack).
dataEconomic<- dataC %>%
summarise(total=as.integer(sum(PROPDMG))) %>%
arrange(desc(total))
maxE<-dataEconomic[1,1]
dataEconomic
## # A tibble: 174 x 2
## EVTYPE total
## <chr> <int>
## 1 TORNADO 877436
## 2 TSTM WIND 107858
## 3 THUNDERSTORM WIND 70765
## 4 FLASH FLOOD 61665
## 5 HIGH WIND 48131
## 6 FLOOD 29231
## 7 LIGHTNING 20320
## 8 WILDFIRE 19527
## 9 WINTER STORM 15463
## 10 HEAVY SNOW 11036
## # … with 164 more rows
Histogram of the top 5 weather events which have the greatest economic consequences
dataEconTotal<- dataC %>%
summarise(total =sum(PROPDMG))
# Stacked barplot with multiple groups
ggplot(data=dataEconTotal, aes(x = reorder(EVTYPE,-total), y = total)) +
geom_bar(stat="identity") +
labs(x= "Weather event", title = "The top 5 weather events which have the greatest economic consequences") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(head(dataEconomic$EVTYPE, 5))
## Warning: Removed 169 rows containing missing values (position_stack).
Across the United States, TORNADO are most harmful with respect to population health.
Across the United States, TORNADO have the greatest economic consequences.