In this study, we aim to investigate the impacts of weather event on the population health and economy in the US from the year 1950 to 2011. The data was obtained from National Oceanic and Atmospheric Administration’s (NOAA) storm database, which tracks characteristics of major storms and weather events in the United States. From these data, we found that tornado/hurricane causes the greatest total number of fatalities and injuries, which is far more than other weather types. In terms of economy, flood induces the highest total economic damage, followed by tornado/hurricane and storm.
Load packages
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
Read either the raw data file (either ~.csv or ~.csv.bz2) into R
if(file.exists("repdata-data-StormData.csv")){
StormData <- read.csv("repdata-data-StormData.csv")
}else{
StormData <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
}
View the structure of the data
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
dim(StormData)
## [1] 902297 37
head(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
Categorize the “EVTYPE” into 18 types, which stores in a new column called “event”. Event types that are not included in the 18 types are called “Others”.
StormData$event <- "Others"
StormData$event <- ifelse(grepl("FLOOD|FLD",StormData$EVTYPE,ignore.case = TRUE),
"Flood",
StormData$event)
StormData$event <- ifelse(grepl("HAIL",StormData$EVTYPE,ignore.case = TRUE),
"Hail",
StormData$event)
StormData$event <- ifelse(grepl("TORNADO|WATERSPOUT|TYPHOON|HURRICANE|TORNDAO",StormData$EVTYPE,ignore.case = TRUE),
"Tornado_Hurricane",
StormData$event)
StormData$event <- ifelse(grepl("WIND",StormData$EVTYPE,ignore.case = TRUE),
"Wind",
StormData$event)
StormData$event <- ifelse(grepl("LIGHTNING|LIGHTING|LIGNTNING",StormData$EVTYPE,ignore.case = TRUE),
"Lightning",
StormData$event)
StormData$event <- ifelse(grepl("SNOW|AVALANCHE|AVALANCE",StormData$EVTYPE,ignore.case = TRUE),
"Snow_avalanche",
StormData$event)
StormData$event <- ifelse(grepl("RAIN",StormData$EVTYPE,ignore.case = TRUE),
"Rain",
StormData$event)
StormData$event <- ifelse(grepl("DROUGHT",StormData$EVTYPE,ignore.case = TRUE),
"Drought",
StormData$event)
StormData$event <- ifelse(grepl("STORM",StormData$EVTYPE,ignore.case = TRUE),
"Storm",
StormData$event)
StormData$event <- ifelse(grepl("FOG",StormData$EVTYPE,ignore.case = TRUE),
"fog",
StormData$event)
StormData$event <- ifelse(grepl("CLOUD",StormData$EVTYPE,ignore.case = TRUE),
"Cloud",
StormData$event)
StormData$event <- ifelse(grepl("SEAS|surf|current",StormData$EVTYPE,ignore.case = TRUE),
"High surf",
StormData$event)
StormData$event <- ifelse(grepl("FIRE",StormData$EVTYPE,ignore.case = TRUE),
"Fire",
StormData$event)
StormData$event <- ifelse(grepl("BLIZZARD",StormData$EVTYPE,ignore.case = TRUE),
"Blizzard",
StormData$event)
StormData$event <- ifelse(grepl("WINTER|COOL|LOW|COLD|Wintry|Freeze|FROST|ICY",StormData$EVTYPE,ignore.case = TRUE),
"Winter_cold",
StormData$event)
StormData$event <- ifelse(grepl("HEAT|Warmth|HOT|WARM",StormData$EVTYPE,ignore.case = TRUE),
"Heat",
StormData$event)
StormData$event <- ifelse(grepl("DRY",StormData$EVTYPE,ignore.case = TRUE),
"Dry",
StormData$event)
StormData$event <- ifelse(grepl("LANDSLIDE",StormData$EVTYPE,ignore.case = TRUE),
"Landslide",
StormData$event)
Convert property damage into dollars (K=thousand; M=million; B=billion)
StormData$property_damage <- StormData$PROPDMG
StormData$property_damage <- ifelse(grepl("K",StormData$PROPDMGEXP,ignore.case = TRUE),
StormData$property_damage*1000,
StormData$property_damage)
StormData$property_damage <- ifelse(grepl("M",StormData$PROPDMGEXP,ignore.case = TRUE),
StormData$property_damage*1000000,
StormData$property_damage)
StormData$property_damage <- ifelse(grepl("B",StormData$PROPDMGEXP,ignore.case = TRUE),
StormData$property_damage*1000000000,
StormData$property_damage)
Convert corp damage into dollars (K=thousand; M=million; B=billion)
StormData$corp_damage <- StormData$CROPDMG
StormData$corp_damage <- ifelse(grepl("K",StormData$CROPDMGEXP,ignore.case = TRUE),
StormData$corp_damage*1000,
StormData$corp_damage)
StormData$corp_damage <- ifelse(grepl("M",StormData$CROPDMGEXP,ignore.case = TRUE),
StormData$corp_damage*1000000,
StormData$corp_damage)
StormData$corp_damage <- ifelse(grepl("B",StormData$CROPDMGEXP,ignore.case = TRUE),
StormData$corp_damage*1000000000,
StormData$corp_damage)
Summarize the total effects of each weather type on fatalities, injuries, and economic damages.
Summary_health<-StormData %>%
group_by(event) %>%
summarize(fatalities=sum(FATALITIES, na.rm = TRUE), injuries=sum(INJURIES, na.rm = TRUE))
Summary_economy<-StormData %>%
group_by(event) %>%
summarize(property=sum(property_damage, na.rm = TRUE), corp=sum(corp_damage, na.rm = TRUE))
Tide the data using the “gather” function.
Summary_health_tide <- gather(Summary_health, Health_type, Number, -event)
Summary_economy_tide <- gather(Summary_economy, Economic_type, Cost, -event)
Use ggplot to plot the total effects of each weather type on fatalities, injuries, and economic damages.
g1 <- ggplot(Summary_health_tide, aes(event, Number, fill=Health_type))
g1+geom_bar(stat = "identity")+coord_flip()
Figure 1 legend: The total number fatalities and injuries for each weather type. The number of people with fatalities are labelled in red while the number of people with injuries are labelled in green.
g2 <- ggplot(Summary_economy_tide, aes(event, Cost, fill=Economic_type))
g2+geom_bar(stat = "identity")+coord_flip()
Figure 2 legend: The total economic damage for each weather type. The corp damage is labelled in red while property cost is labelled in green.