This study investigate the most harmful events in United State in terms of public health and economic aspect. The data used is retrieved from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It database tracks the characteristics of major storms and weather events in the United State.
This analysis shows that tornado has the highest impact on public health while flood has the highest impact economically.
file <- read.csv("repdata_data_StormData.csv.bz2")
str(file)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
data <- file %>% select(EVTYPE,FATALITIES, INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
Only Event type, fatalities, injuries, property damage, property damage exponents, crop damage, crop damage exponents is selected since it is all the information to answer the two questions.
unique(data$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
Exponents and their corresponding factors
data$PROPDMGFAC[data$PROPDMGEXP == "B"] <- 10^9
data$PROPDMGFAC[data$PROPDMGEXP %in% c("M", "m")] <- 10^6
data$PROPDMGFAC[data$PROPDMGEXP == "K"] <- 10^3
data$PROPDMGFAC[data$PROPDMGEXP %in% c("H", "h")] <- 10^2
data$PROPDMGFAC[data$PROPDMGEXP %in% c("", "+", "0", "?", "-")] <- 1
data[data$PROPDMGEXP %in% as.character(1:8),] <- data[data$PROPDMGEXP %in% as.character(1:8),] %>% mutate(PROPDMGFAC = 10^(as.numeric(PROPDMGEXP)))
data <- data %>% mutate(realPropDmg = PROPDMG * PROPDMGFAC)
unique(data$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
Exponents and their corresponding factors
data$CROPDMGFAC[data$CROPDMGEXP == "B"] <- 10^9
data$CROPDMGFAC[data$CROPDMGEXP %in% c("M", "m")] <- 10^6
data$CROPDMGFAC[data$CROPDMGEXP %in% c("K", "k")] <- 10^3
data$CROPDMGFAC[data$CROPDMGEXP == "2"] <- 10^2
data$CROPDMGFAC[data$CROPDMGEXP %in% c("", "0", "?")] <- 1
data <- data %>% mutate(realCropDmg = CROPDMG * CROPDMGFAC)
data <- data %>% mutate(totalDmg = realCropDmg + realPropDmg)
data <- data %>% mutate(healthCost = FATALITIES + INJURIES)
healthData <- data %>% group_by(EVTYPE) %>% summarise(heaDmg = sum(healthCost)) %>% arrange(desc(heaDmg))
econData <- data %>% group_by(EVTYPE) %>% summarise(econDmg = sum(totalDmg)) %>% arrange(desc(econDmg))
g<-ggplot(data = healthData[1:10,], aes(x = reorder(EVTYPE,-heaDmg),y = heaDmg))
g + geom_col() + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + labs(x = "Events", y = "Total Public Health Damage", title = "Public Health Damage by event type")
The above plot shows the top 10 most harmful events in terms of public health.
The top 3 most harmful is tornado, excessive heat and tstm wind (Marine Thunderstorm Wind).
g<-ggplot(data = econData[1:10,], aes(x = reorder(EVTYPE,-econDmg),y = econDmg))
g + geom_col() + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + labs(x = "Events", y = "Total Economic Damage", title = "Economic Damage by event type")
The above plot shows the top 10 most harmful events in terms of economic losts.
The top 3 most harmful natural events are flood, hurricane or typhoon and tornado.