This document represents data analysis based on the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
With the given analysis we are trying to find out which events have caused more damage to the public health as well as to the economic consequences it has produced.
if(!file.exists('repdata_data_StormData.csv')){
unzip('repdata_data_StormData.csv.bz2')
}
df <- read.csv('repdata_data_StormData.csv')
head(df)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
dfHealth <- df[, c(8, 23:24)]
head(dfHealth)
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 0 15
## 2 TORNADO 0 0
## 3 TORNADO 0 2
## 4 TORNADO 0 2
## 5 TORNADO 0 2
## 6 TORNADO 0 6
dfHealth1 <- dfHealth %>%
group_by(EVTYPE) %>%
summarise(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES))%>%
arrange(desc(INJURIES + FATALITIES))
dfHealth1 <- dfHealth1[1:10, ]
dfHealth1$EVTYPE <- factor(dfHealth1$EVTYPE, levels = dfHealth1$EVTYPE)
dfHealth2 <- melt(dfHealth1[, c('EVTYPE', 'FATALITIES', 'INJURIES')], id.vars = 1)
p1 <- ggplot(dfHealth2, aes(x = EVTYPE, y = value)) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
xlab("Event type") +
ggtitle("Harmful events w.r.t population health")
p2 <- ggplot(dfHealth2, aes(x = EVTYPE, y = value)) +
geom_bar(aes(fill = variable), stat = "identity", position = "stack") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
xlab("Event type") +
ggtitle("Harmful events w.r.t population health(stack)") +
scale_fill_manual(values = c("yellow","black"))
grid.arrange(p1, p2, ncol = 2)
dfEco <- df[, c(8, 25:28)]
head(dfEco)
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 25.0 K 0
## 2 TORNADO 2.5 K 0
## 3 TORNADO 25.0 K 0
## 4 TORNADO 2.5 K 0
## 5 TORNADO 2.5 K 0
## 6 TORNADO 2.5 K 0
table(dfEco$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5 6
## 465934 1 8 5 216 25 13 4 4 28 4
## 7 8 B h H K m M
## 5 1 40 1 6 424665 7 11330
table(dfEco$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
dfEco$propFac <- factor(dfEco$PROPDMGEXP, levels = c("H", "K", "M", "B", "h", "m", "O"))
dfEco$propFac[is.na(dfEco$propFac)] <- "O"
dfEco$cropFac <- factor(dfEco$CROPDMGEXP, levels = c("K", "M", "B", "k", "m", "O"))
dfEco$cropFac[is.na(dfEco$cropFac)] <- "O"
dfEco <- mutate(dfEco, property = 0, crop = 0)
dfEco$property[dfEco$propFac == "H" | dfEco$propFac == 'h'] <- 100
dfEco$property[dfEco$propFac == "K"] <- 1000
dfEco$property[dfEco$propFac == "M" | dfEco$propFac == 'm'] <- 1e6
dfEco$property[dfEco$propFac == "B"] <- 1e9
dfEco$property[dfEco$propFac == "O"] <- 1
dfEco$crop[dfEco$cropFac == "K" | dfEco$cropFac == "k"] <- 1000
dfEco$crop[dfEco$cropFac == "M" | dfEco$cropFac == "m"] <- 1e6
dfEco$crop[dfEco$cropFac == "B"] <- 1e9
dfEco$crop[dfEco$cropFac == "O"] <- 1
dfEco <- mutate(dfEco, propertyVal = PROPDMG * property / 1e6, cropVal = CROPDMG * crop / 1e6)
dfEco2 <- dfEco %>%
group_by(EVTYPE) %>%
summarise(propVal = sum(propertyVal, na.rm = TRUE), cropVal = sum(cropVal, na.rm = TRUE)) %>%
arrange(desc(propVal + cropVal))
dfEco2 <- dfEco2[1:10, ]
dfEco2 <- dfEco2[1:10, ]
dfEco2$EVTYPE <- factor(dfEco2$EVTYPE, levels = dfEco2$EVTYPE)
dfEco3 <- melt(dfEco2[, c('EVTYPE', 'propVal', 'cropVal')], id.vars = 1)
p3 <- ggplot(dfEco3, aes(EVTYPE, value)) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
xlab("Event type") +
ggtitle("Harmful events w.r.t economic consequences") +
scale_fill_manual(values = c("red","black"))
p4 <- ggplot(dfEco3, aes(EVTYPE, value)) +
geom_bar(aes(fill = variable), stat = "identity", position = "stack") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
xlab("Event type") +
ggtitle("Harmful events w.r.t economic consequences") +
scale_fill_manual(values = c("magenta","blue"))
grid.arrange(p3, p4, ncol = 2)