Synopsis
Severe weather has a large impact on the health of densely populated regions and may impact property in both rural and urban environments. The below analysis uses data dating back to 1950 and summarizes it to determine the weather patterns that inflict the most harm to human health and rural and urban property.
Data Processing
First we examine the distribution of weather over the entire timeframe the data was collected. From the histogram we can infer that data collection was lite until about 1995. We will use a subset of the data from 1995 to present. Additionally the levels of the EVTYPE factor are extraordinarily messy. We will attempt to clean up the levels by revaluing common levels. This will allow us to present more accurate results in our analysis. The plyr package is used here and then detached because of the conflicts with dplyr and the pipes that are used throughout the analysis. Using the dplyr package we will group the levels of EVTYPE, summarize the results, and arrange the new data frame by Health Concerns (Fatalities + injuries). For the property analysis we will need to first create a new column in the data frame that multiplies the numeric value of property and crop damage by the factor in the explanation column. This is easily done with dplyr’s mutate function. We then follow the same procedure with by grouping the levels of property and crop damage and summarizing them in a new data frame.
library(dplyr)
library(ggplot2)
storm <- read.csv("C:/Users/Jeremiah Lowhorn/Desktop/repdata_data_StormData.csv")
storm$BGN_DATE <- as.Date(storm$BGN_DATE,format="%m/%d/%Y")
ggplot(storm,aes(x=BGN_DATE))+
geom_histogram()
storm <- storm[storm$BGN_DATE >= "1995-1-1",]
storm$Year <- as.factor(format(storm$BGN_DATE,format="%Y"))
library(plyr)
storm$EVTYPE <- revalue(storm$EVTYPE,c("EXCESSIVE HEAT"="HEAT",
"THUNDERSTORM WIND"="TSTM WIND",
"MARINE TSTM WIND"="TSTM WIND",
"HEAT WAVE"="HEAT",
"EXTREME HEAT"="HEAT",
"Heat Wave"="HEAT",
"RECORD HEAT"="HEAT",
"UNSEASONABLY WARM"="HEAT",
"RECORD/EXCESSIVE HEAT"="HEAT",
"THUNDERSTORM WINDS"="TSTM WIND",
"THUNDERSTORM WINDSS"="TSTM WIND",
"TSTM WIND (G45)"="TSTM WIND",
"TORNADO F3"="TORNADO",
"TORNADO F2"="TORNADO",
"LIGHTNING INJURY"="LIGHTNING",
"LIGHTNING."="LIGHTNING",
"THUNDERSTORM WIND (G40)"="TSTM WIND",
"THUNDERSTORM WIND G52"="TSTM WIND",
"THUNDERSTORM WINDS 13"="TSTM WIND",
"THUNDERSTORM WINDS/HAIL"="TSTM WIND",
"THUNDERSTORMS WINDS"="TSTM WIND",
"THUNDERTORM WINDS"="TSTM WIND",
"THUNDERSTORM WINDS"="TSTM WIND"
))
detach("package:plyr", unload=TRUE)
## Warning: 'plyr' namespace cannot be unloaded:
## namespace 'plyr' is imported by 'scales', 'reshape2', 'ggplot2' so cannot be unloaded
health <- storm %>%
group_by(EVTYPE) %>%
summarise(Fatalities = sum(FATALITIES),
Injuries = sum(INJURIES),
Health_Concerns = sum(FATALITIES)+sum(INJURIES)) %>%
arrange(desc(Health_Concerns))%>%
ungroup()
top5<-health[1:5,]
prop <- storm %>%
mutate(PropDamage=ifelse(PROPDMGEXP=="K",
PROPDMG*1000,
ifelse(PROPDMGEXP=="M",
PROPDMG*1000000,
PROPDMG*1000000000)
))
prop <- prop %>%
mutate(CropDamage=ifelse(CROPDMGEXP=="K",
CROPDMG*1000,
ifelse(CROPDMGEXP=="M",
CROPDMG*1000000,
CROPDMG*1000000000)
))
damage_summary <- prop %>%
group_by(EVTYPE) %>%
summarise(Total_Property = sum(PropDamage),
Total_Crop = sum(CropDamage),
Total_Damage = sum(PropDamage)+sum(CropDamage)) %>%
arrange(desc(Total_Damage))
top5_damage <- damage_summary[1:5,]
Results
We can infer from our summary tables that tornados have the most overall health concerns. However, when examining the types of weather individually we learn that heat causes the most fatalities and tornados cause the most injuries. The results for property damage show that thunderstorm wind causes the most overall damage and property damage while hail is the leading cause of crop damage. These results however include outliers in the population which were not analyzed in this document due to the requirements of the class.
top5
## Source: local data frame [5 x 4]
##
## EVTYPE Fatalities Injuries Health_Concerns
## 1 TORNADO 1545 21783 23328
## 2 HEAT 3092 9105 12197
## 3 FLOOD 423 6769 7192
## 4 TSTM WIND 422 5525 5947
## 5 LIGHTNING 730 4632 5362
top5_damage
## Source: local data frame [5 x 4]
##
## EVTYPE Total_Property Total_Crop Total_Damage
## 1 TSTM WIND 6.284302e+12 1028505250 6.285330e+12
## 2 HAIL 3.376408e+11 372613777050 7.102546e+11
## 3 TORNADO 2.226175e+11 160296595610 3.829141e+11
## 4 FLASH FLOOD 3.649659e+11 1343915000 3.663098e+11
## 5 FLOOD 1.510220e+11 5422810400 1.564448e+11
ing <- ggplot(top5,aes(x=EVTYPE,y=Injuries))+
geom_bar(stat="identity") +
ggtitle(expression(atop("Weather Events ~ Injuries"))) +
xlab("Type") +
ylab("Injuries") +
theme(plot.title=element_text(size=24,color="black")) +
theme(axis.title=element_text(size=12,color="black")) +
theme(axis.text=element_text(size=10,color="black"))
fat <- ggplot(top5,aes(x=EVTYPE,y=Fatalities))+
geom_bar(stat="identity") +
ggtitle(expression(atop("Weather Events ~ Fatalities"))) +
xlab("Type") +
ylab("Fatalities") +
theme(plot.title=element_text(size=24,color="black")) +
theme(axis.title=element_text(size=12,color="black")) +
theme(axis.text=element_text(size=10,color="black"))
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
plots <- c(list(...), plotlist)
numPlots = length(plots)
if (is.null(layout)) {
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:numPlots) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
multiplot(ing,fat,cols=1)
prop <- storm %>%
mutate(PropDamage=ifelse(PROPDMGEXP=="K",
PROPDMG*1000,
ifelse(PROPDMGEXP=="M",
PROPDMG*1000000,
PROPDMG*1000000000)
))
prop <- prop %>%
mutate(CropDamage=ifelse(CROPDMGEXP=="K",
CROPDMG*1000,
ifelse(CROPDMGEXP=="M",
CROPDMG*1000000,
CROPDMG*1000000000)
))
damage_summary <- prop %>%
group_by(EVTYPE) %>%
summarise(Total_Property = sum(PropDamage),
Total_Crop = sum(CropDamage),
Total_Damage = sum(PropDamage)+sum(CropDamage)) %>%
arrange(desc(Total_Damage))
top5_damage <- damage_summary[1:5,]
pro <- ggplot(top5_damage,aes(x=EVTYPE,y=Total_Property))+
geom_bar(stat="identity") +
ggtitle(expression(atop("Weather Events ~ Property Damage"))) +
xlab("Type") +
ylab("Dollar Amount of Damage") +
theme(plot.title=element_text(size=24,color="black")) +
theme(axis.title=element_text(size=12,color="black")) +
theme(axis.text=element_text(size=10,color="black"))
crop <- ggplot(top5_damage,aes(x=EVTYPE,y=Total_Crop))+
geom_bar(stat="identity") +
ggtitle(expression(atop("Weather Events ~ Crop Damage"))) +
xlab("Type") +
ylab("Dollar Amount of Damage") +
theme(plot.title=element_text(size=24,color="black")) +
theme(axis.title=element_text(size=12,color="black")) +
theme(axis.text=element_text(size=10,color="black"))
multiplot(pro,crop,cols=1)