Synopsis

Severe weather has a large impact on the health of densely populated regions and may impact property in both rural and urban environments. The below analysis uses data dating back to 1950 and summarizes it to determine the weather patterns that inflict the most harm to human health and rural and urban property.

Data Processing

First we examine the distribution of weather over the entire timeframe the data was collected. From the histogram we can infer that data collection was lite until about 1995. We will use a subset of the data from 1995 to present. Additionally the levels of the EVTYPE factor are extraordinarily messy. We will attempt to clean up the levels by revaluing common levels. This will allow us to present more accurate results in our analysis. The plyr package is used here and then detached because of the conflicts with dplyr and the pipes that are used throughout the analysis. Using the dplyr package we will group the levels of EVTYPE, summarize the results, and arrange the new data frame by Health Concerns (Fatalities + injuries). For the property analysis we will need to first create a new column in the data frame that multiplies the numeric value of property and crop damage by the factor in the explanation column. This is easily done with dplyr’s mutate function. We then follow the same procedure with by grouping the levels of property and crop damage and summarizing them in a new data frame.

library(dplyr)
library(ggplot2)
storm <- read.csv("C:/Users/Jeremiah Lowhorn/Desktop/repdata_data_StormData.csv")
storm$BGN_DATE <- as.Date(storm$BGN_DATE,format="%m/%d/%Y")
ggplot(storm,aes(x=BGN_DATE))+
  geom_histogram()

storm <- storm[storm$BGN_DATE >= "1995-1-1",]
storm$Year <- as.factor(format(storm$BGN_DATE,format="%Y"))

library(plyr)
storm$EVTYPE <- revalue(storm$EVTYPE,c("EXCESSIVE HEAT"="HEAT",
                        "THUNDERSTORM WIND"="TSTM WIND",
                        "MARINE TSTM WIND"="TSTM WIND",
                        "HEAT WAVE"="HEAT",
                        "EXTREME HEAT"="HEAT",
                        "Heat Wave"="HEAT",
                        "RECORD HEAT"="HEAT",
                        "UNSEASONABLY WARM"="HEAT",
                        "RECORD/EXCESSIVE HEAT"="HEAT",
                        "THUNDERSTORM  WINDS"="TSTM WIND",
                        "THUNDERSTORM WINDSS"="TSTM WIND",
                        "TSTM WIND (G45)"="TSTM WIND",
                        "TORNADO F3"="TORNADO",
                        "TORNADO F2"="TORNADO",
                        "LIGHTNING INJURY"="LIGHTNING",
                        "LIGHTNING."="LIGHTNING",
                        "THUNDERSTORM WIND (G40)"="TSTM WIND",
                        "THUNDERSTORM WIND G52"="TSTM WIND",
                        "THUNDERSTORM WINDS 13"="TSTM WIND",
                        "THUNDERSTORM WINDS/HAIL"="TSTM WIND",
                        "THUNDERSTORMS WINDS"="TSTM WIND",
                        "THUNDERTORM WINDS"="TSTM WIND",
                        "THUNDERSTORM WINDS"="TSTM WIND"
                        ))
detach("package:plyr", unload=TRUE)
## Warning: 'plyr' namespace cannot be unloaded:
##   namespace 'plyr' is imported by 'scales', 'reshape2', 'ggplot2' so cannot be unloaded
health <- storm %>%
  group_by(EVTYPE) %>%
  summarise(Fatalities = sum(FATALITIES),
            Injuries = sum(INJURIES),
            Health_Concerns = sum(FATALITIES)+sum(INJURIES)) %>%
  arrange(desc(Health_Concerns))%>%
  ungroup()


top5<-health[1:5,]


prop <- storm %>%
  mutate(PropDamage=ifelse(PROPDMGEXP=="K",
                             PROPDMG*1000,
                             ifelse(PROPDMGEXP=="M",
                                    PROPDMG*1000000,
                                    PROPDMG*1000000000)
  ))

prop <- prop %>%
  mutate(CropDamage=ifelse(CROPDMGEXP=="K",
                           CROPDMG*1000,
                       ifelse(CROPDMGEXP=="M",
                              CROPDMG*1000000,
                              CROPDMG*1000000000)
  ))

damage_summary <- prop %>%
  group_by(EVTYPE) %>%
  summarise(Total_Property = sum(PropDamage),
            Total_Crop = sum(CropDamage),
            Total_Damage = sum(PropDamage)+sum(CropDamage)) %>%
  arrange(desc(Total_Damage))

top5_damage <- damage_summary[1:5,]

Results

We can infer from our summary tables that tornados have the most overall health concerns. However, when examining the types of weather individually we learn that heat causes the most fatalities and tornados cause the most injuries. The results for property damage show that thunderstorm wind causes the most overall damage and property damage while hail is the leading cause of crop damage. These results however include outliers in the population which were not analyzed in this document due to the requirements of the class.

top5
## Source: local data frame [5 x 4]
## 
##      EVTYPE Fatalities Injuries Health_Concerns
## 1   TORNADO       1545    21783           23328
## 2      HEAT       3092     9105           12197
## 3     FLOOD        423     6769            7192
## 4 TSTM WIND        422     5525            5947
## 5 LIGHTNING        730     4632            5362
top5_damage
## Source: local data frame [5 x 4]
## 
##        EVTYPE Total_Property   Total_Crop Total_Damage
## 1   TSTM WIND   6.284302e+12   1028505250 6.285330e+12
## 2        HAIL   3.376408e+11 372613777050 7.102546e+11
## 3     TORNADO   2.226175e+11 160296595610 3.829141e+11
## 4 FLASH FLOOD   3.649659e+11   1343915000 3.663098e+11
## 5       FLOOD   1.510220e+11   5422810400 1.564448e+11
ing <- ggplot(top5,aes(x=EVTYPE,y=Injuries))+
  geom_bar(stat="identity") +
  ggtitle(expression(atop("Weather Events ~ Injuries"))) +
  xlab("Type") +
  ylab("Injuries") +
  theme(plot.title=element_text(size=24,color="black")) +
  theme(axis.title=element_text(size=12,color="black")) +
  theme(axis.text=element_text(size=10,color="black"))

fat <- ggplot(top5,aes(x=EVTYPE,y=Fatalities))+
  geom_bar(stat="identity") +
  ggtitle(expression(atop("Weather Events ~ Fatalities"))) +
  xlab("Type") +
  ylab("Fatalities") +
  theme(plot.title=element_text(size=24,color="black")) +
  theme(axis.title=element_text(size=12,color="black")) +
  theme(axis.text=element_text(size=10,color="black"))

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


multiplot(ing,fat,cols=1)

prop <- storm %>%
  mutate(PropDamage=ifelse(PROPDMGEXP=="K",
                             PROPDMG*1000,
                             ifelse(PROPDMGEXP=="M",
                                    PROPDMG*1000000,
                                    PROPDMG*1000000000)
  ))

prop <- prop %>%
  mutate(CropDamage=ifelse(CROPDMGEXP=="K",
                           CROPDMG*1000,
                       ifelse(CROPDMGEXP=="M",
                              CROPDMG*1000000,
                              CROPDMG*1000000000)
  ))

damage_summary <- prop %>%
  group_by(EVTYPE) %>%
  summarise(Total_Property = sum(PropDamage),
            Total_Crop = sum(CropDamage),
            Total_Damage = sum(PropDamage)+sum(CropDamage)) %>%
  arrange(desc(Total_Damage))

top5_damage <- damage_summary[1:5,]
pro <- ggplot(top5_damage,aes(x=EVTYPE,y=Total_Property))+
  geom_bar(stat="identity") +
  ggtitle(expression(atop("Weather Events ~ Property Damage"))) +
  xlab("Type") +
  ylab("Dollar Amount of Damage") +
  theme(plot.title=element_text(size=24,color="black")) +
  theme(axis.title=element_text(size=12,color="black")) +
  theme(axis.text=element_text(size=10,color="black"))

crop <- ggplot(top5_damage,aes(x=EVTYPE,y=Total_Crop))+
  geom_bar(stat="identity") +
  ggtitle(expression(atop("Weather Events ~ Crop Damage"))) +
  xlab("Type") +
  ylab("Dollar Amount of Damage") +
  theme(plot.title=element_text(size=24,color="black")) +
  theme(axis.title=element_text(size=12,color="black")) +
  theme(axis.text=element_text(size=10,color="black"))

multiplot(pro,crop,cols=1)