Introduction

  • This document has a brief visual analysis of the weather in US. You can find out which type of weather events harmed public health, caused deaths and injury but also about the economic damage on property and crops.

  • These are the libraries I used to produce my results and a function I found on the internet:

library(dplyr)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(wordcloud)
library(gridExtra)

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Read Data

storm <- read.csv("repdata%2Fdata%2FStormData.csv")

Data Processing

Justification for the tranformation: From courses Discussion forum about week 4 assigment I found this comment so that is where I got the info for the following tranformations and computations:

storm$PROPDMGEXP <- as.character(storm$PROPDMGEXP)
storm$PROPDMGEXP <- gsub("h|H", "2", storm$PROPDMGEXP)
storm$PROPDMGEXP <- gsub("k|K", "3", storm$PROPDMGEXP)
storm$PROPDMGEXP <- gsub("m|M", "6", storm$PROPDMGEXP)
storm$PROPDMGEXP <- gsub("B", "9", storm$PROPDMGEXP)
storm$PROPDMGEXP <- gsub("\\-|\\+|\\?", "0", storm$PROPDMGEXP)
storm$PROPDMGEXP <- as.numeric(storm$PROPDMGEXP)
storm$PROPDMGEXP[is.na(storm$PROPDMGEXP)] = 0

storm$CROPDMGEXP <- as.character(storm$CROPDMGEXP)
storm$CROPDMGEXP <- gsub("h|H", "2", storm$CROPDMGEXP)
storm$CROPDMGEXP <- gsub("k|K", "3", storm$CROPDMGEXP)
storm$CROPDMGEXP <- gsub("m|M", "6", storm$CROPDMGEXP)
storm$CROPDMGEXP <- gsub("B", "9", storm$CROPDMGEXP)
storm$CROPDMGEXP <- gsub("\\-|\\+|\\?", "0", storm$CROPDMGEXP)
storm$CROPDMGEXP <- as.numeric(storm$CROPDMGEXP)
storm$CROPDMGEXP[is.na(storm$CROPDMGEXP)] = 0

storm <- mutate(storm, DMG_PROP = PROPDMG * 10^PROPDMGEXP, DMG_CROP = CROPDMG * 10^CROPDMGEXP)

Results

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

  • Wordcloud with the 60 most frequent events. Size is proportional to frequency.
set.seed(9)
pal <- brewer.pal(9,"Set1")
wordcloud(storm$EVTYPE,
          min.freq = 10,
          scale = c(6,.5), 
          random.color = T, 
          max.words = 60,
          colors = pal)
Word cloud of 60 most frequent events. Size is proportional to frequency

Word cloud of 60 most frequent events. Size is proportional to frequency

  • Most harmdul on population health are tornados and excessive heat by far with TSTM wind, flood and lightning having some negative impact too.
t = storm %>% 
  group_by(EVTYPE) %>% 
  summarise(fat = sum(FATALITIES), 
            inj = sum(INJURIES))

Fat <- tail(t[order(t$fat),],10)
Inj <- tail(t[order(t$inj),],10)

p1 = ggplot(Fat,
       aes(x=factor(EVTYPE),
           y=fat,
           fill=factor(EVTYPE)))+
  geom_bar(stat="identity")+
  coord_flip()+guides(fill=FALSE)+
  theme_dark()+
  xlab("")+
  ggtitle("Fatalities")+
  theme(plot.title = element_text(hjust=0.5))+
  ylab("")


p2 = ggplot(Inj,
       aes(x=factor(EVTYPE),
           y=inj,
           fill=factor(EVTYPE)))+
  geom_bar(stat="identity")+
  guides(fill=FALSE)+
  theme_dark()+
  xlab("")+
  scale_x_discrete(position = "top")+
  coord_flip()+
  ggtitle('Injuries')+
  theme(plot.title = element_text(hjust=0.5))+
  ylab("")

multiplot(p1,p2,cols = 2)
Most harmdul on population health are tornados and excessive heat by far with TSTM wind, flood and lightning having some negative impact too

Most harmdul on population health are tornados and excessive heat by far with TSTM wind, flood and lightning having some negative impact too

Across the United States, which types of events have the greatest economic consequences?

-Most damaging to property are flood with hurricane, tornados and storms having a significant share. Most damaging to crop is drought with flood and ice causing significant economic loss

second <- storm %>% 
  group_by(EVTYPE) %>% 
  summarise(prop = sum(DMG_PROP), 
            crop = sum(DMG_CROP))

Prop <- tail(second[order(second$prop),],10)
Crop <- tail(second[order(second$crop),],10)

p1 = ggplot(Prop,
       aes(x=factor(EVTYPE),
           y=prop,
           fill=factor(EVTYPE)))+
  geom_bar(stat="identity")+
  coord_flip()+guides(fill=FALSE)+
  scale_y_continuous(labels = c(0,"5 x 10^10","10^11",1))+
  theme_dark()+
  xlab("")+
  ggtitle("Property Damages")+
  theme(plot.title = element_text(hjust=0.5))+
  ylab("")


p2 = ggplot(Crop,
       aes(x=factor(EVTYPE),
           y=crop,
           fill=factor(EVTYPE)))+
  geom_bar(stat="identity")+
  guides(fill=FALSE)+
  theme_dark()+
  xlab("")+
  scale_x_discrete(position = "top")+
  scale_y_continuous(labels = c(0,"5 x 10^10","10^11",1))+
  coord_flip()+
  ggtitle('Crop Damages')+
  theme(plot.title = element_text(hjust=0.5))+
  ylab("")
multiplot(p1,p2,cols = 2)
Most damaging to property are flood and most damaging to crop is drought

Most damaging to property are flood and most damaging to crop is drought