Synopsys

The present analysis uses the National Oceanic and Atmospheric Administration’s (NOAA) storm database. The report has two objectives, the first is to determine the top 10 events that are most harmful in population health. These are measured by the sum of the fatalities and the injuries caused by the weather events. The second in to find the top 10 events that have the greatest economical consequences, in terms of damage cost in properties and crops.

library(dplyr)
library(ggplot2)

Reading raw data and selecting required variables

rawstormo<-read.csv("repdata%2Fdata%2FStormData.csv.bz2", sep=',', header=TRUE, stringsAsFactors=FALSE)
rawstorm<-tbl_df(rawstormo)     
#rawstorm<-sample_n(rawstormdt, 100000)
# selecting the used variables in the analysis
selected.cols<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
data<-rawstorm[,selected.cols]
data <- subset(x=data, subset=(EVTYPE != "?" & (INJURIES> 0|FATALITIES>0 | PROPDMG> 0 | CROPDMG>0)))
# add new variables for calculate the total damage (fatalities+injuries) and the economical cost in properties and crop
HARMFUL<-data$FATALITIES+data$INJURIES
PROPDMGUSD<-data$PROPDMG
CROPDMGUSD<-data$CROPDMG
stormdata<-cbind(data,PROPDMGUSD,CROPDMGUSD,HARMFUL)
# variables to use
names(stormdata)
##  [1] "EVTYPE"     "FATALITIES" "INJURIES"   "PROPDMG"    "PROPDMGEXP"
##  [6] "CROPDMG"    "CROPDMGEXP" "PROPDMGUSD" "CROPDMGUSD" "HARMFUL"

Preparing Data

This section calculates the economical cost in properties and crop. The exponent to multiply the base data is transformed following the specification indicated in the data documentation published in https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf

# set the proper exponent for the each possible value indicated in PROPDMGEXP"
propExp <-  c("\"\"" = 10^0,
              "-" = 10^0, 
              "+" = 10^0,
              "0" = 10^0,
              "1" = 10^1,
              "2" = 10^2,
              "3" = 10^3,
              "4" = 10^4,
              "5" = 10^5,
              "6" = 10^6,
              "7" = 10^7,
              "8" = 10^8,
              "9" = 10^9,
              "H" = 10^2,
              "K" = 10^3,
              "M" = 10^6,
              "B" = 10^9)
# set the proper exponent for the each possible value indicated in CROPDMGEXP"
cropExp <-  c("\"\"" = 10^0,
              "?" = 10^0, 
              "0" = 10^0,
              "K" = 10^3,
              "M" = 10^6,
              "B" = 10^9)

# Calculate the total economical cost in property and crop by multiplying the base data by the exponent"
for (i in 1:nrow(stormdata)) {
  rowact <- stormdata[i,]
  if(is.na(rowact$PROPDMGEXP)) {
    stormdata[i,]$PROPDMGEXP<-10^0 
  }
  else {
    stormdata[i,]$PROPDMGEXP<-propExp[as.character(toupper(rowact$PROPDMGEXP))]
  }
  if(rowact$CROPDMGEXP=="" || is.na(rowact$CROPDMGEXP))  {
    stormdata[i,]$CROPDMGEXP<-10^0
  }
  else {
    stormdata[i,]$CROPDMGEXP<-cropExp[as.character(toupper(rowact$CROPDMGEXP))]
  }
  stormdata[i,]$PROPDMGUSD <-as.numeric(stormdata[i,]$PROPDMG)*as.numeric(stormdata[i,]$PROPDMGEXP)
  stormdata[i,]$CROPDMGUSD <-as.numeric(stormdata[i,]$CROPDMG)*as.numeric(stormdata[i,]$CROPDMGEXP)
}

Data processing

This section selects the top events that are most harmful in population health and the top events that have the greatest economical consequences, because their cost in properties and crop damages.

stdt<-tbl_df(stormdata)
# Sumarise and ordering the total harmful(fatalities+injuries) gruped by event and select the top 10"
stdtsocial<-stdt %>% select(EVTYPE, FATALITIES, INJURIES, HARMFUL) %>% group_by(EVTYPE) %>% summarise(totalharmful=sum(HARMFUL), totalfatalities=sum(FATALITIES),totalinjuries=sum(INJURIES)) %>% arrange(desc(totalharmful))
stdtsocialtop <- top_n(stdtsocial,10,totalharmful)

# Sumarise and ordering the total cost (properties and crop) gruped by event and select the top 10"
stdteconomical<-stdt %>% select(EVTYPE, PROPDMGUSD, CROPDMGUSD) %>% group_by(EVTYPE) %>% summarise(totalcost=sum(PROPDMGUSD)+sum(CROPDMGUSD), totalprop=sum(PROPDMGUSD), totalcrop=sum(CROPDMGUSD)) %>% arrange(desc(totalcost))
stdteconomicaltop <- top_n(stdteconomical,10, totalcost)

Results

Across the United States, the most harmful events for population health are TORNADO and EXCESSIVE HEAT.

stdtsocialtop
## # A tibble: 10 × 4
##               EVTYPE totalharmful totalfatalities totalinjuries
##                <chr>        <dbl>           <dbl>         <dbl>
## 1            TORNADO        96979            5633         91346
## 2     EXCESSIVE HEAT         8428            1903          6525
## 3          TSTM WIND         7461             504          6957
## 4              FLOOD         7259             470          6789
## 5          LIGHTNING         6046             816          5230
## 6               HEAT         3037             937          2100
## 7        FLASH FLOOD         2755             978          1777
## 8          ICE STORM         2064              89          1975
## 9  THUNDERSTORM WIND         1621             133          1488
## 10      WINTER STORM         1527             206          1321
ggplot(data=stdtsocialtop, aes(x=reorder(EVTYPE, -totalharmful),y=totalharmful))+geom_bar(stat="identity", fill="steelblue")+ggtitle("Top weather events harmful with respect to population health")+labs(x="Event", y="Total Harmful (fatalities+injuries)")+theme(axis.text.x = element_text(angle=45, hjust=1))

Across the United States, the events with greatest economical consequences are STORM SURGE/TIDE and HURRICANE OPAL

stdteconomicaltop
## # A tibble: 10 × 4
##                        EVTYPE  totalcost  totalprop totalcrop
##                         <chr>      <dbl>      <dbl>     <dbl>
## 1            STORM SURGE/TIDE 4642038000 4641188000    850000
## 2              HURRICANE OPAL 3191846000 3172846000  19000000
## 3   HEAVY RAIN/SEVERE WEATHER 2500000000 2500000000         0
## 4  TORNADOES, TSTM WIND, HAIL 1602500000 1600000000   2500000
## 5                  WILD FIRES  624100000  624100000         0
## 6                     TYPHOON  601055000  600230000    825000
## 7                   HAILSTORM  241000000  241000000         0
## 8                     TSUNAMI  144082000  144062000     20000
## 9              River Flooding  134175000  106155000  28020000
## 10           COASTAL FLOODING  126696500  126640500     56000
ggplot(data=stdteconomicaltop, aes(x=reorder(EVTYPE, -totalcost),y=totalcost))+geom_bar(stat="identity", fill="steelblue")+ggtitle("Top weather events with greatest economic consequences")+labs(x="Events", y="Total cost $ (properties+crop)")+theme(axis.text.x = element_text(angle=45, hjust=1))