SYNOPSIS

In this study we need to clean practically all the variables. The EVTYPE, type of event from more than 900 levels to 28 levels. The population health is measured with the variable FATALITIES and INJURIES and the economic consecuences with the variables PROPDMGEXPFINAL (value of property damage),and CROPDMGEXPFINAL (value of crop damage). In overall, the result shows that the type of events Tornado,Thunderstorm Wind, Heat,Coastal Flood and Lightning are the events that more effects produce.

DATA PROCESSING

The below code clean the EVTYPE variable, generating a EVTYPE2 variable with only 28 types of events following the National Weather Service Storm Data Documentation.

I have done use of grep for that.

  library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
  mydata<-read.csv("repdata-data-StormData.csv")
  mydata_tbl<-tbl_df(mydata)
  mydata_tbl$EVTYPE2<-mydata_tbl$EVTYPE
  mydata_tbl$EVTYPE2<-tolower(as.character(mydata_tbl$EVTYPE2))
  mydata_tbl<-mydata_tbl[!grepl("^summary",mydata_tbl$EVTYPE2),]
  mydata_tbl<-mydata_tbl[!grepl("urban(.*)",mydata_tbl$EVTYPE2),]
  


  mydata_tbl$EVTYPE2[grep("astronomical low tide",mydata_tbl$EVTYPE2)]<-"Astronomical Low Tide "
  mydata_tbl$EVTYPE2[grep("avalance|avalanche",mydata_tbl$EVTYPE2)]<-"Avalanche"
  mydata_tbl$EVTYPE2[grep("(.*)blizzard(.*)",mydata_tbl$EVTYPE2)]<-"Blizzard"
  mydata_tbl$EVTYPE2[grep("(.*)flood(.*)",mydata_tbl$EVTYPE2)]<-"Coastal Flood"
  mydata_tbl$EVTYPE2[grep("(.*)cold(.*)",mydata_tbl$EVTYPE2)]<-"Cold/Wind Chill"
  mydata_tbl$EVTYPE2[grep("mud(.*)",mydata_tbl$EVTYPE2)]<-"Debris Flow"
 mydata_tbl$EVTYPE2[grep("(.*)fog(.*)",mydata_tbl$EVTYPE2)]<-"Dense Fog"
  mydata_tbl$EVTYPE2[grep("smoke",mydata_tbl$EVTYPE2)]<-"Dense Smoke"
  mydata_tbl$EVTYPE2[grep("(.*)dry",mydata_tbl$EVTYPE2)]<-"Drought"
  mydata_tbl$EVTYPE2[grep("(.*)drought",mydata_tbl$EVTYPE2)]<-"Drought"
  mydata_tbl$EVTYPE2[grep("(.*)dust dev",mydata_tbl$EVTYPE2)]<-"Dust Devil"
  mydata_tbl$EVTYPE2[grep("(.*)dust(.*)",mydata_tbl$EVTYPE2)]<-"Dust Storm"
  mydata_tbl$EVTYPE2[grep("(.*)excessive heat",mydata_tbl$EVTYPE2)]<-"Excessive Heat"
  mydata_tbl$EVTYPE2[grep("(extreme)? wind|[Ff]r|icy|hyp(.*)",mydata_tbl$EVTYPE2)]<-"Extreme Cold/Wind Chill "
  mydata_tbl$EVTYPE2[grep("flash flo(.*)",mydata_tbl$EVTYPE2)]<-"Flash Flood"
  mydata_tbl$EVTYPE2[grep("freezing fog",mydata_tbl$EVTYPE2)]<-"Freezing Fog "
  mydata_tbl$EVTYPE2[grep("frost(.)|freezing(.)",mydata_tbl$EVTYPE2)]<-"Frost/Freeze "
  mydata_tbl$EVTYPE2[grep("[Ii]ce(.*)",mydata_tbl$EVTYPE2)]<-"Frost/Freeze "
  mydata_tbl$EVTYPE2[grep("funnel(.*)",mydata_tbl$EVTYPE2)]<-"Funnel Cloud"
  mydata_tbl$EVTYPE2[grep("hail(.*)",mydata_tbl$EVTYPE2)]<-"Hail"
  mydata_tbl$EVTYPE2[grep("(.*)[Hh]eat|warm|cool|driest|hot(.*)",mydata_tbl$EVTYPE2)]<-"Heat"
  mydata_tbl$EVTYPE2[grep("(.*)rain|burst|shower|drowning(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Rain"
  mydata_tbl$EVTYPE2[grep("heavy snow(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Snow"
  mydata_tbl$EVTYPE2[grep("(.*)surf(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Surf"
  mydata_tbl$EVTYPE2[grep("(high|bitter) wind(.*)",mydata_tbl$EVTYPE2)]<-"Strong Wind"
  mydata_tbl$EVTYPE2[grep("hurricane(.*)",mydata_tbl$EVTYPE2)]<-"Hurricane"
  mydata_tbl$EVTYPE2[grep("ice(.)?storm(.*)",mydata_tbl$EVTYPE2)]<-"Ice Storm"
  mydata_tbl$EVTYPE2[grep("lake(.*)?effect(.*)",mydata_tbl$EVTYPE2)]<-"Lake-Effect Snow"
  mydata_tbl$EVTYPE2[grep("(.*)marine|rip|water|tide(.*)",mydata_tbl$EVTYPE2)]<-"Marine Thunderstorm Wind"
  mydata_tbl$EVTYPE2[grep("ro(u)?g|beach|erosion|surge|waves",mydata_tbl$EVTYPE2)]<-"Marine Strong Wind"
  mydata_tbl$EVTYPE2[grep("(.*)[Ll]ig(.*)",mydata_tbl$EVTYPE2)]<-"Lightning"
  mydata_tbl$EVTYPE2[grep("sleet(.*)",mydata_tbl$EVTYPE2)]<-"Sleet"
  mydata_tbl$EVTYPE2[grep("snow(.*)",mydata_tbl$EVTYPE2)]<-"Sleet"
  mydata_tbl$EVTYPE2[grep("t(h)?u(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
  mydata_tbl$EVTYPE2[grep("(.*)?tstm(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
  mydata_tbl$EVTYPE2[grep("(.*)[Ww]ind(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
  mydata_tbl$EVTYPE2[grep("(.*)[Ss]torm(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
  mydata_tbl$EVTYPE2[grep("torn(.*)",mydata_tbl$EVTYPE2)]<-"Tornado"
  mydata_tbl$EVTYPE2[grep("tropical depression",mydata_tbl$EVTYPE2)]<-"Tropical Depresion"
  mydata_tbl$EVTYPE2[grep("tropical storm(.*)",mydata_tbl$EVTYPE2)]<-"Tropical Storm"
  mydata_tbl$EVTYPE2[grep("tsunami|typhoon",mydata_tbl$EVTYPE2)]<-"Tsunami"
  mydata_tbl$EVTYPE2[grep("(.*)volcanic(.*)",mydata_tbl$EVTYPE2)]<-"Volcanic Ash"
  mydata_tbl$EVTYPE2[grep("[Vv]olcanic [Aa]sh(.*)",mydata_tbl$EVTYPE2)]<-"Volcanic Ash"
  mydata_tbl$EVTYPE2[grep("(.*)?wa(.)?ter(.)?spout(.*)",mydata_tbl$EVTYPE2)]<-"Waterspout"
  mydata_tbl$EVTYPE2[grep("(.*)fire(.*)",mydata_tbl$EVTYPE2)]<-"Wildfire"
   mydata_tbl$EVTYPE2[grep("winter storm(.*)",mydata_tbl$EVTYPE2)]<-"Winter Storm"
  mydata_tbl$EVTYPE2[grep("(.*)[Ww]int|precip|wet|stream|cloud|temp|severe|mild(.*)",mydata_tbl$EVTYPE2)]<-"Winter Weather"

  mydata_tbl<-mydata_tbl[!grepl("(.*)apache|county|dam|excessive|gustnado|and|heavy|high|swells|landspout|none|other|red|remnants|rock|seiche|southeast|vog|\\?|wnd|glaze|record(.*)",mydata_tbl$EVTYPE2),]
  
  mydata_tbl$EVTYPE2<-factor(mydata_tbl$EVTYPE2)

Recoding PROPDMGEXP into PROPDMGEXPFINAL. This last is a scale of the property damage: Billions, Ten Millions, Million, 100000,10000,1000,100.

  mydata_tbl$PROPDMGEXP<-as.character(mydata_tbl$PROPDMGEXP)
  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="1"] <-"10"
  
  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="2"
                            |mydata_tbl$PROPDMGEXP=="h"|
                            mydata_tbl$PROPDMGEXP=="H"]<-"100"

  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="3"
                            |mydata_tbl$PROPDMGEXP=="k"|
                            mydata_tbl$PROPDMGEXP=="K"]<-"1,000"

  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="4"]<-"10,000"
  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="5"]<-"100,000"
  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="6"
                            |mydata_tbl$PROPDMGEXP=="m"|
                            mydata_tbl$PROPDMGEXP=="M"]<-"Million"
  mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="7"]<-"Ten Millions"
  
   mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="b"|
                                mydata_tbl$PROPDMGEXP=="B"]<-"Billions"
 
  mydata_tbl$PROPDMGEXPFINAL<-factor(mydata_tbl$PROPDMGEXPFINAL)

Recoding CROPDMGEXP into CROPDMGEXPFINAL. This last is a scale of the crop damage: Billions, Million,1000,100.

  mydata_tbl$CROPDMGEXP<-as.character(mydata_tbl$CROPDMGEXP)
  mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="2"]<-"100"
  mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="k"|
                            mydata_tbl$CROPDMGEXP=="K"]<-"1,000"
  mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="m"|
                            mydata_tbl$CROPDMGEXP=="M"]<-"Million"
  mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="B"]<-"Billions"
  
  mydata_tbl$CROPDMGEXPFINAL<-factor(mydata_tbl$CROPDMGEXPFINAL)

Finally, we take the year from the variable BGN_TIME.

  library(lubridate)
  mydata_tbl$BGN_DATE<-as.character(mydata_tbl$BGN_DATE)
  mydata_tbl$DATE<-mdy_hms(mydata_tbl$BGN_DATE)
  mydata_tbl$YEAR<-year(mydata_tbl$DATE)

ANALYSIS

In a first approximation we see the type of events more frequent across U.S. and across all dates:

  library(dplyr)
  by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)
  EVTYPE2_sum<-summarize(by_EVTYPE2,totals=n())
  as.data.frame(EVTYPE2_sum[order(EVTYPE2_sum$totals,decreasing=TRUE),])
##                   EVTYPE2 totals
## 1       Thunderstorm Wind 388421
## 2                    Hail 289280
## 3           Coastal Flood  82312
## 4                 Tornado  60685
## 5               Lightning  15948
## 6              Heavy Snow  15768
## 7              Heavy Rain  11896
## 8          Winter Weather   8398
## 9            Funnel Cloud   6982
## 10               Wildfire   4239
## 11                   Heat   2980
## 12               Blizzard   2744
## 13          Frost/Freeze    2179
## 14              Dense Fog   1882
## 15             Heavy Surf   1063
## 16                  Sleet    986
## 17       Lake-Effect Snow    659
## 18              Avalanche    388
## 19              Hurricane    287
## 20 Astronomical Low Tide     174
## 21             Dust Devil    151
## 22     Tropical Depresion     60
## 23            Debris Flow     36
## 24                Tsunami     31
## 25           Volcanic Ash     29
## 26            Dense Smoke     21
## 27            Flash Flood      1
## 28             Waterspout      1

Plots of type of events and population health (fatalities and injuries).

  by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)%>% summarise_each(funs(sum),FATALITIES,INJURIES )
  by_EVTYPE2<-data.frame(by_EVTYPE2)
  by_EVTYPE2
##                   EVTYPE2 FATALITIES INJURIES
## 1  Astronomical Low Tide           0        0
## 2               Avalanche        225      171
## 3                Blizzard        101      805
## 4           Coastal Flood       1524     8604
## 5             Debris Flow          5        2
## 6               Dense Fog         80     1076
## 7             Dense Smoke          0        0
## 8              Dust Devil          2       43
## 9             Flash Flood          0        0
## 10          Frost/Freeze         102     2164
## 11           Funnel Cloud          0        3
## 12                   Hail         15     1371
## 13                   Heat       3143     9228
## 14             Heavy Rain        106      284
## 15             Heavy Snow        127     1021
## 16             Heavy Surf        163      246
## 17              Hurricane        133     1328
## 18       Lake-Effect Snow          0        0
## 19              Lightning        818     5234
## 20                  Sleet         13       83
## 21      Thunderstorm Wind       2684    14757
## 22                Tornado       5633    91364
## 23     Tropical Depresion          0        0
## 24                Tsunami         33      134
## 25           Volcanic Ash          0        0
## 26             Waterspout          0        0
## 27               Wildfire         90     1608
## 28         Winter Weather         64      641
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

Plots of type events and economic consequences (property damage and crop damage)

  by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?

RESULTS

The types of event more frequent are by order: Thunderstorm Wind, the second Hail, the third Coatal Flood and the fourth Tornado.

As we can see in the plot of fatalities, the 5 type of events that cause more fatalities are, by order: Tornado with around 5633 fatalities, Heat with 3143, Thunderstorm Wind with near 2684, Coastal Flood with around 1524 and Lightning with near 818.

The plot of injuries shows: the main type of event is Tornado with near 91364, second and far is the Thunderstorm Wind with 14757 ,the third is Heat with 9228, four Coastal Flood with 8604, and fifth Lightning with 5234.

In overall, we see how the above are the type of events that more damage to the population health, mostly the Tornado

Regarding the economic consequences, in the plot of damage in the property, the types of event that produce more property damages (above billions) are: Coastal Flood,Hail,Heavy Rain,Hurricane,Thunderstorm Wind,Tornado and Wildfire.

In the plot of crop damage, we see how the types of event that produce more crop damage (above billions) are: Coastal Flood,Frost/Freeze,Heat,Hurricane and Thunderstorm Wind.

In overall, the economic consequences come from the same types of event that the damage on the population health plus Hail and Wildfire.