SYNOPSIS
In this study we need to clean practically all the variables. The EVTYPE, type of event from more than 900 levels to 28 levels. The population health is measured with the variable FATALITIES and INJURIES and the economic consecuences with the variables PROPDMGEXPFINAL (value of property damage),and CROPDMGEXPFINAL (value of crop damage). In overall, the result shows that the type of events Tornado,Thunderstorm Wind, Heat,Coastal Flood and Lightning are the events that more effects produce.
DATA PROCESSING
The below code clean the EVTYPE variable, generating a EVTYPE2 variable with only 28 types of events following the National Weather Service Storm Data Documentation.
I have done use of grep for that.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata<-read.csv("repdata-data-StormData.csv")
mydata_tbl<-tbl_df(mydata)
mydata_tbl$EVTYPE2<-mydata_tbl$EVTYPE
mydata_tbl$EVTYPE2<-tolower(as.character(mydata_tbl$EVTYPE2))
mydata_tbl<-mydata_tbl[!grepl("^summary",mydata_tbl$EVTYPE2),]
mydata_tbl<-mydata_tbl[!grepl("urban(.*)",mydata_tbl$EVTYPE2),]
mydata_tbl$EVTYPE2[grep("astronomical low tide",mydata_tbl$EVTYPE2)]<-"Astronomical Low Tide "
mydata_tbl$EVTYPE2[grep("avalance|avalanche",mydata_tbl$EVTYPE2)]<-"Avalanche"
mydata_tbl$EVTYPE2[grep("(.*)blizzard(.*)",mydata_tbl$EVTYPE2)]<-"Blizzard"
mydata_tbl$EVTYPE2[grep("(.*)flood(.*)",mydata_tbl$EVTYPE2)]<-"Coastal Flood"
mydata_tbl$EVTYPE2[grep("(.*)cold(.*)",mydata_tbl$EVTYPE2)]<-"Cold/Wind Chill"
mydata_tbl$EVTYPE2[grep("mud(.*)",mydata_tbl$EVTYPE2)]<-"Debris Flow"
mydata_tbl$EVTYPE2[grep("(.*)fog(.*)",mydata_tbl$EVTYPE2)]<-"Dense Fog"
mydata_tbl$EVTYPE2[grep("smoke",mydata_tbl$EVTYPE2)]<-"Dense Smoke"
mydata_tbl$EVTYPE2[grep("(.*)dry",mydata_tbl$EVTYPE2)]<-"Drought"
mydata_tbl$EVTYPE2[grep("(.*)drought",mydata_tbl$EVTYPE2)]<-"Drought"
mydata_tbl$EVTYPE2[grep("(.*)dust dev",mydata_tbl$EVTYPE2)]<-"Dust Devil"
mydata_tbl$EVTYPE2[grep("(.*)dust(.*)",mydata_tbl$EVTYPE2)]<-"Dust Storm"
mydata_tbl$EVTYPE2[grep("(.*)excessive heat",mydata_tbl$EVTYPE2)]<-"Excessive Heat"
mydata_tbl$EVTYPE2[grep("(extreme)? wind|[Ff]r|icy|hyp(.*)",mydata_tbl$EVTYPE2)]<-"Extreme Cold/Wind Chill "
mydata_tbl$EVTYPE2[grep("flash flo(.*)",mydata_tbl$EVTYPE2)]<-"Flash Flood"
mydata_tbl$EVTYPE2[grep("freezing fog",mydata_tbl$EVTYPE2)]<-"Freezing Fog "
mydata_tbl$EVTYPE2[grep("frost(.)|freezing(.)",mydata_tbl$EVTYPE2)]<-"Frost/Freeze "
mydata_tbl$EVTYPE2[grep("[Ii]ce(.*)",mydata_tbl$EVTYPE2)]<-"Frost/Freeze "
mydata_tbl$EVTYPE2[grep("funnel(.*)",mydata_tbl$EVTYPE2)]<-"Funnel Cloud"
mydata_tbl$EVTYPE2[grep("hail(.*)",mydata_tbl$EVTYPE2)]<-"Hail"
mydata_tbl$EVTYPE2[grep("(.*)[Hh]eat|warm|cool|driest|hot(.*)",mydata_tbl$EVTYPE2)]<-"Heat"
mydata_tbl$EVTYPE2[grep("(.*)rain|burst|shower|drowning(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Rain"
mydata_tbl$EVTYPE2[grep("heavy snow(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Snow"
mydata_tbl$EVTYPE2[grep("(.*)surf(.*)",mydata_tbl$EVTYPE2)]<-"Heavy Surf"
mydata_tbl$EVTYPE2[grep("(high|bitter) wind(.*)",mydata_tbl$EVTYPE2)]<-"Strong Wind"
mydata_tbl$EVTYPE2[grep("hurricane(.*)",mydata_tbl$EVTYPE2)]<-"Hurricane"
mydata_tbl$EVTYPE2[grep("ice(.)?storm(.*)",mydata_tbl$EVTYPE2)]<-"Ice Storm"
mydata_tbl$EVTYPE2[grep("lake(.*)?effect(.*)",mydata_tbl$EVTYPE2)]<-"Lake-Effect Snow"
mydata_tbl$EVTYPE2[grep("(.*)marine|rip|water|tide(.*)",mydata_tbl$EVTYPE2)]<-"Marine Thunderstorm Wind"
mydata_tbl$EVTYPE2[grep("ro(u)?g|beach|erosion|surge|waves",mydata_tbl$EVTYPE2)]<-"Marine Strong Wind"
mydata_tbl$EVTYPE2[grep("(.*)[Ll]ig(.*)",mydata_tbl$EVTYPE2)]<-"Lightning"
mydata_tbl$EVTYPE2[grep("sleet(.*)",mydata_tbl$EVTYPE2)]<-"Sleet"
mydata_tbl$EVTYPE2[grep("snow(.*)",mydata_tbl$EVTYPE2)]<-"Sleet"
mydata_tbl$EVTYPE2[grep("t(h)?u(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
mydata_tbl$EVTYPE2[grep("(.*)?tstm(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
mydata_tbl$EVTYPE2[grep("(.*)[Ww]ind(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
mydata_tbl$EVTYPE2[grep("(.*)[Ss]torm(.*)",mydata_tbl$EVTYPE2)]<-"Thunderstorm Wind"
mydata_tbl$EVTYPE2[grep("torn(.*)",mydata_tbl$EVTYPE2)]<-"Tornado"
mydata_tbl$EVTYPE2[grep("tropical depression",mydata_tbl$EVTYPE2)]<-"Tropical Depresion"
mydata_tbl$EVTYPE2[grep("tropical storm(.*)",mydata_tbl$EVTYPE2)]<-"Tropical Storm"
mydata_tbl$EVTYPE2[grep("tsunami|typhoon",mydata_tbl$EVTYPE2)]<-"Tsunami"
mydata_tbl$EVTYPE2[grep("(.*)volcanic(.*)",mydata_tbl$EVTYPE2)]<-"Volcanic Ash"
mydata_tbl$EVTYPE2[grep("[Vv]olcanic [Aa]sh(.*)",mydata_tbl$EVTYPE2)]<-"Volcanic Ash"
mydata_tbl$EVTYPE2[grep("(.*)?wa(.)?ter(.)?spout(.*)",mydata_tbl$EVTYPE2)]<-"Waterspout"
mydata_tbl$EVTYPE2[grep("(.*)fire(.*)",mydata_tbl$EVTYPE2)]<-"Wildfire"
mydata_tbl$EVTYPE2[grep("winter storm(.*)",mydata_tbl$EVTYPE2)]<-"Winter Storm"
mydata_tbl$EVTYPE2[grep("(.*)[Ww]int|precip|wet|stream|cloud|temp|severe|mild(.*)",mydata_tbl$EVTYPE2)]<-"Winter Weather"
mydata_tbl<-mydata_tbl[!grepl("(.*)apache|county|dam|excessive|gustnado|and|heavy|high|swells|landspout|none|other|red|remnants|rock|seiche|southeast|vog|\\?|wnd|glaze|record(.*)",mydata_tbl$EVTYPE2),]
mydata_tbl$EVTYPE2<-factor(mydata_tbl$EVTYPE2)
Recoding PROPDMGEXP into PROPDMGEXPFINAL. This last is a scale of the property damage: Billions, Ten Millions, Million, 100000,10000,1000,100.
mydata_tbl$PROPDMGEXP<-as.character(mydata_tbl$PROPDMGEXP)
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="1"] <-"10"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="2"
|mydata_tbl$PROPDMGEXP=="h"|
mydata_tbl$PROPDMGEXP=="H"]<-"100"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="3"
|mydata_tbl$PROPDMGEXP=="k"|
mydata_tbl$PROPDMGEXP=="K"]<-"1,000"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="4"]<-"10,000"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="5"]<-"100,000"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="6"
|mydata_tbl$PROPDMGEXP=="m"|
mydata_tbl$PROPDMGEXP=="M"]<-"Million"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="7"]<-"Ten Millions"
mydata_tbl$PROPDMGEXPFINAL[mydata_tbl$PROPDMGEXP=="b"|
mydata_tbl$PROPDMGEXP=="B"]<-"Billions"
mydata_tbl$PROPDMGEXPFINAL<-factor(mydata_tbl$PROPDMGEXPFINAL)
Recoding CROPDMGEXP into CROPDMGEXPFINAL. This last is a scale of the crop damage: Billions, Million,1000,100.
mydata_tbl$CROPDMGEXP<-as.character(mydata_tbl$CROPDMGEXP)
mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="2"]<-"100"
mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="k"|
mydata_tbl$CROPDMGEXP=="K"]<-"1,000"
mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="m"|
mydata_tbl$CROPDMGEXP=="M"]<-"Million"
mydata_tbl$CROPDMGEXPFINAL[mydata_tbl$CROPDMGEXP=="B"]<-"Billions"
mydata_tbl$CROPDMGEXPFINAL<-factor(mydata_tbl$CROPDMGEXPFINAL)
Finally, we take the year from the variable BGN_TIME.
library(lubridate)
mydata_tbl$BGN_DATE<-as.character(mydata_tbl$BGN_DATE)
mydata_tbl$DATE<-mdy_hms(mydata_tbl$BGN_DATE)
mydata_tbl$YEAR<-year(mydata_tbl$DATE)
ANALYSIS
In a first approximation we see the type of events more frequent across U.S. and across all dates:
library(dplyr)
by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)
EVTYPE2_sum<-summarize(by_EVTYPE2,totals=n())
as.data.frame(EVTYPE2_sum[order(EVTYPE2_sum$totals,decreasing=TRUE),])
## EVTYPE2 totals
## 1 Thunderstorm Wind 388421
## 2 Hail 289280
## 3 Coastal Flood 82312
## 4 Tornado 60685
## 5 Lightning 15948
## 6 Heavy Snow 15768
## 7 Heavy Rain 11896
## 8 Winter Weather 8398
## 9 Funnel Cloud 6982
## 10 Wildfire 4239
## 11 Heat 2980
## 12 Blizzard 2744
## 13 Frost/Freeze 2179
## 14 Dense Fog 1882
## 15 Heavy Surf 1063
## 16 Sleet 986
## 17 Lake-Effect Snow 659
## 18 Avalanche 388
## 19 Hurricane 287
## 20 Astronomical Low Tide 174
## 21 Dust Devil 151
## 22 Tropical Depresion 60
## 23 Debris Flow 36
## 24 Tsunami 31
## 25 Volcanic Ash 29
## 26 Dense Smoke 21
## 27 Flash Flood 1
## 28 Waterspout 1
Plots of type of events and population health (fatalities and injuries).
by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)%>% summarise_each(funs(sum),FATALITIES,INJURIES )
by_EVTYPE2<-data.frame(by_EVTYPE2)
by_EVTYPE2
## EVTYPE2 FATALITIES INJURIES
## 1 Astronomical Low Tide 0 0
## 2 Avalanche 225 171
## 3 Blizzard 101 805
## 4 Coastal Flood 1524 8604
## 5 Debris Flow 5 2
## 6 Dense Fog 80 1076
## 7 Dense Smoke 0 0
## 8 Dust Devil 2 43
## 9 Flash Flood 0 0
## 10 Frost/Freeze 102 2164
## 11 Funnel Cloud 0 3
## 12 Hail 15 1371
## 13 Heat 3143 9228
## 14 Heavy Rain 106 284
## 15 Heavy Snow 127 1021
## 16 Heavy Surf 163 246
## 17 Hurricane 133 1328
## 18 Lake-Effect Snow 0 0
## 19 Lightning 818 5234
## 20 Sleet 13 83
## 21 Thunderstorm Wind 2684 14757
## 22 Tornado 5633 91364
## 23 Tropical Depresion 0 0
## 24 Tsunami 33 134
## 25 Volcanic Ash 0 0
## 26 Waterspout 0 0
## 27 Wildfire 90 1608
## 28 Winter Weather 64 641
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?
Plots of type events and economic consequences (property damage and crop damage)
by_EVTYPE2<-group_by(mydata_tbl,EVTYPE2)
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?
## geom_smooth: Only one unique x value each group.Maybe you want aes(group = 1)?
RESULTS
The types of event more frequent are by order: Thunderstorm Wind, the second Hail, the third Coatal Flood and the fourth Tornado.
As we can see in the plot of fatalities, the 5 type of events that cause more fatalities are, by order: Tornado with around 5633 fatalities, Heat with 3143, Thunderstorm Wind with near 2684, Coastal Flood with around 1524 and Lightning with near 818.
The plot of injuries shows: the main type of event is Tornado with near 91364, second and far is the Thunderstorm Wind with 14757 ,the third is Heat with 9228, four Coastal Flood with 8604, and fifth Lightning with 5234.
In overall, we see how the above are the type of events that more damage to the population health, mostly the Tornado
Regarding the economic consequences, in the plot of damage in the property, the types of event that produce more property damages (above billions) are: Coastal Flood,Hail,Heavy Rain,Hurricane,Thunderstorm Wind,Tornado and Wildfire.
In the plot of crop damage, we see how the types of event that produce more crop damage (above billions) are: Coastal Flood,Frost/Freeze,Heat,Hurricane and Thunderstorm Wind.
In overall, the economic consequences come from the same types of event that the damage on the population health plus Hail and Wildfire.