In this analysis I show the different type of events that have had negative consequences in the US population since 1950. The aim of this analysis is to determine the events that happened more times and the ones that had the most negative economic consequences in the country. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database in which there is trac of the major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The NOAA storm Database is taken from (http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)
#URL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#destfile <- "./repdata-data-StormData.csv.bz2"
#download.file(URL, destfile)
Stormdata <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
Here we calculate the number of events per row.
Stormdata$harmfull_events<-Stormdata$FATALITIES + Stormdata$INJURIES
Here we calculate the sum of economic consequences per row.
Stormdata$CROPDMGEXP<-as.character(Stormdata$CROPDMGEXP)
Stormdata[Stormdata$CROPDMGEXP=="",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="?",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="2",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="k",28]<-"1000"
Stormdata[Stormdata$CROPDMGEXP=="K",28]<-"1000"
Stormdata[Stormdata$CROPDMGEXP=="m",28]<-"1000000"
Stormdata[Stormdata$CROPDMGEXP=="M",28]<-"1000000"
Stormdata[Stormdata$CROPDMGEXP=="B",28]<-"1000000000"
Stormdata$CROPDMGEXP<-as.numeric(Stormdata$CROPDMGEXP)
Stormdata$PROPDMGEXP<-as.character(Stormdata$PROPDMGEXP)
Stormdata[Stormdata$PROPDMGEXP=="",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="-",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="+",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="?",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="0",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="1",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="2",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="3",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="4",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="5",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="6",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="7",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="8",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="9",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="h",26]<-"100"
Stormdata[Stormdata$PROPDMGEXP=="H",26]<-"100"
Stormdata[Stormdata$PROPDMGEXP=="k",26]<-"1000"
Stormdata[Stormdata$PROPDMGEXP=="K",26]<-"1000"
Stormdata[Stormdata$PROPDMGEXP=="m",26]<-"1000000"
Stormdata[Stormdata$PROPDMGEXP=="M",26]<-"1000000"
Stormdata[Stormdata$PROPDMGEXP=="B",26]<-"1000000000"
Stormdata$PROPDMGEXP<-as.numeric(Stormdata$PROPDMGEXP)
Stormdata$CROPDamage<-apply(Stormdata[,c("CROPDMG","CROPDMGEXP")],1,prod)
Stormdata$PROPDamage<-apply(Stormdata[,c("PROPDMG","PROPDMGEXP")],1,prod)
Stormdata$Damage<-apply(Stormdata[,c("CROPDamage","PROPDamage")],1,sum)
Now we divide de Data Set into 2 new Data sets. One to analyze the first question that involves the number of times each type of event happened and the second one that involves the economic consequences per type of event
HumanData<-Stormdata[Stormdata$harmfull_events != 0, ]
DamageData<-Stormdata[Stormdata$Damage != 0, ]
NumberofEvents<-aggregate(harmfull_events~EVTYPE,HumanData,sum)
sortedNumberofEvents<-NumberofEvents[order(-NumberofEvents[2]),]
EconomicEvents<-aggregate(Damage~EVTYPE,DamageData,sum)
sortedNumberofEconomicEvents<-EconomicEvents[order(-EconomicEvents[2]),]
In order to answer both questions I keep in the data set the 7 types of events that influenciate the most in both answers.
sortedNumberofEvents7<-sortedNumberofEvents[1:7,]
sortedNumberofEvents7
## EVTYPE harmfull_events
## 184 TORNADO 96979
## 32 EXCESSIVE HEAT 8428
## 191 TSTM WIND 7461
## 47 FLOOD 7259
## 122 LIGHTNING 6046
## 69 HEAT 3037
## 42 FLASH FLOOD 2755
sortedNumberofEconomicEvents7<-sortedNumberofEconomicEvents[1:7,]
sortedNumberofEconomicEvents7
## EVTYPE Damage
## 67 FLOOD 150319678250
## 191 HURRICANE/TYPHOON 71913712800
## 348 TORNADO 57352113590
## 293 STORM SURGE 43323541000
## 110 HAIL 18758221670
## 55 FLASH FLOOD 17562128610
## 34 DROUGHT 15018672000
pie(sortedNumberofEvents7$harmfull_events,sortedNumberofEvents7$EVTYPE, main="7 Most harmfull Events for popular health")
pie(sortedNumberofEconomicEvents7$Damage,sortedNumberofEconomicEvents7$EVTYPE,, main="7 Worst type of Events in economic consequences")
We can reach to the conclusion that Tornados are by far the most repetitive harmfull event for the US since 1950, nevertheless Flood and Hurricanes have the biggest economic consequences for the population.