Synopsis

In this analysis I show the different type of events that have had negative consequences in the US population since 1950. The aim of this analysis is to determine the events that happened more times and the ones that had the most negative economic consequences in the country. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database in which there is trac of the major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

The NOAA storm Database is taken from (http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)

  #URL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  #destfile <- "./repdata-data-StormData.csv.bz2"
  #download.file(URL, destfile)
  
  Stormdata <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))

Number of events

Here we calculate the number of events per row.

Stormdata$harmfull_events<-Stormdata$FATALITIES + Stormdata$INJURIES

Processing of Economic Variables

Here we calculate the sum of economic consequences per row.

Stormdata$CROPDMGEXP<-as.character(Stormdata$CROPDMGEXP)  
Stormdata[Stormdata$CROPDMGEXP=="",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="?",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="2",28]<-"0"
Stormdata[Stormdata$CROPDMGEXP=="k",28]<-"1000"
Stormdata[Stormdata$CROPDMGEXP=="K",28]<-"1000"
Stormdata[Stormdata$CROPDMGEXP=="m",28]<-"1000000"
Stormdata[Stormdata$CROPDMGEXP=="M",28]<-"1000000"
Stormdata[Stormdata$CROPDMGEXP=="B",28]<-"1000000000"
Stormdata$CROPDMGEXP<-as.numeric(Stormdata$CROPDMGEXP)


Stormdata$PROPDMGEXP<-as.character(Stormdata$PROPDMGEXP)
Stormdata[Stormdata$PROPDMGEXP=="",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="-",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="+",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="?",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="0",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="1",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="2",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="3",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="4",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="5",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="6",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="7",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="8",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="9",26]<-"0"
Stormdata[Stormdata$PROPDMGEXP=="h",26]<-"100"
Stormdata[Stormdata$PROPDMGEXP=="H",26]<-"100"
Stormdata[Stormdata$PROPDMGEXP=="k",26]<-"1000"
Stormdata[Stormdata$PROPDMGEXP=="K",26]<-"1000"
Stormdata[Stormdata$PROPDMGEXP=="m",26]<-"1000000"
Stormdata[Stormdata$PROPDMGEXP=="M",26]<-"1000000"
Stormdata[Stormdata$PROPDMGEXP=="B",26]<-"1000000000"

Stormdata$PROPDMGEXP<-as.numeric(Stormdata$PROPDMGEXP)

Stormdata$CROPDamage<-apply(Stormdata[,c("CROPDMG","CROPDMGEXP")],1,prod)
Stormdata$PROPDamage<-apply(Stormdata[,c("PROPDMG","PROPDMGEXP")],1,prod)

Stormdata$Damage<-apply(Stormdata[,c("CROPDamage","PROPDamage")],1,sum)

Now we divide de Data Set into 2 new Data sets. One to analyze the first question that involves the number of times each type of event happened and the second one that involves the economic consequences per type of event

HumanData<-Stormdata[Stormdata$harmfull_events != 0, ]
DamageData<-Stormdata[Stormdata$Damage != 0, ]

NumberofEvents<-aggregate(harmfull_events~EVTYPE,HumanData,sum)
sortedNumberofEvents<-NumberofEvents[order(-NumberofEvents[2]),]


EconomicEvents<-aggregate(Damage~EVTYPE,DamageData,sum)
sortedNumberofEconomicEvents<-EconomicEvents[order(-EconomicEvents[2]),]

In order to answer both questions I keep in the data set the 7 types of events that influenciate the most in both answers.

sortedNumberofEvents7<-sortedNumberofEvents[1:7,]
sortedNumberofEvents7
##             EVTYPE harmfull_events
## 184        TORNADO           96979
## 32  EXCESSIVE HEAT            8428
## 191      TSTM WIND            7461
## 47           FLOOD            7259
## 122      LIGHTNING            6046
## 69            HEAT            3037
## 42     FLASH FLOOD            2755
sortedNumberofEconomicEvents7<-sortedNumberofEconomicEvents[1:7,]
sortedNumberofEconomicEvents7
##                EVTYPE       Damage
## 67              FLOOD 150319678250
## 191 HURRICANE/TYPHOON  71913712800
## 348           TORNADO  57352113590
## 293       STORM SURGE  43323541000
## 110              HAIL  18758221670
## 55        FLASH FLOOD  17562128610
## 34            DROUGHT  15018672000

Results

pie(sortedNumberofEvents7$harmfull_events,sortedNumberofEvents7$EVTYPE, main="7 Most harmfull Events for popular health")

pie(sortedNumberofEconomicEvents7$Damage,sortedNumberofEconomicEvents7$EVTYPE,, main="7 Worst type of Events in economic consequences")

We can reach to the conclusion that Tornados are by far the most repetitive harmfull event for the US since 1950, nevertheless Flood and Hurricanes have the biggest economic consequences for the population.