The present analysis uses the National Oceanic and Atmospheric Administration’s (NOAA) storm database. The report has two objectives, the first is to determine the top 10 events that are most harmful in population health. These are measured by the sum of the fatalities and the injuries caused by the weather events. The second in to find the top 10 events that have the greatest economical consequences, in terms of damage cost in properties and crops.
library(dplyr)
library(ggplot2)
rawstormo<-read.csv("repdata%2Fdata%2FStormData.csv.bz2", sep=',', header=TRUE, stringsAsFactors=FALSE)
rawstorm<-tbl_df(rawstormo)
#rawstorm<-sample_n(rawstormdt, 100000)
# selecting the used variables in the analysis
selected.cols<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
data<-rawstorm[,selected.cols]
data <- subset(x=data, subset=(EVTYPE != "?" & (INJURIES> 0|FATALITIES>0 | PROPDMG> 0 | CROPDMG>0)))
# add new variables for calculate the total damage (fatalities+injuries) and the economical cost in properties and crop
HARMFUL<-data$FATALITIES+data$INJURIES
PROPDMGUSD<-data$PROPDMG
CROPDMGUSD<-data$CROPDMG
stormdata<-cbind(data,PROPDMGUSD,CROPDMGUSD,HARMFUL)
# variables to use
names(stormdata)
## [1] "EVTYPE" "FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP"
## [6] "CROPDMG" "CROPDMGEXP" "PROPDMGUSD" "CROPDMGUSD" "HARMFUL"
This section calculates the economical cost in properties and crop. The exponent to multiply the base data is transformed following the specification indicated in the data documentation published in https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf
# set the proper exponent for the each possible value indicated in PROPDMGEXP"
propExp <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# set the proper exponent for the each possible value indicated in CROPDMGEXP"
cropExp <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Calculate the total economical cost in property and crop by multiplying the base data by the exponent"
for (i in 1:nrow(stormdata)) {
rowact <- stormdata[i,]
if(is.na(rowact$PROPDMGEXP)) {
stormdata[i,]$PROPDMGEXP<-10^0
}
else {
stormdata[i,]$PROPDMGEXP<-propExp[as.character(toupper(rowact$PROPDMGEXP))]
}
if(rowact$CROPDMGEXP=="" || is.na(rowact$CROPDMGEXP)) {
stormdata[i,]$CROPDMGEXP<-10^0
}
else {
stormdata[i,]$CROPDMGEXP<-cropExp[as.character(toupper(rowact$CROPDMGEXP))]
}
stormdata[i,]$PROPDMGUSD <-as.numeric(stormdata[i,]$PROPDMG)*as.numeric(stormdata[i,]$PROPDMGEXP)
stormdata[i,]$CROPDMGUSD <-as.numeric(stormdata[i,]$CROPDMG)*as.numeric(stormdata[i,]$CROPDMGEXP)
}
This section selects the top events that are most harmful in population health and the top events that have the greatest economical consequences, because their cost in properties and crop damages.
stdt<-tbl_df(stormdata)
# Sumarise and ordering the total harmful(fatalities+injuries) gruped by event and select the top 10"
stdtsocial<-stdt %>% select(EVTYPE, FATALITIES, INJURIES, HARMFUL) %>% group_by(EVTYPE) %>% summarise(totalharmful=sum(HARMFUL), totalfatalities=sum(FATALITIES),totalinjuries=sum(INJURIES)) %>% arrange(desc(totalharmful))
stdtsocialtop <- top_n(stdtsocial,10,totalharmful)
# Sumarise and ordering the total cost (properties and crop) gruped by event and select the top 10"
stdteconomical<-stdt %>% select(EVTYPE, PROPDMGUSD, CROPDMGUSD) %>% group_by(EVTYPE) %>% summarise(totalcost=sum(PROPDMGUSD)+sum(CROPDMGUSD), totalprop=sum(PROPDMGUSD), totalcrop=sum(CROPDMGUSD)) %>% arrange(desc(totalcost))
stdteconomicaltop <- top_n(stdteconomical,10, totalcost)
Across the United States, the most harmful events for population health are TORNADO and EXCESSIVE HEAT.
stdtsocialtop
## # A tibble: 10 × 4
## EVTYPE totalharmful totalfatalities totalinjuries
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 96979 5633 91346
## 2 EXCESSIVE HEAT 8428 1903 6525
## 3 TSTM WIND 7461 504 6957
## 4 FLOOD 7259 470 6789
## 5 LIGHTNING 6046 816 5230
## 6 HEAT 3037 937 2100
## 7 FLASH FLOOD 2755 978 1777
## 8 ICE STORM 2064 89 1975
## 9 THUNDERSTORM WIND 1621 133 1488
## 10 WINTER STORM 1527 206 1321
ggplot(data=stdtsocialtop, aes(x=reorder(EVTYPE, -totalharmful),y=totalharmful))+geom_bar(stat="identity", fill="steelblue")+ggtitle("Top weather events harmful with respect to population health")+labs(x="Event", y="Total Harmful (fatalities+injuries)")+theme(axis.text.x = element_text(angle=45, hjust=1))
Across the United States, the events with greatest economical consequences are STORM SURGE/TIDE and HURRICANE OPAL
stdteconomicaltop
## # A tibble: 10 × 4
## EVTYPE totalcost totalprop totalcrop
## <chr> <dbl> <dbl> <dbl>
## 1 STORM SURGE/TIDE 4642038000 4641188000 850000
## 2 HURRICANE OPAL 3191846000 3172846000 19000000
## 3 HEAVY RAIN/SEVERE WEATHER 2500000000 2500000000 0
## 4 TORNADOES, TSTM WIND, HAIL 1602500000 1600000000 2500000
## 5 WILD FIRES 624100000 624100000 0
## 6 TYPHOON 601055000 600230000 825000
## 7 HAILSTORM 241000000 241000000 0
## 8 TSUNAMI 144082000 144062000 20000
## 9 River Flooding 134175000 106155000 28020000
## 10 COASTAL FLOODING 126696500 126640500 56000
ggplot(data=stdteconomicaltop, aes(x=reorder(EVTYPE, -totalcost),y=totalcost))+geom_bar(stat="identity", fill="steelblue")+ggtitle("Top weather events with greatest economic consequences")+labs(x="Events", y="Total cost $ (properties+crop)")+theme(axis.text.x = element_text(angle=45, hjust=1))