Storms and other severe weather events cause both public health and economic problems. Severe events result in fatalities, injuries, property and crop damage. In the follwing I have explored the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to determine, which types of events
The answers to both questions are given by tables with top 1o natural disaster events, and graphics that show, over the time period from 1950 and end in November 2011, the accumulated health and economic impact.
# set work directory
setwd("/Users/joergheintz/Documents/08_MPHPHI/11_Coursera/Coursera_ReproduciableResearch/RepResProj2")
#data download from the source
mypath<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(mypath,"myfile.bz2")
# read file, and extract needed variables
mydata<-read.csv("myfile.bz2")[,c("STATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP", "BGN_DATE")]
# Split date from time, keep date
BGN_DATE<-data.frame(t(as.data.frame(strsplit(as.character(mydata$BGN_DATE),' ')))[,1], row.names=NULL)
mydata$BGN_DATE<-as.Date(mydata$BGN_DATE, "%m/%d/%Y")
The splited data sets, one for health, and other for economic are stored locally.
# Fatalities & Injuries
df_FAT_INJ<-mydata[mydata$FATALITIES != 0 | mydata$INJURIES != 0 ,c("STATE", "EVTYPE", "FATALITIES", "INJURIES", "BGN_DATE")]
write.csv(df_FAT_INJ, "FAT_INJ.csv")
# Property & Crop Damage
df_ECO_DAM<-mydata[mydata$PROPDMG != 0 | mydata$CROPDMG, c("STATE", "EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP", "BGN_DATE") ]
write.csv(df_ECO_DAM, "ECO_DAM.csv")
The data processing sections accumulate the values from date to date (incremental). The following steps have been taken:
# read data set
myFAT<-read.csv("FAT_INJ.csv")
# Sums up by event and generates top 5 and top 10 events
FAT<-as.data.frame(tapply(myFAT$FATALITIES, myFAT$EVTYPE, sum))
FAT$EVENT<-rownames(FAT)
colnames(FAT)<-c("fatalities", "events")
rownames(FAT)<-NULL
FAT<-FAT[order(-FAT$fatalities),]
FAT10<-FAT[1:10,]
FAT5<-FAT[1:5,]
# Fatalities
# Generates the times series pro top 5 event, and sums up the values pro date
fat<-data.frame()
for (ev in FAT5$events){
sf<-myFAT[myFAT$EVTYPE == ev, ]
sf<-sf[order(as.Date(sf$BGN_DATE, format="%Y-%m-%d")),]
sf$SumFATALITIES<-sf$FATALITIES
k<-length(sf$FATALITIES)-1
for (i in 1:k){
sf$SumFATALITIES[i+1] = sf$SumFATALITIES[i]+sf$SumFATALITIES[i+1]
}
k=0
fat<-rbind(fat,sf)
}
fat$BGN_DATE<-fat$BGN_DATE<-as.Date(fat$BGN_DATE, format="%Y-%m-%d")
fat<-fat[order(as.Date(fat$BGN_DATE, format="%Y-%m-%d")),]
# Fatalities Plots
g2 = ggplot(fat, aes(x =BGN_DATE , y = SumFATALITIES, group = EVTYPE, color = factor(EVTYPE)))
g2 = g2 + xlab("Date") + ylab("Fatalities") + labs(color = "Events")
g2 = g2 + guides(fill=guide_legend(title="Natural Events"))
g2 = g2 + ylim(0, 6000)
g2 = g2 + geom_line()
# read data set
myINJ<-read.csv("FAT_INJ.csv")
# Sums up by event and generates top 5 and top 10 events
INJ<-as.data.frame(tapply(myINJ$INJURIES, myINJ$EVTYPE, sum))
INJ$EVENT<-rownames(INJ)
colnames(INJ)<-c("injuries", "events")
rownames(INJ)<-NULL
INJ<-INJ[order(-INJ$injuries),]
INJ10<-INJ[1:10,]
INJ5<-INJ[1:5,]
#Injuries
# Generates the times series pro top 5 event, and sums up the values pro date
inj<-data.frame()
for (ev in INJ5$events){
sf<-myINJ[myINJ$EVTYPE == ev, ]
sf<-sf[order(as.Date(sf$BGN_DATE, format="%Y-%m-%d")),]
sf$SumINJURIES<-sf$INJURIES
k<-length(sf$INJURIES)-1
for (i in 1:k){
sf$SumINJURIES[i+1] = sf$SumINJURIES[i]+sf$SumINJURIES[i+1]
}
k=0
inj<-rbind(inj,sf)
}
inj$BGN_DATE<-inj$BGN_DATE<-as.Date(inj$BGN_DATE, format="%Y-%m-%d")
inj<-inj[order(as.Date(inj$BGN_DATE, format="%Y-%m-%d")),]
# Injuries Plots
g1 = ggplot(inj, aes(x =BGN_DATE ,y = SumINJURIES, color = factor(EVTYPE)))
g1 = g1 + guides(fill=guide_legend(title="Natural Events")) + labs(color = "Events")
g1 = g1 + xlab("Date") + ylab("Injuries")
g1 = g1 + ylim(0, 100000)
g1 = g1 + geom_line()
The variables K, B, M are substitute by values to calculate the property and crop damages.
# read data set
myDAM<-read.csv("ECO_DAM.csv")
# Property Damage
myDAM$PROPDMGEXP<-as.character(myDAM$PROPDMGEXP)
myDAM[myDAM$PROPDMGEXP != as.character("K") & myDAM$PROPDMGEXP != as.character("M") & myDAM$PROPDMGEXP != as.character("B"), "PROPDMGEXP" ] = 0
myDAM[myDAM$PROPDMGEXP == "K", "PROPDMGEXP"] <- 1000
myDAM[myDAM$PROPDMGEXP == "M", "PROPDMGEXP"] <- 1000000
myDAM[myDAM$PROPDMGEXP == "B", "PROPDMGEXP"] <- 1000000000
myDAM$PROPDMGEXP<-as.numeric(myDAM$PROPDMGEXP)
myDAM$PROPDMG <- as.numeric(myDAM$PROPDMG) * as.numeric(myDAM$PROPDMGEXP)
# Sums up by event and generates top 5 events
DamProp<-as.data.frame(tapply(myDAM$PROPDMG, myDAM$EVTYPE, sum))
DamProp$EVENT<-rownames(DamProp)
colnames(DamProp)<-c("damage", "events")
rownames(DamProp)<-NULL
DamProp<-DamProp[order(-DamProp$damage),]
TopPropDam<-DamProp[1:5,]
# Property damage
# Generates the times series pro top 5 event, and sums up the values pro date
propDam<-data.frame()
for (ev in TopPropDam$events){
sf<-myDAM[myDAM$EVTYPE == ev, ]
sf<-sf[order(as.Date(sf$BGN_DATE, format="%Y-%m-%d")),]
sf$SummyDAM<-sf$PROPDMG
k<-length(sf$PROPDMG)-1
for (i in 1:k){
sf$SummyDAM[i+1] = sf$SummyDAM[i]+sf$SummyDAM[i+1]
}
k=0
propDam<-rbind(propDam,sf)
}
propDam$BGN_DATE<-as.Date(propDam$BGN_DATE, format="%Y-%m-%d")
propDam<-propDam[order(as.Date(propDam$BGN_DATE, format="%Y-%m-%d")),]
#Property Damage Plots
g3 = ggplot(propDam, aes(x =BGN_DATE , y = SummyDAM, group = EVTYPE, color = factor(EVTYPE)))
g3 = g3 + xlab("Date") + ylab("Property Damage in [$]")+ labs(color = "Events")
g3 = g3 + geom_line()
# read data set
myDAM<-read.csv("ECO_DAM.csv")
#Crop Damage
myDAM$CROPDMGEXP<-as.character(myDAM$CROPDMGEXP)
myDAM[myDAM$CROPDMGEXP != as.character("K") & myDAM$CROPDMGEXP != as.character("M") & myDAM$CROPDMGEXP != as.character("B"), "CROPDMGEXP" ] = 0
myDAM[myDAM$CROPDMGEXP == "K", "CROPDMGEXP"] <- 1000
myDAM[myDAM$CROPDMGEXP == "M", "CROPDMGEXP"] <- 1000000
myDAM[myDAM$CROPDMGEXP == "B", "CROPDMGEXP"] <- 1000000000
myDAM$CROPDMG <- as.numeric(myDAM$CROPDMG) * as.numeric(myDAM$CROPDMGEXP)
# Sums up by event and generates top 5 events
DamCrop<-as.data.frame(tapply(myDAM$CROPDMG, myDAM$EVTYPE, sum))
DamCrop$EVENT<-rownames(DamCrop)
colnames(DamCrop)<-c("damage", "events")
rownames(DamCrop)<-NULL
DamCrop<-DamCrop[order(-DamCrop$damage),]
TopCropDam<-DamCrop[1:5,]
#Crop damage
# Generates the times series pro top 5 event, and sums up the values pro date
cropDam<-data.frame()
for (ev in TopCropDam$events){
sf<-myDAM[myDAM$EVTYPE == ev, ]
sf<-sf[order(as.Date(sf$BGN_DATE, format="%Y-%m-%d")),]
sf$SummyCropDAM<-sf$CROPDMG
k<-length(sf$CROPDMG)-1
for (i in 1:k){
sf$SummyCropDAM[i+1] = sf$SummyCropDAM[i]+sf$SummyCropDAM[i+1]
}
k=0
cropDam<-rbind(cropDam,sf)
}
cropDam$BGN_DATE<-as.Date(cropDam$BGN_DATE, format="%Y-%m-%d")
cropDam<-cropDam[order(as.Date(cropDam$BGN_DATE, format="%Y-%m-%d")),]
#Property Damage Plots
g4 = ggplot(cropDam, aes(x =BGN_DATE , y = SummyCropDAM, group = EVTYPE, color = factor(EVTYPE)))
g4 = g4 + xlab("Date") + ylab("Crop Damage in [$]")+ labs(color = "Events")
g4 = g4 + geom_line()
HIxt<-xtable(cbind(FAT10, INJ10))
print(HIxt, type = 'html', include.rownames = FALSE)
| fatalities | events | injuries | events |
|---|---|---|---|
| 5633 | TORNADO | 91346 | TORNADO |
| 1903 | EXCESSIVE HEAT | 6957 | TSTM WIND |
| 978 | FLASH FLOOD | 6789 | FLOOD |
| 937 | HEAT | 6525 | EXCESSIVE HEAT |
| 816 | LIGHTNING | 5230 | LIGHTNING |
| 504 | TSTM WIND | 2100 | HEAT |
| 470 | FLOOD | 1975 | ICE STORM |
| 368 | RIP CURRENT | 1777 | FLASH FLOOD |
| 248 | HIGH WIND | 1488 | THUNDERSTORM WIND |
| 224 | AVALANCHE | 1361 | HAIL |
# Combining fatalities and injuries in one plot
pFat<-ggplotGrob(g2)
pInf<-ggplotGrob(g1)
grid.arrange(pFat, pInf, layout_matrix = rbind(2,1))
# Transform the table into a html table
XTDam<-cbind(DamProp[1:10,], DamCrop[1:10,])
colnames(XTDam)<-c("property damage [$]", "event","crop damage[$]", "event")
ECxt<-xtable(XTDam, align = "cccrc",latex.environments="center")
print(ECxt, type = 'html', include.rownames = FALSE)
| property damage [$] | event | crop damage[$] | event |
|---|---|---|---|
| 144657709800.00 | FLOOD | 13972566000.00 | DROUGHT |
| 69305840000.00 | HURRICANE/TYPHOON | 5661968450.00 | FLOOD |
| 56925660480.00 | TORNADO | 5029459000.00 | RIVER FLOOD |
| 43323536000.00 | STORM SURGE | 5022113500.00 | ICE STORM |
| 16140811510.00 | FLASH FLOOD | 3025537450.00 | HAIL |
| 15727366720.00 | HAIL | 2741910000.00 | HURRICANE |
| 11868319010.00 | HURRICANE | 2607872800.00 | HURRICANE/TYPHOON |
| 7703890550.00 | TROPICAL STORM | 1421317100.00 | FLASH FLOOD |
| 6688497250.00 | WINTER STORM | 1292973000.00 | EXTREME COLD |
| 5270046260.00 | HIGH WIND | 1094086000.00 | FROST/FREEZE |