This report summarizes the effect weather events have in terms of human and economic costs. The data come from the U.S. National Oceanic and Atmospheric Administration and include events between 1950 and 2011. Detailed information is provided by the National Weather Service and the National Climatic Data Center. The data suggest that weather events vary in terms of human and economic costs. Tornados are the most harmful for people’s health and lives. On the other hand, floods cost the most in terms of property damage and droughts cause the most crop damage. The analysis below assumes that the file “StormData.csv.bz2” is present in the working directory.
First, the data file is read into R and the resulting data frame is verified. Column names are also checked to facilitate further processing.
## Read Data
StormData <- read.csv(bzfile("StormData.csv.bz2"))
names(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Then, the fatalities data are extracted for easier plotting. NAs are removed from the data.
library(plyr)
fatal <- ddply(StormData,~EVTYPE,summarise,sum = sum(FATALITIES))
is.na(fatal$sum) <- !fatal$sum
fatal_tidy <- na.omit(fatal)
fatal_ordered <- arrange(fatal_tidy,desc(sum))
fatal_top <- head(fatal_ordered, 5)
Next, the injuries data are extracted for easier plotting. NAs are removed from the data.
injury <- ddply(StormData,~EVTYPE,summarise,sum = sum(INJURIES))
is.na(injury$sum) <- !injury$sum
injury_tidy <- na.omit(injury)
injury_ordered <- arrange(injury_tidy,desc(sum))
injury_top <- head(injury_ordered, 5)
Finally, the costliness data are processed and extracted. Due to the format of the data for both property and crop damage, two pairs of columns are combined for property and crop damage respectively. PROPDMG and PROPDMGEXP need to be combined in order to calculate the USD cost of property damage. The variable PROPDMGEXP specifies whether PROPDMG values are in thousands, millions, or billions of dollars. Similarly, CROPDMGEXP specifies whether the crop damage values in CROPDMG are in thousands, millions, or billions of dollars. In the end, all values are converted to dollar values and NAs are removed before constructing the plots.
# Property Damage Data Processing
PropData <- StormData[,c("EVTYPE","PROPDMG","PROPDMGEXP")]
PropData[PropData$PROPDMGEXP == "K", "PROPDMG"] <- PropData[PropData$PROPDMGEXP == "K", "PROPDMG"] * 1000
PropData[PropData$PROPDMGEXP == "K", "PROPDMGEXP"] <- ""
PropData[PropData$PROPDMGEXP == "M", "PROPDMG"] <- PropData[PropData$PROPDMGEXP == "M", "PROPDMG"] * 1000000
PropData[PropData$PROPDMGEXP == "M", "PROPDMGEXP"] <- ""
PropData[PropData$PROPDMGEXP == "B", "PROPDMG"] <- PropData[PropData$PROPDMGEXP == "B", "PROPDMG"] * 1000000000
PropData[PropData$PROPDMGEXP == "B", "PROPDMGEXP"] <- ""
prop <- ddply(PropData,~EVTYPE,summarise,sum = sum(PROPDMG))
is.na(prop$sum) <- !prop$sum
prop_tidy <- na.omit(prop)
prop_ordered <- arrange(prop_tidy,desc(sum))
prop_top <- head(prop_ordered, 5)
# Crop Damage Data Processing
CropData <- StormData[,c("EVTYPE","CROPDMG","CROPDMGEXP")]
CropData[CropData$CROPDMGEXP == "K", "CROPDMG"] <- CropData[CropData$CROPDMGEXP == "K", "CROPDMG"] * 1000
CropData[CropData$CROPDMGEXP == "K", "CROPDMGEXP"] <- ""
CropData[CropData$CROPDMGEXP == "M", "CROPDMG"] <- CropData[CropData$CROPDMGEXP == "M", "CROPDMG"] * 1000000
CropData[CropData$CROPDMGEXP == "M", "CROPDMGEXP"] <- ""
CropData[CropData$CROPDMGEXP == "B", "CROPDMG"] <- CropData[CropData$CROPDMGEXP == "B", "CROPDMG"] * 1000000000
CropData[CropData$CROPDMGEXP == "B", "CROPDMGEXP"] <- ""
crop <- ddply(CropData,~EVTYPE,summarise,sum = sum(CROPDMG))
is.na(crop$sum) <- !crop$sum
crop_tidy <- na.omit(crop)
crop_ordered <- arrange(crop_tidy,desc(sum))
crop_top <- head(crop_ordered, 5)
In order to print the plots in the results section, four plots are constructed to measure the human cost (injuries and deaths) of weather events and the economic cost (property and crop damage) of weather events.
#Fatalities Plot
library(ggplot2)
g <- ggplot(fatal_top, aes(x=EVTYPE, y=sum))
g <- g + geom_bar(stat = "identity")
g <- g + labs(title = "Most Deadly Events in the US (1950-2011)")
g <- g + labs(x = "Type of Event", y="Number of People Killed")
g <- g + theme(axis.text.x = element_text(face="bold", size=8, angle=40))
#Injuries Plot
g2 <- ggplot(injury_top, aes(x=EVTYPE, y=sum))
g2 <- g2 + geom_bar(stat = "identity")
g2 <- g2 + labs(title = "Most Harmful Events in the US (1950-2011)")
g2 <- g2 + labs(x = "Type of Event", y="Number of People Injured")
g2 <- g2 + theme(axis.text.x = element_text(face="bold", size=8, angle=40))
#Property Damage Plot
g3 <- ggplot(prop_top, aes(x=EVTYPE, y=sum/1000000))
g3 <- g3 + geom_bar(stat = "identity")
g3 <- g3 + labs(title = "Most Costly Events in the US (1950-2011)")
g3 <- g3 + labs(x = "Type of Event", y="Property Damage in Millions of Dollars")
g3 <- g3 + theme(axis.text.x = element_text(face="bold", size=8, angle=40))
#Crop Damaage Plot
g4 <- ggplot(crop_top, aes(x=EVTYPE, y=sum/1000000))
g4 <- g4 + geom_bar(stat = "identity")
g4 <- g4 + labs(title = "Most Costly Events in the US (1950-2011)")
g4 <- g4 + labs(x = "Type of Event", y="Crop Damage in Millions of Dollars")
g4 <- g4 + theme(axis.text.x = element_text(face="bold", size=8, angle=40))
The results plotted below show that tornadoes are the most harmful weather event in the US between 1950 and 2011. Tornadoes have killed close to 6,000 people and harmed over 90,000 people. Number 2 killer event is excessive heat responsible for close to 2,000 casualties. On the other hand, the highest property damage is caused not by tornadoes, but by floods. Floods account for over 140 billion dollars in damage over the specified period. Hurricanes are the second most costly weather event responsible for close to 70 billion dollars of damage. However, in terms of crop damage, floods are only second with over 5.5 billion dollars. The number one event in terms of crop damage are droughts having caused almost 14 billion dollars of damage between 1950 and 2011.
library(gridExtra)
grid.arrange(g, g2, ncol=2)
grid.arrange(g3, g4, ncol=2)