The purpose of the assignment is to examine the NOAA database of storms and to study the impact of severe weather events on both the population and the economy.The database covers the period from 1950 to November 2011.
This analysis shows which types of severe weather events are most dangerous for the population and the economy. 1. Health effects: injuries and deaths. 2. Economic consequences: the impact on property and crops.
Download the raw data file.
library("data.table")
library("ggplot2")
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "D:/R projects/datasciencecoursera/course5/project2/repdata_data_StormData.csv.bz2")
storm_df <- read.csv("D:/R projects/datasciencecoursera/course5/project2/repdata_data_StormData.csv.bz2")
storm_dt <- as.data.table(storm_df)
colnames(storm_dt)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Subset the dataset on the parameters of interest. Basically, we remove the columns we don’t need for clarity.
cols_to_remove <- colnames(storm_dt[, !c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP")])
storm_dt[, c(cols_to_remove) := NULL]
storm_dt <- storm_dt[(EVTYPE != "?" &
(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP") ]
Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
storm_dt[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
prop_dmg_key <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
crop_dmg_key <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
storm_dt[, PROPDMGEXP := prop_dmg_key[as.character(storm_dt[,PROPDMGEXP])]]
storm_dt[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
storm_dt[, CROPDMGEXP := crop_dmg_key[as.character(storm_dt[,CROPDMGEXP])] ]
storm_dt[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
Making Economic Cost Columns
storm_dt <- storm_dt[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
Total Property and Crop Cost
total_cost_dt <- storm_dt[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
total_cost_dt <- total_cost_dt[order(-Total_Cost), ]
total_cost_dt <- total_cost_dt[1:10, ]
head(total_cost_dt, 5)
## EVTYPE propCost cropCost Total_Cost
## <char> <num> <num> <num>
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380677 414953270 57362333947
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
Total Fatalities and Injuries
total_injuries_dt <- storm_dt[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
total_injuries_dt <- total_injuries_dt[order(-FATALITIES), ]
total_injuries_dt <- total_injuries_dt[1:10, ]
head(total_injuries_dt, 5)
## EVTYPE FATALITIES INJURIES totals
## <char> <num> <num> <num>
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
Melting data.table so that it is easier to put in bar graph format
bad_stuff <- melt(total_injuries_dt, id.vars="EVTYPE", variable.name = "bad_thing")
head(bad_stuff, 5)
## EVTYPE bad_thing value
## <char> <fctr> <num>
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
Plot
health_chart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") +
ylab("Frequency (count)") +
xlab("Event type") +
theme(axis.text.x = element_text(angle=50, hjust=1)) +
ggtitle("Top 10 killers") +
theme(plot.title = element_text(hjust = 0.3))
health_chart
econ_consequences <- melt(total_cost_dt, id.vars="EVTYPE", variable.name = "Damage_Type")
head(econ_consequences, 5)
## EVTYPE Damage_Type value
## <char> <fctr> <num>
## 1: FLOOD propCost 144657709807
## 2: HURRICANE/TYPHOON propCost 69305840000
## 3: TORNADO propCost 56947380677
## 4: STORM SURGE propCost 43323536000
## 5: HAIL propCost 15735267513
Plot 2
econ_chart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge") +
ylab("Cost, dollars") +
xlab("Event type") +
theme(axis.text.x = element_text(angle=50, hjust=1)) +
ggtitle("Top 10 Storm Events causing Economic Consequences") +
theme(plot.title = element_text(hjust = 0.5))
econ_chart