This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database which tracks characteristics of major storms and weather events in the United States to try to assess which types of weather events cause most harm with respect to:
The data was downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 and extracted to the file named repdata_data_StormData1.csv
The documentation for this data can be found on https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf
The necessary packages were installed (if not available) and loaded in
library(knitr)
library(ggplot2)
setwd("~/Coursera/Reproducible Research/Assignments/Code/StormData")
data <- read.csv("repdata_data_StormData.csv")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
The data obtained has a lot of information, so we need to extract the specific data needed to perform our analysis
event_data <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG",
"CROPDMGEXP")
proj_data <- data[event_data]
head(proj_data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
Property damages was listed out along with exponents for each level.
# Viewing the property damage exponent and its levels
unique(proj_data$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
Appropriate exponential values was assigned to each levels of the alpha-numeric property damage exponent. Invalid values were assigned a value of zero
# Assigning zero for invalid values
proj_data$PropExp[proj_data$PROPDMGEXP == "+"] <- 0
proj_data$PropExp[proj_data$PROPDMGEXP == "-"] <- 0
proj_data$PropExp[proj_data$PROPDMGEXP == "?"] <- 0
# Assigning values for the other exponents
proj_data$PropExp[proj_data$PROPDMGEXP == "B"] <- 1e+09
proj_data$PropExp[proj_data$PROPDMGEXP == "8"] <- 1e+08
proj_data$PropExp[proj_data$PROPDMGEXP == "7"] <- 1e+07
proj_data$PropExp[proj_data$PROPDMGEXP == "M"] <- 1e+06
proj_data$PropExp[proj_data$PROPDMGEXP == "m"] <- 1e+06
proj_data$PropExp[proj_data$PROPDMGEXP == "6"] <- 1e+06
proj_data$PropExp[proj_data$PROPDMGEXP == "5"] <- 1e+05
proj_data$PropExp[proj_data$PROPDMGEXP == "4"] <- 1e+04
proj_data$PropExp[proj_data$PROPDMGEXP == "K"] <- 1000
proj_data$PropExp[proj_data$PROPDMGEXP == "3"] <- 1000
proj_data$PropExp[proj_data$PROPDMGEXP == "H"] <- 100
proj_data$PropExp[proj_data$PROPDMGEXP == "h"] <- 100
proj_data$PropExp[proj_data$PROPDMGEXP == "2"] <- 100
proj_data$PropExp[proj_data$PROPDMGEXP == "1"] <- 10
proj_data$PropExp[proj_data$PROPDMGEXP == " "] <- 1
proj_data$PropExp[proj_data$PROPDMGEXP == "0"] <- 1
Property Damage value was assigned a new column and calculated by multiplying property damage by the assigned exponent values
# Calculating Property Damage and entering it into a new column
proj_data$Propdmg_value <- proj_data$PROPDMG * proj_data$PropExp
head(proj_data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP PropExp
## 1 TORNADO 0 15 25.0 K 0 1000
## 2 TORNADO 0 0 2.5 K 0 1000
## 3 TORNADO 0 2 25.0 K 0 1000
## 4 TORNADO 0 2 2.5 K 0 1000
## 5 TORNADO 0 2 2.5 K 0 1000
## 6 TORNADO 0 6 2.5 K 0 1000
## Propdmg_value
## 1 25000
## 2 2500
## 3 25000
## 4 2500
## 5 2500
## 6 2500
Crop damages was listed out along with exponents for each level. Values needed to be assigned to exponents so as to enable calculating the total crop damages by multiplying the crop damage with the exponent value. Invalid values were assigned a value of zero
# Finding the cropdamages exponent and its levels
unique(proj_data$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
Appropriate exponential values was assigned to each levels of the alpha-numeric property damage exponent. Invalid values were assigned a value of zero
# Assigning zero for invalid values
proj_data$CropExp[proj_data$CROPDMGEXP == "?"] <- 0
# Assigning values for the other exponents
proj_data$CropExp[proj_data$CROPDMGEXP == "B"] <- 1e+09
proj_data$CropExp[proj_data$CROPDMGEXP == "M"] <- 1e+06
proj_data$CropExp[proj_data$CROPDMGEXP == "m"] <- 1e+06
proj_data$CropExp[proj_data$CROPDMGEXP == "K"] <- 1000
proj_data$CropExp[proj_data$CROPDMGEXP == "k"] <- 1000
proj_data$CropExp[proj_data$CROPDMGEXP == "2"] <- 100
proj_data$CropExp[proj_data$CROPDMGEXP == "0"] <- 1
proj_data$CropExp[proj_data$CROPDMGEXP == " "] <- 1
# Calculating Crop Damage and entering it into a new column
proj_data$Cropdmg_Value <- proj_data$CROPDMG * proj_data$CropExp
It was observed that the most harmful to population health were the fatalities and injuries. So, only events causing these two were considered
The total values for both fatalities and injuries was calculated and then listed in descending order
# Calculating the total fatalities and injuries
fatality_tot <- aggregate(FATALITIES ~ EVTYPE, proj_data, FUN = sum)
injury_tot <- aggregate(INJURIES ~ EVTYPE, proj_data, FUN = sum)
# Listing the top 10 total fatalities and injuries along with the event types in descending order
fatal_10 <- fatality_tot[order(-fatality_tot$FATALITIES), ][1:10, ]
fatal_10
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
injury_10 <- injury_tot[order(-injury_tot$INJURIES), ][1:10, ]
injury_10
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
Setting the option to display two graphs side by side, followed by plotting both fatality and injury totals side by side in separate bar plots to observe which event type has the highest impact on population health
par(mfrow = c(1,2), mar = c(15, 6, 5, 4), mgp=c(3, 1, 0), cex = 0.8)
barplot(fatal_10$FATALITIES, las = 3, names.arg = fatal_10$EVTYPE,
main = " Severe Events with Highest Fatalities",
ylab = "Number of fatalities", col = "dark blue")
barplot(injury_10$INJURIES, las = 3, names.arg = injury_10$EVTYPE,
main = " Severe Events with Highest Injuries",
ylab = "Number of injuries", col = "light blue")
# Calculating the total property and crop damages
totprp_dmg <- aggregate(Propdmg_value ~ EVTYPE, proj_data, FUN = sum)
totcrp_dmg <- aggregate(Cropdmg_Value ~ EVTYPE, proj_data, FUN = sum)
# Listing the largest total property and crop damage values in descending order along with the event types
propdmg_10 <- totprp_dmg[order(-totprp_dmg$Propdmg_value), ][1:10, ]
propdmg_10
## EVTYPE Propdmg_value
## 63 FLOOD 144657709800
## 181 HURRICANE/TYPHOON 69305840000
## 335 TORNADO 56947380614
## 283 STORM SURGE 43323536000
## 51 FLASH FLOOD 16822673772
## 105 HAIL 15735267456
## 173 HURRICANE 11868319010
## 343 TROPICAL STORM 7703890550
## 402 WINTER STORM 6688497251
## 158 HIGH WIND 5270046260
cropdmg_10 <- totcrp_dmg[order(-totcrp_dmg$Cropdmg_Value), ][1:10, ]
cropdmg_10
## EVTYPE Cropdmg_Value
## 16 DROUGHT 13972566000
## 35 FLOOD 5661968450
## 99 RIVER FLOOD 5029459000
## 86 ICE STORM 5022113500
## 53 HAIL 3025954470
## 78 HURRICANE 2741910000
## 83 HURRICANE/TYPHOON 2607872800
## 30 FLASH FLOOD 1421317100
## 26 EXTREME COLD 1292973000
## 47 FROST/FREEZE 1094086000
Setting the option to display both property and crop damage barplots side by side, then plotting the bar plots for the largest 10 crop and property damage value totals to observe which event type has the highest economic impact
par(mfrow = c(1,2), mar = c(14, 5, 4, 3), mgp=c(4, 2, 1), cex = 0.8)
barplot(propdmg_10$Propdmg_value/(1e+09), las = 3, names.arg = propdmg_10$EVTYPE,
main = " Severe Events with Highest Property Damages",
ylab = "Damage Cost (in Billions USD) ", col = "dark green")
barplot(cropdmg_10$Cropdmg_Value/(1e+09), las = 3, names.arg = cropdmg_10$EVTYPE,
main = " Severe Events with Highest Crop Damages",
ylab = "Damage Cost (in Billions USD)", col = "light green")
Based on the analysis done, it can be concluded that Tornadoes had the biggest impact on fatalities and injuries. The second highest fatality count was seen in Excessive Heat, while Thunderstorms led to the second highest injury count.
In regards to the economic consequences, Droughts had the largest amount of Crop damages and Floods had the largest amount of Property damages. Additionally, the second highest property damage was observed in Typhoons and second highest crop damage was seen for floods.