The analysis is based on the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database which records data on weather events in the United States (number of fatalities, injuries, and property damage). Top three causes of injuries are tornados, excessive heats and flash floods responsible for estimated 5633, 1903 and 978 fatalities respectively. When it comes to number of injuries the top three causes are tornados, TSTM winds and floods responsible for estimated nomber of 91346, 5957 and 6789 injuries respectively. The highest economic burden was generated by floods ($150 billion), hurricane/typhoos ($72 billion) and tornados ($57 billion).
Loading necessary libraes
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
Importing the oroginal file
dt <- read.table(file="data1.csv", header = TRUE, sep = ",")
Exploring the dimmensions
dim(dt)
## [1] 902297 37
Exploring column names
names(dt)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Selecting columns of interest
dtsel <- dt[c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
Exploring unique units
unique(dt$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(dt$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
Recalculating units for PROPDMGEXP
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "K"] <- 1e3
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "k"] <- 1e3
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "M"] <- 1e6
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "m"] <- 1e6
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "B"] <- 1e9
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "H"] <- 1e2
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "h"] <- 1e2
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "-"] <- 1e0
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "+"] <- 1e0
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "?"] <- 1e0
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == ""] <- 1e0
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "0"] <- 1e0
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "1"] <- 1e1
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "2"] <- 1e2
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "3"] <- 1e3
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "4"] <- 1e4
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "5"] <- 1e5
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "6"] <- 1e6
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "7"] <- 1e7
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "8"] <- 1e8
dtsel$PROPDMGEXPNEW[dtsel$PROPDMGEXP == "9"] <- 1e9
Recalculating units for PROPDMGEXP
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "K"] <- 1e3
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "k"] <- 1e3
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "M"] <- 1e6
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "m"] <- 1e6
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "B"] <- 1e9
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "?"] <- 1e0
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == ""] <- 1e0
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "0"] <- 1e0
dtsel$CROPDMGEXPNEW[dtsel$CROPDMGEXP == "2"] <- 1e2
Checking the units recalculations
with(dtsel, ftable(PROPDMGEXP,PROPDMGEXPNEW))
## PROPDMGEXPNEW 1 10 100 1000 10000 1e+05 1e+06 1e+07 1e+08 1e+09
## PROPDMGEXP
## 465934 0 0 0 0 0 0 0 0 0
## - 1 0 0 0 0 0 0 0 0 0
## ? 8 0 0 0 0 0 0 0 0 0
## + 5 0 0 0 0 0 0 0 0 0
## 0 216 0 0 0 0 0 0 0 0 0
## 1 0 25 0 0 0 0 0 0 0 0
## 2 0 0 13 0 0 0 0 0 0 0
## 3 0 0 0 4 0 0 0 0 0 0
## 4 0 0 0 0 4 0 0 0 0 0
## 5 0 0 0 0 0 28 0 0 0 0
## 6 0 0 0 0 0 0 4 0 0 0
## 7 0 0 0 0 0 0 0 5 0 0
## 8 0 0 0 0 0 0 0 0 1 0
## B 0 0 0 0 0 0 0 0 0 40
## h 0 0 1 0 0 0 0 0 0 0
## H 0 0 6 0 0 0 0 0 0 0
## K 0 0 0 424665 0 0 0 0 0 0
## m 0 0 0 0 0 0 7 0 0 0
## M 0 0 0 0 0 0 11330 0 0 0
with(dtsel, ftable(CROPDMGEXP,CROPDMGEXPNEW))
## CROPDMGEXPNEW 1 100 1000 1e+06 1e+09
## CROPDMGEXP
## 618413 0 0 0 0
## ? 7 0 0 0 0
## 0 19 0 0 0 0
## 2 0 1 0 0 0
## B 0 0 0 0 9
## k 0 0 21 0 0
## K 0 0 281832 0 0
## m 0 0 0 1 0
## M 0 0 0 1994 0
Calculating total cost
dtsel$TOTALCOST <- dtsel$PROPDMG*dtsel$PROPDMGEXPNEW+dtsel$CROPDMG*dtsel$CROPDMGEXPNEW
Preparing data for fatalities plot
dtagg1 <- aggregate(FATALITIES~EVTYPE, data = dtsel, FUN = sum)
dtplot1 <- dtagg1[order(dtagg1[,2], decreasing = TRUE),]
Preparing data for injuries plot
dtagg2 <- aggregate(INJURIES~EVTYPE, data = dtsel, FUN = sum)
dtplot2 <- dtagg2[order(dtagg2[,2], decreasing = TRUE),]
Preparing data for economic problems plot
dtagg3 <- aggregate(TOTALCOST~EVTYPE, data = dtsel, FUN = sum)
dtplot3 <- dtagg3[order(dtagg3[,2], decreasing = TRUE),]
dtplot3$TOTALCOST <- round(dtplot3$TOTALCOST/1000000000, digits = 0)
Exploring top 15 causes of fatalities
ggplot(data = dtplot1[1:15,], aes(x=EVTYPE,y=FATALITIES))+geom_bar(stat = "identity", color="red", fill="white")+coord_flip()+geom_text(aes(label=FATALITIES), hjust=0.4, color="black", size=3.5)+ggtitle("Top 15 causes of fatalities")
Tornados, excessive heats and flash floods are top 3 out of top 15 causes of fatalities when analysing pure numbers.
Exporing top 15 causes of injuries
ggplot(data = dtplot2[1:15,], aes(x=EVTYPE,y=INJURIES))+geom_bar(stat = "identity", color="red", fill="white")+coord_flip()+geom_text(aes(label=INJURIES), hjust=0.4, color="black", size=3.5)+ggtitle("Top 15 causes of inuries")
Tornados, TSTM winds and floods are top 3 out of top 15 causes of injuries when analysing pure numbers.
Exploring top 15 causes of economic problems
ggplot(data = dtplot3[1:15,], aes(x=EVTYPE,y=TOTALCOST))+geom_bar(stat = "identity", color="red", fill="white")+coord_flip()+geom_text(aes(label=TOTALCOST), hjust=0.4, color="black", size=3.5)+ggtitle("Top 15 causes of economic burden in billions")
Floods, hurricanes/typhoons and tornados are top 3 out of top 15 causes of economic problems in billions of dollars.
Tornados are causing most of fatalities and injuries however they are on the third position when it comes to total economic burden. Flash floods and floods are on the third position when is comes to number of fatalities and injuries, however floods alone are the biggest economic burden.