Synopsis
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
The database tracks characteristics of major storms and weather events in the United States.
It include when and where stroms occur, as well as estimates of any fatalities, injuries, and property damage.
The events in the database start in the year 1950 and end in November 2011.
This comes file is in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size.
Data available on Strom Data
National Whether Services data on click here
The details on data is available on link
Across the United States, which types of events are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Data Processing
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1<-read.csv("Strom.csv",header=T,sep=",")
## I have changed the name of file to Strom.csv
columns <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
data2 <- data1[, columns]
sort(table(data2$EVTYPE), decreasing = TRUE)[1:10]
##
## HAIL TSTM WIND THUNDERSTORM WIND TORNADO
## 288661 219940 82563 60652
## FLASH FLOOD FLOOD THUNDERSTORM WINDS HIGH WIND
## 54277 25326 20843 20212
## LIGHTNING HEAVY SNOW
## 15754 15708
data2$EVTYPE1 <- "OTHER"
# group by keyword in EVTYPE
data2$EVTYPE1[grep("HAIL", data2$EVTYPE, ignore.case = TRUE)] <- "HAIL"
data2$EVTYPE1[grep("HEAT", data2$EVTYPE, ignore.case = TRUE)] <- "HEAT"
data2$EVTYPE1[grep("FLOOD", data2$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
data2$EVTYPE1[grep("WIND", data2$EVTYPE, ignore.case = TRUE)] <- "WIND"
data2$EVTYPE1[grep("STORM", data2$EVTYPE, ignore.case = TRUE)] <- "STORM"
data2$EVTYPE1[grep("SNOW", data2$EVTYPE, ignore.case = TRUE)] <- "SNOW"
data2$EVTYPE1[grep("TORNADO", data2$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
data2$EVTYPE1[grep("WINTER", data2$EVTYPE, ignore.case = TRUE)] <- "WINTER"
data2$EVTYPE1[grep("RAIN", data2$EVTYPE, ignore.case = TRUE)] <- "RAIN"
# listing the transformed event types
sort(table(data2$EVTYPE1), decreasing = TRUE)
##
## HAIL WIND STORM FLOOD TORNADO OTHER WINTER SNOW RAIN HEAT
## 289270 255362 113156 82686 60700 48970 19604 17660 12241 2648
sort(table(data2$PROPDMGEXP), decreasing = TRUE)[1:10]
##
## K M 0 B 5 1 2 ? m
## 465934 424665 11330 216 40 28 25 13 8 7
sort(table(data2$CROPDMGEXP), decreasing = TRUE)[1:10]
##
## K M k 0 B ? 2 m <NA>
## 618413 281832 1994 21 19 9 7 1 1
data2$PROPDMGEXP <- as.character(data2$PROPDMGEXP)
data2$PROPDMGEXP[is.na(data2$PROPDMGEXP)] <- 0 # NA's considered as dollars
data2$PROPDMGEXP[!grepl("K|M|B", data2$PROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
data2$PROPDMGEXP[grep("K", data2$PROPDMGEXP, ignore.case = TRUE)] <- "3"
data2$PROPDMGEXP[grep("M", data2$PROPDMGEXP, ignore.case = TRUE)] <- "6"
data2$PROPDMGEXP[grep("B", data2$PROPDMGEXP, ignore.case = TRUE)] <- "9"
data2$PROPDMGEXP <- as.numeric(as.character(data2$PROPDMGEXP))
data2$PROPDMG <- data2$PROPDMG * 10^data2$PROPDMGEXP
data2$CROPDMGEXP <- as.character(data2$CROPDMGEXP)
data2$CROPDMGEXP[is.na(data2$CROPDMGEXP)] <- 0 # NA's considered as dollars
data2$CROPDMGEXP[!grepl("K|M|B", data2$CROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
data2$CROPDMGEXP[grep("K", data2$CROPDMGEXP, ignore.case = TRUE)] <- "3"
data2$CROPDMGEXP[grep("M", data2$CROPDMGEXP, ignore.case = TRUE)] <- "6"
data2$CROPDMGEXP[grep("B", data2$CROPDMGEXP, ignore.case = TRUE)] <- "9"
data2$CROPDMGEXP <- as.numeric(as.character(data2$CROPDMGEXP))
data2$CROPDMG<- data2$CROPDMG * 10^data2$CROPDMGEXP
sort(table(data2$PROPDMG), decreasing = TRUE)[1:10]
##
## 0 5000 10000 1000 2000 25000 50000 3000 20000 15000
## 663123 31731 21787 17544 17186 17104 13596 10364 9179 8617
sort(table(data2$CROPDMG), decreasing = TRUE)[1:10]
##
## 0 5000 10000 50000 1e+05 1000 2000 25000 20000 5e+05
## 880198 4097 2349 1984 1233 956 951 830 758 721
##Analysis of data
#Aggregating events for harmful event for public health
da<-data2%>%select(EVTYPE1,FATALITIES,INJURIES)
da<-melt(da,id.vars="EVTYPE1",measure.vars=c("FATALITIES","INJURIES"))
da<-aggregate(value~EVTYPE1+variable,data=da,FUN=sum,na.rm=T)
da1<-head(da[order(-da$value),],12)
head(da1)
## EVTYPE1 variable value
## 18 TORNADO INJURIES 91407
## 14 OTHER INJURIES 12224
## 13 HEAT INJURIES 9224
## 19 WIND INJURIES 9001
## 11 FLOOD INJURIES 8602
## 8 TORNADO FATALITIES 5661
#Aggregating events for economic variables
data3<-data2%>%select(EVTYPE1,PROPDMG,CROPDMG)
data3<-melt(data3,id.vars="EVTYPE1",measure.vars=c("PROPDMG","CROPDMG"))
data3<-aggregate(value~EVTYPE1+variable,data=data3,FUN=sum,na.rm=T)
data4<-head(data3[order(-data3$value),],12)
head(data4)
## EVTYPE1 variable value
## 1 FLOOD PROPDMG 167502193929
## 4 OTHER PROPDMG 97246712337
## 7 STORM PROPDMG 66304415393
## 8 TORNADO PROPDMG 58593098029
## 14 OTHER CROPDMG 23588880870
## 2 HAIL PROPDMG 15733043048
#Resulting event harmful for public health
da1$variable<-as.factor(da1$variable)
g<-ggplot(da1,aes(EVTYPE1,value,fill=variable))
g+geom_bar(stat="identity")
#Resulting events for economic variables
data4$variable<-as.factor(data4$variable)
g<-ggplot(data4,aes(EVTYPE1,value,fill=variable))
g+geom_bar(stat="identity")