##Synopsis The following data analysis report is about the health and economic impacts caused by various hazardous events in the United States, starting from 1950 to November, 2011. The raw data is manipulated and computed to show the TOP 10 HAZARDS and their impacts with the help of tabular data as well as graphical plots. Where on one hand TORNADOES have caused a huge impact, relatively, on human health; FLOODS have been responsible for impacting the economy by damaging property as well as crops. The data on which the analysis is done doesn’t cover accurate information due to the lack of data collection in earlier days and hence, the final results can vary from the actual scenario of the past yet matching perfectly with the provided data.
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
Q-1: Across the United States, which types of events are most harmful with respect to population health?
url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#dest<-"Please enter the directory address in which you want to download the file/name of file" #(Remove beginning # to run this line)
#download.file(url,dest) #(Remove beginning # to run this line)
data<-read.csv("data.csv",header = TRUE,sep = ",")
impact<-with(data,aggregate(FATALITIES+INJURIES~EVTYPE,data,sum))
names(impact)[2]<-"IMPACT"
topEvents<-head(impact[order(-impact$IMPACT),],10)
topEvents
## EVTYPE IMPACT
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
evplot<-ggplot(topEvents,aes(EVTYPE,IMPACT))+geom_bar(stat = "identity")+ggtitle("Impact of harmful events on US populations health")+xlab("Event")+ylab("Impact=Fatalities+Injuries")+geom_bar(stat = "identity",col="brown",fill="red")+theme_light()+theme(axis.text.x=element_text(angle = 90, hjust=1))
evplot
Answer1: Looking at the topEvents data frame and the bar plot of Impact vs Events, we can conclude that the most harmful event has been Tornado, impacting 96,979 people in the total US population.
Q-2: Across the United States, which types of events have the greatest economic consequences?
#Here we need to decode and change the characters to the corresponding monetary values
ddata<-data[c(8,25:28)]
ddata$property<-factor(ddata$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
ddata$property[is.na(data$property)] <- "O"
ddata$crops<-factor(ddata$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
ddata$crops[is.na(ddata$crops)] <- "O"
ddata<- mutate(ddata,PROPERTY= 0, CROPS=0)
ddata$PROPERTY[ddata$property=="K"]<-1000
ddata$PROPERTY[ddata$property=="H"|ddata$property=="h"]<-100
ddata$PROPERTY[ddata$property=="M"|ddata$property=="m"]<-1e6
ddata$PROPERTY[ddata$property=="B"]<-1e9
ddata$PROPERTY[ddata$propertyr=="O"]<-1
ddata$CROPS[ddata$crops=="K"|ddata$crops=="k"]<-1000
ddata$CROPS[ddata$crops=="M"|ddata$crops=="m"]<-1e6
ddata$CROPS[ddata$crops=="B"]<-1e9
ddata$CROPS[ddata$crops=="O"]<-1
#Creating a new dataset which constitutes variables representing the property and crop damages
ddata<- mutate(ddata,pdam= PROPDMG*PROPERTY/1e6, cdam=CROPDMG*CROPS/1e6)
ddata2<-aggregate(cbind(pdam,cdam)~EVTYPE,ddata,sum,na.rm=TRUE)
ddata2<-ddata2 %>% group_by(EVTYPE) %>% summarize(pdam=sum(pdam,na.rm=TRUE),cdam=sum(cdam,na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
ddata2<-arrange(ddata2,desc(pdam+cdam))
ddata2<-ddata2[1:10,]
#Summing up the crop and property damages and storing value in a new variable "cdam"
ddata2<-mutate(ddata2,EVTYPE,pdam,cdam,dam=pdam+cdam)
ddata2
## # A tibble: 10 x 4
## EVTYPE pdam cdam dam
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144658. 5662. 150320.
## 2 HURRICANE/TYPHOON 69306. 2608. 71914.
## 3 TORNADO 56937. 415. 57352.
## 4 STORM SURGE 43324. 0.005 43324.
## 5 HAIL 15732. 3026. 18758.
## 6 FLASH FLOOD 16141. 1421. 17562.
## 7 DROUGHT 1046. 13973. 15019.
## 8 HURRICANE 11868. 2742. 14610.
## 9 RIVER FLOOD 5119. 5029. 10148.
## 10 ICE STORM 3945. 5022. 8967.
#plotting cdam vs events
damplot<-ggplot(ddata2,aes(EVTYPE,dam))+geom_bar(stat="identity",col="black",fill="green")+ggtitle("Economic impact of harmful events on crops and property")+xlab("Event")+ylab("Total Damage(in Milion dollars)")+theme_light()+theme(axis.text.x = element_text(angle = 90,hjust = 1))
damplot
Answer2: Looking at the data frame ddata2 and plot of total damage(in millions) vs Events, it is clear that Floods have cause the maximum damage.