The Storm events causes various damages across United States of America and its consequences/damanges have been recorded across various healt and property areas. The analysed data shows that Tornado has the top health as well as economic impact on people and lives by a large margin.
The data collected is from various NOAA Storm Database and stored as a csv file which is then read. The EVTYPE column or characteristic is converted into “character type” to perform further action accordingly. 2 new columns are added- TOTAL_AFFECTED,TOTAL_ECONOMICDMG which combines FATALITIES,INJURIES and PROPDMG,CROPDMG respectively. The data is then grouped on EVTYPE and Top 5 most harmful EVTYPE are recorded and plotted for Comparision.
library("ggplot2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","Storm.csv.bz2")
data<-read.csv("Storm.csv.bz2")
data<-mutate(data,TOTAL_AFFECTED=data$FATALITIES+data$INJURIES)
data<-mutate(data,TOTAL_ECONOMICDMG=data$PROPDMG+data$CROPDMG)
data$EVTYPE<-as.character(data$EVTYPE)
data$EVTYPE<-toupper(data$EVTYPE)
grouped_by_events<-data %>% group_by(EVTYPE) %>% summarise(TOTAL.AFFECTED=sum(TOTAL_AFFECTED,na.rm = TRUE),TOTAL.ECONOMICDMG=sum(TOTAL_ECONOMICDMG,na.rm = TRUE))
top5<-tail(order(grouped_by_events$TOTAL.AFFECTED),5)
##selecting only top 5 data
grouped_by_events<-grouped_by_events[top5,]
grouped_by_events$EVTYPE<-as.factor(grouped_by_events$EVTYPE)
par(mfrow=c(2,1))
##plotting 1st plot
barplot(names.arg=grouped_by_events$EVTYPE,height = grouped_by_events$TOTAL.AFFECTED,xlab="Event Type",ylab="Total Affected",col=grouped_by_events$EVTYPE,main="Health Damange Chart",space=5)
##plotting 2nd plot
barplot(names.arg=grouped_by_events$EVTYPE,height = grouped_by_events$TOTAL.ECONOMICDMG,xlab="Event Type",ylab="Economic Damage",col=grouped_by_events$EVTYPE,main="Economic Damange Chart",space=5)
6. Complete R-Code
##downloading the data from URL
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","Storm.csv.bz2")
##reading the downloaded data
data<-read.csv("Storm.csv.bz2")
##add new columns
data<-mutate(data,TOTAL_AFFECTED=data$FATALITIES+data$INJURIES)
data<-mutate(data,TOTAL_ECONOMICDMG=data$PROPDMG+data$CROPDMG)
data$EVTYPE<-as.character(data$EVTYPE)
data$EVTYPE<-toupper(data$EVTYPE)
##grouping the data
grouped_by_events<-data %>% group_by(EVTYPE) %>% summarise(TOTAL.AFFECTED=sum(TOTAL_AFFECTED,na.rm = TRUE),TOTAL.ECONOMICDMG=sum(TOTAL_ECONOMICDMG,na.rm = TRUE))
top5<-tail(order(grouped_by_events$TOTAL.AFFECTED),5)
##selecting only top 5 data
grouped_by_events<-grouped_by_events[top5,]
grouped_by_events$EVTYPE<-as.factor(grouped_by_events$EVTYPE)
par(mfrow=c(2,1))
##plotting 1st plot
barplot(names.arg=grouped_by_events$EVTYPE,height = grouped_by_events$TOTAL.AFFECTED,xlab="Event Type",ylab="Total Affected",col=grouped_by_events$EVTYPE,main="Health Damange Chart",space=5)
##plotting 2nd plot
barplot(names.arg=grouped_by_events$EVTYPE,height = grouped_by_events$TOTAL.ECONOMICDMG,xlab="Event Type",ylab="Economic Damage",col=grouped_by_events$EVTYPE,main="Economic Damange Chart",space=5)