Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Here,we are going to read the data.
data<-read.csv("repdata%2Fdata%2FStormData.csv.bz2",header = TRUE,sep = ",")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Geting the column names of data
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
loading the required library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Selecting the required coulumn as the coulumn “INJURIES” and “FATALITIES” indicates harmful to health of population
harmful<-select(data,c(EVTYPE,INJURIES))
head(harmful)
## EVTYPE INJURIES
## 1 TORNADO 15
## 2 TORNADO 0
## 3 TORNADO 2
## 4 TORNADO 2
## 5 TORNADO 2
## 6 TORNADO 6
harmful2<-select(data,c(EVTYPE,FATALITIES))
head(harmful2)
## EVTYPE FATALITIES
## 1 TORNADO 0
## 2 TORNADO 0
## 3 TORNADO 0
## 4 TORNADO 0
## 5 TORNADO 0
## 6 TORNADO 0
Grouping the data with “EVTYPE” and Summerising the “INJURIES”
agrharmful<-harmful%>%group_by(EVTYPE)
sumharm<-agrharmful%>%summarize(Injuries=sum(INJURIES))
head(sumharm)
## # A tibble: 6 x 2
## EVTYPE Injuries
## <fct> <dbl>
## 1 " HIGH SURF ADVISORY" 0.
## 2 " COASTAL FLOOD" 0.
## 3 " FLASH FLOOD" 0.
## 4 " LIGHTNING" 0.
## 5 " TSTM WIND" 0.
## 6 " TSTM WIND (G45)" 0.
Grouping the data with “EVTYPE” and Summerising the “FATALIES”
agrharmful2<-harmful2%>%group_by(EVTYPE)
sumharm2<-agrharmful2%>%summarize(FATALITIES=sum(FATALITIES))
head(sumharm2)
## # A tibble: 6 x 2
## EVTYPE FATALITIES
## <fct> <dbl>
## 1 " HIGH SURF ADVISORY" 0.
## 2 " COASTAL FLOOD" 0.
## 3 " FLASH FLOOD" 0.
## 4 " LIGHTNING" 0.
## 5 " TSTM WIND" 0.
## 6 " TSTM WIND (G45)" 0.
Sorting the data in decending order of “Injuries” to get top 10 injuries and same for “FATALITIES”
sumharm<-sumharm[with(sumharm,order(-Injuries)),]
head(sumharm)
## # A tibble: 6 x 2
## EVTYPE Injuries
## <fct> <dbl>
## 1 TORNADO 91346.
## 2 TSTM WIND 6957.
## 3 FLOOD 6789.
## 4 EXCESSIVE HEAT 6525.
## 5 LIGHTNING 5230.
## 6 HEAT 2100.
sumharm2<-sumharm2[with(sumharm2,order(-FATALITIES)),]
head(sumharm2)
## # A tibble: 6 x 2
## EVTYPE FATALITIES
## <fct> <dbl>
## 1 TORNADO 5633.
## 2 EXCESSIVE HEAT 1903.
## 3 FLASH FLOOD 978.
## 4 HEAT 937.
## 5 LIGHTNING 816.
## 6 TSTM WIND 504.
par(mfrow=c(1,2),mar=c(12,6,3,3))
sumharm1<-sumharm[1:10,]
sumharm3<-sumharm2[1:10,]
barplot(sumharm1$Injuries,names.arg = sumharm1$EVTYPE,las=2)
barplot(sumharm3$FATALITIES,names.arg = sumharm3$EVTYPE,las=2)
By checking these coulumn we find that the are coded in level where K means kilo ie(10^3) and M is Mega ie (10^6)like that
unique(data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
So,here we will change the coded level value to the actual vales for PROPDMG ie. Property damage
data$PROPDMG[data$PROPDMGEXP == "K"] <- data$PROPDMG[data$PROPDMGEXP == "K"] * 1000
data$PROPDMG[data$PROPDMGEXP == "M"] <- data$PROPDMG[data$PROPDMGEXP == "M"] * (10^6)
data$PROPDMG[data$PROPDMGEXP == "H"] <- data$PROPDMG[data$PROPDMGEXP == "H"] * 100
data$PROPDMG[data$PROPDMGEXP == "h"] <- data$PROPDMG[data$PROPDMGEXP == "h"] * 100
data$PROPDMG[data$PROPDMGEXP == ""] <- data$PROPDMG[data$PROPDMGEXP == ""] * 1
data$PROPDMG[data$PROPDMGEXP == "B"] <-data$PROPDMG[data$PROPDMGEXP == "B"] * (10^9)
data$PROPDMG[data$PROPDMGEXP == "m"] <- data$PROPDMG[data$PROPDMGEXP == "m"] * (10^6)
data$PROPDMG[data$PROPDMGEXP == "0"] <- data$PROPDMG[data$PROPDMGEXP == "0"] * 1
data$PROPDMG[data$PROPDMGEXP == "1"] <- data$PROPDMG[data$PROPDMGEXP == "1"] * 10
data$PROPDMG[data$PROPDMGEXP == "2"] <- data$PROPDMG[data$PROPDMGEXP == "2"] * 100
data$PROPDMG[data$PROPDMGEXP == "3"] <- data$PROPDMG[data$PROPDMGEXP == "3"] * 1000
data$PROPDMG[data$PROPDMGEXP == "4"] <- data$PROPDMG[data$PROPDMGEXP == "4"] * (10^4)
data$PROPDMG[data$PROPDMGEXP == "5"] <- data$PROPDMG[data$PROPDMGEXP == "5"] * (10^5)
data$PROPDMG[data$PROPDMGEXP == "6"] <- data$PROPDMG[data$PROPDMGEXP == "6"] * (10^6)
data$PROPDMG[data$PROPDMGEXP == "7"] <- data$PROPDMG[data$PROPDMGEXP == "7"] * (10^7)
data$PROPDMG[data$PROPDMGEXP == "8"] <- data$PROPDMG[data$PROPDMGEXP == "8"] * (10^8)
data$PROPDMG[data$PROPDMGEXP == "+"] <- 0
data$PROPDMG[data$PROPDMGEXP == "-"] <- 0
data$PROPDMG[data$PROPDMGEXP == "?"] <- 0
head(data$PROPDMG)
## [1] 25000 2500 25000 2500 2500 2500
Selecting the required coulmn from original data for Property danmage and making a new data frame
damage<-select(data,c(EVTYPE,PROPDMG,PROPDMGEXP))
head(damage)
## EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO 25000 K
## 2 TORNADO 2500 K
## 3 TORNADO 25000 K
## 4 TORNADO 2500 K
## 5 TORNADO 2500 K
## 6 TORNADO 2500 K
So here we will change the coded level value to the actual vales for CROPDMG ie. Crop damage
data$CROPDMG[data$CROPDMGEXP == "M"] <- data$CROPDMG[data$CROPDMGEXP == "M"] * (10^6)
data$CROPDMG[data$CROPDMGEXP == "K"] <- data$CROPDMG[data$CROPDMGEXP == "K"] * 1000
data$CROPDMG[data$CROPDMGEXP == "m"] <- data$CROPDMG[data$CROPDMGEXP == "m"] * (10^6)
data$CROPDMG[data$CROPDMGEXP == "B"] <- data$CROPDMG[data$CROPDMGEXP == "B"] * (10^9)
data$CROPDMG[data$CROPDMGEXP == "k"] <- data$CROPDMG[data$CROPDMGEXP == "k"] * 1000
data$CROPDMG[data$CROPDMGEXP == "0"] <- data$CROPDMG[data$CROPDMGEXP == "0"] * 1
data$CROPDMG[data$CROPDMGEXP == "?"] <- 0
data$CROPDMG[data$CROPDMGEXP == "2"] <- data$CROPDMG[data$CROPDMGEXP == "2"] * 100
Selecting the required coulmn from original data for Property danmage and making a new data frame
damage2<-select(data,c(EVTYPE,CROPDMG,CROPDMGEXP))
head(damage2)
## EVTYPE CROPDMG CROPDMGEXP
## 1 TORNADO 0
## 2 TORNADO 0
## 3 TORNADO 0
## 4 TORNADO 0
## 5 TORNADO 0
## 6 TORNADO 0
Aggregating the PROPDMG and CROPDMG according to the EVNTYPE to get the sum for each event
propdam<-aggregate(PROPDMG~EVTYPE,data=damage,FUN = sum,na.rm=TRUE)
cropdam<-aggregate(CROPDMG~EVTYPE,data=damage2,FUN = sum,na.rm=TRUE)
head(propdam)
## EVTYPE PROPDMG
## 1 HIGH SURF ADVISORY 200000
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 50000
## 4 LIGHTNING 0
## 5 TSTM WIND 8100000
## 6 TSTM WIND (G45) 8000
head(cropdam)
## EVTYPE CROPDMG
## 1 HIGH SURF ADVISORY 0
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 0
## 4 LIGHTNING 0
## 5 TSTM WIND 0
## 6 TSTM WIND (G45) 0
Sorting the data acording the damages to get the top damages
propdam<-propdam[with(propdam,order(-PROPDMG)),]
head(propdam)
## EVTYPE PROPDMG
## 170 FLOOD 144657709807
## 411 HURRICANE/TYPHOON 69305840000
## 834 TORNADO 56947380617
## 670 STORM SURGE 43323536000
## 153 FLASH FLOOD 16822673979
## 244 HAIL 15735267513
cropdam<-cropdam[with(cropdam,order(-CROPDMG)),]
head(cropdam)
## EVTYPE CROPDMG
## 95 DROUGHT 13972566000
## 170 FLOOD 5661968450
## 590 RIVER FLOOD 5029459000
## 427 ICE STORM 5022113500
## 244 HAIL 3025954473
## 402 HURRICANE 2741910000
propdam<-propdam[1:10,]
cropdam<-cropdam[1:10,]
par(mfrow=c(1,2),mar=c(11,5,3,3))
barplot(cropdam$CROPDMG,names.arg = cropdam$EVTYPE,las=2,col="pink",main = "CROP DAMAGE Vs EVENT")
barplot(propdam$PROPDMG,names.arg = propdam$EVTYPE,las=2,col="pink",main = "PROPERTY DAMAGEVs EVENT")