Recent past , it has been observed that extreme weather events caused huge damage to human lives and properties across the United States leading to fatalities,injuries and economic crisis. To minimized the impact of server weather events is a key corcern for many muncipalities and communities.This paper uses the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to find out which are the weathers events causing maximum casualities, crop and property damage.
The data is compressed csv file which is downloaded online. This section is about the data processing techniques used.
if(!file.exists("./NOAA")){dir.create("./NOAA")}
surl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(surl,destfile = "./NOAA/SD.bz2", mode= "wb")
storm.Data <- read.csv(bzfile("./NOAA/SD.bz2"), header = T, sep =",")
dim(storm.Data)
## [1] 902297 37
names(storm.Data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
There are 902297 rows and 37 columns in the dataset. The dataset is further trimmed to extract only relevant columns i.e. “BGN_DATA”, “EVTYPE”,“FATALITIES” ,“INJURIES”,“PROPDMG”,“CROPDMG” for this assignment.The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records.For the analysis, only 20 years record starting from 1991 was considered.
storm.Data$BGN_DATE <- as.Date(storm.Data$BGN_DATE,'%m/%d/%Y %H:%M:%S')
storm.Data$Year <- format(storm.Data$BGN_DATE,'%Y')
storm.Data <- storm.Data[storm.Data$Year > 1990 & (storm.Data$FATALITIES > 0 | storm.Data$INJURIES > 0 | storm.Data$PROPDMG > 0 | storm.Data$CROPDMG > 0 ), c(38,8,23,24,25,26,27,28)]
storm.Data$PROPDMGEXP<- gsub("K", '1000', storm.Data$PROPDMGEXP)
storm.Data$PROPDMGEXP <- gsub("M", '1000000', storm.Data$PROPDMGEXP)
storm.Data$PROPDMGEXP <- gsub(" ", '1', storm.Data$PROPDMGEXP)
storm.Data$CROPDMGEXP<- gsub("K", '1000', storm.Data$CROPDMGEXP)
storm.Data$CROPDMGEXP <- gsub("M", '1000000', storm.Data$CROPDMGEXP)
storm.Data$PROPDMGEXP[storm.Data$PROPDMGEXP==""] <- NA
storm.Data$CROPDMGEXP[storm.Data$CROPDMGEXP==""] <- NA
storm.Data[is.na(storm.Data)] <- 1
storm.Data$PROPDMGEXP <- storm.Data$PROPDMG * as.numeric(storm.Data$PROPDMGEXP)
## Warning: NAs introduced by coercion
storm.Data$CROPDMGEXP <- storm.Data$CROPDMG * as.numeric(storm.Data$CROPDMGEXP)
## Warning: NAs introduced by coercion
storm.Data$casualities <- storm.Data$FATALITIES + storm.Data$INJURIES
what are ten worst events causing maximum damage in last 20 years?
library(dplyr)
storm.Data$casualities <- storm.Data$FATALITIES + storm.Data$INJURIES
by_event <- group_by(storm.Data,EVTYPE)
Year_SData <- data.frame(summarise(by_event,casualities = sum(casualities,na.rm=T),Prop.dmg = sum(PROPDMG, na.rm=T), Crop.dmg = sum(CROPDMG, na.rm =T)))
# Events causing maximum Casualities (Injuries + Fatalities)
head(Year_SData[order(-Year_SData$casualities),c(1)],10)
## [1] TORNADO EXCESSIVE HEAT FLOOD
## [4] LIGHTNING TSTM WIND HEAT
## [7] FLASH FLOOD ICE STORM THUNDERSTORM WIND
## [10] WINTER STORM
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
# Events causing maximum Property Damage
head(Year_SData[order(-Year_SData$Prop.dmg),c(1)],10)
## [1] TORNADO FLASH FLOOD TSTM WIND
## [4] FLOOD THUNDERSTORM WIND HAIL
## [7] LIGHTNING THUNDERSTORM WINDS HIGH WIND
## [10] WINTER STORM
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
# Events causingg Maximum Crop Damage
head(Year_SData[order(-Year_SData$Crop.dmg),c(1)],10)
## [1] HAIL FLASH FLOOD FLOOD
## [4] TSTM WIND TORNADO THUNDERSTORM WIND
## [7] DROUGHT THUNDERSTORM WINDS HIGH WIND
## [10] HEAVY RAIN
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
Worst 10 event causing maximum casualities are TORNADO , EXCESSIVE HEAT,FLOOD , DROUGHT ,TSTM WIND , HEAT ,ICE STORM ,FLASH FLOOD ,THUNDERSTORM WIND,WINTER STORM. After a close inspection of the dataset, we still see some discrepancies in the data, like for example TSTM WIND , THUNDERSTORM WIND , THUNDERSTROM WINDS. etc respresent same event THUNDERSTORM but there are multiple enteries in dataset. Further cleansing of the dataset is done to remove this discrepacies.
# 1. THUNDERSTORM and TSTM wind
Thunderstorm <- storm.Data[grep("^THUNDER",storm.Data$EVTYPE),]
TSTM <- storm.Data[grep("^TSTM",storm.Data$EVTYPE),]
Thunderstorm <- rbind(Thunderstorm, TSTM)
by_year <- group_by(Thunderstorm,Year)
E.Thunderstorm <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.Thunderstorm$EVTYPE <- rep("THUNDERSTORM", nrow(E.Thunderstorm))
# 2. EXCESSIVE HEAT and HEAT WAVE , EXCESSIVE HEAT will fall in as event category
heat <-storm.Data[grep("^HEAT",storm.Data$EVTYPE),]
ex.heat <-storm.Data[grep("^EXCESSIVE HEAT",storm.Data$EVTYPE),]
heat <- rbind(heat,ex.heat)
by_year <- group_by(heat,Year)
E.Heat <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.Heat$EVTYPE <- rep("HEAT",nrow(E.Heat))
# 3.FLOOD
flood <-storm.Data[grep("^FLOOD",storm.Data$EVTYPE),]
by_year <- group_by(flood,Year)
E.flood <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T), Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.flood$EVTYPE <- rep("FLOODS",nrow(E.flood))
# 4. FLASH FLOOD
Flash.flood <- storm.Data[grep("^FLASH",storm.Data$EVTYPE),]
by_year <- group_by(Flash.flood,Year)
E.flash.flood <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T),Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.flash.flood$EVTYPE <- rep("FLASH_FLOOD",nrow(E.flash.flood))
# 5. DROUGHT
drought <- storm.Data[grep("^DROUGHT",storm.Data$EVTYPE),]
by_year <- group_by(drought,Year)
E.drought <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.drought$EVTYPE <- rep("DROUGHT",nrow(E.drought))
# 6. TORNADO
tornado <- storm.Data[grep("^TORNADO",storm.Data$EVTYPE),]
by_year <- group_by(tornado,Year)
E.tornado <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.tornado$EVTYPE <- rep("TORNADO",nrow(E.tornado))
# 7. ICE STORM and WINTERSTORM
ice.storm <-storm.Data[grep("^ICE STORM",storm.Data$EVTYPE),]
winter.storm <-storm.Data[grep("^WINTER STORM",storm.Data$EVTYPE),]
winter.storm <- rbind(ice.storm, winter.storm)
by_year <- group_by(winter.storm,Year)
E.winter.storm <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T),Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.winter.storm$EVTYPE <- rep("WINTER_STORM",nrow(E.winter.storm))
# 8. HURRICANE/TYPOON
Hurricane <- storm.Data[grep("^HURRICANE",storm.Data$EVTYPE),]
typoon <- storm.Data[grep("^TYPOON",storm.Data$EVTYPE),]
Hurrican.typoon<- rbind(Hurricane,typoon)
by_year <- group_by(Hurrican.typoon,Year)
E.Hur.typ <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.Hur.typ$EVTYPE <- rep("HURRICANE_TYPOON", nrow(E.Hur.typ))
# 9. TSUANAMI
tsunami <- storm.Data[grep("^TSUNAMI",storm.Data$EVTYPE),]
by_year <- group_by(tsunami,Year)
E.tsunami <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T),Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.tsunami$EVTYPE <-rep("TSUNAMI",nrow(E.tsunami))
# 10. HAIL
hail <- storm.Data[grep("^HAIL",storm.Data$EVTYPE),]
by_year <- group_by(hail,Year)
E.hail <- data.frame(summarise(by_year,Casualities = sum(casualities,na.rm=T),Property.Dmg= sum(PROPDMG,na.rm=T), Crop.Dmg = sum(CROPDMG,na.rm=T)))
E.hail$EVTYPE <- rep("HAIL",nrow(E.hail))
# Clean Dataset
F_Storm.Data <- rbind(E.hail,E.tsunami,E.Hur.typ,E.winter.storm,E.tornado,E.drought ,E.flash.flood,E.flood,E.Heat,E.Thunderstorm)
head(F_Storm.Data)
## Year Casualities Property.Dmg Crop.Dmg EVTYPE
## 1 1991 8 0.00 0.00 HAIL
## 2 1992 109 0.00 0.00 HAIL
## 3 1993 17 37585.05 18280.35 HAIL
## 4 1994 27 53966.95 46148.00 HAIL
## 5 1995 206 22868.00 16968.96 HAIL
## 6 1996 83 31278.93 25026.00 HAIL
To see how theses events have impacted the human population across the united states.
Casualities <- F_Storm.Data[,c(1,5,2)]
Cas.Event<- dcast(Casualities, Year ~ EVTYPE)
## Using Casualities as value column: use value.var to override.
Cas.Event[is.na(Cas.Event)] <- 0
Cas.Event$Year <- as.integer(Cas.Event$Year)
ylim <- c(0, max(Cas.Event$FLASH_FLOOD + Cas.Event$FLOODS + Cas.Event$HAIL + Cas.Event$HEAT + Cas.Event$HURRICANE_TYPOON + Cas.Event$DROUGHT
+ Cas.Event$THUNDERSTORM + Cas.Event$TORNADO + Cas.Event$TSUNAMI + Cas.Event$WINTER_STORM ))
xx <- c(Cas.Event$Year, rev(Cas.Event$Year))
yyff <- c(rep(0, nrow(Cas.Event)), rev(Cas.Event$TORNADO))
plot(x=Cas.Event$Year, y=Cas.Event$TORNADO, ylim=ylim, col='light blue',type='l', ylab='Casualities', xlab='Year',main='Worst Casualities By Weather Event\n Across United States (1991-2011)')
polygon(xx, yyff, col='lightgreen')
yyfld <- c(Cas.Event$TORNADO, rev(Cas.Event$TORNADO) + rev(Cas.Event$FLASH_FLOOD))
polygon(xx, yyfld, col='royalblue')
yyh <- c(Cas.Event$TORNADO + Cas.Event$FLASH_FLOOD , rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD))
polygon(xx, yyh, col='plum')
yyhe <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD,
rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT))
polygon(xx, yyhe, col='maroon')
yyht <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT,
rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT) + rev(Cas.Event$HAIL))
polygon(xx, yyht, col='red')
yylg <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT + Cas.Event$HAIL ,rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$ DROUGHT)+ rev(Cas.Event$HAIL) + rev(Cas.Event$WINTER_STORM))
polygon(xx, yylg, col='orange')
yyth <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT
+ Cas.Event$HAIL + Cas.Event$WINTER_STORM ,rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT) + rev(Cas.Event$HAIL) + rev(Cas.Event$WINTER_STORM) + rev(Cas.Event$HURRICANE_TYPOON))
polygon(xx, yyth, col='purple')
yyt <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT + Cas.Event$HAIL + Cas.Event$WINTER_STORM + Cas.Event$HURRICANE_TYPOON ,rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT) + rev(Cas.Event$HAIL)+ rev(Cas.Event$WINTER_STORM) + rev(Cas.Event$HURRICANE_TYPOON) + rev(Cas.Event$THUNDERSTORM))
polygon(xx, yyt, col='darkgreen')
yyts <-c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT + Cas.Event$HAIL + Cas.Event$WINTER_STORM + Cas.Event$HURRICANE_TYPOON + Cas.Event$THUNDERSTORM ,
rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT) + rev(Cas.Event$HAIL) + rev(Cas.Event$WINTER_STORM) + rev(Cas.Event$HURRICANE_TYPOON) + rev(Cas.Event$THUNDERSTORM) + rev(Cas.Event$FLOODS))
polygon(xx, yyts, col='grey') #9
yyws <- c(Cas.Event$TORNADO + Cas.Event$HEAT + Cas.Event$FLASH_FLOOD + Cas.Event$DROUGHT + Cas.Event$WINTER_STORM + Cas.Event$HURRICANE_TYPOON + Cas.Event$THUNDERSTORM +
Cas.Event$FLOODS ,rev(Cas.Event$TORNADO) + rev(Cas.Event$HEAT)+ rev(Cas.Event$FLASH_FLOOD) + rev(Cas.Event$DROUGHT)+ rev(Cas.Event$HAIL) + rev(Cas.Event$WINTER_STORM) + rev(Cas.Event$HURRICANE_TYPOON) + rev(Cas.Event$THUNDERSTORM) + rev(Cas.Event$FLOODS) + rev(Cas.Event$TSUNAMI) )
polygon(xx, yyws, col='yellow') # 10
legend(2000, 9700, c('Tornato', 'FlashFlood', 'Heat', " Drought","Hail","Winter storm", "Hurricane/Typoon", "Thunderstorm","Floods","Tsunami"),
fill=c('lightgreen', 'royalblue', 'plum',"maroon","red","orange","purple","darkgreen","grey","yellow"),cex=0.8,bty = "n")
From the chart it is clear that Tornados,Heat, Thunderstorm and floods are the events most harmful to human population.
Across the United States, which types of events have the greatest economic consequences? Again we use the dataset “F_Storm.Data”. For this analysis we have calculated the 20 years annual average crop damage(1991-2011) , 5 years average annual crop damage(2006-2011), and crop damage for year 2011.
library(dplyr)
head(F_Storm.Data)
## Year Casualities Property.Dmg Crop.Dmg EVTYPE
## 1 1991 8 0.00 0.00 HAIL
## 2 1992 109 0.00 0.00 HAIL
## 3 1993 17 37585.05 18280.35 HAIL
## 4 1994 27 53966.95 46148.00 HAIL
## 5 1995 206 22868.00 16968.96 HAIL
## 6 1996 83 31278.93 25026.00 HAIL
by_20year <- group_by(F_Storm.Data,EVTYPE)
Avg_An_20yrs_dmg <- data.frame(summarise(by_20year, Avg_Prop_Dmg_20yrs = round(mean(Property.Dmg, na.rm=T),2), Avg_Crop_Dmg_20yrs= round(mean(Crop.Dmg,na.rm=T),2)))
by_5year <- group_by(F_Storm.Data[F_Storm.Data$Year > 2005,], EVTYPE)
Avg_An_5yrs_dmg <- data.frame(summarise(by_5year, Avg_Prop_Dmg_5yrs = round(mean(Property.Dmg, na.rm=T),2), Avg_Crop_Dmg_5yrs = round(mean(Crop.Dmg,na.rm=T),2)))
by_2011 <- group_by(F_Storm.Data[F_Storm.Data$Year ==2011 ,], EVTYPE)
Avg_An_yr2011_dmg <- data.frame(summarise(by_2011, Avg_Prop_Dmg_2011 = round(mean(Property.Dmg, na.rm=T),2), Avg_Crop_Dmg_2011 = round(mean(Crop.Dmg,na.rm=T),2)))
Final_Damage<- merge(Avg_An_20yrs_dmg , Avg_An_5yrs_dmg, by ="EVTYPE")
Final_Damage <- merge(Final_Damage,Avg_An_yr2011_dmg, by="EVTYPE" )
CrpDmg <- Final_Damage[,c(1,3,5,7)]
CrpDmg<-melt(CrpDmg, id = c("EVTYPE"))
names(CrpDmg) <- c("EVTYPE","Annual.Avg","Crop.Dmg")
ggplot(CrpDmg, aes(EVTYPE, Crop.Dmg)) +
geom_bar(aes(fill = Annual.Avg), position = "dodge", stat="identity") +
xlab("") +
ylab("Annual Average Crop Damage Casualities") +
ggtitle("Weather Events Causing Worst Crop Damage \nin United States (1991-2011)") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.1)) +
theme(plot.title = element_text(lineheight=.8, face="bold"))
From the chart , the weather events causing most crop damage are Flood, Hail,Thunderstorm, Tornado,Flash-Flood.
For this analysis we have calculated the 20 years annual average Property damage (1991-2011) , 5 years average annual property damage (2006-2011) , and crop damage for year 2011. Property Damage caused due weather events from 1991-2011
library(reshape2)
PropDmg <- Final_Damage[,c(1,2,3,6)]
PropDmg <-melt(PropDmg, id = c("EVTYPE"))
names(PropDmg) <- c("EVTYPE","Annual.Avg","Prop.Dmg")
ggplot(PropDmg, aes(EVTYPE, Prop.Dmg)) +
geom_bar(aes(fill = Annual.Avg), position = "dodge", stat="identity") +
xlab("") +
ylab("Annual Average Property Damage Casualities") +
ggtitle("Weather Events Causing Worst Property Damage \nin United States (1991-2011)") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.1)) +
theme(plot.title = element_text(lineheight=.8, face="bold"))
From the above chart, the weather events causing maximum damage to property across united states are Tornado, Thunderstorm, flash_floods and floods. ## Results## From above analysis it is pretty clear that Tornados,Heat,Hail, Floods,Thunderstorms has the most harmful effect to live and property across united states