Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. You must use the database to answer the questions below and show the code for your entire analysis. Your analysis can consist of tables, figures, or other summaries. You may use any R package you want to support your analysis.
Your data analysis must address the following questions: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
first download the file and load in the working directory
library(plyr)
library(ggplot2)
setwd("C:/Users/Siddhesha/Desktop/course 5 project 2")
stormD <- read.csv("repdata_data_StormData.csv.bz2")
str(stormD)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
missingdata<- function(x) sum(is.na(x))
colwise(missingdata)(stormD)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## 1 0 0 0 0 0 0 0 0 0
## BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI
## 1 0 0 0 0 0 902297 0 0
## END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1 0 0 0 843563 0 0 0 0 0
## CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE LATITUDE_E
## 1 0 0 0 0 0 47 0 40
## LONGITUDE_ REMARKS REFNUM
## 1 0 0 0
injured <- ddply(stormD, .(EVTYPE), summarise,
harm = sum(FATALITIES+INJURIES))
ordinjured <- injured[order(injured$harm, decreasing = TRUE), ]
top10harmed <- ordinjured[1:10, ]
This is the result of top 10 harmful type base on the sum of casualties.
top10harmed
## EVTYPE harm
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
ggplot(data = top10harmed, aes(EVTYPE, harm, col= EVTYPE))+
geom_col()+ labs(x= "Top 10 events", y= "total no of people harmed")+
ggtitle("Casualities in U.S from 1950- 2011")+
theme(axis.text.x = element_text(angle = 90))
collect the data for the events that have the greatest economic consequences
propdmg0 <- ddply(stormD, .(EVTYPE, PROPDMGEXP), summarise, PROPDMG= sum(PROPDMG))
propdmg1 <- mutate(propdmg0, PropertyDmg = ifelse(toupper(PROPDMGEXP)== "K", PROPDMG*1000,
ifelse(toupper(PROPDMGEXP)== "M", PROPDMG*1000000,
ifelse(toupper(PROPDMGEXP)== "B", PROPDMG*1000000000,
ifelse(toupper(PROPDMGEXP)== "H", PROPDMG*100,
PROPDMG
)))))
propdmg2 <- subset(propdmg1, select = c('EVTYPE', 'PropertyDmg'))
proptot <- ddply(propdmg2, .(EVTYPE), summarise, TotalPropDamage = sum(PropertyDmg))
cropdmg0 <- ddply(stormD, .(EVTYPE, CROPDMGEXP), summarise, CROPDMG= sum(CROPDMG))
cropdmg1 <- mutate(cropdmg0, CropDmg = ifelse(toupper(CROPDMGEXP)== "K", CROPDMG*1000,
ifelse(toupper(CROPDMGEXP)== "M", CROPDMG*1000000,
ifelse(toupper(CROPDMGEXP)== "B", CROPDMG*1000000000,
ifelse(toupper(CROPDMGEXP)== "H", CROPDMG*100,
CROPDMG
)))))
cropdmg2 <- subset(cropdmg1, select = c('EVTYPE', 'CropDmg'))
croptot <- ddply(cropdmg2, .(EVTYPE), summarise, TotalCropDamage = sum(CropDmg))
damage0 <- merge(proptot, croptot, by = "EVTYPE")
damage1 <- mutate(damage0, TotalDamage = TotalPropDamage + TotalCropDamage )
damage <- damage1[order(damage1$TotalDamage, decreasing = TRUE), ]
Topdam <- damage[1:15, ]
Topdam
## EVTYPE TotalPropDamage TotalCropDamage TotalDamage
## 170 FLOOD 144657709807 5661968450 150319678257
## 411 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 834 TORNADO 56937160779 414953270 57352114049
## 670 STORM SURGE 43323536000 5000 43323541000
## 244 HAIL 15732267543 3025954473 18758222016
## 153 FLASH FLOOD 16140812067 1421317100 17562129167
## 95 DROUGHT 1046106000 13972566000 15018672000
## 402 HURRICANE 11868319010 2741910000 14610229010
## 590 RIVER FLOOD 5118945500 5029459000 10148404500
## 427 ICE STORM 3944927860 5022113500 8967041360
## 848 TROPICAL STORM 7703890550 678346000 8382236550
## 972 WINTER STORM 6688497251 26944000 6715441251
## 359 HIGH WIND 5270046295 638571300 5908617595
## 957 WILDFIRE 4765114000 295472800 5060586800
## 856 TSTM WIND 4484928495 554007350 5038935845
ggplot(Topdam, aes(EVTYPE, TotalDamage, col= EVTYPE))+
geom_col()+ labs(x= "Top 15 events", y= "economic damage")+
theme(axis.text.x = element_text(angle = 90))+
ggtitle("economic damage in U.S from 1950- 2011")+
theme(plot.margin = unit(c(1,1,1,1), "cm"))