This analysis explores the NOAA Storm Database. It try to address the following questions: 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences? I find: 1. Tornado is the most harmful event to population health, 2 Tornado causes the most property demage while drought causes the most crop property demage.
Load the data.
data<-read.csv( "repdata%2Fdata%2FStormData.csv",na.strings="NA",stringsAsFactors = FALSE )
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string
Check its names. Delete indicators useless. Delete rows with abnormal values.
colName<-names(data)
colName
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
indicators<-c(8,23:28)
data<-data[1:547363,indicators]
head(data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0.00 15.00 25.00 K 0.00
## 2 TORNADO 0.00 0.00 2.50 K 0.00
## 3 TORNADO 0.00 2.00 25.00 K 0.00
## 4 TORNADO 0.00 2.00 2.50 K 0.00
## 5 TORNADO 0.00 2.00 2.50 K 0.00
## 6 TORNADO 0.00 6.00 2.50 K 0.00
Calculate each event’s total demage to both property and crop property
Property demage first.
exp1<-data[,5]
exp1<-as.factor(exp1)
levels(exp1)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
data[,5]<-factor(exp1,labels=c(NA,NA,NA,NA,1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000,100,100,1000,1000000,1000000))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
head(data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0.00 15.00 25.00 1000 0.00
## 2 TORNADO 0.00 0.00 2.50 1000 0.00
## 3 TORNADO 0.00 2.00 25.00 1000 0.00
## 4 TORNADO 0.00 2.00 2.50 1000 0.00
## 5 TORNADO 0.00 2.00 2.50 1000 0.00
## 6 TORNADO 0.00 6.00 2.50 1000 0.00
data[,4]<-as.numeric(data[,4])
data[,4]<-data[,4]*as.numeric(as.character(data[,5]))
head(data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0.00 15.00 25000 1000 0.00
## 2 TORNADO 0.00 0.00 2500 1000 0.00
## 3 TORNADO 0.00 2.00 25000 1000 0.00
## 4 TORNADO 0.00 2.00 2500 1000 0.00
## 5 TORNADO 0.00 2.00 2500 1000 0.00
## 6 TORNADO 0.00 6.00 2500 1000 0.00
CropProperty the next
exp2<-data[,7]
exp2<-as.factor(exp2)
levels(exp2)
## [1] "" "?" "0" "2" "B" "k" "K" "m" "M"
data[,7]<-factor(exp2,labels=c(NA,NA,1,100,1000000000,1000,1000,1000000,1000000))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
tail(data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 547358 TSTM WIND 0.00 0.00 4e+05 1000 0.00
## 547359 HAIL 0.00 0.00 NA <NA> 0.00
## 547360 HAIL 0.00 0.00 2e+05 1000 100.00
## 547361 FLASH FLOOD 0.00 0.00 NA <NA> 0.00
## 547362 HAIL 0.00 0.00 NA <NA> 0.00
## 547363 HAIL 0.00 0.00 NA <NA> 0.00
## CROPDMGEXP
## 547358 <NA>
## 547359 <NA>
## 547360 1000
## 547361 <NA>
## 547362 <NA>
## 547363 <NA>
data[,6]<-as.numeric(data[,6])
data[,6]<-data[,6]*as.numeric(as.character(data[,7]))
data<-data[,c(1,2,3,4,6)]
Group the data by their EVTYPE. Sum the demage, fatalities and injuries of each event. Save the new data as “dataFinal”
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data[,2]<-as.numeric(data[,2])
data[,3]<-as.numeric(data[,3])
EVT<- group_by(data,EVTYPE)
temp<-summarize(EVT,
FTL = sum(FATALITIES, na.rm = TRUE),
INJ = sum(INJURIES,na.rm = TRUE),
PRO= sum(PROPDMG,na.rm = TRUE),
CRO= sum(CROPDMG,na.rm = TRUE)
)
dataFinal<-as.data.frame(as.matrix(temp))
Now the look for the top three events of fatality and injury respectively. Then plot them.
FTL3<-as.character(arrange(dataFinal,desc(FTL))[1:3,1])
INJ3<-as.character(arrange(dataFinal,desc(INJ))[1:3,1])
HH5<-c(FTL3,INJ3[2:3])
fn<-function(name){
match(name,dataFinal[,1])
}
HH5N<-sapply(HH5,fn)
dataHH5<-dataFinal[HH5N,]
Total.Fatality<-as.numeric(as.character(dataHH5[,2]))
Total.Injuries<-as.numeric(as.character(dataHH5[,3]))
Plot them:
plot(Total.Fatality,Total.Injuries, col = "blue", pch = 19, cex = 2 , main="Top events harmful to population health")
text(Total.Fatality[1]-500, Total.Injuries[1]-4000, labels = HH5[1])
text(Total.Fatality[2]+600, Total.Injuries[2], labels = HH5[2])
text(Total.Fatality[3]+300, Total.Injuries[3], labels = HH5[3])
text(Total.Fatality[4]+500, Total.Injuries[4], labels = HH5[4])
text(Total.Fatality[5], Total.Injuries[5]+12000, labels = HH5[5])
I find that Tornato is most harmful to population health.
Now look for the top events causing property demage. Make a barplot
PRO5<-as.character(arrange(dataFinal,desc(PRO))[1:5,1])
PRO5N<-sapply(PRO5,fn)
dataPRO5<-dataFinal[PRO5N,]
barplot(as.numeric(as.character(dataPRO5$PRO)),xlab="Event",ylab="Property Demage",names.arg=PRO5 )
I find that Tornado causes the most property demage.
Now look for the top events causing crop property demage. Make a barplot.
CRO5<-as.character(arrange(dataFinal,desc(CRO))[1:5,1])
CRO5N<-sapply(CRO5,fn)
dataCRO5<-dataFinal[CRO5N,]
barplot(as.numeric(as.character(dataCRO5$CRO)),xlab="Event",ylab="Crop Property Demage",names.arg=CRO5 )
I find that Drought cause the most crop property demage.
To conclude, Tornado is most harmful to population health. Tornado and Drought are most harmful for economics. Tornado causes the most property demage. Drought cause the most crop property demage.