Synopsis

This analysis explores the NOAA Storm Database. It try to address the following questions: 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences? I find: 1. Tornado is the most harmful event to population health, 2 Tornado causes the most property demage while drought causes the most crop property demage.

Data Processing

Load the data.

data<-read.csv( "repdata%2Fdata%2FStormData.csv",na.strings="NA",stringsAsFactors = FALSE  )
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string

Check its names. Delete indicators useless. Delete rows with abnormal values.

colName<-names(data)
colName
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
indicators<-c(8,23:28)
data<-data[1:547363,indicators]
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO       0.00    15.00   25.00          K    0.00           
## 2 TORNADO       0.00     0.00    2.50          K    0.00           
## 3 TORNADO       0.00     2.00   25.00          K    0.00           
## 4 TORNADO       0.00     2.00    2.50          K    0.00           
## 5 TORNADO       0.00     2.00    2.50          K    0.00           
## 6 TORNADO       0.00     6.00    2.50          K    0.00

Calculate each event’s total demage to both property and crop property
Property demage first.

exp1<-data[,5]
exp1<-as.factor(exp1)
levels(exp1)
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
data[,5]<-factor(exp1,labels=c(NA,NA,NA,NA,1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000,100,100,1000,1000000,1000000))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO       0.00    15.00   25.00       1000    0.00           
## 2 TORNADO       0.00     0.00    2.50       1000    0.00           
## 3 TORNADO       0.00     2.00   25.00       1000    0.00           
## 4 TORNADO       0.00     2.00    2.50       1000    0.00           
## 5 TORNADO       0.00     2.00    2.50       1000    0.00           
## 6 TORNADO       0.00     6.00    2.50       1000    0.00
data[,4]<-as.numeric(data[,4])
data[,4]<-data[,4]*as.numeric(as.character(data[,5]))
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO       0.00    15.00   25000       1000    0.00           
## 2 TORNADO       0.00     0.00    2500       1000    0.00           
## 3 TORNADO       0.00     2.00   25000       1000    0.00           
## 4 TORNADO       0.00     2.00    2500       1000    0.00           
## 5 TORNADO       0.00     2.00    2500       1000    0.00           
## 6 TORNADO       0.00     6.00    2500       1000    0.00

CropProperty the next

exp2<-data[,7]
exp2<-as.factor(exp2)
levels(exp2)
## [1] ""  "?" "0" "2" "B" "k" "K" "m" "M"
data[,7]<-factor(exp2,labels=c(NA,NA,1,100,1000000000,1000,1000,1000000,1000000))
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
tail(data)
##             EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 547358   TSTM WIND       0.00     0.00   4e+05       1000    0.00
## 547359        HAIL       0.00     0.00      NA       <NA>    0.00
## 547360        HAIL       0.00     0.00   2e+05       1000  100.00
## 547361 FLASH FLOOD       0.00     0.00      NA       <NA>    0.00
## 547362        HAIL       0.00     0.00      NA       <NA>    0.00
## 547363        HAIL       0.00     0.00      NA       <NA>    0.00
##        CROPDMGEXP
## 547358       <NA>
## 547359       <NA>
## 547360       1000
## 547361       <NA>
## 547362       <NA>
## 547363       <NA>
data[,6]<-as.numeric(data[,6])
data[,6]<-data[,6]*as.numeric(as.character(data[,7]))
data<-data[,c(1,2,3,4,6)]

Group the data by their EVTYPE. Sum the demage, fatalities and injuries of each event. Save the new data as “dataFinal”

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data[,2]<-as.numeric(data[,2])
data[,3]<-as.numeric(data[,3])
EVT<- group_by(data,EVTYPE)
temp<-summarize(EVT, 
    FTL = sum(FATALITIES, na.rm = TRUE),  
    INJ = sum(INJURIES,na.rm = TRUE),
    PRO= sum(PROPDMG,na.rm = TRUE), 
    CRO= sum(CROPDMG,na.rm = TRUE)
)
dataFinal<-as.data.frame(as.matrix(temp))   

Results

Now the look for the top three events of fatality and injury respectively. Then plot them.

FTL3<-as.character(arrange(dataFinal,desc(FTL))[1:3,1])
INJ3<-as.character(arrange(dataFinal,desc(INJ))[1:3,1])
HH5<-c(FTL3,INJ3[2:3])
fn<-function(name){
    match(name,dataFinal[,1])
}
HH5N<-sapply(HH5,fn)
dataHH5<-dataFinal[HH5N,]
Total.Fatality<-as.numeric(as.character(dataHH5[,2]))
Total.Injuries<-as.numeric(as.character(dataHH5[,3]))

Plot them:

plot(Total.Fatality,Total.Injuries,  col = "blue", pch = 19, cex = 2 , main="Top events harmful to population health")
text(Total.Fatality[1]-500, Total.Injuries[1]-4000, labels = HH5[1])
text(Total.Fatality[2]+600, Total.Injuries[2], labels = HH5[2])
text(Total.Fatality[3]+300, Total.Injuries[3], labels = HH5[3])
text(Total.Fatality[4]+500, Total.Injuries[4], labels = HH5[4])
text(Total.Fatality[5], Total.Injuries[5]+12000, labels = HH5[5])

I find that Tornato is most harmful to population health.
Now look for the top events causing property demage. Make a barplot

PRO5<-as.character(arrange(dataFinal,desc(PRO))[1:5,1])
PRO5N<-sapply(PRO5,fn)
dataPRO5<-dataFinal[PRO5N,]

barplot(as.numeric(as.character(dataPRO5$PRO)),xlab="Event",ylab="Property Demage",names.arg=PRO5 )

I find that Tornado causes the most property demage.

Now look for the top events causing crop property demage. Make a barplot.

CRO5<-as.character(arrange(dataFinal,desc(CRO))[1:5,1])
CRO5N<-sapply(CRO5,fn)
dataCRO5<-dataFinal[CRO5N,]

barplot(as.numeric(as.character(dataCRO5$CRO)),xlab="Event",ylab="Crop Property Demage",names.arg=CRO5 )

I find that Drought cause the most crop property demage.

To conclude, Tornado is most harmful to population health. Tornado and Drought are most harmful for economics. Tornado causes the most property demage. Drought cause the most crop property demage.