In this analysis, StormData from NOAA website has been analyzed to investigate the damage caused by severe weather in the US. In Q1, we used the fatalities and the injuries variables provided in the dataset to evaluate the damage related to population health. We use mean value of the data as an indicator. We found that TSUNAMI caused the greatest damage in the US. In Q2, we analyzed the property damage and crop damage, to identify the weather that caused the greatest economic loss. We found that WND is the type that caused the greatest economic loss.
Data source: NOAA website - Storm data - Please see readme file for further details
##load data
data=read.csv(file=c('repdata-data-StormData.csv'),header=T,sep=",")
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
##subset the data if FATALITIES+INJURIES=0, then dicard this type of data since it is not useful for the analysis
bad1=data$FATALITIES!=0
bad2=data$INJURIES!=0
bad3<-bad1+bad2
bad.total1=bad3==0
data1=data[!bad.total1,]
##
fatalities.mean=tapply(INDEX=data1$EVTYPE,FUN=mean,data1$FATALITIES)
injuries.mean=tapply(INDEX=data1$EVTYPE,FUN=mean,data1$INJURIES)
##sort the mean by descending order
fatalities.mean=sort(fatalities.mean,decreasing=T)
injuries.mean=sort(injuries.mean,decreasing=T)
##rank the mean value, and get an index for the combinated rank of fatalities and injuries
rank1=rank(fatalities.mean)
rank2=rank(injuries.mean)
rank3.names=names(rank1)
rank3.num=NULL
for (i in 1:length(rank1)) {
rank1.cur=rank1[i]
name.cur=names(rank1)[i]
a=which(names(rank2)==name.cur)
rank2.cur=rank2[a]
rank3.cur=rank1.cur+rank2.cur
rank3.num=c(rank3.num,rank3.cur)
}
rank3=data.frame(rank3.names,rank3.num)
answer=which(rank3[,2]==max(rank3[,2]))
answer.1=rank3[answer,1]
##plot the mean number
x=barplot(fatalities.mean[1:20],xaxt="n",main="Mean of the fatalities by type of weather",ylab="Number of fatalities")
labs<-paste(rownames(fatalities.mean[1:20]))
text(cex=0.7,x=x-.1,y=-5,labs,xpd=TRUE,srt=90)
x=barplot(injuries.mean[1:20],xaxt="n",main="Mean of the injuries by type of weather",ylab="Number of injuries")
labs<-paste(rownames(injuries.mean[1:20]))
text(cex=0.7,x=x-.1,y=-5,labs,xpd=TRUE,srt=90)
###drop the data before 1996
drop=strptime(data$BGN_DATE,format="%m/%d/%Y")
drop=drop<'1996-01-01'
data2=data[!drop,]
###drop the data if PROPDMG+CROPDMG=0
bad1=data2$PROPDMG!=0
bad2=data2$CROPDMG!=0
bad3<-bad1+bad2
bad.total=bad3==0
data2=data2[!bad.total,]
###get the exp from PROPDMGEXP and CROPDMGEXP
prop=data2$PROPDMG
crop=data2$CROPDMG
prop.k=data2$PROPDMGEXP=='K'
prop.m=data2$PROPDMGEXP=='M'
prop.b=data2$PROPDMGEXP=='B'
crop.k=data2$CROPDMGEXP=='K'
crop.m=data2$CROPDMGEXP=='M'
crop.b=data2$CROPDMGEXP=='B'
####
prop[prop.k]=prop[prop.k]*10^3
prop[prop.m]=prop[prop.m]*10^6
prop[prop.b]=prop[prop.b]*10^9
crop[crop.k]=crop[crop.k]*10^3
crop[crop.m]=crop[crop.m]*10^6
crop[crop.b]=crop[crop.b]*10^9
data2=cbind(data2,prop)
data2=cbind(data2,crop)
###
prop.mean=tapply(INDEX=data2$EVTYPE,FUN=sum,data2$prop)
crop.mean=tapply(INDEX=data2$EVTYPE,FUN=sum,data2$crop)
###
rank1=rank(prop.mean)
rank2=rank(crop.mean)
rank3.names=names(rank1)
rank3.num=NULL
for (i in 1:length(rank1)) {
rank1.cur=rank1[i]
name.cur=names(rank1)[i]
a=which(names(rank2)==name.cur)
rank2.cur=rank2[a]
rank3.cur=rank1.cur+rank2.cur
rank3.num=c(rank3.num,rank3.cur)
}
rank3=data.frame(rank3.names,rank3.num)
answer=which(rank3[,2]==max(rank3[,2]))
answer.2=rank3[answer,1]
Q1. The type that is most harmful for public health is TSUNAMI
Q2. The type that have the greatest economic loss is WND