setwd("/Users/cindyliu/Desktop/Coursera")
stormData<-read.csv("repdata_data_StormData.csv")
head(stormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
library(ggplot2)
Check column names.
colnames(stormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Extract usable columns.
storm<- stormData[,which(colnames(stormData)%in%
c("EVTYPE",
"FATALITIES",
"INJURIES",
"PROPDMG",
"PROPDMGEXP",
"CROPDMG",
"CROPDMGEXP"))]
head(storm)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
Create a data frame about fatalities and injuries.
health<- storm[which(storm$FATALITIES>0|storm$INJURIES>0),c("EVTYPE","FATALITIES","INJURIES")]
head(health)
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 0 15
## 3 TORNADO 0 2
## 4 TORNADO 0 2
## 5 TORNADO 0 2
## 6 TORNADO 0 6
## 7 TORNADO 0 1
Create a data fram that contains the columns type, fatalities, injuries and total numbers for plotting convenience.
healthType<- aggregate(health$FATALITIES, by=list(health$EVTYPE), sum)
injsum<- aggregate(health$INJURIES,by=list(health$EVTYPE), sum)
healthType$Injuries<-injsum$x
healthType$Total<- rowSums(cbind(healthType$x,healthType$Injuries))
colnames(healthType)<-c("Type", "Fatalities","Injuries","Total")
healthType<-healthType[which(healthType$Total>3000),]
head(healthType)
## Type Fatalities Injuries Total
## 32 EXCESSIVE HEAT 1903 6525 8428
## 47 FLOOD 470 6789 7259
## 69 HEAT 937 2100 3037
## 123 LIGHTNING 816 5230 6046
## 184 TORNADO 5633 91346 96979
## 191 TSTM WIND 504 6957 7461
Interpreting and Transforming the Units:
# Create a data frame about crop and property damage
econ<- storm[which(storm$PROPDMG>0|storm$CROPDMG>0),c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
unique(econ$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k"
unique(econ$PROPDMGEXP)
## [1] "K" "M" "B" "m" "" "+" "0" "5" "6" "4" "h" "2" "7" "3" "H" "-"
library(plyr)
econ$PROPDMGEXP <- mapvalues(econ$PROPDMGEXP, from = c("K","M", "B", "m", "", "0", "5","6","4","h","2","7","3", "H","-"), to = c(10^3,10^6, 10^9, 10^6, 1, 1, 10^5, 10^6,10^4, 10^2,10^2, 10^7, 10^3, 10^2,1))
econ$CROPDMGEXP <- mapvalues(econ$CROPDMGEXP, from = c("K","M", "B", "m", "?", "0", "k",""), to = c(10^3,10^6, 10^9, 10^6, 1, 1, 10^3, 1))
econ$CROPDMGEXP <- as.numeric(econ$CROPDMGEXP)
econ$CROPDMGTOTAL <- (econ$CROPDMG * econ$CROPDMGEXP)
econ$PROPDMGEXP <- as.numeric(econ$PROPDMGEXP)
## Warning: NAs introduced by coercion
econ$PROPDMGTOTAL <- (econ$PROPDMG * econ$PROPDMGEXP)
head(econ)
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP CROPDMGTOTAL PROPDMGTOTAL
## 1 TORNADO 25.0 1000 0 1 0 25000
## 2 TORNADO 2.5 1000 0 1 0 2500
## 3 TORNADO 25.0 1000 0 1 0 25000
## 4 TORNADO 2.5 1000 0 1 0 2500
## 5 TORNADO 2.5 1000 0 1 0 2500
## 6 TORNADO 2.5 1000 0 1 0 2500
econType<- aggregate(econ$CROPDMGTOTAL, by=list(econ$EVTYPE), sum)
propsum<- aggregate(econ$PROPDMGTOTAL,by=list(econ$EVTYPE), sum)
econType$Property<-propsum$x
econType$Total<- rowSums(cbind(econType$x,econType$Property))
colnames(econType)<-c("Type", "Crop","Property","Total")
econType<-econType[which(econType$Total>10^10),]
head(econType)
## Type Crop Property Total
## 39 DROUGHT 13972566000 1046106000 15018672000
## 59 FLASH FLOOD 1421317100 16822673978 18243991078
## 72 FLOOD 5661968450 144657709807 150319678257
## 116 HAIL 3025954473 15735267513 18761221986
## 189 HURRICANE 2741910000 11868319010 14610229010
## 197 HURRICANE/TYPHOON 2607872800 69305840000 71913712800
ggplot(healthType, aes(x=Type, y=Total))+
geom_bar(stat="identity", position="dodge")+
ylab("Frequency Count") +
xlab("Type")
“Tornado” is the type of events are most harmful to population health.
ggplot(econType, aes(x=Type, y=Total))+
geom_bar(stat="identity", position="dodge")+
ylab("Frequency Count") +
xlab("Type")
“Flood” is the type of events that have the greatest economic consequences.