Load the data

setwd("/Users/cindyliu/Desktop/Coursera")
stormData<-read.csv("repdata_data_StormData.csv")
head(stormData)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
library(ggplot2)

Handling the Columns

Check column names.

colnames(stormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Extract usable columns.

storm<- stormData[,which(colnames(stormData)%in%
                           c("EVTYPE",
                             "FATALITIES",
                             "INJURIES",
                             "PROPDMG",
                             "PROPDMGEXP",
                             "CROPDMG",
                             "CROPDMGEXP"))]
head(storm)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Calculating Fatalities and Injuries

Create a data frame about fatalities and injuries.

health<- storm[which(storm$FATALITIES>0|storm$INJURIES>0),c("EVTYPE","FATALITIES","INJURIES")]
head(health)
##    EVTYPE FATALITIES INJURIES
## 1 TORNADO          0       15
## 3 TORNADO          0        2
## 4 TORNADO          0        2
## 5 TORNADO          0        2
## 6 TORNADO          0        6
## 7 TORNADO          0        1

Create a data fram that contains the columns type, fatalities, injuries and total numbers for plotting convenience.

healthType<- aggregate(health$FATALITIES, by=list(health$EVTYPE), sum)
injsum<- aggregate(health$INJURIES,by=list(health$EVTYPE), sum)
healthType$Injuries<-injsum$x
healthType$Total<- rowSums(cbind(healthType$x,healthType$Injuries))
colnames(healthType)<-c("Type", "Fatalities","Injuries","Total")
healthType<-healthType[which(healthType$Total>3000),]
head(healthType)
##               Type Fatalities Injuries Total
## 32  EXCESSIVE HEAT       1903     6525  8428
## 47           FLOOD        470     6789  7259
## 69            HEAT        937     2100  3037
## 123      LIGHTNING        816     5230  6046
## 184        TORNADO       5633    91346 96979
## 191      TSTM WIND        504     6957  7461

Calculating Property and Crop Damage(Economic Loss)

Interpreting and Transforming the Units:

# Create a data frame about crop and property damage
econ<- storm[which(storm$PROPDMG>0|storm$CROPDMG>0),c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
unique(econ$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k"
unique(econ$PROPDMGEXP)
##  [1] "K" "M" "B" "m" ""  "+" "0" "5" "6" "4" "h" "2" "7" "3" "H" "-"
library(plyr)

econ$PROPDMGEXP <- mapvalues(econ$PROPDMGEXP, from = c("K","M", "B", "m", "", "0", "5","6","4","h","2","7","3", "H","-"), to = c(10^3,10^6, 10^9, 10^6, 1, 1, 10^5, 10^6,10^4, 10^2,10^2, 10^7, 10^3, 10^2,1))

econ$CROPDMGEXP <- mapvalues(econ$CROPDMGEXP, from = c("K","M", "B", "m", "?", "0", "k",""), to = c(10^3,10^6, 10^9, 10^6, 1, 1, 10^3, 1))

econ$CROPDMGEXP <- as.numeric(econ$CROPDMGEXP)
econ$CROPDMGTOTAL <- (econ$CROPDMG * econ$CROPDMGEXP)

econ$PROPDMGEXP <- as.numeric(econ$PROPDMGEXP)
## Warning: NAs introduced by coercion
econ$PROPDMGTOTAL <- (econ$PROPDMG * econ$PROPDMGEXP)

head(econ)
##    EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP CROPDMGTOTAL PROPDMGTOTAL
## 1 TORNADO    25.0       1000       0          1            0        25000
## 2 TORNADO     2.5       1000       0          1            0         2500
## 3 TORNADO    25.0       1000       0          1            0        25000
## 4 TORNADO     2.5       1000       0          1            0         2500
## 5 TORNADO     2.5       1000       0          1            0         2500
## 6 TORNADO     2.5       1000       0          1            0         2500
econType<- aggregate(econ$CROPDMGTOTAL, by=list(econ$EVTYPE), sum)
propsum<- aggregate(econ$PROPDMGTOTAL,by=list(econ$EVTYPE), sum)
econType$Property<-propsum$x
econType$Total<- rowSums(cbind(econType$x,econType$Property))
colnames(econType)<-c("Type", "Crop","Property","Total")
econType<-econType[which(econType$Total>10^10),]
head(econType)
##                  Type        Crop     Property        Total
## 39            DROUGHT 13972566000   1046106000  15018672000
## 59        FLASH FLOOD  1421317100  16822673978  18243991078
## 72              FLOOD  5661968450 144657709807 150319678257
## 116              HAIL  3025954473  15735267513  18761221986
## 189         HURRICANE  2741910000  11868319010  14610229010
## 197 HURRICANE/TYPHOON  2607872800  69305840000  71913712800

Results

Determining the Most Harmful Events to Public Health

ggplot(healthType, aes(x=Type, y=Total))+
  geom_bar(stat="identity", position="dodge")+
  ylab("Frequency Count") +
  xlab("Type") 

“Tornado” is the type of events are most harmful to population health.

Determining the Most Harmful Events to Economy

ggplot(econType, aes(x=Type, y=Total))+
  geom_bar(stat="identity", position="dodge")+
  ylab("Frequency Count") +
  xlab("Type") 

“Flood” is the type of events that have the greatest economic consequences.