1 Synopsis

This report shows the most harmful weather event related in public health and property.
The result of the investigation, most harmful events are tornado and flood.
In relation to the property damage , there is also the crop damage.  
But it is small amount of money , it is not included here .

2 Data Processing

2.1 Read csv file and make the data frame

Sys.setlocale(category = "LC_ALL", locale = "English")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
library(ggplot2)
DF <- read.csv("./data/repdata-data-StormData.csv", stringsAsFactors = FALSE)

2.2 Check the column and data

names(DF)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

2.3 Make dataset with mandatory fields

DF <- DF[, c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", 
             "CROPDMG", "CROPDMGEXP")]

head(DF,10)
##              BGN_DATE  EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1   4/18/1950 0:00:00 TORNADO          0       15    25.0          K
## 2   4/18/1950 0:00:00 TORNADO          0        0     2.5          K
## 3   2/20/1951 0:00:00 TORNADO          0        2    25.0          K
## 4    6/8/1951 0:00:00 TORNADO          0        2     2.5          K
## 5  11/15/1951 0:00:00 TORNADO          0        2     2.5          K
## 6  11/15/1951 0:00:00 TORNADO          0        6     2.5          K
## 7  11/16/1951 0:00:00 TORNADO          0        1     2.5          K
## 8   1/22/1952 0:00:00 TORNADO          0        0     2.5          K
## 9   2/13/1952 0:00:00 TORNADO          1       14    25.0          K
## 10  2/13/1952 0:00:00 TORNADO          0        0    25.0          K
##    CROPDMG CROPDMGEXP
## 1        0           
## 2        0           
## 3        0           
## 4        0           
## 5        0           
## 6        0           
## 7        0           
## 8        0           
## 9        0           
## 10       0
tail(DF,10)
##                  BGN_DATE         EVTYPE FATALITIES INJURIES PROPDMG
## 902288  11/5/2011 0:00:00 WINTER WEATHER          0        0       0
## 902289 11/28/2011 0:00:00   FROST/FREEZE          0        0       0
## 902290 11/12/2011 0:00:00      HIGH WIND          0        0       0
## 902291 11/28/2011 0:00:00 WINTER WEATHER          0        0       0
## 902292 11/28/2011 0:00:00 WINTER WEATHER          0        0       0
## 902293 11/30/2011 0:00:00      HIGH WIND          0        0       0
## 902294 11/10/2011 0:00:00      HIGH WIND          0        0       0
## 902295  11/8/2011 0:00:00      HIGH WIND          0        0       0
## 902296  11/9/2011 0:00:00       BLIZZARD          0        0       0
## 902297 11/28/2011 0:00:00     HEAVY SNOW          0        0       0
##        PROPDMGEXP CROPDMG CROPDMGEXP
## 902288          K       0          K
## 902289          K       0          K
## 902290          K       0          K
## 902291          K       0          K
## 902292          K       0          K
## 902293          K       0          K
## 902294          K       0          K
## 902295          K       0          K
## 902296          K       0          K
## 902297          K       0          K

2.4 Find invalid data row

for(i in 1:nrow(DF)){
    if( is.na(as.Date(DF$BGN_DATE[i], "%m/%d/%Y"))){
        stop("Invalid data found ")
    }
}

Make “Year” field. it is not a field that in needed in this investigation, but will be needed in the future.

dates <- as.Date(DF$BGN_DATE, "%m/%d/%Y")
DF$YEAR <- as.integer(format(dates,"%Y"))

2.5 Make “PROPDMGEXP_POW” field. to calcaute the damage amounts.

DF <- DF[1:i-1,]
unique(DF$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
DF$PROPDMGEXP_POW <- DF$PROPDMGEXP   
DF[grep("B", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 9
DF[grep("M", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 6
DF[grep("K", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 3
DF[grep("H", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 2
DF[grep("\\+", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[grep("\\-", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[grep("\\?", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[nchar(DF$PROPDMGEXP)==0,]$PROPDMGEXP_POW <- 0

2.6 Review the values of PROPDMGEXP_POW

unique(DF$PROPDMGEXP_POW)
##  [1] "3" "6" "0" "9" "5" "4" "2" "7" "1" "8"

2.8 Sort and extract top 10 disaster on injuries

DF.injuries <- DF.injuries[order(DF.injuries$x,decreasing = TRUE),]
DF.injuries <- DF.injuries[1:10,]

2.10 Sort and extract top 10 disaster on injuries

DF.fatalities <- DF.fatalities[order(DF.fatalities$x,decreasing = TRUE),]
DF.fatalities <- DF.fatalities[1:10,]

2.12 Sort and extract top 10 disaster on property

DF.property <- DF.property[order(DF.property$x,decreasing = TRUE),]
DF.property <- DF.property[1:10,]

3 Results

3.1 Question 1

Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to popuulation health?
Answer: Following plots, show the most dangerous 10 disaster for public health.
Result of the investigation , it was confirmed that a **tornado is the most dangerous hazard**
#make plot for injuries
ggplot(DF.injuries,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) + 
    geom_bar(position="dodge",stat="identity",binwidth=1) + 
    ggtitle("Top 10 disasters of injuries") + 
    labs(x="",y="Number of people injured") + 
    theme(axis.text.x=element_text(angle=45,hjust=1))

#make plot for fatalities
ggplot(DF.fatalities,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) + 
    geom_bar(position="dodge",stat="identity",binwidth=1) + 
    ggtitle("Top 10 disasters of fatalities") + 
    labs(x="",y="Number of people fatalities") + 
    theme(axis.text.x=element_text(angle=45,hjust=1))

3.2 Question 2

Q2. Across the United States, which types of events have the greatest economic consequences?
Answer: The following plot, show the most dangerous 10 disaster for property.
Result of the investigation , it was confirmed that a **flood is the most dangerous hazard**
#make plot for property damage
ggplot(DF.property,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) + 
    geom_bar(position="dodge",stat="identity",binwidth=1) + 
    ggtitle("The 10 most harmful events to property") + 
    labs(x="",y="Amounts of property damage") + 
    theme(axis.text.x=element_text(angle=45,hjust=1))