##Course Project:

#The goal of the assignment is to analyse the fatallities and injuries from the NOAA Storm Database from 1950 to 2011. There were less comprehensive recorded events in the earlier years; however, the most recent records have been more completed.

##Synopsis:

#During the data analysis, the top 10 most deadly fatalities: Tornado, Excessive Heat, Flash Flood, Heat, Lighting, TSTM Wind, Flood, Rip Current, Hight Wind, and Avalanche from 1950 to 2011.

knitr::opts_chunk$set(echo = TRUE)

##Data Processing:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
#Load csv file to the report
setwd("~/Desktop/specdata")
report <- read.csv('repdata_data_StormData.csv')

##Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

#Sum each type of fatalities and rearrange the order descending in the top 10

data1 <- aggregate(FATALITIES~EVTYPE,report,sum)
data1 <- arrange(data1,desc(data1[,2]))[1:10,]

#Sum each type of injuries and rearrange the order descending in the top 10
data2 <- aggregate(INJURIES~EVTYPE,report,sum)
data2 <- arrange(data2,desc(data2[,2]))[1:10,]

#Plot the total number of the top 10 fatalities 
g<- ggplot(data1,aes(EVTYPE,FATALITIES,fill=FATALITIES))
g <-g+  geom_bar(stat='identity')+ coord_flip() + ylab("The top 10 fatalities") +
    xlab("Type of Event")

#Plot the total number of the top 10 injuries 
h<- ggplot(data2,aes(EVTYPE,INJURIES,fill=INJURIES))
h <-h+geom_bar(stat='identity')+ coord_flip() + ylab("The top 10 injuries") +
    xlab("Type of Event")
grid.arrange(g,h, ncol=1,nrow=2)

#There are 91346 Injuries and 5633 Fatalities from Tornado between 1950 to 2011 in US.

##Across the United States, which types of events have the greatest economic consequences? #Collect the data from the property and crop damage.

#Substitute character with the numeric data
#Replace special character with 1
report$PROPDMGEXP[report$PROPDMGEXP==''] = 1
report$PROPDMGEXP <- gsub('[[:punct:]]',1,report$PROPDMGEXP)
report$CROPDMGEXP[report$CROPDMGEXP==''] = 1
report$CROPDMGEXP <- gsub('[[:punct:]]',1,report$CROPDMGEXP)

#Replace H or h character with 100
report$PROPDMGEXP <- gsub('[:hH:]',100,report$PROPDMGEXP)
report$CROPDMGEXP <- gsub('[:hH:]',100,report$CROPDMGEXP)
#Replace k or K character with 1000
report$PROPDMGEXP <- gsub('[:Kk:]',1000,report$PROPDMGEXP)
report$CROPDMGEXP <- gsub('[:Kk:]',100,report$CROPDMGEXP)
#Replace b or B character with 1000000
report$PROPDMGEXP <- gsub('[:mM:]',1000000,report$PROPDMGEXP)
report$CROPDMGEXP <- gsub('[:mM:]',1000000,report$CROPDMGEXP)

#Replace b or B character with 1000000000
report$PROPDMGEXP <- gsub('[:bB:]',1000000000,report$PROPDMGEXP)
report$CROPDMGEXP <- gsub('[:bB:]',1000000000,report$CROPDMGEXP)
#Convert characters to be numeric
report$PROPDMGEXP <- as.numeric(report$PROPDMGEXP)
report$CROPDMGEXP <- as.numeric(report$CROPDMGEXP)

#Calculate the total cost of the property and crop damage

#Check for special character
unique(report$PROPDMGEXP)
##  [1] 1e+03 1e+06 1e+00 1e+09 0e+00 5e+00 6e+00 4e+00 2e+00 3e+00 1e+02 7e+00
## [13] 8e+00
unique(report$CROPDMGEXP)
## [1] 1e+00 1e+06 1e+02 1e+09 0e+00 2e+00
#Calculation of the total cost of weather disaster on property
report$PROPDMGCost <- report$PROPDMG*report$PROPDMGEXP
report$CROPDMGCost <- report$CROPDMG*report$CROPDMGEXP

#Sum each type of property damage and rearrange the order descending in the top 10
data11 <- aggregate(PROPDMGCost~EVTYPE,report,sum)
data11 <- arrange(data11,desc(data11[,2]))[1:10,]
data11
##               EVTYPE  PROPDMGCost
## 1              FLOOD 144657709807
## 2  HURRICANE/TYPHOON  69305840000
## 3            TORNADO  56937161054
## 4        STORM SURGE  43323536000
## 5        FLASH FLOOD  16140812294
## 6               HAIL  15732267427
## 7          HURRICANE  11868319010
## 8     TROPICAL STORM   7703890550
## 9       WINTER STORM   6688497250
## 10         HIGH WIND   5270046295
#Sum each type of crop damage and rearrange the order descending in the top 10
data22 <- aggregate(CROPDMGCost~EVTYPE,report,sum)
data22 <- arrange(data22,desc(data22[,2]))[1:10,]
data22
##               EVTYPE CROPDMGCost
## 1            DROUGHT 13953264600
## 2              FLOOD  5515683845
## 3        RIVER FLOOD  5026345900
## 4          ICE STORM  5020616350
## 5          HURRICANE  2739570000
## 6  HURRICANE/TYPHOON  2604540280
## 7               HAIL  2506542448
## 8       EXTREME COLD  1288623300
## 9        FLASH FLOOD  1261155710
## 10      FROST/FREEZE  1088734600

#Create a visulization comprarison for the top 10 disasters 1950-2011

#Plot the total number of the top 10 property damage 
g1 <- ggplot(data11,aes(EVTYPE,PROPDMGCost,fill=PROPDMGCost))
g1 <- g1+  geom_bar(stat='identity')+ coord_flip() + ylab("The top 10 property damage in US Dollar") +
  xlab("Type of Disaster")
#Plot the total number of the top 10 crop damage 
h1 <- ggplot(data22,aes(EVTYPE,CROPDMGCost,fill=CROPDMGCost))
h1 <-h1+geom_bar(stat='identity')+ coord_flip() + ylab("The top 10 crop damage in US Dollar") +
  xlab("Type of Disaster")
grid.arrange(g1,h1, ncol=1,nrow=2)

##Based on the data, the flood disaster is the worst property damage while the drought disaster impacts the crop yield havest between 1950 to 2011