Synoposis

The report is to download and analyse the data from National Weather Service according to two aspects. One is the health impact in terms of fatalities and injuries. The other is the economy impact in terms of property damage and crop damage.

Data Processing

require(R.utils)
require(dplyr)
require(ggplot2)
destzipFileName = "stormData.bz2"
destCSV = "data.csv"
# Data Processing
if (!file.exists(destCSV))
{
    url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
    download.file(url, destzipFileName, method="libcurl")
    bunzip2(destzipFileName, destCSV)
}
ds <- read.csv(destCSV, header=TRUE, sep=",")

Using the dplyr package to group by the data with regarding their EVTYPE and sum up all the fatalties and injuries for health damage, and property/crop damages for economic impact. For the health damage we will just summarise the fatalties and injuries as a total.

ds <- read.csv(destCSV)
health_ds <- ds %>%
    group_by(EVTYPE) %>%
    summarise_at(vars(INJURIES, FATALITIES), sum) %>%
    arrange(desc(FATALITIES + INJURIES)) %>%
    mutate(health_damage = (FATALITIES + INJURIES)) %>%
    head(10)

For the economy damage, we will need to multiply the prop damage with the unit described in the PROPDMGEXP ,which has the code of K(1000), M(1 million), B (1 billion). There are some other code like 0,1,2…8 or ? , since those have limited impact on the data, we will not multily them.

economy_ds <- ds %>%
    select(EVTYPE, PROPDMG, CROPDMG, PROPDMGEXP, CROPDMGEXP) %>%
    mutate(PDMG=PROPDMG * case_when(PROPDMGEXP == "K" ~ 1000,
                                    (PROPDMGEXP == "M"| PROPDMGEXP == "m") ~ 1000000,
                                    PROPDMGEXP == "B" ~ 1000000000,
                                    TRUE ~ 1)) %>%
    mutate(CDMG=CROPDMG * case_when(CROPDMGEXP == "K" ~ 1000,
                                    (CROPDMGEXP == "M" | CROPDMGEXP == "m") ~ 1000000,
                                    CROPDMGEXP == "B" ~ 1000000000,
                                    TRUE ~ 1)) %>%
    group_by(EVTYPE) %>%
    summarise_at(vars(PDMG, CDMG), sum) %>%
    arrange(desc(PDMG+ CDMG))  %>%
    mutate(ECONOMY_DAMAGE= (PDMG+ CDMG) / 100000) %>%
    head(10)

Results

require(ggplot2)
ggplot(aes(y=health_damage, x=reorder(EVTYPE,health_damage)), data=health_ds) + 
    geom_bar(stat="identity", aes(alpha=EVTYPE, col=EVTYPE, fill=EVTYPE)) + 
    coord_flip() + 
    ggtitle("Health Damange of Weather Disaster") + 
    labs(y="Sum of injuries and fatalties", x="Disaster Type")

ggplot(aes(y=ECONOMY_DAMAGE, x=reorder(EVTYPE, ECONOMY_DAMAGE)), data=economy_ds) + 
    geom_bar(stat="identity", aes(alpha=EVTYPE,col=EVTYPE,fill=EVTYPE)) + 
    coord_flip() + 
    ggtitle("Economic Damange of Weather Disaster") + 
    labs(y="Sum of crop and properties damages (millions)", x="Disaster Type")