Analysis of the harmful weather in US

This project involves exploring the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

The main goal is to answer the following two questions:

Data Processing

Load required library

library(ggplot2)

Processing data

if (!file.exists("Storm_Data.csv.bz2")) {
    download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv    \n                  .bz2", 
        "Storm_Data.csv.bz2", method = "curl")
}
table = read.csv("Storm_Data.csv.bz2")

columns <- c("EVTYPE", "INJURIES", "FATALITIES", "PROPDMG")
filter_table <- table[, columns]
result <- aggregate(cbind(INJURIES, FATALITIES, PROPDMG) ~ EVTYPE, data = filter_table, 
    sum)
result$EVTYPE <- as.character(result$EVTYPE)

Get the total number of different event types:

length(unique(result$EVTYPE))
## [1] 977

Get the top 10 harmful weathers from all types:

injuries_top10 <- result[order(result$INJURIES, decreasing = T), c("EVTYPE", 
    "INJURIES")][1:10, ]
fatalities_top10 <- result[order(result$FATALITIES, decreasing = T), c("EVTYPE", 
    "FATALITIES")][1:10, ]
dmg_top10 <- result[order(result$PROPDMG, decreasing = T), c("EVTYPE", "PROPDMG")][1:10, 
    ]

Result

The most harmful events with respect to population health

Top 10 events cause injuries:

plot1 <- ggplot(injuries_top10, aes(reorder(injuries_top10$EVTYPE, injuries_top10$INJURIES), 
    INJURIES)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) + 
    labs(x = "Events", title = "Top 10 events cause injuries")
print(plot1)

plot of chunk unnamed-chunk-5

rownames(injuries_top10) <- injuries_top10$EVTYPE
injuries_top10
##                              EVTYPE INJURIES
## TORNADO                     TORNADO    80690
## TSTM WIND                 TSTM WIND     6683
## FLOOD                         FLOOD     6606
## EXCESSIVE HEAT       EXCESSIVE HEAT     4652
## LIGHTNING                 LIGHTNING     4022
## ICE STORM                 ICE STORM     1963
## FLASH FLOOD             FLASH FLOOD     1442
## HURRICANE/TYPHOON HURRICANE/TYPHOON     1272
## WINTER STORM           WINTER STORM     1255
## HAIL                           HAIL     1162

Top 10 events cause fatalities:

plot2 <- ggplot(fatalities_top10, aes(reorder(fatalities_top10$EVTYPE, fatalities_top10$FATALITIES), 
    FATALITIES)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) + 
    labs(x = "Events", title = "Top 10 events cause fatalities")
print(plot2)

plot of chunk unnamed-chunk-6

rownames(fatalities_top10) <- NULL
fatalities_top10
##            EVTYPE FATALITIES
## 1         TORNADO       4700
## 2  EXCESSIVE HEAT       1527
## 3            HEAT        708
## 4     FLASH FLOOD        622
## 5       LIGHTNING        608
## 6       TSTM WIND        488
## 7           FLOOD        287
## 8    RIP CURRENTS        204
## 9       HIGH WIND        199
## 10   WINTER STORM        182

The events have the greatest economic consequences:

Top 10 events cause lossing properties:

plot3 <- ggplot(dmg_top10, aes(reorder(dmg_top10$EVTYPE, dmg_top10$PROPDMG), 
    PROPDMG)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) + 
    labs(x = "Events", title = "Top 10 events cause lossing properties")
print(plot3)

plot of chunk unnamed-chunk-7

rownames(dmg_top10) <- NULL
dmg_top10
##                EVTYPE PROPDMG
## 1             TORNADO 2653286
## 2           TSTM WIND 1195630
## 3         FLASH FLOOD  845838
## 4  THUNDERSTORM WINDS  446293
## 5                HAIL  441428
## 6               FLOOD  409389
## 7           LIGHTNING  376062
## 8           HIGH WIND  186278
## 9          HEAVY SNOW   99542
## 10       WINTER STORM   71162