Synopsis

Using the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database, we analyze:

  1. The most harmful types of events across the United States with respect to population health.
  2. The types of events across the United States that have the greatest economic consequences.

Based on fatality and injury data, Tornadoes were the most harmful events with respect to population health. Based on property and crop damage, Flood had the greatest impacts on property damages while Drought had the greatest impacts on crop damages.

Setting and Library loading

Data loading

# Using "R.utils" package, unzip ".bz2" file format
if (!file.exists("repdata_data_StormData.csv")) {
  URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(URL, destfile = "repdata_data_StormData.csv.bz2")
  bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv", remove = FALSE)
}

data <- read.csv("repdata_data_StormData.csv")

Explore the dataset

names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
str(data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
summary(data)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI          END_DATE        
##  Min.   :   0.000   Length:902297      Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :   1.484                                                           
##  3rd Qu.:   1.000                                                           
##  Max.   :3749.000                                                           
##                                                                             
##    END_TIME           COUNTY_END COUNTYENDN       END_RANGE       
##  Length:902297      Min.   :0    Mode:logical   Min.   :  0.0000  
##  Class :character   1st Qu.:0    NA's:902297    1st Qu.:  0.0000  
##  Mode  :character   Median :0                   Median :  0.0000  
##                     Mean   :0                   Mean   :  0.9862  
##                     3rd Qu.:0                   3rd Qu.:  0.0000  
##                     Max.   :0                   Max.   :925.0000  
##                                                                   
##    END_AZI           END_LOCATI            LENGTH              WIDTH         
##  Length:902297      Length:902297      Min.   :   0.0000   Min.   :   0.000  
##  Class :character   Class :character   1st Qu.:   0.0000   1st Qu.:   0.000  
##  Mode  :character   Mode  :character   Median :   0.0000   Median :   0.000  
##                                        Mean   :   0.2301   Mean   :   7.503  
##                                        3rd Qu.:   0.0000   3rd Qu.:   0.000  
##                                        Max.   :2315.0000   Max.   :4400.000  
##                                                                              
##        F               MAG            FATALITIES          INJURIES        
##  Min.   :0.0      Min.   :    0.0   Min.   :  0.0000   Min.   :   0.0000  
##  1st Qu.:0.0      1st Qu.:    0.0   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  Median :1.0      Median :   50.0   Median :  0.0000   Median :   0.0000  
##  Mean   :0.9      Mean   :   46.9   Mean   :  0.0168   Mean   :   0.1557  
##  3rd Qu.:1.0      3rd Qu.:   75.0   3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##  Max.   :5.0      Max.   :22000.0   Max.   :583.0000   Max.   :1700.0000  
##  NA's   :843563                                                           
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:902297      Min.   :  0.000   Length:902297     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  1.527                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000                     
##                                                                           
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 

Find out the necessary columns
- EVTYPE: Type
- FATALITIES: Fatality
- INJURIES: Injuries
- PROPDMG: Property damage
- PROPDMGEXP: Property damage exponent
- CROPDMG: Crop damage
- CROPDMGEXP: Crop damage exponent

Data cleansing

# Check the contents of "-EXP" columns and transform
data.2 <- data %>%
  filter(FATALITIES != 0 | INJURIES != 0 | PROPDMG != 0 | CROPDMG != 0) %>%
  mutate(PROPDMGEXP2 = case_when(
    PROPDMGEXP %in% c("+", "-", "0", "?", "") ~ 10^0,
    PROPDMGEXP %in% c("H", "h") ~ 10^2,
    PROPDMGEXP %in% c("K", "k") ~ 10^3,
    PROPDMGEXP %in% c("M", "m") ~ 10^6,
    PROPDMGEXP %in% c("B", "b") ~ 10^9,
    grepl("^[0-9]+$", PROPDMGEXP) ~ 10^as.numeric(PROPDMGEXP),  
    TRUE ~ 10^0  
  )) %>%
  mutate(CROPDMGEXP2 = case_when(
    CROPDMGEXP %in% c("0", "?", "") ~ 10^0,
    CROPDMGEXP %in% c("K", "k") ~ 10^3,
    CROPDMGEXP %in% c("M", "m") ~ 10^6,
    CROPDMGEXP %in% c("B", "b") ~ 10^9,
    grepl("^[0-9]+$", CROPDMGEXP) ~ 10^as.numeric(CROPDMGEXP),  
    TRUE ~ 10^0  
  )) %>%
  # Multiply
  mutate(PROPDMG_TOTAL = PROPDMG * PROPDMGEXP2,
         CROPDMG_TOTAL = CROPDMG * CROPDMGEXP2)

Question 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

data.health <- data.2 %>% 
  group_by(EVTYPE) %>% 
  summarize(Fatality_total = sum(FATALITIES, na.rm = TRUE),
            Injury_total = sum(INJURIES, na.rm = TRUE),
            .groups = "drop")

# Pick top 10 fatality
data.fatality10 <- data.health %>% 
  arrange(desc(Fatality_total)) %>% 
  slice_head(n = 10)

# Pick top 10 injury
data.injury10 <- data.health %>% 
  arrange(desc(Injury_total)) %>%
  slice_head(n = 10)

Figure_1A <- ggplot(aes(x = fct_reorder(EVTYPE, desc(Fatality_total)), 
                        y = Fatality_total), data = data.fatality10) +
  geom_bar(stat = "identity", fill = "#E76F51") +
  scale_y_continuous(limits = c(0, 6000), breaks = seq(0, 6000, by = 2000)) +  # 한 번만 추가
  labs(title = "(A) Fatality by the types of events",
       x = "",
       y = "Fatality") +
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.title.position = "plot",
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
    axis.title.y = element_text(size = 12, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
  )

Figure_1B <- ggplot(aes(x = fct_reorder(EVTYPE, desc(Injury_total)), 
                        y = Injury_total / 1000), data = data.injury10) +
  geom_bar(stat = "identity", fill = "steelblue") +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20)) +
  labs(title = "(B) Injury by the types of events",
       x = "",
       y = "Injury (thousands)") +
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.title.position = "plot",
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
    axis.title.y = element_text(size = 12, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
  )

grid.arrange(Figure_1A, Figure_1B, nrow = 2)

Resluts

  • In the view of population health, Tornado was the most harmful type of events in both fatalities and injuries

Question 2. Across the United States, which types of events have the greatest economic consequences?

data.economy <- data.2 %>% 
  group_by(EVTYPE) %>% 
  summarize(PROP_total = sum(PROPDMG_TOTAL, na.rm = TRUE),
            CROP_total = sum(CROPDMG_TOTAL, na.rm = TRUE),
            .groups = "drop")

# Pick top 10 property damage
data.PROP10 <- data.economy %>% 
  arrange(desc(PROP_total)) %>% 
  slice_head(n = 10)

# Pick top 10 crop damage
data.CROP10 <- data.economy %>% 
  arrange(desc(CROP_total)) %>%
  slice_head(n = 10)

Figure_2A <- ggplot(aes(x = fct_reorder(EVTYPE, desc(PROP_total)), 
                        y = PROP_total / 10^9), data = data.PROP10) +
  geom_bar(stat = "identity", fill = "#E76F51") +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, by = 20)) +
  labs(title = "(A) Property damage by the types of events",
       x = "",
       y = "Property damage (billion)") +
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.title.position = "plot",
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
    axis.title.y = element_text(size = 12, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
  )

Figure_2B <- ggplot(aes(x = fct_reorder(EVTYPE, desc(CROP_total)), 
                        y = CROP_total / 10^9), data = data.CROP10) +
  geom_bar(stat = "identity", fill = "steelblue") +
  scale_y_continuous(limits = c(0, 20), breaks = seq(0, 20, by = 5)) +
  labs(title = "(B) Crop damage by the types of events",
       x = "",
       y = "Crop damage (billion)") +
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.title.position = "plot",
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
    axis.title.y = element_text(size = 12, face = "bold"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
  )

grid.arrange(Figure_2A, Figure_2B, nrow = 2)

Resluts

  • In the view of economy, Flood had the greatest impacts on property damages while drought had the greatest impacts on crop damages.