Synopsis

This analysis aims to identify the types of weather events that have the most significant impact on public health and the economy in the United States. By examining the NOAA storm database, we assess the overall impact of various weather events in terms of fatalities, injuries, and economic damages.

Data

The data for this assignment come in the form of a compressed CSV file via the bzip2 algorithm. The file can be downloaded from the course web site.

Load Data

library(dplyr)
library(ggplot2)
data <- read.csv("repdata_data_StormData.csv.bz2")

Data Overview

head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Data Processing

In this section, we process the data to prepare it for analysis. This includes cleaning the data, handling missing values, and transforming variables as necessary.

# Removing unnecessary columns
data_clean <- data %>%
  select(-c(BGN_RANGE, BGN_AZI, BGN_LOCATI, END_DATE, END_TIME, COUNTY_END, COUNTYENDN, END_RANGE, END_AZI, END_LOCATI, WFO, STATEOFFIC, ZONENAMES, LATITUDE_E, LONGITUDE_, REMARKS))

# Converting data types
data_clean$BGN_DATE <- as.Date(data_clean$BGN_DATE, format="%m/%d/%Y %H:%M:%S")
data_clean$FATALITIES <- as.numeric(data_clean$FATALITIES)
data_clean$INJURIES <- as.numeric(data_clean$INJURIES)
data_clean$PROPDMG <- as.numeric(data_clean$PROPDMG)
data_clean$CROPDMG <- as.numeric(data_clean$CROPDMG)

# Handling missing values
data_clean <- na.omit(data_clean)

Analysis

Question 1: Which types of events are most harmful with respect to population health?

We analyze the data to determine which weather events have the most significant impact on public health, measured by the number of fatalities and injurie

data_clean <- data %>%
  filter(!is.na(FATALITIES), !is.na(INJURIES)) %>%
  mutate(TotalHealthImpact = FATALITIES + INJURIES)

# Aggregate data to find total health impact per event type
health_impact_per_event <- data_clean %>%
  group_by(EVTYPE) %>%
  summarize(TotalImpact = sum(TotalHealthImpact, na.rm = TRUE)) %>%
  arrange(desc(TotalImpact))

# Top 10 most harmful events for population health
top_events <- head(health_impact_per_event, 10)

Question 2: Which types of events have the greatest economic consequences?

This part of the analysis focuses on assessing the economic impact of different weather events, considering property and crop damages.

# Function to convert strings in PROPDMGEXP column to numeric values
convert_exp <- function(exp) {
  exp <- as.character(exp)
  exp[exp == "K"] <- 3  # Convert "K" representing thousands to 10^3
  exp[exp == "M"] <- 6  # Convert "M" representing millions to 10^6
  exp[exp == "B"] <- 9  # Convert "B" representing billions to 10^9
  exp[exp == ""] <- 0   # Treat empty strings as 10^0
  as.numeric(exp)       # Convert strings to numeric values
}

# Convert PROPDMGEXP column to numeric values
data$PROPDMGEXP <- convert_exp(data$PROPDMGEXP)
## Warning in convert_exp(data$PROPDMGEXP): 強制変換により NA が生成されました
# Apply the same process to CROPDMGEXP column if it exists
data$CROPDMGEXP <- convert_exp(data$CROPDMGEXP)
## Warning in convert_exp(data$CROPDMGEXP): 強制変換により NA が生成されました
# Calculate economic loss
data_clean <- data %>%
  mutate(TotalEconomicImpact = (PROPDMG * 10 ^ PROPDMGEXP) + (CROPDMG * 10 ^ CROPDMGEXP))

# Aggregate data to find total economic impact per event type
economic_impact_per_event <- data_clean %>%
  group_by(EVTYPE) %>%
  summarize(TotalEconomicImpact = sum(TotalEconomicImpact, na.rm = TRUE)) %>%
  arrange(desc(TotalEconomicImpact))

# Identify top 10 events with the greatest economic impact
top_economic_events <- head(economic_impact_per_event, 10)

Results

This section presents the main results of the analysis, including figures and tables that summarize our findings.

Question 1: Which types of events are most harmful with respect to population health?

# Plotting the results
ggplot(top_events, aes(x = reorder(EVTYPE, -TotalImpact), y = TotalImpact)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(title = "Top 10 Most Harmful Events for Population Health",
       x = "Event Type",
       y = "Total Health Impact (Fatalities + Injuries)") +
  coord_flip() # for horizontal bars

Tornadoes are the most detrimental with respect to population health.

Question 2: Which types of events have the greatest economic consequences?

# Plotting the results
ggplot(top_economic_events, aes(x = reorder(EVTYPE, -TotalEconomicImpact), y = TotalEconomicImpact)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(title = "Top 10 Events with Greatest Economic Impact",
       x = "Event Type",
       y = "Total Economic Impact") +
  coord_flip() # for horizontal bars

Flood has the greatest economic impact.