Using the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database, we analyze:
Based on fatality and injury data, Tornadoes were the
most harmful events with respect to population health. Based on property
and crop damage, Flood had the greatest impacts on property
damages while Drought had the greatest impacts on crop
damages.
# Using "R.utils" package, unzip ".bz2" file format
if (!file.exists("repdata_data_StormData.csv")) {
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL, destfile = "repdata_data_StormData.csv.bz2")
bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv", remove = FALSE)
}
data <- read.csv("repdata_data_StormData.csv")
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
str(data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
summary(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.0 Min. : 0.0 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.: 0.0 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :1.0 Median : 50.0 Median : 0.0000 Median : 0.0000
## Mean :0.9 Mean : 46.9 Mean : 0.0168 Mean : 0.1557
## 3rd Qu.:1.0 3rd Qu.: 75.0 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :5.0 Max. :22000.0 Max. :583.0000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
Find out the necessary columns
- EVTYPE: Type
- FATALITIES: Fatality
- INJURIES: Injuries
- PROPDMG: Property damage
- PROPDMGEXP: Property damage exponent
- CROPDMG: Crop damage
- CROPDMGEXP: Crop damage exponent
# Check the contents of "-EXP" columns and transform
data.2 <- data %>%
filter(FATALITIES != 0 | INJURIES != 0 | PROPDMG != 0 | CROPDMG != 0) %>%
mutate(PROPDMGEXP2 = case_when(
PROPDMGEXP %in% c("+", "-", "0", "?", "") ~ 10^0,
PROPDMGEXP %in% c("H", "h") ~ 10^2,
PROPDMGEXP %in% c("K", "k") ~ 10^3,
PROPDMGEXP %in% c("M", "m") ~ 10^6,
PROPDMGEXP %in% c("B", "b") ~ 10^9,
grepl("^[0-9]+$", PROPDMGEXP) ~ 10^as.numeric(PROPDMGEXP),
TRUE ~ 10^0
)) %>%
mutate(CROPDMGEXP2 = case_when(
CROPDMGEXP %in% c("0", "?", "") ~ 10^0,
CROPDMGEXP %in% c("K", "k") ~ 10^3,
CROPDMGEXP %in% c("M", "m") ~ 10^6,
CROPDMGEXP %in% c("B", "b") ~ 10^9,
grepl("^[0-9]+$", CROPDMGEXP) ~ 10^as.numeric(CROPDMGEXP),
TRUE ~ 10^0
)) %>%
# Multiply
mutate(PROPDMG_TOTAL = PROPDMG * PROPDMGEXP2,
CROPDMG_TOTAL = CROPDMG * CROPDMGEXP2)
EVTYPE variable) are most harmful with
respect to population health?data.health <- data.2 %>%
group_by(EVTYPE) %>%
summarize(Fatality_total = sum(FATALITIES, na.rm = TRUE),
Injury_total = sum(INJURIES, na.rm = TRUE),
.groups = "drop")
# Pick top 10 fatality
data.fatality10 <- data.health %>%
arrange(desc(Fatality_total)) %>%
slice_head(n = 10)
# Pick top 10 injury
data.injury10 <- data.health %>%
arrange(desc(Injury_total)) %>%
slice_head(n = 10)
Figure_1A <- ggplot(aes(x = fct_reorder(EVTYPE, desc(Fatality_total)),
y = Fatality_total), data = data.fatality10) +
geom_bar(stat = "identity", fill = "#E76F51") +
scale_y_continuous(limits = c(0, 6000), breaks = seq(0, 6000, by = 2000)) + # 한 번만 추가
labs(title = "(A) Fatality by the types of events",
x = "",
y = "Fatality") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.title.position = "plot",
axis.title.x = element_blank(),
axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
axis.title.y = element_text(size = 12, face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
)
Figure_1B <- ggplot(aes(x = fct_reorder(EVTYPE, desc(Injury_total)),
y = Injury_total / 1000), data = data.injury10) +
geom_bar(stat = "identity", fill = "steelblue") +
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20)) +
labs(title = "(B) Injury by the types of events",
x = "",
y = "Injury (thousands)") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.title.position = "plot",
axis.title.x = element_blank(),
axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
axis.title.y = element_text(size = 12, face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
)
grid.arrange(Figure_1A, Figure_1B, nrow = 2)
Tornado was the most
harmful type of events in both fatalities and injuriesdata.economy <- data.2 %>%
group_by(EVTYPE) %>%
summarize(PROP_total = sum(PROPDMG_TOTAL, na.rm = TRUE),
CROP_total = sum(CROPDMG_TOTAL, na.rm = TRUE),
.groups = "drop")
# Pick top 10 property damage
data.PROP10 <- data.economy %>%
arrange(desc(PROP_total)) %>%
slice_head(n = 10)
# Pick top 10 crop damage
data.CROP10 <- data.economy %>%
arrange(desc(CROP_total)) %>%
slice_head(n = 10)
Figure_2A <- ggplot(aes(x = fct_reorder(EVTYPE, desc(PROP_total)),
y = PROP_total / 10^9), data = data.PROP10) +
geom_bar(stat = "identity", fill = "#E76F51") +
scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, by = 20)) +
labs(title = "(A) Property damage by the types of events",
x = "",
y = "Property damage (billion)") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.title.position = "plot",
axis.title.x = element_blank(),
axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
axis.title.y = element_text(size = 12, face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
)
Figure_2B <- ggplot(aes(x = fct_reorder(EVTYPE, desc(CROP_total)),
y = CROP_total / 10^9), data = data.CROP10) +
geom_bar(stat = "identity", fill = "steelblue") +
scale_y_continuous(limits = c(0, 20), breaks = seq(0, 20, by = 5)) +
labs(title = "(B) Crop damage by the types of events",
x = "",
y = "Crop damage (billion)") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.title.position = "plot",
axis.title.x = element_blank(),
axis.text.x = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
axis.title.y = element_text(size = 12, face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.margin = unit(c(0.5, 0.2, 0.5, 0.5), "cm")
)
grid.arrange(Figure_2A, Figure_2B, nrow = 2)
Flood had the greatest impacts
on property damages while drought had the greatest impacts
on crop damages.