# Load required packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load the data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
# Display the first few rows of the dataset
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
# Calculate the total number of fatalities and injuries for each event type
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(
total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(total_fatalities), desc(total_injuries))
# Display the top 10 events with the highest health impact
head(health_impact, 10)
## # A tibble: 10 × 3
## EVTYPE total_fatalities total_injuries
## <chr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
# Calculate the total property and crop damage for each event type
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(
total_property_damage = sum(PROPDMG, na.rm = TRUE),
total_crop_damage = sum(CROPDMG, na.rm = TRUE)
) %>%
arrange(desc(total_property_damage), desc(total_crop_damage))
# Display the top 10 events with the highest economic impact
head(economic_impact, 10)
## # A tibble: 10 × 3
## EVTYPE total_property_damage total_crop_damage
## <chr> <dbl> <dbl>
## 1 TORNADO 3212258. 100019.
## 2 FLASH FLOOD 1420125. 179200.
## 3 TSTM WIND 1335966. 109203.
## 4 FLOOD 899938. 168038.
## 5 THUNDERSTORM WIND 876844. 66791.
## 6 HAIL 688693. 579596.
## 7 LIGHTNING 603352. 3581.
## 8 THUNDERSTORM WINDS 446293. 18685.
## 9 HIGH WIND 324732. 17283.
## 10 WINTER STORM 132721. 1979.
The table below shows the top 10 event types with the highest total fatalities and injuries.
# Plot the top 10 events with the highest health impact
top_health_impact <- health_impact %>% head(10)
ggplot(top_health_impact, aes(x=reorder(EVTYPE, -total_fatalities), y=total_fatalities)) +
geom_bar(stat="identity", fill="red") +
labs(title="Top 10 Events with Highest Fatalities", x="Event Type", y="Total Fatalities") +
theme(axis.text.x = element_text(angle=45, hjust=1))
The table below shows the top 10 event types with the highest total property and crop damages.
# Plot the top 10 events with the highest economic impact
top_economic_impact <- economic_impact %>% head(10)
ggplot(top_economic_impact, aes(x=reorder(EVTYPE, -total_property_damage), y=total_property_damage)) +
geom_bar(stat="identity", fill="blue") +
labs(title="Top 10 Events with Highest Property Damage", x="Event Type", y="Total Property Damage") +
theme(axis.text.x = element_text(angle=45, hjust=1))