Loading Data

Load necessary libraries for data manipulation and visualization. Define the file URL and the destination file name. Download the dataset if it doesn’t already exist in the working directory.Then check that the file downloaded successfully. Finally Read the dataset into R.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)

dir.create("graphs", showWarnings = FALSE)

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

download.file(fileUrl, destfile = "repdata_data_StormData.csv")

getwd()
## [1] "C:/Users/Danie/Desktop/John Hopkins University Data Science Certification/Project-8"
list.files()
## [1] "graphs"                         "Project-8"                     
## [3] "Project-8.Rproj"                "repdata_data_StormData.csv"    
## [5] "repdata_data_StormData.csv.bz2" "StormData.html"                
## [7] "StormData.Rmd"
file.exists("repdata_data_StormData.csv")
## [1] TRUE
storm_Data <- read_csv("repdata_data_StormData.csv")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Cleaning Data

Filter the original dataset to include only rows where: - Fatalities, injuries, property damage, or crop damage are greater than 0. This ensures we focus only on impactful weather events. Create a summary table showing total harm to population (fatalities + injuries). Grouped by event type and sorted from most to least harmful.

storm_filtered <- filter(storm_Data, FATALITIES>0 | INJURIES>0 |
                      PROPDMG>0 | CROPDMG>0)


 storm_summary <- storm_filtered %>%
    group_by(EVTYPE) %>%
summarise(FATALITIES = sum(FATALITIES, na.rm = TRUE),
          INJURIES = sum(INJURIES,na.rm = TRUE),
          TOTAL_HARM = FATALITIES + INJURIES) %>%
  arrange(desc(TOTAL_HARM))

Plot a horizontal bar chart showing the top 10 weather events by Total Harm

X-axis:Event type (EVTYPE).Y-axis: Total Harm caused.

Save the plot to the graphs directory

top10 <- head(storm_summary, 10)



ggplot(data = top10 , aes(EVTYPE, TOTAL_HARM)) + geom_col(color="turquoise") + coord_flip() + labs(x = "Ev Types", y = "Total amount of Harm", title = "Top 10 Weather Events by Total Harm to Population")

ggsave("graphs/Total_Harm_plot.png", plot = last_plot(), width = 10, height = 6)

##Cleaning up data for economic consequences

Data Selection and Filtering.Convert Character values into numeric multiplers.Calculate Total Cost.

glimpse(storm_filtered)
## Rows: 254,633
## Columns: 37
## $ STATE__    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ BGN_DATE   <chr> "4/18/1950 0:00:00", "4/18/1950 0:00:00", "2/20/1951 0:00:0…
## $ BGN_TIME   <chr> "0130", "0145", "1600", "0900", "1500", "2000", "0100", "09…
## $ TIME_ZONE  <chr> "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CS…
## $ COUNTY     <dbl> 97, 3, 57, 89, 43, 77, 9, 123, 125, 57, 43, 9, 73, 49, 107,…
## $ COUNTYNAME <chr> "MOBILE", "BALDWIN", "FAYETTE", "MADISON", "CULLMAN", "LAUD…
## $ STATE      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",…
## $ EVTYPE     <chr> "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TOR…
## $ BGN_RANGE  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BGN_AZI    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ BGN_LOCATI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_DATE   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_TIME   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ COUNTY_END <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ COUNTYENDN <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_RANGE  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ END_AZI    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_LOCATI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ LENGTH     <dbl> 14.0, 2.0, 0.1, 0.0, 0.0, 1.5, 1.5, 0.0, 3.3, 2.3, 1.3, 4.7…
## $ WIDTH      <dbl> 100, 150, 123, 100, 150, 177, 33, 33, 100, 100, 400, 400, 2…
## $ F          <dbl> 3, 2, 2, 2, 2, 2, 2, 1, 3, 3, 1, 1, 3, 3, 3, 4, 1, 1, 1, 1,…
## $ MAG        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ FATALITIES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0,…
## $ INJURIES   <dbl> 15, 0, 2, 2, 2, 6, 1, 0, 14, 0, 3, 3, 26, 12, 6, 50, 2, 0, …
## $ PROPDMG    <dbl> 25.0, 2.5, 25.0, 2.5, 2.5, 2.5, 2.5, 2.5, 25.0, 25.0, 2.5, …
## $ PROPDMGEXP <chr> "K", "K", "K", "K", "K", "K", "K", "K", "K", "K", "M", "M",…
## $ CROPDMG    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CROPDMGEXP <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ WFO        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ STATEOFFIC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ZONENAMES  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ LATITUDE   <dbl> 3040, 3042, 3340, 3458, 3412, 3450, 3405, 3255, 3334, 3336,…
## $ LONGITUDE  <dbl> 8812, 8755, 8742, 8626, 8642, 8748, 8631, 8558, 8740, 8738,…
## $ LATITUDE_E <dbl> 3051, 0, 0, 0, 0, 0, 0, 0, 3336, 3337, 3402, 3404, 0, 3432,…
## $ LONGITUDE_ <dbl> 8806, 0, 0, 0, 0, 0, 0, 0, 8738, 8737, 8644, 8640, 0, 8540,…
## $ REMARKS    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ REFNUM     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
economic_data <- storm_filtered %>%
  select(EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

  economic_data <- economic_data %>%
    mutate(prop_multipler = 
             case_when(PROPDMGEXP == "K"~ 1000,
                     PROPDMGEXP  =="M"~ 1000000,
                     PROPDMGEXP  =="B"~1000000000,
                     TRUE~1))
  
economic_data2 <- economic_data %>%
  mutate(crop_multipler =
           case_when(CROPDMGEXP == "K"~1000,
                    CROPDMGEXP == "M"~1000000,
                    CROPDMGEXP== "B"~1000000000, 
                    TRUE~1))

economic_calculations <- economic_data2 %>%
  mutate(Prop_cost = PROPDMG * prop_multipler,
         crop_cost = CROPDMG * crop_multipler,
         Total_cost = rowSums(cbind(Prop_cost, crop_cost),na.rm =TRUE))




economic_summary <- economic_calculations %>%
  group_by(EVTYPE)%>%
  summarise(TOTAL_COST = sum(Total_cost, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_COST))

 Total_summary10 <- head(economic_summary,10)

Plot a horizontal bar chart showing the top 10 weather events by Total Cost

X-axis: Events types(EVTYPES).Y-axis: Total economic cost.

Save the plot to the graphs directory

ggplot(data = Total_summary10 , aes(EVTYPE, TOTAL_COST)) + geom_col(color="blue") + coord_flip() + labs(x = "Ev Types", y = "Total amount of cost", title = "Top 10 Costliest Weather Events in the United States")

ggsave("graphs/economic_costs_plot.png", plot = last_plot(), width = 10, height = 6)

Results

For the first question based on data from the NOAA Storm Database, tornadoes are by far the most harmful weather event to population health in the United States. They account for 5,633 fatalities, 91,346 people being injured, and a total of 96,979 people being harmed. For the second question based on the NOAA Storm Database, the top 10 weather events causing the highest total economic damage in the U.S. are led by floods, followed by hurricanes/typhoons and tornadoes. Floods alone account for over 150 billion dollars in damages, making them the most financially devastating event type. These findings highlight the significant economic impact of severe weather events and emphasize the need for preparedness and mitigation strategies in vulnerable regions.

Conclusion

The NOAA storm database reveals that tornadoes are the most harmful to population health, while floods lead to the greatest economic consequences. This analysis provides insight into which weather events have had the most significant human and financial impact in the United States.