#Synopsis This analysis examines the impact of severe weather events on health and the economy using NOAA data. The dataset was processed by removing missing values and creating key variables such as health_impact (sum of fatalities and injuries) and total_danio (sum of property and agricultural damages). The most harmful events for human health and those with the highest economic consequences were identified. Results are presented in bar charts for better interpretation. Certain phenomena, such as tornadoes and floods, have a significant economic impact, while tornadoes and excessive heat pose a serious threat to human lives. This study highlights the importance of prevention and disaster response policies. Data visualization helps to understand critical patterns for decision-making. Finally, the analysis provides a foundation for future research on the effects of climate change on extreme weather events.
#Data Processing ##Loading Raw Data in R The dataset used in this analysis comes from the NOAA storm database and is stored in a compressed CSV (.bz2) format. To ensure reproducibility, the data is loaded directly into R without any external preprocessing. ## Load the raw data file directly in R
file <- read.csv("C:/Users/Compumax/Desktop/repdata_data_StormData.csv.bz2")
##To verify successful loading, we check the first few rows, the structure, and the summary of the dataset:
head(file) # Display first rows
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
str(file) # Check dataset structure
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
summary(file) # View statistical summary of the variables
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.0 Min. : 0.0 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.: 0.0 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :1.0 Median : 50.0 Median : 0.0000 Median : 0.0000
## Mean :0.9 Mean : 46.9 Mean : 0.0168 Mean : 0.1557
## 3rd Qu.:1.0 3rd Qu.: 75.0 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :5.0 Max. :22000.0 Max. :583.0000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
#Data Cleaning and Transformation ##Once the raw data is loaded, we apply necessary transformations: ##Remove missing values (NA) in the event type (EVTYPE) column. ##Create a new variable health_impact, combining fatalities (FATALITIES) and injuries (INJURIES) to assess health risks. ##Group and summarize events based on their impact on health and economy.
##Data filtering and transformation
file <- file %>%
filter(!is.na(EVTYPE)) %>%
mutate(health_impact = FATALITIES + INJURIES)
##Aggregate health impact by event type
health_impact_analysis <- file %>%
group_by(EVTYPE) %>%
summarise(total_impact = sum(FATALITIES + INJURIES, na.rm = TRUE)) %>%
arrange(desc(total_impact))
##Aggregate economic impact by event type
economic_impact_analysis <- file %>%
group_by(EVTYPE) %>%
summarise(total_damage = sum(PROPDMG + CROPDMG, na.rm = TRUE)) %>%
arrange(desc(total_damage))
##Display the top harmful events
head(health_impact_analysis, 10)
## # A tibble: 10 × 2
## EVTYPE total_impact
## <chr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
head(economic_impact_analysis, 10)
## # A tibble: 10 × 2
## EVTYPE total_damage
## <chr> <dbl>
## 1 TORNADO 3312277.
## 2 FLASH FLOOD 1599325.
## 3 TSTM WIND 1445168.
## 4 HAIL 1268290.
## 5 FLOOD 1067976.
## 6 THUNDERSTORM WIND 943636.
## 7 LIGHTNING 606932.
## 8 THUNDERSTORM WINDS 464978.
## 9 HIGH WIND 342015.
## 10 WINTER STORM 134700.
cache = TRUEIf the data processing takes too long, the
cache = TRUE option prevents repeated
execution of time-consuming operations when knitting the document,
improving efficiency.
This approach ensures that the analysis starts from the
original CSV file without any external preprocessing. The
transformations applied enable meaningful insights into the health and
economic impacts of severe weather events, with
cache = TRUE optimizing execution speed.
#Results ##Most Harmful Weather Events for Health ##The analysis indicates that certain weather phenomena have a significant impact ##on public health. The following table presents the events with the highest ##number of victims (fatalities + injuries):
head(health_impact_analysis, 10)
## # A tibble: 10 × 2
## EVTYPE total_impact
## <chr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
#The graph below visually represents these events:
ggplot(head(health_impact_analysis, 10), aes(x = reorder(EVTYPE, -total_impact), y = total_impact)) +
geom_bar(stat = "identity", fill = "red") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Health Impact by Event Type", x = "Event Type", y = "Total Fatalities and Injuries")
##Weather Events with the Greatest Economic Impact The following table highlights the events that have caused the most economic damage to property and crops:
head(economic_impact_analysis, 10)
## # A tibble: 10 × 2
## EVTYPE total_damage
## <chr> <dbl>
## 1 TORNADO 3312277.
## 2 FLASH FLOOD 1599325.
## 3 TSTM WIND 1445168.
## 4 HAIL 1268290.
## 5 FLOOD 1067976.
## 6 THUNDERSTORM WIND 943636.
## 7 LIGHTNING 606932.
## 8 THUNDERSTORM WINDS 464978.
## 9 HIGH WIND 342015.
## 10 WINTER STORM 134700.
#The graph below presents these events:
ggplot(head(economic_impact_analysis, 10), aes(x = reorder(EVTYPE, -total_damage), y = total_damage)) +
geom_bar(stat = "identity", fill = "blue") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Economic Impact by Event Type", x = "Event Type", y = "Total Property and Crop Damage")
ggplot(head(health_impact_analysis, 10), aes(x = reorder(EVTYPE, -total_impact), y = total_impact)) +
geom_bar(stat = "identity", fill = "green") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Health Impact by Event Type", x = "Event Type", y = "Total Fatalities and Injuries")
##Interpretation of the Results The analysis reveals that tornadoes have the greatest impact on health, followed by excessive heat and floods. Economically, floods and tropical storms cause the most significant financial losses in property and crops. These findings emphasize the need for mitigation strategies and proactive disaster response policies.