This analysis used the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Database to examine the effects of severe weather events on population health and the economy.The impact on the Population health was measured as the combined total of injuries and fatalities across all recorded storm events. The analysis shows that Tornadoes are the most harmful event type to population health, causing the highest number of injuries and fatalities. A histogram and line plot of the EVTYPE variable reveal that storm events are unevenly distributed, with a small number of event types occurring very frequently. Economic consequences were measured using the combined property and crop damage values. Flood were found to have the greatest economic impact, resulting in the highest total economic losses among all event types. At the county level, Cook County suffered the highest combined number of injuries and fatalities, indicating significant vulnerability to severe weather events.
# Create directory if it does not exist
if (!dir.exists("rpubassigndata")) {
dir.create("rpubassigndata")
}
url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "rpubassigndata/StormData.csv.bz2"
if (!file.exists(destfile)) {
download.file(url, destfile, mode = "wb")
}
stormdata <- read.csv("rpubassigndata/StormData.csv.bz2")
head(stormdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
str(stormdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
summary(stormdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.00 Min. : 0.0 Min. : 0.00000 Min. : 0.0000
## 1st Qu.:0.00 1st Qu.: 0.0 1st Qu.: 0.00000 1st Qu.: 0.0000
## Median :1.00 Median : 50.0 Median : 0.00000 Median : 0.0000
## Mean :0.91 Mean : 46.9 Mean : 0.01678 Mean : 0.1557
## 3rd Qu.:1.00 3rd Qu.: 75.0 3rd Qu.: 0.00000 3rd Qu.: 0.0000
## Max. :5.00 Max. :22000.0 Max. :583.00000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
nrow(stormdata)
## [1] 902297
sum(!complete.cases(stormdata))
## [1] 902297
sum(is.na(stormdata))
## [1] 1745947
any(duplicated(stormdata))
## [1] FALSE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
evtype_freq <- stormdata %>%
count(EVTYPE, sort = TRUE)
ggplot(evtype_freq[1:10, ],
aes(x = reorder(EVTYPE, n), y = n)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Most Frequent Storm Event Types",
x = "Event Type",
y = "Frequency"
) +
theme_minimal()
storm <- stormdata[, c("EVTYPE", "FATALITIES", "INJURIES",
"PROPDMG", "CROPDMG", "COUNTYNAME")]
# Total health impact
storm$HEALTH_IMPACT <- storm$FATALITIES + storm$INJURIES
# Total economic impact
storm$ECONOMIC_DAMAGE <- storm$PROPDMG + storm$CROPDMG
health_by_event <- aggregate(HEALTH_IMPACT ~ EVTYPE, data = storm, sum)
health_by_event <- health_by_event[order(-health_by_event$HEALTH_IMPACT), ]
head(health_by_event, 10)
## EVTYPE HEALTH_IMPACT
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
hist(as.numeric(factor(storm$EVTYPE)),
breaks = 50,
main = "Histogram of Storm Event Types (EVTYPE)",
xlab = "Event Type Index",
col = "lightblue")
economic_by_event <- aggregate(ECONOMIC_DAMAGE ~ EVTYPE, data = storm, sum)
economic_by_event <- economic_by_event[order(-economic_by_event$ECONOMIC_DAMAGE), ]
head(economic_by_event, 10)
## EVTYPE ECONOMIC_DAMAGE
## 834 TORNADO 3312276.7
## 153 FLASH FLOOD 1599325.1
## 856 TSTM WIND 1445168.2
## 244 HAIL 1268289.7
## 170 FLOOD 1067976.4
## 760 THUNDERSTORM WIND 943635.6
## 464 LIGHTNING 606932.4
## 786 THUNDERSTORM WINDS 464978.1
## 359 HIGH WIND 342014.8
## 972 WINTER STORM 134699.6
county_impact <- aggregate(HEALTH_IMPACT ~ COUNTYNAME, data = storm, sum)
county_impact <- county_impact[order(-county_impact$HEALTH_IMPACT), ]
head(county_impact, 10)
## COUNTYNAME HEALTH_IMPACT
## 8485 JEFFERSON 2728
## 5653 GREENE 1952
## 27694 WICHITA 1907
## 10592 MADISON 1771
## 20322 OHZ42>088 1569
## 8480 JASPER 1524
## 28529 WORCESTER 1386
## 24418 TUSCALOOSA 1163
## 10614 MARION 1138
## 22148 POLK 1058