##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
In this brief analysis we analize the consequences of weather events, specifically, consequences for population health (fatalities and injuries) and economic consequences (cost of property/cost of crop). The analysis is based in the data collected by the National Oceanic and Atmospheric Administration (NOAA).
For the consequences to the population health we considered the two types of consequences, fatalities and injuries. The analysis shows two types of indicators. The total of fatalities/injuries of each weather event and the maximum of fatalities/injuries of one of each event. The second analysis consider the same factors for the economic consequences.
We support our study with some graphics and tables.
Storm Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA). The National Weather service receives their information from a variety of sources, which include but are not limited to:
The Storm Data present the next documents:
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
First, we set our working directory and if we don’t already downloaded the data, the code below will do it.
setwd("~/Desktop/repos/datasciencecoursera/Reproducible Reaserch/PeerAssessment2")
if(!file.exists("data.bz2")){
url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "data.bz2")
}
Read the data into workspace and change the uppercase letters to lowercase letters.
data <- read.csv(bzfile("data.bz2"))
names(data) <- tolower(names(data))
data$evtype <- tolower(data$evtype)
This is how it looks our data. The data contains 37 variables, but we only work with these 6.
head(data %>% select(state, evtype, fatalities, injuries, propdmg, cropdmg))
## state evtype fatalities injuries propdmg cropdmg
## 1 AL tornado 0 15 25.0 0
## 2 AL tornado 0 0 2.5 0
## 3 AL tornado 0 2 25.0 0
## 4 AL tornado 0 2 2.5 0
## 5 AL tornado 0 2 2.5 0
## 6 AL tornado 0 6 2.5 0
The entire data structure is show below
str(data)
## 'data.frame': 902297 obs. of 37 variables:
## $ state__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ bgn_date : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ bgn_time : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ time_zone : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ county : num 97 3 57 89 43 77 9 123 125 57 ...
## $ countyname: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ state : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ evtype : chr "tornado" "tornado" "tornado" "tornado" ...
## $ bgn_range : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bgn_azi : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ bgn_locati: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ end_date : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ end_time : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ county_end: num 0 0 0 0 0 0 0 0 0 0 ...
## $ countyendn: logi NA NA NA NA NA NA ...
## $ end_range : num 0 0 0 0 0 0 0 0 0 0 ...
## $ end_azi : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ end_locati: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ length : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ width : num 100 150 123 100 150 177 33 33 100 100 ...
## $ f : int 3 2 2 2 2 2 2 1 3 3 ...
## $ mag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fatalities: num 0 0 0 0 0 0 0 0 1 0 ...
## $ injuries : num 15 0 2 2 2 6 1 0 14 0 ...
## $ propdmg : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ propdmgexp: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ cropdmg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cropdmgexp: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ wfo : Factor w/ 542 levels ""," CI","%SD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ stateoffic: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ zonenames : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ latitude : num 3040 3042 3340 3458 3412 ...
## $ longitude : num 8812 8755 8742 8626 8642 ...
## $ latitude_e: num 3051 0 0 0 0 ...
## $ longitude_: num 8806 0 0 0 0 ...
## $ remarks : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ refnum : num 1 2 3 4 5 6 7 8 9 10 ...
The firs we are going to do is filter the date with the records that possess more than one fatalitie or injure record.
harmful_data <- data %>%
select(bgn_date, state, evtype, fatalities, injuries) %>%
filter(!(fatalities == 0 & injuries == 0))
We need classify the types of events in the data with the events permitted in Storm Data. So we extract some patter of the names for official events and agroup the data events.
pattern <- substr(official_events, 1, 4)
for(i in seq_along(official_events)){
harmful_data$evtype[grep(pattern[i], harmful_data$evtype)] <- official_events[i]
}
We’re interest only in the data collected for the United States, filter the data with the states of the country, group by type of event. Sum the total of fatalities/injures and register the maximum of fatalities/injuries of that event.
by_evtype_harmful <- harmful_data %>%
filter(!is.na(match(harmful_data$state, state.abb)) &
!is.na(match(harmful_data$evtype, official_events))) %>%
group_by(evtype) %>%
summarise(sum(fatalities), max(fatalities), sum(injuries), max(injuries))
names(by_evtype_harmful) <- c("evtype", "total_fatal", "max_fatal", "total_injur", "max_injur")
We considerated the event with the maximun total of fatalities and injuries. The events are:
harmful_event <- c()
(harmful_event[1] <- by_evtype_harmful$evtype[which.max(by_evtype_harmful$total_fatal)])
## [1] "tornado"
(harmful_event[2] <- by_evtype_harmful$evtype[which.max(by_evtype_harmful$total_injur)])
## [1] "tornado"
We considerated the event with the maximun of fatalities and injuries in one single event. The events are:
(harmful_event[3] <- by_evtype_harmful$evtype[which.max(by_evtype_harmful$max_fatal)])
## [1] "heat"
(harmful_event[4] <- by_evtype_harmful$evtype[which.max(by_evtype_harmful$max_injur)])
## [1] "tornado"
The figure below shows the relation between the fatal and injur events and the most harmful events respect to the total of fatalities or injures
labels <- by_evtype_harmful$evtype[by_evtype_harmful$total_fatal > 3000]
plot(by_evtype_harmful$total_fatal, by_evtype_harmful$total_injur,
xlim = c(1, max(by_evtype_harmful$total_fatal) + 200),
ylim = c(1, max(by_evtype_harmful$total_injur) + 2000),
main = "Most harmful event for population health",
xlab = "Total of fatalities", ylab = "Total of injuries")
points(by_evtype_harmful$total_fatal[by_evtype_harmful$total_fatal > 3000], by_evtype_harmful$total_injur[by_evtype_harmful$total_fatal > 3000], col = "red", pch = 20, lwd = 2)
text(by_evtype_harmful$total_fatal[by_evtype_harmful$total_fatal > 3000], by_evtype_harmful$total_injur[by_evtype_harmful$total_fatal > 3000], labels = labels, pos = 2, cex = 0.8)
The figure below shows the relation between the fatal and injur events and the most harmful events respect to the maximun of fatalities or injures
labels2 <- by_evtype_harmful$evtype[by_evtype_harmful$max_injur > 1000]
labels3 <- by_evtype_harmful$evtype[by_evtype_harmful$max_fatal > 500]
plot(by_evtype_harmful$max_fatal, by_evtype_harmful$max_injur,
xlim = c(1, max(by_evtype_harmful$max_fatal) + 100),
ylim = c(1, max(by_evtype_harmful$max_injur) + 200),
main = "Most harmful event for population health",
xlab = "Maximun of fatalities", ylab = "Maximun of injuries")
points(by_evtype_harmful$max_fatal[by_evtype_harmful$max_injur > 1000], by_evtype_harmful$max_injur[by_evtype_harmful$max_injur > 1000], col = "red", pch = 20, lwd = 2)
points(by_evtype_harmful$max_fatal[by_evtype_harmful$max_fatal > 500], by_evtype_harmful$max_injur[by_evtype_harmful$max_fatal > 500], col = "red", pch = 20, lwd = 2)
text(by_evtype_harmful$max_fatal[by_evtype_harmful$max_injur > 1000], by_evtype_harmful$max_injur[by_evtype_harmful$max_injur > 1000], labels = labels2, pos = 4, cex = 0.8)
text(by_evtype_harmful$max_fatal[by_evtype_harmful$max_fatal > 500], by_evtype_harmful$max_injur[by_evtype_harmful$max_fatal > 500], labels = labels3, pos = 1, cex = 0.8)
Consider only the next variables, and take the records with the condition that record should have at least one report of property or crop damage.
economic_data <- data %>%
select(bgn_date, state, evtype, propdmg, propdmgexp, cropdmg, cropdmgexp) %>%
filter(!(propdmg == 0 & cropdmg == 0))
We consider the events with the official events in the data of NOAA
for(i in seq_along(official_events)){
economic_data$evtype[grep(pattern[i], economic_data$evtype)] <- official_events[i]
}
Consider again, two factors, maximum number of total damage or maximum damage
by_evtype_economic <- economic_data %>%
filter(!is.na(match(economic_data$state, state.abb)) &
!is.na(match(economic_data$evtype, official_events))) %>%
group_by(evtype) %>%
summarise(sum(propdmg), max(propdmg), sum(cropdmg), max(cropdmg))
names(by_evtype_economic) <- c("evtype", "total_prop", "max_prop", "total_crop", "max_crop")
We considerated the event with the maximun total of fatalities and injuries. The events are:
economic_event <- c()
(economic_event[1] <- by_evtype_economic$evtype[which.max(by_evtype_economic$total_prop)])
## [1] "tornado"
(economic_event[2] <- by_evtype_economic$evtype[which.max(by_evtype_economic$max_prop)])
## [1] "flood"
We considerated the event with the maximun of fatalities and injuries in one single event. The events are:
(economic_event[3] <- by_evtype_economic$evtype[which.max(by_evtype_economic$total_crop)])
## [1] "hail"
(economic_event[4] <- by_evtype_economic$evtype[which.max(by_evtype_economic$max_crop)])
## [1] "drought"
The next plot shows the events with more económic consequences:
barplot(by_evtype_economic$total_prop, col = "red",
ylim = c(0, max(by_evtype_economic$total_prop) + 500000))
barplot(by_evtype_economic$total_crop, col = "blue", add = T, axes = F,
main ="Most economic damage events", xlab = "Events", ylab = "Damage in US Dollars")
legend("topleft", c("Propery Damage", "Crop Damage"), pch = c(15, 15), col = c("red", "blue"), cex = 0.8)
text(x = which.max(by_evtype_economic$total_prop), y = (max(by_evtype_economic$total_prop) + 300000),
label = by_evtype_economic$evtype[which.max(by_evtype_economic$total_prop)], cex = 0.7, pos = 4)
text(x = which.max(by_evtype_economic$total_crop), y = (max(by_evtype_economic$total_crop) + 300000),
label = by_evtype_economic$evtype[which.max(by_evtype_economic$total_crop)], cex = 0.7, pos = 4)
text(x = which.max(by_evtype_economic$max_prop), y = (max(by_evtype_economic$max_prop) + 300000),
label = by_evtype_economic$evtype[which.max(by_evtype_economic$max_prop)], cex = 0.7, pos = 4)
text(x = which.max(by_evtype_economic$max_crop), y = (max(by_evtype_economic$max_crop) + 500000),
label = by_evtype_economic$evtype[which.max(by_evtype_economic$max_crop)], cex = 0.7, pos = 4)
The previous analysis clearly shows two types of weather events that can be very dangerous for the population health: * tornado: With 5636 fatalities recorded * tornado: With 9.1407 × 104 injures recorded
In addition, we must consider another type of event: * heat: With a record of 583 fatalities in only one weather event * tornado: With a record of 1700 fatalities in only one weather event
In case of economic damages, we consider the next events that can be expensives:
* tornado: With a 3.2153 × 106 total property damage recorded
* hail: With a record of 5000 property damage in only one weather event
In addition, we must consider another type of event:
* flood: With 5.8595 × 105 total crop damage recorded
* drought: With a record of 990 crop damage in only one weather event