The data in this report comes from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database documents the occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce. In addition, it is a partial record of other significant meteorological events, such as record maximum or minimum temperatures or precipitation that occurs in connection with another event.
The data for this assignment comes in the form of a comma-separated-value compressed via the bzip2 algorithm to reduce its size. It is available here:
Documentation of the database is available at the following sources;
The purpose of this report is to answer two questions from the NOAA database:
The first two graphs show the top ten weather events for the two properties in each question, and the last graph shows the top ten weather events for the summation of each of the two properties.
library(ggplot2)
library(dplyr)
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.2.4
storm <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","%SD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
health.full <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm, sum)
health.fatalities: EVTYPE by most fatalities
health.fatalities <- head(arrange(health.full, desc(FATALITIES)), 10)
health.fatalities$INJURIES <- NULL
health.injuries: EVTYPE by most injuries
health.injuries <- head(arrange(health.full, desc(INJURIES)), 10)
health.injuries$FATALITIES <- NULL
health.harmful: summation of fatalities and injuries
health.harmful <- head(arrange(health.full, desc(INJURIES + FATALITIES)), 10)
health.harmful$HARMFUL <- health.harmful$FATALITIES + health.harmful$INJURIES
health.harmful$FATALITIES <- NULL
health.harmful$INJURIES <- NULL
Subset the properties that determine the economic consequences
storm2 <- select(storm, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
Factor EXP into DMG values. This process ignores bad entries into PROPDMGEXP and CROPDMGEXP.
storm2$PROPDMG <- ifelse(grepl("[Hh]", storm2$PROPDMGEXP), storm2$PROPDMG*100,
storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Kk]", storm2$PROPDMGEXP), storm2$PROPDMG*1000,
storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Mm]", storm2$PROPDMGEXP), storm2$PROPDMG*1000000,
storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Bb]", storm2$PROPDMGEXP), storm2$PROPDMG*1000000000,
storm2$PROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Hh]", storm2$CROPDMGEXP), storm2$CROPDMG*100,
storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Kk]", storm2$CROPDMGEXP), storm2$CROPDMG*1000,
storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Mm]", storm2$CROPDMGEXP), storm2$CROPDMG*1000000,
storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Bb]", storm2$CROPDMGEXP), storm2$CROPDMG*1000000000,
storm2$CROPDMG)
money.full <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, data = storm2, sum)
money.prop: EVTYPE by highest property damage
money.prop <- head(arrange(money.full, desc(PROPDMG)), 10)
money.prop$CROPDMG <- NULL
money.crop: EVTYPE by highest crop damage
money.crop <- head(arrange(money.full, desc(CROPDMG)), 10)
money.crop$PROPDMG <- NULL
money.harmful: summation of PROPDMG and CROPDMG
money.harmful <- head(arrange(money.full, desc(PROPDMG + CROPDMG)), 10)
money.harmful$HARMFUL <- money.harmful$PROPDMG + money.harmful$CROPDMG
money.harmful$PROPDMG <- NULL
money.harmful$CROPDMG <- NULL
health.fatalities$FATALITIES <- health.fatalities$FATALITIES / 1000
health.fatalities$EVTYPE <-
factor(health.fatalities$EVTYPE,
levels = health.fatalities$EVTYPE[order(!health.fatalities$FATALITIES)])
fatalities <- ggplot(health.fatalities, aes(EVTYPE, FATALITIES)) +
labs(title="Fatalities") +
xlab("Event Type") + ylab("Number of Fatalities \n (in thousands)")
fatalities.plot <- fatalities + geom_bar(stat="identity", color = "black",
fill = "midnightblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "white"),
panel.background = element_rect(fill = "antiquewhite")) +
coord_cartesian(ylim = c(0, 6))
health.injuries$INJURIES <- health.injuries$INJURIES / 1000
health.injuries$EVTYPE <-
factor(health.injuries$EVTYPE,
levels = health.injuries$EVTYPE[order(!health.injuries$INJURIES)])
injuries <- ggplot(health.injuries, aes(EVTYPE, INJURIES)) +
labs(title="Injuries") +
xlab("Event Type") + ylab("Number of Injuries \n (in thousands)")
injuries.plot <- injuries + geom_bar(stat="identity", color = "black",
fill = "midnightblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "white"),
panel.background = element_rect(fill = "antiquewhite")) +
coord_cartesian(ylim = c(0, 96))
grid.arrange(fatalities.plot, injuries.plot, ncol=2, nrow=1)
money.prop$PROPDMG <- money.prop$PROPDMG / 1000000000
money.prop$EVTYPE <-
factor(money.prop$EVTYPE,
levels = money.prop$EVTYPE[order(!money.prop$PROPDMG)])
prop <- ggplot(money.prop, aes(EVTYPE, PROPDMG)) +
labs(title="Property Damage") +
xlab("Event Type") + ylab("Property Damage \n (in billions)")
prop.plot <- prop + geom_bar(stat="identity", color = "black",
fill = "coral2") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray80"),
panel.background = element_rect(fill = "aliceblue")) +
coord_cartesian(ylim = c(0, 150))
money.crop$CROPDMG <- money.crop$CROPDMG / 1000000000
money.crop$EVTYPE <-
factor(money.crop$EVTYPE,
levels = money.crop$EVTYPE[order(!money.crop$CROPDMG)])
crop <- ggplot(money.crop, aes(EVTYPE, CROPDMG)) +
labs(title="Crop Damage") +
xlab("Event Type") + ylab("Crop Damage \n (in billions)")
crop.plot <- crop + geom_bar(stat="identity", color = "black",
fill = "coral2") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray80"),
panel.background = element_rect(fill = "aliceblue")) +
coord_cartesian(ylim = c(0, 15))
grid.arrange(prop.plot, crop.plot, ncol=2, nrow=1)
health.harmful$HARMFUL <- health.harmful$HARMFUL / 1000
health.harmful$EVTYPE <-
factor(health.harmful$EVTYPE,
levels = health.harmful$EVTYPE[order(!health.harmful$HARMFUL)])
health <- ggplot(health.harmful, aes(EVTYPE, HARMFUL)) +
labs(title="Health") +
xlab("Event Type") + ylab("Damage to Population Health \n (in thousands)")
health.plot <- health + geom_bar(stat="identity", color = "black",
fill = "midnightblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "white"),
panel.background = element_rect(fill = "antiquewhite")) +
coord_cartesian(ylim = c(0, 100))
money.harmful$HARMFUL <- money.harmful$HARMFUL / 1000000000
money.harmful$EVTYPE <-
factor(money.harmful$EVTYPE,
levels = money.harmful$EVTYPE[order(!money.harmful$HARMFUL)])
money <- ggplot(money.harmful, aes(EVTYPE, HARMFUL)) +
labs(title="Economy") +
xlab("Event Type") + ylab("Damage to Economy \n (in billions)")
money.plot <- money + geom_bar(stat="identity", color = "black",
fill = "coral2") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid.major = element_line(colour = "gray80"),
panel.grid.minor = element_line(colour = "gray80"),
panel.background = element_rect(fill = "aliceblue")) +
coord_cartesian(ylim = c(0, 155))
grid.arrange(health.plot, money.plot, ncol=2, nrow=1)