The NOAA storm dataset measures 985 unique weather events from 1/3/1950 to 11/30/2011 across 72 states and US territories. Due to the numerous event types, similar events were rolled up into logical categories to better understand the most damaging types of weather events from injury, fatality and economic measures across the United States. In this analysis it is seen that Tornado events cause the greatest number of fatalities and injuries to the human population health. From an economic perspective, cold weather events result in the greatest damage to crops, followed by flooding and storm events. Property damage by weather events is rather neglible compared to the damages to crops.
library(ggplot2)
library(scales)
# getwd()
setwd("~/scripts/r/storm")
# software environment
sessionInfo()
## R version 3.0.3 (2014-03-06)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_0.2.3 ggplot2_0.9.3.1 knitr_1.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.4
## [4] evaluate_0.5.3 formatR_0.10 grid_3.0.3
## [7] gtable_0.1.2 labeling_0.2 MASS_7.3-31
## [10] munsell_0.4.2 plyr_1.8.1 proto_0.3-10
## [13] RColorBrewer_1.0-5 Rcpp_0.11.1 reshape2_1.2.2
## [16] stringr_0.6.2 tools_3.0.3
dataFile <- "./data/stormdata.csv.bz2"
# Download the compressed storm data file
if (!file.exists(dataFile)) {
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, destfile = "./data/stormdata.csv.bz2", method = "curl")
}
if (file.exists(dataFile)) {
# Read the file
bzCon <- bzfile(description = dataFile, open = "r", encoding = getOption("encoding"),
compression = 9)
raw <- read.table(bzCon, header = TRUE, stringsAsFactors = FALSE, sep = ",")
}
head(raw)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
summary(raw)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31 Class :character Class :character Class :character
## Median : 75 Mode :character Mode :character Mode :character
## Mean :101
## 3rd Qu.:131
## Max. :873
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0 Class :character Class :character Class :character
## Median : 0 Mode :character Mode :character Mode :character
## Mean : 1
## 3rd Qu.: 1
## Max. :3749
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0
## Mode :character Median :0 Median : 0
## Mean :0 Mean : 1
## 3rd Qu.:0 3rd Qu.: 0
## Max. :0 Max. :925
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0 Min. : 0
## Class :character Class :character 1st Qu.: 0.0 1st Qu.: 0
## Mode :character Mode :character Median : 0.0 Median : 0
## Mean : 0.2 Mean : 8
## 3rd Qu.: 0.0 3rd Qu.: 0
## Max. :2315.0 Max. :4400
##
## F MAG FATALITIES INJURIES
## Min. :0 Min. : 0 Min. : 0 Min. : 0.0
## 1st Qu.:0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.0
## Median :1 Median : 50 Median : 0 Median : 0.0
## Mean :1 Mean : 47 Mean : 0 Mean : 0.2
## 3rd Qu.:1 3rd Qu.: 75 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :5 Max. :22000 Max. :583 Max. :1700.0
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0 Length:902297 Min. : 0.0 Length:902297
## 1st Qu.: 0 Class :character 1st Qu.: 0.0 Class :character
## Median : 0 Mode :character Median : 0.0 Mode :character
## Mean : 12 Mean : 1.5
## 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :5000 Max. :990.0
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
str(raw)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
sum(is.na(raw))
## [1] 1745947
nas <- format(sum(is.na(raw)), big.mark = ",", scientific = FALSE)
records <- format(nrow(raw), big.mark = ",", scientific = FALSE)
paste("Of", records, "records, there are", nas, "incomplete records!")
## [1] "Of 902,297 records, there are 1,745,947 incomplete records!"
# Create new data.frame for processing data with desired fields which also
# eliminates NAs
temp <- raw[, c("BGN_DATE", "STATE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP",
"CROPDMG", "CROPDMGEXP", "EVTYPE")]
# Check NAs again
sum(is.na(temp))
## [1] 0
nas <- format(sum(is.na(temp)), big.mark = ",", scientific = FALSE)
records <- format(nrow(temp), big.mark = ",", scientific = FALSE)
paste("Of", records, "records, there are", nas, "incomplete records.")
## [1] "Of 902,297 records, there are 0 incomplete records."
# Rollup event types into logical categories using regular expression match
# on events including some mispelled events to include as much data as
# possible
categorizeEvent <- function(event) {
# group 985 unique event types into 14 categories
event <- toupper(event)
# print(event)
if (grepl("SUMMARY", event)) {
return("SUMMARY")
break
}
test <- checkMultipleEvents(event, c("AVALAN", "BLIZZARD", "COLD", "COOL",
"HAIL", "ICY", "EXPOSURE", "FREEZE", "FREEZING", "FROST", "GLAZE", "HYPOTHERMIA",
"ICE", "LOW", "SLEET", "SNOW", "WINTER", "WINTRY"), "COLD")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("FIRE", "SMOKE"), "FIRE")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("EROSION", "LAND", "SLIDE"), "EROSION")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("DAM", "FLASH", "FLOOD", "STREAM",
"STRM", "WATER", "WET"), "FLOOD")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("HURRICANE", "TROPICAL", "TYPHOON"),
"HURRICANE")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("BEACH", "COASTAL", "DROWNING", "FLAG",
"MARINE", "RIP", "SEAS", "SEICHE", "SURF", "SWELL", "TIDE", "WAVE"),
"MARINE")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("CLOUD", "BURST", "EXCESSIVE", "HEAVY",
"FOG", "PRECIP", "PRECIPITATION", "RAIN", "SHOWER"), "RAIN")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("LIGHT", "LIGNTNING", "LIGHTNING",
"STORM", "THUNDERSTORM", "TSTM"), "STORM")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("FUNNEL", "TORN", "TORNADO", "SPOUT"),
"TORNADO")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("TSUNAMI"), "TSUNAMI")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("DRIE", "DROUGHT", "DRY", "HEAT", "HIGH",
"HOT", "TEMPERATURE", "WARM"), "WARM")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("DUST", "GUST", "TURBULENCE", "WIND",
"WND"), "WIND")
if (!test == "NONE") {
return(test)
break
}
test <- checkMultipleEvents(event, c("VOG", "VOLCANIC"), "VOLCANIC")
if (!test == "NONE") {
return(test)
break
}
# if no match, return original
return(event)
}
checkMultipleEvents <- function(event, group, category) {
# matches event in a group to categorize it
for (g in group) {
if (grepl(g, event)) {
return(category)
break
}
}
return("NONE")
}
# Damage is result of damage & exponent fields, though 'K” denotes
# thousands, “M” denotes millions, and “B” denotes billions)
calcDamage <- function(num, exp) {
if (num > 0) {
if (exp == "K")
return(num * 1000)
if (exp == "M")
return(num * 1e+06)
if (exp == "B")
return(num * 1e+09)
if (exp > 0)
return(num * (10^exp))
} else {
return(0)
}
}
# Note: knitr has been unable to complete this chunk after adding damage
# calculations, so saved calculated field data as csv files that can be read
# by setting readFromFile = TRUE in subsequent knitr runs.
readFromFile = TRUE
if (readFromFile) {
# output to csv for faster loading later times
crop <- read.csv(file = "./data/crop_damage.csv", header = TRUE)
prop <- read.csv(file = "./data/property_damage.csv", header = TRUE)
data <- read.csv(file = "./data/fatalities_injuries.csv", header = TRUE)
} else {
# original raw data format: 4/18/1950 0:00:0
temp$begin_date <- strptime(temp$BGN_DATE, "%m/%d/%Y")
temp$year <- format(temp$begin_date, "%Y")
temp$event_type <- as.factor(temp$EVTYPE)
# rollup the event type to category
event_category <- sapply(temp$event_type, categorizeEvent)
temp <- cbind(temp, event_category)
temp$event_category <- as.factor(event_category)
# keep these separate to limit processing time for running calcDamage
prop <- subset(temp, temp$PROPDMG > 0)
prop <- prop[, c("begin_date", "event_category", "PROPDMG", "PROPDMGEXP")]
property_damage <- sapply(prop$PROPDMG, prop$PROPDMGEXP, FUN = calcDamage)
prop <- cbind(prop, property_damage)
summary(prop)
str(prop)
crop <- subset(temp, temp$CROPDMG > 0)
crop <- crop[, c("begin_date", "event_category", "CROPDMG", "CROPDMGEXP")]
crop_damage <- sapply(crop$CROPDMG, crop$CROPDMGEXP, FUN = calcDamage)
crop <- cbind(crop, crop_damage)
summary(crop)
str(crop)
# Look at numbers by event type
events <- unique(temp$event_type)
length(events)
categories <- unique(temp$event_category)
length(categories)
categories
# tidy dataset for injuries and fatalities
data <- temp[, c("begin_date", "year", "event_type", "event_category", "STATE",
"FATALITIES", "INJURIES")]
summary(data)
str(data)
# output to csv for faster loading in subsequent knitr runs
write.csv(crop, file = "./data/crop_damage.csv", row.names = FALSE)
write.csv(prop, file = "./data/property_damage.csv", row.names = FALSE)
write.csv(data, file = "./data/fatalities_injuries.csv", row.names = FALSE)
}
# Look at outcomes: 'FATALITIES' using xtabs to sum the fatality number
xtFatalities <- xtabs(FATALITIES ~ event_category, data = data)
dfFatalities <- as.data.frame(xtFatalities)
dfFatalities
## event_category Freq
## 1 ? 0
## 2 APACHE COUNTY 0
## 3 COLD 1394
## 4 EROSION 44
## 5 FIRE 90
## 6 FLOOD 1563
## 7 HURRICANE 201
## 8 MARINE 1042
## 9 MILD PATTERN 0
## 10 NO SEVERE WEATHER 0
## 11 NONE 0
## 12 OTHER 0
## 13 RAIN 2108
## 14 REMNANTS OF FLOYD 0
## 15 SOUTHEAST 0
## 16 STORM 1558
## 17 SUMMARY 0
## 18 TORNADO 5633
## 19 TSUNAMI 33
## 20 URBAN AND SMALL 0
## 21 URBAN SMALL 0
## 22 URBAN/SMALL 0
## 23 VOLCANIC 0
## 24 WARM 1318
## 25 WIND 161
# Look at injuries: 'INJURIES' using xtabs to sum the injury number
xtInjuries <- xtabs(INJURIES ~ event_category, data = data)
dfInjuries <- as.data.frame(xtInjuries)
dfInjuries
## event_category Freq
## 1 ? 0
## 2 APACHE COUNTY 0
## 3 COLD 8338
## 4 EROSION 60
## 5 FIRE 1608
## 6 FLOOD 8755
## 7 HURRICANE 1716
## 8 MARINE 1292
## 9 MILD PATTERN 0
## 10 NO SEVERE WEATHER 0
## 11 NONE 0
## 12 OTHER 4
## 13 RAIN 7939
## 14 REMNANTS OF FLOYD 0
## 15 SOUTHEAST 0
## 16 STORM 15123
## 17 SUMMARY 0
## 18 TORNADO 91364
## 19 TSUNAMI 129
## 20 URBAN AND SMALL 0
## 21 URBAN SMALL 0
## 22 URBAN/SMALL 0
## 23 VOLCANIC 0
## 24 WARM 3752
## 25 WIND 448
# Combine fatalities and injuries
df <- cbind(dfFatalities[, 1:2], dfInjuries[, 2])
colnames(df) <- c("category", "fatalities", "injuries")
df$fatalities <- as.numeric(df$fatalities)
df$injuries <- as.numeric(df$injuries)
df <- df[order(df$fatalities, df$injuries, decreasing = TRUE), ]
# filter where fatalities and injuries are > 0 and category != SUMMARY
df <- subset(df, df$category != "SUMMARY" & df$fatalities > 0 & df$injuries >
0)
summary(df)
## category fatalities injuries
## COLD :1 Min. : 33 Min. : 60
## EROSION :1 1st Qu.: 143 1st Qu.: 1081
## FIRE :1 Median :1180 Median : 2734
## FLOOD :1 Mean :1262 Mean :11710
## HURRICANE:1 3rd Qu.:1559 3rd Qu.: 8442
## MARINE :1 Max. :5633 Max. :91364
## (Other) :6
# Look at Financial damages using xtabs to sum amounts
# Property damage
xtPropDmg <- xtabs(property_damage ~ event_category, data = prop)
dfPropDmg <- as.data.frame(xtPropDmg)
dfPropDmg
## event_category Freq
## 1 ? 5.000e+03
## 2 APACHE COUNTY 5.000e+03
## 3 COLD 1.125e+09
## 4 EROSION 2.174e+07
## 5 FIRE 1.253e+08
## 6 FLOOD 2.473e+09
## 7 HURRICANE 7.586e+07
## 8 MARINE 2.170e+07
## 9 OTHER 5.550e+04
## 10 RAIN 7.655e+07
## 11 STORM 3.293e+09
## 12 TORNADO 3.215e+09
## 13 TSUNAMI 9.053e+05
## 14 URBAN AND SMALL 5.000e+03
## 15 URBAN SMALL 5.000e+01
## 16 VOLCANIC 5.000e+05
## 17 WARM 3.848e+08
## 18 WIND 7.101e+07
# filter on damage > zero
dfPropDmg <- subset(dfPropDmg, dfPropDmg$Freq > 0)
# exclude unclear events ('?','APACHE COUNTY','URBAN AND SMALL','URBAN
# SMALL','OTHER')
dfPropDmg <- subset(dfPropDmg, dfPropDmg$event_category %in% c("?", "APACHE COUNTY",
"URBAN AND SMALL", "URBAN SMALL", "OTHER") == FALSE)
# Crop damage
xtCropDmg <- xtabs(crop_damage ~ event_category, data = crop)
dfCropDmg <- as.data.frame(xtCropDmg)
dfCropDmg
## event_category Freq
## 1 COLD 6.111e+11
## 2 EROSION 3.700e+07
## 3 FIRE 9.566e+09
## 4 FLOOD 3.675e+11
## 5 HURRICANE 1.810e+10
## 6 MARINE 1.227e+09
## 7 OTHER 1.034e+09
## 8 RAIN 1.296e+10
## 9 STORM 2.005e+11
## 10 TORNADO 1.000e+11
## 11 TSUNAMI 2.000e+07
## 12 WARM 5.361e+10
## 13 WIND 2.156e+09
# filter on Crop damage > zero
dfCropDmg <- subset(dfCropDmg, dfCropDmg$Freq > 0)
# exclude unclear events ('?','APACHE COUNTY','URBAN AND SMALL','URBAN
# SMALL','OTHER')
dfCropDmg <- subset(dfCropDmg, dfCropDmg$event_category %in% c("?", "APACHE COUNTY",
"URBAN AND SMALL", "URBAN SMALL", "OTHER") == FALSE)
# add group before combining damages data
dfPropDmg$type = "Property Damage"
dfCropDmg$type = "Crop Damage"
dfDmg <- rbind(dfPropDmg, dfCropDmg)
colnames(dfDmg) <- c("event_category", "amount", "type")
dfDmg
## event_category amount type
## 3 COLD 1.125e+09 Property Damage
## 4 EROSION 2.174e+07 Property Damage
## 5 FIRE 1.253e+08 Property Damage
## 6 FLOOD 2.473e+09 Property Damage
## 7 HURRICANE 7.586e+07 Property Damage
## 8 MARINE 2.170e+07 Property Damage
## 10 RAIN 7.655e+07 Property Damage
## 11 STORM 3.293e+09 Property Damage
## 12 TORNADO 3.215e+09 Property Damage
## 13 TSUNAMI 9.053e+05 Property Damage
## 16 VOLCANIC 5.000e+05 Property Damage
## 17 WARM 3.848e+08 Property Damage
## 18 WIND 7.101e+07 Property Damage
## 1 COLD 6.111e+11 Crop Damage
## 2 EROSION 3.700e+07 Crop Damage
## 31 FIRE 9.566e+09 Crop Damage
## 41 FLOOD 3.675e+11 Crop Damage
## 51 HURRICANE 1.810e+10 Crop Damage
## 61 MARINE 1.227e+09 Crop Damage
## 81 RAIN 1.296e+10 Crop Damage
## 9 STORM 2.005e+11 Crop Damage
## 101 TORNADO 1.000e+11 Crop Damage
## 111 TSUNAMI 2.000e+07 Crop Damage
## 121 WARM 5.361e+10 Crop Damage
## 131 WIND 2.156e+09 Crop Damage
# 1. Fatalities & Injuries Plot Fatalities
f <- ggplot(df, aes(x = category, y = fatalities))
f + geom_bar(stat = "identity") + labs(title = "Fatality Count") + labs(x = "Event Type",
y = "Number of Fatalities") + scale_y_continuous(labels = comma) + theme_bw(base_size = 10) +
theme(axis.text.x = element_text(angle = 90))
dev.copy(png, file = "./figure/fatalities_plot.png", width = 480, height = 480)
## quartz_off_screen
## 3
dev.off()
## pdf
## 2
# Plot Injuries
i <- ggplot(df, aes(x = category, y = injuries))
i + geom_bar(stat = "identity") + labs(title = "Injuries Count") + labs(x = "Event Type",
y = "Number of Injuries") + scale_y_continuous(labels = comma) + theme_bw(base_size = 10) +
theme(axis.text.x = element_text(angle = 90))
dev.copy(png, file = "./figure/injuries_plot.png", width = 480, height = 480)
## quartz_off_screen
## 3
dev.off()
## pdf
## 2
# 2. Property & Crop Damage
g <- ggplot(dfDmg, aes(x = event_category, y = amount, group = type, color = type)) +
geom_point() + geom_line() + labs(title = "Total Property & Crop Damage by Event Type") +
labs(x = "Event Type", y = "Damage ($)") + scale_y_continuous(labels = comma) +
theme_bw(base_size = 10) + theme(axis.text.x = element_text(angle = 90))
g
dev.copy(png, file = "./figure/damages_plot.png", width = 480, height = 480)
## quartz_off_screen
## 3
dev.off()
## pdf
## 2