This reports aims to explore and analyse NOAA Storm Data to characterize major weather events that caused substantial public health and economic damage between 1950 - 2011. The raw data is first cleaned by standardising EVTYPE (initally 985 types to standard 48 types) according to the documentation provided in question. Further, harm to Population health is determined in terms of Injuries and Fatalities while Economic damage is assumed to be summation of property and crop damage. The results depict Tornadoes to have caused maximum casualities and injuries, and Floods to have caused maximum economic (property+crop) damage.
Load the necessary libraries and read the data using read.csv() function.
library(ggplot2)
#set working directory to the directory where the file is saved.
setwd("~/Personal/Coursera/Course_7_Reproducible_Research")
rawdata <- read.csv("repdata-data-StormData.csv.bz2", header = TRUE, sep = ",", stringsAsFactors = FALSE, quote = "\"'", dec = ".", na.strings = "NA", comment.char = "")
Check the field names and dimensions of the dataset.
dim(rawdata)
## [1] 902297 37
colnames(rawdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Storm data has 37 variables, for the analysis only following 7 variables are used.
Drop the irrelevant fields to obtain working dataset - relevantData. Relevant fields are:
relevantData <- rawdata[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
str(relevantData)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
Use the following conversion to compute exponent of 10 for property and crop damage:
library(knitr)
exponentVal <- data.frame(Notation=c("'B' or 'b'","'M' or 'm'","'K' or 'k'","'H' or 'h'", "'?' or '+' or '-' or Blank"), Exponent_Value=c("9","6","3","2","0"))
kable(exponentVal)
| Notation | Exponent_Value |
|---|---|
| ‘B’ or ‘b’ | 9 |
| ‘M’ or ‘m’ | 6 |
| ‘K’ or ‘k’ | 3 |
| ‘H’ or ‘h’ | 2 |
| ‘?’ or ‘+’ or ‘-’ or Blank | 0 |
rm(exponentVal)
#For property damage, exponent of 10:
relevantData[relevantData$PROPDMGEXP=="", "PROPDMGEXP"] <- "0"
relevantData[relevantData$PROPDMGEXP=="+"|relevantData$PROPDMGEXP=="-"|relevantData$PROPDMGEXP=="?", "PROPDMGEXP"] <- 0
relevantData$PROPDMGEXP <- toupper(relevantData$PROPDMGEXP)
relevantData[relevantData$PROPDMGEXP=="H", "PROPDMGEXP"] <- 2
relevantData[relevantData$PROPDMGEXP=="K", "PROPDMGEXP"] <- 3
relevantData[relevantData$PROPDMGEXP=="M", "PROPDMGEXP"] <- 6
relevantData[relevantData$PROPDMGEXP=="B", "PROPDMGEXP"] <- 9
relevantData$PROPDMGEXP <- as.numeric(relevantData$PROPDMGEXP)
#For crop damage, exponent of 10:
relevantData[relevantData$CROPDMGEXP=="", "CROPDMGEXP"] <- "0"
relevantData[relevantData$CROPDMGEXP=="+"|relevantData$CROPDMGEXP=="-"|relevantData$CROPDMGEXP=="?", "CROPDMGEXP"] <- 0
relevantData$CROPDMGEXP <- toupper(relevantData$CROPDMGEXP)
relevantData[relevantData$CROPDMGEXP=="H", "CROPDMGEXP"] <- 2
relevantData[relevantData$CROPDMGEXP=="K", "CROPDMGEXP"] <- 3
relevantData[relevantData$CROPDMGEXP=="M", "CROPDMGEXP"] <- 6
relevantData[relevantData$CROPDMGEXP=="B", "CROPDMGEXP"] <- 9
relevantData$CROPDMGEXP <- as.numeric(relevantData$CROPDMGEXP)
#For total damage: Sum both property and crop damage
relevantData$TOTALDMG <- ( (relevantData$PROPDMG* 10^relevantData$PROPDMGEXP) + (relevantData$CROPDMG* 10^relevantData$CROPDMGEXP) )
As per the initial dataset.
#Unique EVTYPE initially
print(paste("Initially unique number of event =", length(unique(relevantData$EVTYPE)) ))
## [1] "Initially unique number of event = 985"
However, according to the Storm Data Documentation there are only 48 types of events accepted as standard.
#Removing rows with no property and health damage
relevantData <- relevantData[(relevantData$TOTALDMG + relevantData$FATALITIES + relevantData$INJURIES) != 0, ]
#Number of Relevant rows
nrow(relevantData)
## [1] 254633
# change case to upper
relevantData$EVTYPE <- toupper(relevantData$EVTYPE)
# remove any white space in end or start of the event name
relevantData$EVTYPE <- trimws(relevantData$EVTYPE)
# ascending sort by EVTYPE
relevantData <- relevantData[order(relevantData[1]), ]
# get sum of fatalities, injuries and total damage for each unique event
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
#Assign a standard event type by first searching in the Documentation of 48 weather types,
#second set according to the Remarks. If not found in either, categorise as OTHER
relevantData$EVTYPE <- gsub("WILD.*", "WILD FIRE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("TROPICAL STORM.*", "TROPICAL STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("TSTM.*", "THUNDERSTORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("THUNDERSTORM.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WIND.*", "WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WINTER STORM.*", "WINTER STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WINTER WEATH.*", "WINTER WEATHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*TORNADO.*", "TORNADO", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WHIRLWIND.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WET MICRO.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WATERSPOUT.*", "WATERSPOUT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("URBAN.*", "HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("TYPHOON.*", "HURRICANE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HURRICANE.*", "HURRICANE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HVY.*", "HEAVY", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("THU.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("TUNDER.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("STORM FORCE.*", "TROPICAL STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("STORM SUR.*", "STORM TIDE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HAIL.*", "HAIL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SMALL HAIL", "HAIL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTENDED COL.*", "EXTREME COLD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTREME COL.*", "EXTREME COLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTREME WIND.*", "EXTREME COLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("FLASH FLO.*", "FLASH FLOOD", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("EXTREME HEAT.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTREME HEAT.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAT WAVE.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("RECORD/EXCESSIVE HEAT.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("RECORD HEAT.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("FLOOD.*", "FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAVY SNO.*", "HEAVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FREEZING RAIN.*", "WINTER WEATHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FREEZING SPRAY.*", "WINTER WEATHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FREEZING DRIZZLE.*", "WINTER WEATHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FREEZE", "FROST/FREEZE", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("GLAZE.*","FREEZING FOG", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*MICROBURST.*", "THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HYPOTHERM.*", "EXTREME COLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HYPERTHERM.*", "EXCESSIVE HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAVY RAI.*", "HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*ICE.*", "ICE STORM", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("LIGHT SNOW.*","WINTER STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LIGHTING.*","LIGHTNING", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LIGHTNI.*","LIGHTNING", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTREME COLD/WIND CHILL","EXTREME CLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*COLD.*","COLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXTREME CLD/WIND CHILL","EXTREME COLD/WIND CHILL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("GUSTNADO","THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("GUSTY WI.*","STRONG WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SEVERE THUN.*","THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("RIP CURR.*","RIP CURRENT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MUD.*","DEBRIS FLOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LANDSPOUT","TORNADO", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LAND.*","DEBRIS FLOW", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("LIGNT.*","LIGHTNING", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("COASTAL.*STORM.*","TROPICAL STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("CSTL","COASTAL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("COASTAL.*","COASTAL FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXCESSIVE RAINFALL","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXCESSIVE SNOW","HEAVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("RECORD RAINFALL","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("RECORD SNOW","HEAVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SMALL STREAM FLOOD","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("BLOWING SNOW","WINTER STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SNOW/SLEET","WINTER STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SNOWMELT F.*","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("SNOW SQUALL.*","WINTER WEATHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAVY LAKE.*","LAKE-EFFECT SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LAKE EFFECT.*","LAKE-EFFECT SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LAKE F.*","LAKESHORE FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LAKE-EFFECT.*","LAKE-EFFECT SNW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*SNOW.*","HEAVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("LAKE-EFFECT.*","LAKE-EFFECT SNOW", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData[relevantData$EVTYPE=="FOG", "EVTYPE"] <- "DENSE FOG"
relevantData$EVTYPE <- gsub(".*EROSION.*","COASTAL FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*BLIZZARD.*","BLIZZARD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("AVALAN.*","AVALANCHE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FIRE.*","WILDFIRE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("BREAKUP FLOOD","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*FROST.*","FROST/FREEZE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DUST ST.*","DUST STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DUST DEV.*","DUST DEVIL", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DROUG.*","DROUGHT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DOWNBURS.*","THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub(".*SURF.*","HIGH SURF", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAVY SNOW","HVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HEAVY.*","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HVY SNOW","HEAVY SNOW", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("ASTRONOMICAL HIGH TIDE","STORM SURGE/TIDE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*TURB.*","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MAJOR FLOOD","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MINOR FLOOD","FLASH FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*RIVER.*FLOOD","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*RAL FLOOD","FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub(".*DAL FLOOD","FLOOD", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("DAM BREAK","FLASH FLOOD", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("BLOWING DUST","DUST STORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("EXCESSIVE WETNESS","THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("COOL AND WET","THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DRY MIRCOBURST","THUNDERSTORM", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("DROWNING","MARINE THUNDERSTORM WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("GRADIENT WIND","TROPICAL DEPRESSION", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HIGH WAVES","HURRICANE", relevantData$EVTYPE)
#Convert HIGH to HI to save name from changing to HIGH SURF
relevantData$EVTYPE <- gsub("HIGH WIND","HI WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MARINE HIGH WIND","MARINE HI WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HIGH.*","HIGH SURF", relevantData$EVTYPE)
#Now convert back HI to HIGH
relevantData$EVTYPE <- gsub("MARINE HI WIND","MARINE HIGH WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("HI WIND","HIGH WIND", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub("ICY ROA.*","ICE STORM", relevantData$EVTYPE)
#Cold in remarks
relevantData$EVTYPE <- gsub("LOW TEMPERATURE","COLD/WIND CHILL", relevantData$EVTYPE)
#Freezing rain in remarks
relevantData$EVTYPE <- gsub("MIXED PREC.*","WINTER WEATHER", relevantData$EVTYPE)
#HIGH WIND in remarks
relevantData$EVTYPE <- gsub("NON-SEVERE WIND","HIGH WIND", relevantData$EVTYPE)
#Steady wind speed mentioned in Remarks is as per STRONG WIND range
relevantData$EVTYPE <- gsub("NON.* WIND","STRONG WIND", relevantData$EVTYPE)
#HURRICANE mentioned in Remarks
relevantData$EVTYPE <- gsub("UNSEASONAL RAIN","HURRICANE", relevantData$EVTYPE)
#Not much mentioned in Remarks so puting in OTHER category
relevantData$EVTYPE <- gsub("TORRENTIA.*","OTHER", relevantData$EVTYPE)
#Mudslides mentioned in Remarks so comes under DEBRIS FLOW
relevantData$EVTYPE <- gsub("RAINSTORM","DEBRIS FLOW", relevantData$EVTYPE)
#1 Death by falling tree
relevantData$EVTYPE <- gsub("RAIN/WIND","OTHER", relevantData$EVTYPE)
#Change RAIN in HEAVY RAIN
relevantData$EVTYPE <- gsub("HEAVY RAIN","HEAVY RN", relevantData$EVTYPE)
#Majority of the damage caused due to moderate rain so putting under OTHER category
relevantData$EVTYPE <- gsub("RAIN.*","OTHER", relevantData$EVTYPE)
#Change back to RAIN in HEAVY RN
relevantData$EVTYPE <- gsub("HEAVY RN","HEAVY RAIN", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
#HEAVY RAIN mentioned in Remarks
relevantData$EVTYPE <- gsub("RAPIDL.*","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("ROCK SLIDE","HEAVY RAIN", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("ROGUE WAVE","HIGH SURF", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("ROUGH SEAS","STRONG WIND", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("STORM TIDE","STORM SURGE/TIDE", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("TORN.*","TORNADO", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
relevantData$EVTYPE <- gsub(".*WARM.*","HEAT", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("WINTRY MIX","WINTER STORM", relevantData$EVTYPE)
relevantData[relevantData$EVTYPE=="WIND", "EVTYPE"] <- "HIGH WIND"
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
#I have put the ones which could not fit in anywhere in "OTHER" category
relevantData[relevantData$EVTYPE=="?", "EVTYPE"] <- "OTHER"
relevantData$EVTYPE <- gsub("APACHE COUNTY","OTHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MARINE ACCIDENT", "OTHER", relevantData$EVTYPE)
relevantData$EVTYPE <- gsub("MARINE MISHAP", "OTHER", relevantData$EVTYPE)
relevantData <- aggregate(cbind(TOTALDMG, INJURIES, FATALITIES) ~ EVTYPE, data = relevantData, sum)
#CLEAN Dataset structure:
str(relevantData)
## 'data.frame': 49 obs. of 4 variables:
## $ EVTYPE : chr "ASTRONOMICAL LOW TIDE" "AVALANCHE" "BLIZZARD" "COASTAL FLOOD" ...
## $ TOTALDMG : num 3.20e+05 3.72e+06 7.72e+08 4.45e+08 1.56e+08 ...
## $ INJURIES : num 0 170 805 7 61 ...
## $ FATALITIES: num 0 225 101 6 170 44 80 0 2 2 ...
NOTE
Apart from the standard 48 event types, there is another category named OTHER It contains those event records which could not be placed under any standard event types due to either lack of information or not satisfying any event’s standard criteria.
#Change the field EVTYPE to a factor variable and plot the data.
relevantData$EVTYPE <- factor(relevantData$EVTYPE)
#FATALITIES, INJURIES AND TOTAL ECONOMIC DAMAGE PLOTS considering top 10 damaging events
fatalData <- relevantData[order(relevantData[4], decreasing = TRUE), ]
injuryData <- relevantData[order(relevantData[3], decreasing = TRUE), ]
damageData <- relevantData[order(relevantData[2], decreasing = TRUE), ]
#damageData values divided by 10e+9 to convert to billion USD.
damageData[2] <- damageData[2]/(10e+9)
f <- ggplot(data=fatalData[1:10, ], aes(x=EVTYPE, y=FATALITIES)) + geom_bar(stat = "identity", fill="RED") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, colour="BLUE"), axis.text.y = element_text(colour="BLUE")) + xlab("Event Type") + ylab("Number of Fatalities") + ggtitle("Fatality Counts vs Event Type, 1950-2011")
f
i <- ggplot(data=injuryData[1:10, ], aes(x=EVTYPE, y=INJURIES)) + geom_bar(stat = "identity", fill="RED") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, colour="BLUE"), axis.text.y = element_text(colour="BLUE")) + xlab("Event Type") + ylab("Number of Injuries") + ggtitle("Injury Counts vs Event Type, 1950-2011")
i
d <- ggplot(data=damageData[1:10, ], aes(x=EVTYPE, y=TOTALDMG)) + geom_bar(stat = "identity", fill="RED") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, colour="BLUE"), axis.text.y = element_text(colour="BLUE")) + xlab("Event Type") + ylab("Total Economic Damage (in Billion USD)") + ggtitle("Economic Damage vs Event Type, 1950-2011")
d
# Remove objects to free memory
rm(fatalData, injuryData ,damageData, rawdata, relevantData)
It is clear from the above figures that: