This study assesses severe weather data gathered by NOAA to determine those weather events inflicting the greatest degrees of harm to public health and economic impact. This is determined by developing the following types of data plots:

-Pareto plots to determine the extent to which impact is concentrated among a relatively small number of event types.

-Bar plots to specifically identify the highest impact types of weather events, in terms of health and economic impact, respectively.

-Radial plots showing the relative prevalence of key types of weather events throughout the year.

The Pareto plots confirmed that, for each category of impact assessed (deaths, injuries, crop loss, and property loss), roughly a half dozen types of weather events were responsible for 75 to 80% of overall impact. The bar charts revealed that tornados are overwhelmingly the greatest contributor to fatalities and deaths, while property and crop damage are dominated by a number of types of weather events, including floods, hurricanes/typhoons, tornados, and storm surges. The radial plots showed the high degree of seasonality for some types of weather events: for example, tornados are most often encountered in the spring, thunderstorms and excessive heat activity become more prevalent during the summer, and (not unexpectedly) ice storms are most likely in the winter.

Data Processing

Read in data file
storm_data <- read.table(file="repdata_data_StormData.csv.bz2", sep = ",",header=TRUE)
Establish multipliers for PROPDMG and CROPDMG based on PROPDMGEXP and CROPDMGEXP values
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## extract unique values for PROPDMGEXP
PROPDMGEXP <- storm_data %>% select(PROPDMGEXP) %>% unique
## list multipliers  corresponding to contents of vector PROPDMGEXP
prop_mult <- c(1000,1000000,1,1000000000,1000000,1,1,100000,1000000,1,10000,100,1000,100,10000000,100,1,10,100000000)
## combine vectors PROPDMGEXP and prop_mult together in a data frame.
prop_factors <- data.frame(PROPDMGEXP,prop_mult)

## extract unique values for CROPDMGEXP
CROPDMGEXP <- storm_data %>% select(CROPDMGEXP) %>% unique
## list multipliers  corresponding to contents of vector CROPDMGEXP
crop_mult <- c(10,1000000,1000,1000000,1000000000,1,1,1000,100)
## combine vectors CROPDMGEXP and crop_mult together in a data frame.
crop_factors <- data.frame(CROPDMGEXP,crop_mult)

## merge prop_factors into the main data set
combo_stage_1 <- merge(storm_data,prop_factors,by = "PROPDMGEXP")
## merge crop_factors into the main data set
combo_stage_2 <- merge(combo_stage_1,crop_factors,by = "CROPDMGEXP")
Clean up BGN_DATE field
## start by removing unwanted hour-minute-second info from end (last 8 characters)
combo_stage_2$BGN_DATE <- substr(combo_stage_2$BGN_DATE, 1, nchar(as.character(combo_stage_2$BGN_DATE)) - 8)
library(lubridate)
## convert BGN_DATA to a date
combo_stage_2$BGN_DATE <- as.Date(combo_stage_2$BGN_DATE,format="%m/%d/%Y")
Calculate values for Property Damage and Crop Damage
combo_stage_2$property_damage <- combo_stage_2$PROPDMG * combo_stage_2$prop_mult
combo_stage_2$crop_damage <- combo_stage_2$CROPDMG * combo_stage_2$crop_mult
Extract desired fields and records from original data set.
## Extract fields and records of interest
core_fields <- combo_stage_2 %>% select(BGN_DATE,EVTYPE,FATALITIES,INJURIES,property_damage,crop_damage)
selected_records <- filter(core_fields,FATALITIES>0 | INJURIES>0 | property_damage>0 | crop_damage>0)

## Add in fields for FATALITIES & INJURIES combined, as well as property damage and crop damage combined.
selected_records$fatalities_injuries <- selected_records$FATALITIES + selected_records$INJURIES
selected_records$property_crops <- selected_records$property_damage + selected_records$crop_damage
Group by Event Type and summarize by parameters of interest (i.e. health and economic factors
## Group by event types
together_by_EVTYPE <- group_by(selected_records,EVTYPE)

## Calculate total fatalities, injuries, property damage and crop damage by Event Type
fatalities_by_EVTYPE <- summarise(together_by_EVTYPE, total_fatalities=sum(FATALITIES, na.rm=TRUE))
injuries_by_EVTYPE <- summarise(together_by_EVTYPE, total_injuries=sum(INJURIES, na.rm=TRUE))
propdam_by_EVTYPE <- summarise(together_by_EVTYPE, total_propdam=sum(property_damage, na.rm=TRUE))
cropdam_by_EVTYPE <- summarise(together_by_EVTYPE, total_cropdam=sum(crop_damage, na.rm=TRUE))
fat_inj_by_EVTYPE <- summarise(together_by_EVTYPE, total_fat_inj=sum(fatalities_injuries, na.rm=TRUE))
prop_crop_by_EVTYPE <- summarise(together_by_EVTYPE, total_propcrop=sum(property_crops, na.rm=TRUE))
Prepare data needed to generate Pareto Charts
# convert summaries by EVTYPE into vectors suitable for use in Pareto plotting function
v_fatalities <- as.vector(fatalities_by_EVTYPE$total_fatalities)
v_injuries <- as.vector(injuries_by_EVTYPE$total_injuries)
v_propdam <- as.vector(propdam_by_EVTYPE$total_propdam)
v_cropdam <- as.vector(cropdam_by_EVTYPE$total_cropdam)
## assign names to vectors                        
names(v_fatalities) <- fatalities_by_EVTYPE$EVTYPE
names(v_injuries) <- injuries_by_EVTYPE$EVTYPE
names(v_propdam) <- propdam_by_EVTYPE$EVTYPE
names(v_cropdam) <- cropdam_by_EVTYPE$EVTYPE
# reduce vectors to non-zero values
v_fatalities_pareto <- v_fatalities[v_fatalities>0]
v_injuries_pareto <- v_injuries[v_injuries>0]
v_propdam_pareto <- v_propdam[v_propdam>0]
v_cropdam_pareto <- v_cropdam[v_cropdam>0]

Results

## In above, fig.width and fig.height ensure sufficient plot size

## Arrange for plots to be arranged vertically, reduce plot margins to avoid "figure margins too large" error
## par(mfcol=c(4,1),mar=c(1,1,1,1),cex.axis=0.5,cex.lab=0.5,cex.main=0.8)
par(mfcol=c(4,1),mar=c(1,1,1,1))

require(qcc)
## Loading required package: qcc
## Package 'qcc', version 2.6
## Type 'citation("qcc")' for citing this R package in publications.
p_fatalities <- pareto.chart(v_fatalities_pareto,ylab="Cumulative Fatalities",xlab="Event Type",main="Pareto Chart for Fatalities",cex.names=0.2)
p_injuries <- pareto.chart(v_injuries_pareto,ylab="Cumulative Injuries",xlab="Event Type",main="Pareto Chart for Injuries",cex.names=0.2)
p_propdam <- pareto.chart(v_propdam_pareto,ylab="Cumulative Cost",xlab="Event Type",main="Pareto Chart for Property Damage",cex.names=0.1)
p_cropdam <- pareto.chart(v_cropdam_pareto,ylab="Cumulative Cost",xlab="Event Type",main="Pareto Chart for Crop Damage",cex.names=0.2)

## Create bar charts showing top 10 contributors to FATALITIES & INJURIES (combined)
##      and property damage & crop damage (combined)
## Arrange fat_inj_by_EVTYPE and prop_crop_by_EVTYPE in descending order by 
##      total_fat_inj and total_propcrop, respectively.
fat_inj_descending <- fat_inj_by_EVTYPE[order(-fat_inj_by_EVTYPE[,2]),]
propcrop_descending <- prop_crop_by_EVTYPE[order(-prop_crop_by_EVTYPE[,2]),]
## select top 10 event types from fat_inj_descending and propcrop_descending
fat_inj_top_10 <- fat_inj_descending[c(1:10), ]
propcrop_top_10 <- propcrop_descending[c(1:10), ]
## Plot bar charts for  fat_inj_top_10 and propcrop_top_10
## Arrange for plots to be arranged 2 high and 1 wide 
par(mfcol=c(2,1))

barplot(fat_inj_top_10$total_fat_inj, names.arg=fat_inj_top_10$EVTYPE, cex.names=0.5,las=2,
        cex.axis=0.6,cex.lab=0.6,xlab="Event Type", ylab="Combined Fatalities and Injuries")
mtext(side=3,"Top 10 Event Types resulting in Fatalities and Injuries",line=2,cex=1.0)
barplot(propcrop_top_10$total_propcrop, names.arg=propcrop_top_10$EVTYPE, cex.names=0.5,las=2,
        cex.axis=0.6,cex.lab=0.6,xlab="Event Type", ylab="Total Cost ($)")
mtext(side=3,"Top 10 Event Types resulting in Property & Crop Damage",line=2,cex=1.0)

# Prepare data to plot polar plots showing event activity relative to season
# calculate day of year for each record
core_fields$day_of_year <- yday(core_fields$BGN_DATE)
core_fields$day_of_year_degrees <- core_fields$day_of_year *360/365
# extract subsets of data corresponding to major event types
tornado_data <- filter(core_fields,EVTYPE=="TORNADO")
excessive_heat_data <- filter(core_fields,EVTYPE=="EXCESSIVE HEAT")
lightning_data <- filter(core_fields,EVTYPE=="LIGHTNING")
thunderstorm_wind_data <- filter(core_fields,EVTYPE=="TSTM WIND")
flood_data <- filter(core_fields,EVTYPE=="FLOOD")
hurricane_typhoon_data <- filter(core_fields,EVTYPE=="HURRICANE/TYPHOON")
storm_surge_data <- filter(core_fields,EVTYPE=="STORM SURGE")
drought_data <- filter(core_fields,EVTYPE=="DROUGHT")
ice_storm_data <- filter(core_fields,EVTYPE=="ICE STORM")

# group subsets by day_of_year_degrees field
tornado_by_day <- group_by(tornado_data,day_of_year_degrees)
excessive_heat_by_day <- group_by(excessive_heat_data,day_of_year_degrees)
lightning_by_day <- group_by(lightning_data,day_of_year_degrees)
thunderstorm_wind_by_day <- group_by(thunderstorm_wind_data,day_of_year_degrees)
flood_by_day <- group_by(flood_data,day_of_year_degrees)
hurricane_typhoon_by_day <- group_by(hurricane_typhoon_data,day_of_year_degrees)
storm_surge_by_day <- group_by(storm_surge_data,day_of_year_degrees)
drought_by_day <- group_by(drought_data,day_of_year_degrees)
ice_storm_by_day <- group_by(ice_storm_data,day_of_year_degrees)

# count occurrences per day
tornado_count <- summarise(tornado_by_day,count=n())
excessive_heat_count <- summarise(excessive_heat_by_day,count=n())
lightning_count <- summarise(lightning_by_day,count=n())
thunderstorm_wind_count <- summarise(thunderstorm_wind_by_day,count=n())
flood_count <- summarise(flood_by_day,count=n())
hurricane_typhoon_count <- summarise(hurricane_typhoon_by_day,count=n())
storm_surge_count <- summarise(storm_surge_by_day,count=n())
drought_count <- summarise(drought_by_day,count=n())
ice_storm_count <- summarise(ice_storm_by_day,count=n())

# plot polar graphs
## Arrange for plots to be arranged 3 high and 3 wide 
par(mfcol=c(3,3),cex.lab=0.5)
# install.packages("plotrix")
# library(plotrix)
require(plotrix)
## Loading required package: plotrix
# polar.plot(tornado_count$count,tornado_count$day_of_year_degrees, rp.type="p", lwd=2, line.col="red", xaxt='n',ylim=c(0,max(tornado_count$count)), labels="", show.grid.labels=FALSE)
polar.plot(tornado_count$count,tornado_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(tornado_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Tornado Activity")
polar.plot(excessive_heat_count$count,excessive_heat_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(excessive_heat_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Excessive Heat Activity")
polar.plot(lightning_count$count,lightning_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(lightning_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Lightning Activity")
polar.plot(thunderstorm_wind_count$count,thunderstorm_wind_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(thunderstorm_wind_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Thunderstorm Wind Activity")
polar.plot(flood_count$count,flood_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(flood_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Flood Activity")
polar.plot(hurricane_typhoon_count$count,hurricane_typhoon_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(hurricane_typhoon_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Hurricane-Typhoon Activity")
polar.plot(storm_surge_count$count,storm_surge_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(storm_surge_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Storm Surge Activity")
polar.plot(drought_count$count,drought_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(drought_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Drought Activity")
polar.plot(ice_storm_count$count,ice_storm_count$day_of_year_degrees, rp.type="p", lwd=2, 
           line.col="red", xaxt='n',ylim=c(0,max(ice_storm_count$count)), 
           labels=c("Jan 1","Feb 1","Mar 1","Apr 1","May 1","Jun 1","Jul 1","Aug 1","Sep 1","Oct 1","Nov 1","Dec 1"), 
           label.pos=c(0,32,59,90,119,150,180,210,241,270,301,330),
           start=90,clockwise=TRUE,show.grid.labels=FALSE,
           main="Ice Storm Activity")