PAII_template.Rmd Prepared by Marcel Merchat October 2, 2015
The purpose of this project is to provide a reproducible data analysis
for government officials who need to prepare for severe weather events.
The raw data was processed to prepare a gross analysis of the risk for
fatalities and the risk for property damage.
The raw data consists of 902,297 records of weather events since 1950
with 37 variables recorded for each event. The important fields for this
analysis are the "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", and
"PROPDMGEXP" columns of data.
The data contains four fields that were used in this analysis:
"EVTYPE" The type of weather event such as a "TORNADO""
"FATALITIES" The number of deaths attributed for each event
"INJURIES" The number of reported injuries for each event
"PROPDMG" The significant figures for the damages in thousands
"PROPDMGEXP" A code for the multiplier for the "PROPDMG" field
The damages in millions of dollars is the product of the
"PROPDMG" field and the multiplier that corredsponds to the
"PROPDMGEXP" field.
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
##
## here
## The following object is masked from 'package:base':
##
## date
file_name <- "repdata-data-StormData.csv"
storms <- read.csv(file_name)
stormdata <- data.frame(
as.character(storms[,"BGN_DATE"]),
as.character(storms[,"BGN_TIME"]),
storms[,"COUNTY"],
storms[,"STATE"],
storms[,"EVTYPE"],
storms[,"LENGTH"],
storms[,"WIDTH"],
storms[,"MAG"],
as.numeric(as.character(storms[,"FATALITIES"])),
storms[,"INJURIES"],
as.numeric(as.character(storms[,"PROPDMG"])),
storms[,"PROPDMGEXP"])
colnames(stormdata) <-
c("DATE","TIME","COUNTY","STATE","EVTYPE","LENGTH","WIDTH","MAG",
"FATALITIES","INJURIES","PROPDMG","PROPDMGEXP")
deletedcolumns <- c("TIME","LENGTH","WIDTH")
df1 <- stormdata[,!(names(stormdata) %in% deletedcolumns)]
severe <- df1[df1[,"FATALITIES"]> 0 | df1[,"INJURIES"]>0 | df1[,"PROPDMG"]>0,]
severe[,"DATE"] <- as.Date(severe[,"DATE"], format='%m/%d/%Y')
## Sort the observed records by the total number of fatalities since 1950
severe <- arrange(severe, -FATALITIES)
## Find the total fatalities for each tyoe of event since 1950
risks <- tapply(severe[,6],severe[,4],sum)
risks <- sort(risks, decreasing = TRUE, na.last = NA)
harm_plot <- barplot(height=risks[1:5],beside=FALSE,width = 5,
axes = FALSE, axisnames = FALSE,
ylab = "Fatalities",
main="Harmful Weather Events Since 1950")
axis(2)
axis(1, at = harm_plot, labels=names(risks[1:5]), cex.axis = 0.6)
df_harmful <- data.frame(severe[1:50,"STATE"],
severe[1:50,"EVTYPE"],
severe[1:50,"FATALITIES"],
severe[1:50,"INJURIES"])
colnames(df_harmful) <-
c("STATE","EVENT","FATALITIES","INJURIES")
grid.table(df_harmful[1:15,])
## Sort the observed records by the amount of property damage.
severe[,"dmg_millions"] <- NA
## Sort the records so that the total damages is listed in decending order.
## The amount in the "PROPDMG" field is multiplied by the factor derived
## from the "PROPDMGEXP" field. We use the multiplier described by
## Tobias Rosenberger on the RPubs website (July 26, 2015)
## Codes for PROPDMG:
## "", "-", "?", "+",
## "0", "1", "2", "3", "4", "5", "6", "7", "8",
## "H","h", "K", "k", "M", "m", "B", "b")
## Corresponding Multiplier:
## 1, 0, 0, 0,
## 1, 10, 100, 1000, 10000, 1e+05, 1e+06, 1e+07, 1e+08,
## 100, 100, 1000, 1000, 1e+06, 1e+06, 1e+09, 1e+09
severe[severe[,"PROPDMGEXP"]=="","dmg_millions"] <-
0.000001 * severe[severe[,"PROPDMGEXP"]=="","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="-","dmg_millions"] <- 0
severe[severe[,"PROPDMGEXP"]=="?","dmg_millions"] <- 0
severe[severe[,"PROPDMGEXP"]=="+","dmg_millions"] <- 0
severe[severe[,"PROPDMGEXP"]=="8","dmg_millions"] <-
100 * severe[severe[,"PROPDMGEXP"]=="8","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="7","dmg_millions"] <-
10 * severe[severe[,"PROPDMGEXP"]=="7","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="6","dmg_millions"] <-
1 * severe[severe[,"PROPDMGEXP"]=="6","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="5","dmg_millions"] <-
0.1 * severe[severe[,"PROPDMGEXP"]=="5","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="4","dmg_millions"] <-
0.01 * severe[severe[,"PROPDMGEXP"]=="4","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="3","dmg_millions"] <-
0.001 * severe[severe[,"PROPDMGEXP"]=="3","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="2","dmg_millions"] <-
0.0001 * severe[severe[,"PROPDMGEXP"]=="2","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="1","dmg_millions"] <-
0.00001 * severe[severe[,"PROPDMGEXP"]=="1","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="0","dmg_millions"] <-
0.000001 * severe[severe[,"PROPDMGEXP"]=="0","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="h","dmg_millions"] <-
0.0001 * severe[severe[,"PROPDMGEXP"]=="h","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="H","dmg_millions"] <-
0.0001 * severe[severe[,"PROPDMGEXP"]=="H","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="k","dmg_millions"] <-
0.001 * severe[severe[,"PROPDMGEXP"]=="k","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="K","dmg_millions"] <-
0.001 * severe[severe[,"PROPDMGEXP"]=="K","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="m","dmg_millions"] <-
severe[severe[,"PROPDMGEXP"]=="m","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="M","dmg_millions"] <-
severe[severe[,"PROPDMGEXP"]=="M","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="b","dmg_millions"] <-
1000 * severe[severe[,"PROPDMGEXP"]=="b","PROPDMG"]
severe[severe[,"PROPDMGEXP"]=="B","dmg_millions"] <-
1000 * severe[severe[,"PROPDMGEXP"]=="B","PROPDMG"]
## Sort the records so that the total damages is listed in decending order.
severe <- arrange(severe, -dmg_millions)
## Find the total damages for each type of event since 1950
damages <- tapply(severe[,10],severe[,4],sum)/1000
damages <- sort(damages, decreasing = TRUE, na.last = NA)
damage_plot <- barplot(height=damages[1:4],beside=FALSE,width = 5,
axes = FALSE, axisnames = FALSE,
ylab = "Damages in Billions of Dollars",
main = "Severe Economic Weather Events")
axis(2)
axis(1, at = damage_plot, labels=names(damages[1:4]), cex.axis = 0.55)
## Make table for most damaging events
deletedcolumns <- c("DATE","PROPDMG","PROPDMGEXP")
severe_table1 <- data.frame(severe[,"STATE"],
severe[,"EVTYPE"],
severe[,"FATALITIES"],
severe[,"dmg_millions"])
colnames(severe_table1) <-
c("STATE","EVENT","FATALITIES","Dollar_Loss_in_Millions")
dftable <- severe_table1[1:15,]
grid.table(dftable)