The data can be found at:
The documentation can be found at:
setwd("C:\\Users\\frase\\Documents\\GitHub\\RepData_PeerAssessment2")
## File was previously unzipped
StormData <- read.csv("repdata-data-StormData.csv", stringsAsFactors = FALSE,colClasses = "character" ,na.strings = "NA")
## view some information about the data
dim(StormData)
## [1] 902297 37
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : chr "1.00" "1.00" "1.00" "1.00" ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : chr "97.00" "3.00" "57.00" "89.00" ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : chr "0.00" "0.00" "0.00" "0.00" ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: chr "0.00" "0.00" "0.00" "0.00" ...
## $ COUNTYENDN: chr "" "" "" "" ...
## $ END_RANGE : chr "0.00" "0.00" "0.00" "0.00" ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : chr "14.00" "2.00" "0.10" "0.00" ...
## $ WIDTH : chr "100.00" "150.00" "123.00" "100.00" ...
## $ F : chr "3" "2" "2" "2" ...
## $ MAG : chr "0.00" "0.00" "0.00" "0.00" ...
## $ FATALITIES: chr "0.00" "0.00" "0.00" "0.00" ...
## $ INJURIES : chr "15.00" "0.00" "2.00" "2.00" ...
## $ PROPDMG : chr "25.00" "2.50" "25.00" "2.50" ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : chr "0.00" "0.00" "0.00" "0.00" ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : chr "3040.00" "3042.00" "3340.00" "3458.00" ...
## $ LONGITUDE : chr "8812.00" "8755.00" "8742.00" "8626.00" ...
## $ LATITUDE_E: chr "3051.00" "0.00" "0.00" "0.00" ...
## $ LONGITUDE_: chr "8806.00" "0.00" "0.00" "0.00" ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : chr "1.00" "2.00" "3.00" "4.00" ...
head(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1.00 4/18/1950 0:00:00 0130 CST 97.00 MOBILE AL
## 2 1.00 4/18/1950 0:00:00 0145 CST 3.00 BALDWIN AL
## 3 1.00 2/20/1951 0:00:00 1600 CST 57.00 FAYETTE AL
## 4 1.00 6/8/1951 0:00:00 0900 CST 89.00 MADISON AL
## 5 1.00 11/15/1951 0:00:00 1500 CST 43.00 CULLMAN AL
## 6 1.00 11/15/1951 0:00:00 2000 CST 77.00 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0.00 0.00
## 2 TORNADO 0.00 0.00
## 3 TORNADO 0.00 0.00
## 4 TORNADO 0.00 0.00
## 5 TORNADO 0.00 0.00
## 6 TORNADO 0.00 0.00
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 0.00 14.00 100.00 3 0.00 0.00
## 2 0.00 2.00 150.00 2 0.00 0.00
## 3 0.00 0.10 123.00 2 0.00 0.00
## 4 0.00 0.00 100.00 2 0.00 0.00
## 5 0.00 0.00 150.00 2 0.00 0.00
## 6 0.00 1.50 177.00 2 0.00 0.00
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15.00 25.00 K 0.00
## 2 0.00 2.50 K 0.00
## 3 2.00 25.00 K 0.00
## 4 2.00 2.50 K 0.00
## 5 2.00 2.50 K 0.00
## 6 6.00 2.50 K 0.00
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040.00 8812.00 3051.00 8806.00 1.00
## 2 3042.00 8755.00 0.00 0.00 2.00
## 3 3340.00 8742.00 0.00 0.00 3.00
## 4 3458.00 8626.00 0.00 0.00 4.00
## 5 3412.00 8642.00 0.00 0.00 5.00
## 6 3450.00 8748.00 0.00 0.00 6.00
names(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
## are there any missing values
sum(is.na(StormData))
## [1] 0
library(dplyr)
## get data for the Event type and Fatalities and sum by event
StormFatal <- subset(StormData,as.numeric(StormData$FATALITIES)>0)
StormFatal <- data.frame(StormFatal$EVTYPE,StormFatal$FATALITIES)
StormFatalAgg <- aggregate(as.numeric(StormFatal.FATALITIES) ~ StormFatal.EVTYPE,StormFatal,sum)
colnames(StormFatalAgg) <- c("Event","Fatalities")
## Sort by Fatalities descending
StormFatalSort <- arrange(StormFatalAgg,desc(Fatalities))
##
head(StormFatalSort)
## Event Fatalities
## 1 TORNADO 19335
## 2 EXCESSIVE HEAT 5944
## 3 FLASH FLOOD 4038
## 4 FLOOD 1956
## 5 LIGHTNING 1518
## 6 TSTM WIND 1493
## get data for the Event type and INjuries and sum by event
StormInj <- subset(StormData,as.numeric(StormData$INJURIES)>0)
StormInj <- data.frame(StormInj$EVTYPE,StormInj$INJURIES)
StormInjAgg <- aggregate(as.numeric(StormInj.INJURIES) ~ StormInj.EVTYPE,StormInj,sum)
colnames(StormInjAgg) <- c("Event","Injuries")
## Sort by Injuries descending
StormInjSort <- arrange(StormInjAgg,desc(Injuries))
##
head(StormInjSort)
## Event Injuries
## 1 TORNADO 525450
## 2 TSTM WIND 114907
## 3 LIGHTNING 80405
## 4 THUNDERSTORM WIND 20768
## 5 HIGH WIND 18199
## 6 FLASH FLOOD 16429
## get data for the Event type and Property Damage and sum by event
## multiply PROPDMG by appropriate value depending on PROPDMGEXP
StormPropDmg <- subset(StormData,as.numeric(StormData$PROPDMG)!=0)
## find the values in PROPDMGEXP to be used as multipliers
table(StormPropDmg$PROPDMGEXP)
##
## - + 0 2 3 4 5 6 7
## 76 1 5 209 1 1 4 18 3 2
## B h H K m M
## 40 1 6 227481 7 11319
## do the multiplication
StormPropDmg$PropertyDamage <- as.numeric(StormPropDmg$PROPDMG)*
if (toupper(StormPropDmg$PROPDMGEXP)=="B") 1000000000 else
if (toupper(StormPropDmg$PROPDMGEXP)=="M") 1000000 else
if (toupper(StormPropDmg$PROPDMGEXP)=="K") 1000 else
if (toupper(StormPropDmg$PROPDMGEXP)=="H") 100 else
if(StormPropDmg$PROPDMGEXP=="-" || StormPropDmg$PROPDMGEXP=="+") 0 else
if(StormPropDmg$PROPDMGEXP=="2") 100 else if(StormPropDmg$PROPDMGEXP=="3") 1000 else
if(StormPropDmg$PROPDMGEXP=="4") 10000 else if(StormPropDmg$PROPDMGEXP=="5") 100000 else
if (StormPropDmg$PROPDMGEXP=="6") 1000000 else if(StormPropDmg$PROPDMGEXP=="7") 10000000 else
1
StormPropDmg <- data.frame(StormPropDmg$EVTYPE,StormPropDmg$PropertyDamage)
StormPropDmgAgg <- aggregate(as.numeric(StormPropDmg.PropertyDamage) ~ StormPropDmg.EVTYPE,StormPropDmg,sum)
colnames(StormPropDmgAgg) <- c("Event","PropertyDamage")
## Sort by Property Damage amount descending
StormPropDmgSort <- arrange(StormPropDmgAgg,desc(PropertyDamage))
head(StormPropDmgSort)
## Event PropertyDamage
## 1 TORNADO 3212258160
## 2 FLASH FLOOD 1420124590
## 3 TSTM WIND 1335965610
## 4 FLOOD 899938480
## 5 THUNDERSTORM WIND 876844170
## 6 HAIL 688693380
## get data for the Event type and Crop Damage and sum by event
## multiply CROPDMG by appropriate value depending on CROPDMGEXP
StormCropDmg <- subset(StormData,as.numeric(StormData$CROPDMG)!=0)
## find the values in CROPDMGEXP to be used as multipliers
table(StormCropDmg$CROPDMGEXP)
##
## 0 B k K m M
## 3 12 7 21 20137 1 1918
## do the multiplication
StormCropDmg$CropDamage <- as.numeric(StormCropDmg$CROPDMG)*
if (toupper(StormCropDmg$CROPDMGEXP)=="B") 1000000000 else
if (toupper(StormCropDmg$CROPDMGEXP)=="M") 1000000 else
if (toupper(StormCropDmg$CROPDMGEXP)=="K") 1000 else
if (toupper(StormCropDmg$CROPDMGEXP)=="H") 100 else 1
StormCropDmg <- data.frame(StormCropDmg$EVTYPE,StormCropDmg$CropDamage)
StormCropDmgAgg <- aggregate(as.numeric(StormCropDmg.CropDamage) ~ StormCropDmg.EVTYPE,StormCropDmg,sum)
colnames(StormCropDmgAgg) <- c("Event","CropDamage")
## Sort by Crop Damage amount descending
StormCropDmgSort <- arrange(StormCropDmgAgg,desc(CropDamage))
head(StormCropDmgSort)
## Event CropDamage
## 1 HAIL 579596280000
## 2 FLASH FLOOD 179200460000
## 3 FLOOD 168037880000
## 4 TSTM WIND 109202600000
## 5 TORNADO 100018520000
## 6 THUNDERSTORM WIND 66791450000
library(lattice)
StormFatalTop20 <- head(StormFatalSort,n=20)
StormInjTop20 <- head(StormInjSort,n=20)
Fatal <- barchart( reorder(Event, Fatalities) ~ Fatalities,
data=StormFatalTop20,
cex=0.6,
main="Top Twenty Weather-related Causes of Fatalities")
Inj <- barchart( reorder(Event, Injuries) ~ Injuries,
data=StormInjTop20,
cex=0.6,
main="Top Twenty Weather-related Causes of Injuries")
StormPropDmgTop20 <- head(StormPropDmgSort,n=20)
StormCropDmgTop20 <- head(StormCropDmgSort,n=20)
PD <- barchart( reorder(Event, PropertyDamage) ~ PropertyDamage,
data=StormPropDmgTop20,
cex=0.6,
main="Top Twenty Weather-related Causes of Property Damage")
CD <- barchart( reorder(Event, CropDamage) ~ CropDamage,
data=StormCropDmgTop20,
cex=0.6,
main="Top Twenty Weather-related Causes of Crop Damage")