We will explore the NOAA Storm Database and answer some basic questions about severe weather events. We will examine Fatalities, Injuries, Property Damage, Crop Damage and Total Damage. We will present tables and Bar plots for the top 15 weather events that cause each of each of these 5 circumstances. Then we will note the top 3 events for each of the 5 circumstances.
if (!file.exists("StormData.csv.bz2")) {
fileURL <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
download.file(fileURL, destfile='StormData.csv.bz2', method = 'curl')
}
Storm_Data <- read.csv(bzfile('StormData.csv.bz2'),header=TRUE, stringsAsFactors = FALSE)
After reading in the Storm Data, we check the first few rows (there are 902297) rows in this dataset.
dim(Storm_Data)
## [1] 902297 37
head(Storm_Data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
##And check the structure of the data
str(Storm_Data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
We are interested in first Fatalities and Injuries, and second Property Damage and Crop Damage. We will check for missing data and NA’s.
sum(is.na(Storm_Data[,23]))
## [1] 0
sum(!complete.cases(Storm_Data[,23])) ## Fatalities
## [1] 0
sum(is.na(Storm_Data[,24]))
## [1] 0
sum(!complete.cases(Storm_Data[,24])) ## Injuries
## [1] 0
sum(is.na(Storm_Data[,25]))
## [1] 0
sum(!complete.cases(Storm_Data[,25])) ## Property Damage
## [1] 0
sum(is.na(Storm_Data[,27]))
## [1] 0
sum(!complete.cases(Storm_Data[,27])) ## Crop Damage
## [1] 0
We will use column #8, EVTYPE (Event Type)
Fatalities <- aggregate(Storm_Data$FATALITIES, by=list(unique.values = Storm_Data$EVTYPE),
FUN=sum)
names(Fatalities) <- c("Event", "Fatalities")
Injuries <- aggregate(Storm_Data$INJURIES, by=list(unique.values = Storm_Data$EVTYPE),
FUN=sum)
names(Injuries) <- c("Event", "Injuries")
Then order each of these from largest to smallest for the top 15 causes of Fatalities/Injuries. And view these datasets.
Fatalities_Ordered <- Fatalities[order(-Fatalities$Fatalities), ][1:15, ]
Fatalities_Ordered
## Event Fatalities
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
## 972 WINTER STORM 206
## 586 RIP CURRENTS 204
## 278 HEAT WAVE 172
## 140 EXTREME COLD 160
## 760 THUNDERSTORM WIND 133
Injuries_Ordered <- Injuries[order(-Injuries$Injuries), ][1:15, ]
Injuries_Ordered
## Event Injuries
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
## 972 WINTER STORM 1321
## 411 HURRICANE/TYPHOON 1275
## 359 HIGH WIND 1137
## 310 HEAVY SNOW 1021
## 957 WILDFIRE 911
Next, we plot the two datasets of the ordered data for the top 15 causes of Fatalities/Injuries.
par(mfrow=c(1,2), mar = c(14, 6, 2, 2), las = 2, cex = 0.7, cex.main = 1.4)
barplot(names.arg = Fatalities_Ordered$Event,Fatalities_Ordered$Fatalities, col='navyblue',
xlab = "",
ylab = 'Number of Fatalities',
main ='Number of Fatalities by Event Type',col.lab = "red", col.main = "red")
par(las = 0)
mtext("Type of Event", side = 1, line = 12, col = "red")
par(las = 2)
barplot(names.arg = Injuries_Ordered$Event,Injuries_Ordered$Injuries, col='green3',
xlab = "",
ylab = "",
main ='Number of Injuries by Event Type',col.lab = "red", col.main = "red")
par(las = 0)
mtext("Type of Event", side = 1, line = 12, col = "red")
mtext("Number of Injuries", side = 2, line = 5, col = "red")
Property_Damage <- aggregate(Storm_Data$PROPDMG, by=list(unique.values = Storm_Data$EVTYPE),
FUN=sum)
names(Property_Damage) <- c("Event", "Property_Damage")
Crop_Damage <- aggregate(Storm_Data$CROPDMG, by=list(unique.values = Storm_Data$EVTYPE),
FUN=sum)
names(Crop_Damage) <- c("Event", "Crop_Damage")
Total_Damage <- aggregate(Storm_Data$PROPDMG + Storm_Data$CROPDMG, by = list(Storm_Data$EVTYPE), "sum")
names(Total_Damage) <- c("Event", "Total_Damage")
Then order each of these from largest to smallest for the top 15 causes of Property Damage/Crop Damage/ Total Damage. And view these datasets.
Property_Damage_Ordered <- Property_Damage[order(-Property_Damage$Property_Damage), ][1:15, ]
Property_Damage_Ordered
## Event Property_Damage
## 834 TORNADO 3212258.16
## 153 FLASH FLOOD 1420124.59
## 856 TSTM WIND 1335965.61
## 170 FLOOD 899938.48
## 760 THUNDERSTORM WIND 876844.17
## 244 HAIL 688693.38
## 464 LIGHTNING 603351.78
## 786 THUNDERSTORM WINDS 446293.18
## 359 HIGH WIND 324731.56
## 972 WINTER STORM 132720.59
## 310 HEAVY SNOW 122251.99
## 957 WILDFIRE 84459.34
## 427 ICE STORM 66000.67
## 676 STRONG WIND 62993.81
## 376 HIGH WINDS 55625.00
Crop_Damage_Ordered <- Crop_Damage[order(-Crop_Damage$Crop_Damage), ][1:15, ]
Crop_Damage_Ordered
## Event Crop_Damage
## 244 HAIL 579596.28
## 153 FLASH FLOOD 179200.46
## 170 FLOOD 168037.88
## 856 TSTM WIND 109202.60
## 834 TORNADO 100018.52
## 760 THUNDERSTORM WIND 66791.45
## 95 DROUGHT 33898.62
## 786 THUNDERSTORM WINDS 18684.93
## 359 HIGH WIND 17283.21
## 290 HEAVY RAIN 11122.80
## 212 FROST/FREEZE 7034.14
## 140 EXTREME COLD 6121.14
## 848 TROPICAL STORM 5899.12
## 402 HURRICANE 5339.31
## 164 FLASH FLOODING 5126.05
Total_Damage_Ordered <- Total_Damage[order(-Total_Damage$Total_Damage), ][1:15, ]
Total_Damage_Ordered
## Event Total_Damage
## 834 TORNADO 3312276.68
## 153 FLASH FLOOD 1599325.05
## 856 TSTM WIND 1445168.21
## 244 HAIL 1268289.66
## 170 FLOOD 1067976.36
## 760 THUNDERSTORM WIND 943635.62
## 464 LIGHTNING 606932.39
## 786 THUNDERSTORM WINDS 464978.11
## 359 HIGH WIND 342014.77
## 972 WINTER STORM 134699.58
## 310 HEAVY SNOW 124417.71
## 957 WILDFIRE 88823.54
## 427 ICE STORM 67689.62
## 676 STRONG WIND 64610.71
## 290 HEAVY RAIN 61964.94
Next, we plot the three datasets of the ordered data for the top 15 causes of Property Damage/Crop Damage/ Total Damage.
par(mfrow=c(1,2), mar = c(14, 6, 2, 2), las = 2, cex = 0.7, cex.main = 1.4)
## Property Damage
barplot(names.arg = Property_Damage_Ordered$Event,Property_Damage_Ordered$Property_Damage, col='navyblue',
xlab = "",
ylab = "",
main ='Amount of Property Damage \nby Event Type',col.lab = "red", col.main = "red")
par(las = 0)
mtext("Type of Event", side = 1, line = 12, col = "red")
mtext("Amount of Damage", side = 2, line = 5, col = "red")
## Crop Damage
par(las = 2)
barplot(names.arg = Crop_Damage_Ordered$Event, Crop_Damage_Ordered$Crop_Damage, col='green3',
xlab = "",
ylab = "", ylim = c(0,3000000),
main ='Amount of Crop Damage \nby Event Type',col.lab = "red", col.main = "red")
par(las = 0)
mtext("Type of Event", side = 1, line = 12, col = "red")
mtext("Amount of Damage", side = 2, line = 5, col = "red")
## Total Damage
par(mfrow=c(1,1), mar = c(14, 6, 2, 2), las = 2, cex = 0.7, cex.main = 1.4)
barplot(names.arg = Total_Damage_Ordered$Event, Total_Damage_Ordered$Total_Damage, col='magenta3',
xlab = "",
ylab = "",
main ='Amount of Total Damage \nby Event Type',col.lab = "red", col.main = "red")
par(las = 0)
mtext("Type of Event", side = 1, line = 12, col = "red")
mtext("Amount of Damage", side = 2, line = 5, col = "red")