The Storm Data was provided by Coursera. However, if you would like to get this data and are not presently in that course. The link is –
(https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)
Warning this a large file and will take a significant amount of time to run.
data <- read.csv("repdata-data-StormData.csv.bz2")
data$EVTYPE = toupper(data$EVTYPE)
dim(data)
## [1] 902297 37
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
There are multiple names for the same type of weather patterns. This has to be resolved in order for a logic analysis.
data[data$EVTYPE == "TSTM WIND", ]$EVTYPE = "THUNDERSTORM WIND"
data[data$EVTYPE == "THUNDERSTORM WINDS", ]$EVTYPE = "THUNDERSTORM WIND"
data[data$EVTYPE == "RIVER FLOOD", ]$EVTYPE = "FLOOD"
data[data$EVTYPE == "HURRICANE/TYPHOON", ]$EVTYPE = "HURRICANE-TYPHOON"
data[data$EVTYPE == "HURRICANE", ]$EVTYPE = "HURRICANE-TYPHOON"
We need to put a numerical value on Fatalities.
fatal <- aggregate(FATALITIES ~ EVTYPE, data = data, sum)
fatal1 <- fatal[fatal$FATALITIES > 0, ]
fatalorder <- fatal1[order(fatal1$FATALITIES, decreasing = TRUE), ]
head(fatalorder)
## EVTYPE FATALITIES
## 755 TORNADO 5633
## 116 EXCESSIVE HEAT 1903
## 138 FLASH FLOOD 978
## 243 HEAT 937
## 417 LIGHTNING 816
## 683 THUNDERSTORM WIND 701
We need to put a numerical value on Injuries
injury <- aggregate(INJURIES ~ EVTYPE, data = data, sum)
injury1 <- injury[injury$INJURIES > 0, ]
injuryorder <- injury1[order(injury1$INJURIES, decreasing = TRUE), ]
head(injuryorder)
## EVTYPE INJURIES
## 755 TORNADO 91346
## 683 THUNDERSTORM WIND 9353
## 154 FLOOD 6791
## 116 EXCESSIVE HEAT 6525
## 417 LIGHTNING 5230
## 243 HEAT 2100
I had difficulty here. My plots don’t have enough room. If you could provide me with some advice I would be more then appreciative.
par(mfrow = c(2, 1))
barplot(fatalorder[1:10, 2], col = rainbow(10), legend.text = fatalorder[1:10, 1], ylab = "Fatalities", main = "10 Natural Events -- Fatality")
barplot(injuryorder[1:10, 2], col = rainbow(10), legend.text = injuryorder[1:10, 1], ylab = "Injuries", main = "10 Natural Events -- Injuries")
Defining Property and Crop Damage. Allowing for an ease of analysis.
unique(data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
In this data for Crop and Property Damage there is letters used for values. We need to make those letters into numeric values.
data[data$PROPDMGEXP == "K", ]$PROPDMG <- data[data$PROPDMGEXP == "K", ]$PROPDMG *
1000
data[data$PROPDMGEXP == "M", ]$PROPDMG <- data[data$PROPDMGEXP == "M", ]$PROPDMG *
1e+06
data[data$PROPDMGEXP == "m", ]$PROPDMG <- data[data$PROPDMGEXP == "m", ]$PROPDMG *
1e+06
data[data$PROPDMGEXP == "B", ]$PROPDMG <- data[data$PROPDMGEXP == "B", ]$PROPDMG *
1e+09
data[data$CROPDMGEXP == "K", ]$CROPDMG <- data[data$CROPDMGEXP == "K", ]$CROPDMG *
1000
data[data$CROPDMGEXP == "k", ]$CROPDMG <- data[data$CROPDMGEXP == "k", ]$CROPDMG *
1000
data[data$CROPDMGEXP == "M", ]$CROPDMG <- data[data$CROPDMGEXP == "M", ]$CROPDMG *
1e+06
data[data$CROPDMGEXP == "m", ]$CROPDMG <- data[data$CROPDMGEXP == "m", ]$CROPDMG *
1e+06
data[data$CROPDMGEXP == "B", ]$CROPDMG <- data[data$CROPDMGEXP == "B", ]$CROPDMG *
1e+09
We need a numeric value on Property Damage.
damage <- aggregate(PROPDMG ~ EVTYPE, data = data, sum)
damage1 <- damage[damage$PROPDMG > 0, ]
damageorder <- damage1[order(damage1$PROPDMG, decreasing = TRUE), ]
head(damageorder)
## EVTYPE PROPDMG
## 154 FLOOD 149776655307
## 364 HURRICANE-TYPHOON 81174159010
## 755 TORNADO 56937160779
## 597 STORM SURGE 43323536000
## 138 FLASH FLOOD 16140812067
## 212 HAIL 15732267048
We need a numeric value on Crop Damage
cropdmg <- aggregate(CROPDMG ~ EVTYPE, data = data, sum)
cropdmg1 <- cropdmg[cropdmg$CROPDMG > 0, ]
cropdmgorder <- cropdmg1[order(cropdmg1$CROPDMG, decreasing = TRUE), ]
head(cropdmgorder)
## EVTYPE CROPDMG
## 84 DROUGHT 13972566000
## 154 FLOOD 10691427450
## 364 HURRICANE-TYPHOON 5349782800
## 386 ICE STORM 5022113500
## 212 HAIL 3025954473
## 138 FLASH FLOOD 1421317100
Again, I had difficulty here. My plots don’t have enough room. If you could provide me with some advice I would be more then appreciative.
par(mfrow = c(2, 1))
barplot(damageorder[1:10, 2], col = rainbow(10), legend.text = damageorder[1:10,
1], ylab = "Property Damage", main = "10 Natural Events -- Property Damage")
barplot(cropdmgorder[1:10, 2], col = rainbow(10), legend.text = cropdmgorder[1:10,
1], ylab = "Crop Damage", main = "10 Natural Events -- Crop Damage")
This will show us what events cause the most economic damages. WE will be focusing on the top five.
totaldmg <- merge(damageorder, cropdmgorder, by = "EVTYPE")
totaldmg$total = totaldmg$PROPDMG + totaldmg$CROPDMG
totaldmgorder <- totaldmg[order(totaldmg$total, decreasing = TRUE), ]
totaldmgorder[1:5, ]
## EVTYPE PROPDMG CROPDMG total
## 19 FLOOD 149776655307 10691427450 160468082757
## 48 HURRICANE-TYPHOON 81174159010 5349782800 86523941810
## 82 TORNADO 56937160779 414953270 57352114049
## 66 STORM SURGE 43323536000 5000 43323541000
## 31 HAIL 15732267048 3025954473 18758221521
Cheers.