options(scipen = 1)
#Read BZ2 file
MyData <- read.csv("repdata-data-StormData.csv.bz2", header = TRUE,)
#Look at dimensions
dim(MyData)
## [1] 902297 37
# Pick required columns
q<- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG",
"CROPDMGEXP")
sum_data <- MyData[q]
# Header at 2 rows
head(sum_data, n=2)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
unique(sum_data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
sum_data$PROPEXP[sum_data$PROPDMGEXP == "K"] <- 1000
sum_data$PROPEXP[sum_data$PROPDMGEXP == "M"] <- 1e+06
sum_data$PROPEXP[sum_data$PROPDMGEXP == ""] <- 1
sum_data$PROPEXP[sum_data$PROPDMGEXP == "B"] <- 1e+09
sum_data$PROPEXP[sum_data$PROPDMGEXP == "m"] <- 1e+06
sum_data$PROPEXP[sum_data$PROPDMGEXP == "0"] <- 1
sum_data$PROPEXP[sum_data$PROPDMGEXP == "5"] <- 1e+05
sum_data$PROPEXP[sum_data$PROPDMGEXP == "6"] <- 1e+06
sum_data$PROPEXP[sum_data$PROPDMGEXP == "4"] <- 10000
sum_data$PROPEXP[sum_data$PROPDMGEXP == "2"] <- 100
sum_data$PROPEXP[sum_data$PROPDMGEXP == "3"] <- 1000
sum_data$PROPEXP[sum_data$PROPDMGEXP == "h"] <- 100
sum_data$PROPEXP[sum_data$PROPDMGEXP == "7"] <- 1e+07
sum_data$PROPEXP[sum_data$PROPDMGEXP == "H"] <- 100
sum_data$PROPEXP[sum_data$PROPDMGEXP == "1"] <- 10
sum_data$PROPEXP[sum_data$PROPDMGEXP == "8"] <- 1e+08
sum_data$PROPEXP[sum_data$PROPDMGEXP == "+"] <- 0
sum_data$PROPEXP[sum_data$PROPDMGEXP == "-"] <- 0
sum_data$PROPEXP[sum_data$PROPDMGEXP == "?"] <- 0
# solve for the property damage value
sum_data$PROPDMGVAL <- sum_data$PROPDMG * sum_data$PROPEXP
unique(sum_data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
sum_data$CROPEXP[sum_data$CROPDMGEXP == "M"] <- 1e+06
sum_data$CROPEXP[sum_data$CROPDMGEXP == "K"] <- 1000
sum_data$CROPEXP[sum_data$CROPDMGEXP == "m"] <- 1e+06
sum_data$CROPEXP[sum_data$CROPDMGEXP == "B"] <- 1e+09
sum_data$CROPEXP[sum_data$CROPDMGEXP == "0"] <- 1
sum_data$CROPEXP[sum_data$CROPDMGEXP == "k"] <- 1000
sum_data$CROPEXP[sum_data$CROPDMGEXP == "2"] <- 100
sum_data$CROPEXP[sum_data$CROPDMGEXP == ""] <- 1
sum_data$CROPEXP[sum_data$CROPDMGEXP == "?"] <- 0
#solve for the crop damage value
sum_data$CROPDMGVAL <- sum_data$CROPDMG * sum_data$CROPEXP
fatal <- aggregate(FATALITIES ~ EVTYPE, data = sum_data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data = sum_data, FUN = sum)
propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data = sum_data, FUN = sum)
cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data = sum_data, FUN = sum)
# get top10 event with highest fatalities
fatal10 <- fatal[order(-fatal$FATALITIES), ][1:10, ]
# get top10 event with highest injuries
injury10 <- injury[order(-injury$INJURIES), ][1:10, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatal10$FATALITIES, las = 3, names.arg = fatal10$EVTYPE, main = "Weather Events With The 10 Highest Fatalities",
ylab = "number of fatalities", col = "blue")
barplot(injury10$INJURIES, las = 3, names.arg = injury10$EVTYPE, main = "Weather Events With the 10 Highest Injuries",
ylab = "number of injuries", col = "blue")
# get top 10 events with highest property damage
propdmg10 <- propdmg[order(-propdmg$PROPDMGVAL), ][1:10, ]
# get top 10 events with highest crop damage
cropdmg10 <- cropdmg[order(-cropdmg$CROPDMGVAL), ][1:10, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg10$PROPDMGVAL/(10^9), las = 3, names.arg = propdmg10$EVTYPE,
main = "10 Events with Greatest Property Damages", ylab = "Cost of damages ($ billions)",
col = "blue")
barplot(cropdmg10$CROPDMGVAL/(10^9), las = 3, names.arg = cropdmg10$EVTYPE,
main = "10 Events With Greatest Crop Damages", ylab = "Cost of damages ($ billions)",
col = "blue")