library(ggplot2)
library(plyr)
library(dplyr)
if (!file.exists("./coursera/repdata_data_StormData.csv.bz2"))
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","./coursera/repdata_data_StormData.csv.bz2")
Datos_Raw <- read.csv(bzfile("./coursera/repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv")) %>% select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
Datos <- Datos_Raw %>% filter(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0) %>% arrange(EVTYPE)
Datos$EVTYPE <- toupper(Datos$EVTYPE)
Datos$EVTYPE <- trimws(Datos$EVTYPE)
Datos$EVTYPE <- gsub(" ", " ", Datos$EVTYPE)
Datos$EVTYPE <- gsub(" ", " ", Datos$EVTYPE)
valuEv <- Datos %>% group_by(EVTYPE) %>% summarize_at(c("FATALITIES", "INJURIES", "PROPDMG", "CROPDMG"), sum)
dim(valuEv)
## [1] 440 5
Datos$EVTYPE[Datos$EVTYPE == "AVALANCE"] <- "AVALANCHE"
Datos$EVTYPE[Datos$EVTYPE == "ASTRONOMICAL HIGH TIDE"] <- "ASTRONOMICAL LOW TIDE"
Datos$EVTYPE[Datos$EVTYPE == "BLIZZARD/WINTER STORM"] <- "BLIZZARD"
Datos$EVTYPE[Datos$EVTYPE == "COASTAL FLOODING"] <- "COASTAL FLOOD"
Datos$EVTYPE[Datos$EVTYPE == "COASTALSTORM"] <- "COASTAL STORM"
Datos$EVTYPE[Datos$EVTYPE == "COLD" | Datos$EVTYPE == "COLD/WINDS"] <- "COLD/WIND CHILL"
Datos$EVTYPE[Datos$EVTYPE == "DUST STORM/HIGH WINDS"] <- "DUST STORM"
Datos$EVTYPE[grepl("^DROUGHT.", Datos$EVTYPE)] <- "DROUGHT"
Datos$EVTYPE[Datos$EVTYPE == "EXTREME HEAT"] <- "EXCESSIVE HEAT"
Datos$EVTYPE[Datos$EVTYPE == "EXTREME COLD"] <- "EXTREME COLD/WIND CHILL"
Datos$EVTYPE[Datos$EVTYPE == "EXTREME WIND CHILL"] <- "EXTREME COLD/WIND CHILL"
Datos$EVTYPE[Datos$EVTYPE == "EXTREME WINDCHILL"] <- "EXTREME COLD/WIND CHILL"
Datos$EVTYPE[grepl("^FLASH FLOOD.", Datos$EVTYPE)] <- "FLASH FLOOD"
Datos$EVTYPE[grepl("^FLOOD.", Datos$EVTYPE)] <- "FLOOD"
Datos$EVTYPE[grepl("^FOG", Datos$EVTYPE)] <- "DENSE FOG"
Datos$EVTYPE[Datos$EVTYPE == "FREEZING FOG"] <- "@"
Datos$EVTYPE[grepl("^FREEZ.|^FROST.|^FROST", Datos$EVTYPE)] <- "FROST/FREEZE"
Datos$EVTYPE[Datos$EVTYPE == "@"] <- "FREEZING FOG"
Datos$EVTYPE[grepl("^HAIL.", Datos$EVTYPE)] <- "HAIL"
Datos$EVTYPE[grepl("^HEAT.", Datos$EVTYPE)] <- "HEAT"
Datos$EVTYPE[grepl("^HEAVY RAIN.|^HVY RAIN", Datos$EVTYPE)] <- "HEAVY RAIN"
Datos$EVTYPE[grepl("^HEAVY SNOW.", Datos$EVTYPE)] <- "HEAVY SNOW"
Datos$EVTYPE[grepl("^HIGH SURF.", Datos$EVTYPE)] <- "HIGH SURF"
Datos$EVTYPE[grepl("^HIGH WIND.", Datos$EVTYPE)] <- "HIGH WIND"
Datos$EVTYPE[grepl("^HURRICANE.|^HURRICANE|^TYPHOON", Datos$EVTYPE)] <- "HURRICANE (TYPHOON)"
Datos$EVTYPE[grepl("^LAKE EFFECT", Datos$EVTYPE)] <- "LAKE-EFFECT SNOW"
Datos$EVTYPE[grepl("^LAKE FLOOD.|^LAKE FLOOD", Datos$EVTYPE)] <- "LAKESHORE FLOOD"
Datos$EVTYPE[grepl("^LIGHTNING.|^LIGNTNING|^LIGHTING", Datos$EVTYPE)] <- "LIGHTNING"
Datos$EVTYPE[grepl("^MUD SLIDE.|^MUDSLIDE.|^MUDSLIDE", Datos$EVTYPE)] <- "MUD SLIDE"
Datos$EVTYPE[grepl("^MARINE TSTM.", Datos$EVTYPE)] <- "MARINE THUNDERSTORM WIND"
Datos$EVTYPE[grepl("^MIXED PRECI.", Datos$EVTYPE)] <- "MIXED PRECIPITATION"
Datos$EVTYPE[grepl("^RIP CURRENT.", Datos$EVTYPE)] <- "RIP CURRENT"
Datos$EVTYPE[grepl("^SLEET.", Datos$EVTYPE)] <- "SLEET"
Datos$EVTYPE[grepl("^STORM SURGE.", Datos$EVTYPE)] <- "STORM SURGE/TIDE"
Datos$EVTYPE[grepl("^STRONG WIND.", Datos$EVTYPE)] <- "STRONG WIND"
Datos$EVTYPE[grepl("^SEVERE THUNDERSTORM.", Datos$EVTYPE)] <- "SEVERE THUNDERSTORM"
Datos$EVTYPE[grepl("^THUNDERSTORM WIN.|^THUNDERSTORMS WIN.|^THUNDERSTORMW|^THUNDERTORM WIN.|^TSTM WIN.|^TSTMW|^TUNDERSTORM WIN.|^THUNDERSTORMS|^THUNDERSTROM WIND|^THUNERSTORM WINDS|^THUNDEERSTORM WINDS|^THUDERSTORM WINDS|^THUNDERESTORM WINDS", Datos$EVTYPE)] <- "THUNDERSTORM WINDS"
Datos$EVTYPE[grepl("^TORNADO.|^TORNDAO", Datos$EVTYPE)] <- "TORNADO"
Datos$EVTYPE[grepl("^TROPICAL STORM.", Datos$EVTYPE)] <- "TROPICAL STORM"
Datos$EVTYPE[grepl("^WATERSPOUT.", Datos$EVTYPE)] <- "WATERSPOUT"
Datos$EVTYPE[grepl("^WILD.|^FOREST FIRE.|^BRUSH FIRE|^GRASS FIRE.", Datos$EVTYPE)] <- "WILDFIRE"
Datos$EVTYPE[grepl("^WINTER STORM.", Datos$EVTYPE)] <- "WINTER STORM"
Datos$EVTYPE[grepl("^WINTER WEATHER.", Datos$EVTYPE)] <- "WINTER WEATHER"
valuEvClean <- Datos %>% group_by(EVTYPE) %>% summarize_at(c("FATALITIES", "INJURIES", "PROPDMG", "CROPDMG"), sum)
dim(valuEvClean)
## [1] 218 5
Datos$PROPDMGEXP <- as.character(Datos$PROPDMGEXP)
Datos$CROPDMGEXP <- as.character(Datos$CROPDMGEXP)
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("H","h")] = 10^2
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("H","h")] = 10^2
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("K","k")] = 10^3
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("K","k")] = 10^3
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("M","m")] = 10^6
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("M","m")] = 10^6
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("B","b")] = 10^9
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("B","b")] = 10^9
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("+")] = 1
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("+")] = 1
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] = 10
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] = 10
Datos$PROPDMGEXP[Datos$PROPDMGEXP %in% c("-","?",""," ")] = 0
Datos$CROPDMGEXP[Datos$CROPDMGEXP %in% c("-","?",""," ")] = 0
Datos$PROPDMGEXP <- as.numeric(Datos$PROPDMGEXP)
Datos$CROPDMGEXP <- as.numeric(Datos$CROPDMGEXP)
evFatalities <- Datos %>% group_by(EVTYPE) %>% summarize(TFatalities = sum(FATALITIES)) %>% filter(TFatalities > 0) %>% arrange(desc(TFatalities))
evInjuries <- Datos %>% group_by(EVTYPE) %>% summarize(TInjuries = sum(INJURIES)) %>% filter(TInjuries > 0) %>% arrange(desc(TInjuries))
evPropdmg <- Datos %>% group_by(EVTYPE) %>% summarize(TPropdmg = sum(PROPDMG * PROPDMGEXP)) %>% filter(TPropdmg > 0) %>% arrange(desc(TPropdmg))
evCropdmg <- Datos %>% group_by(EVTYPE) %>% summarize(TCropdmg = sum(CROPDMG * CROPDMGEXP)) %>% filter(TCropdmg > 0) %>% arrange(desc(TCropdmg))
evTotaldmg <- Datos %>% group_by(EVTYPE) %>% summarize(TTotaldmg = sum((CROPDMG * CROPDMGEXP) + (PROPDMG * PROPDMGEXP))) %>% filter(TTotaldmg > 0) %>% arrange(desc(TTotaldmg))
evFatalities <- evFatalities[evFatalities$TFatalities > mean(evFatalities$TFatalities), ]
evInjuries <- evInjuries[evInjuries$TInjuries > mean(evInjuries$TInjuries), ]
evPropdmg <- evPropdmg[evPropdmg$TPropdmg > mean(evPropdmg$TPropdmg), ]
evCropdmg <- evCropdmg[evCropdmg$TCropdmg > mean(evCropdmg$TCropdmg), ]
evTotaldmg <- evTotaldmg[evTotaldmg$TTotaldmg > mean(evTotaldmg$TTotaldmg), ]
evFatalities
## # A tibble: 12 x 2
## EVTYPE TFatalities
## <chr> <dbl>
## 1 TORNADO 5658
## 2 EXCESSIVE HEAT 1999
## 3 HEAT 1118
## 4 FLASH FLOOD 1018
## 5 LIGHTNING 817
## 6 THUNDERSTORM WINDS 710
## 7 RIP CURRENT 577
## 8 FLOOD 495
## 9 EXTREME COLD/WIND CHILL 304
## 10 HIGH WIND 293
## 11 AVALANCHE 225
## 12 WINTER STORM 217
ggplot(evFatalities, aes(x=EVTYPE, y=TFatalities)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=30, vjust=1, hjust=1)) + ggtitle("Events with Highest Total Fatalities") +labs(x="Event Type", y="Total Fatalities")
evInjuries
## # A tibble: 9 x 2
## EVTYPE TInjuries
## <chr> <dbl>
## 1 TORNADO 91364
## 2 THUNDERSTORM WINDS 9496
## 3 FLOOD 6806
## 4 EXCESSIVE HEAT 6680
## 5 LIGHTNING 5232
## 6 HEAT 2494
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1785
## 9 WILDFIRE 1608
ggplot(evInjuries, aes(x=EVTYPE, y=TInjuries)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=30, vjust=1, hjust=1)) + ggtitle("Events with Highest Total Injuries") +labs(x="Event Type", y="Total Injuries")
evPropdmg
## # A tibble: 15 x 2
## EVTYPE TPropdmg
## <chr> <dbl>
## 1 FLOOD 144957523972
## 2 HURRICANE (TYPHOON) 85356410010
## 3 TORNADO 58541935137
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16732872111
## 6 HAIL 15974472377
## 7 THUNDERSTORM WINDS 9762051272
## 8 WILDFIRE 8496628500
## 9 TROPICAL STORM 7714390550
## 10 WINTER STORM 6748997260
## 11 HIGH WIND 6003357060
## 12 RIVER FLOOD 5118945500
## 13 STORM SURGE/TIDE 4641188000
## 14 ICE STORM 3944928310
## 15 HEAVY RAIN 3230998140
evCropdmg
## # A tibble: 11 x 2
## EVTYPE TCropdmg
## <chr> <dbl>
## 1 DROUGHT 13972571780
## 2 FLOOD 5878707950
## 3 HURRICANE (TYPHOON) 5516117800
## 4 RIVER FLOOD 5029459000
## 5 ICE STORM 5022113500
## 6 HAIL 3026094800
## 7 FROST/FREEZE 1616911000
## 8 FLASH FLOOD 1437163150
## 9 EXTREME COLD/WIND CHILL 1330023000
## 10 THUNDERSTORM WINDS 1224414700
## 11 HEAVY RAIN 795755800
evTotaldmg
## # A tibble: 16 x 2
## EVTYPE TTotaldmg
## <chr> <dbl>
## 1 FLOOD 150836231922
## 2 HURRICANE (TYPHOON) 90872527810
## 3 TORNADO 58959398047
## 4 STORM SURGE 43323541000
## 5 HAIL 19000567177
## 6 FLASH FLOOD 18170035261
## 7 DROUGHT 15018677780
## 8 THUNDERSTORM WINDS 10986465972
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041810
## 11 WILDFIRE 8899910130
## 12 TROPICAL STORM 8409286550
## 13 WINTER STORM 6781441260
## 14 HIGH WIND 6689658960
## 15 STORM SURGE/TIDE 4642038000
## 16 HEAVY RAIN 4026753940
ggplot(evTotaldmg, aes(x=EVTYPE, y=TTotaldmg)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=30, vjust=1, hjust=1)) + ggtitle("Events with Highest Economic Impact") +labs(x="Event Type", y="Total $")