Load Libraries

Data loading

file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# check the folder exists, if not; create it.
ifelse(!dir.exists(paste0(getwd(),"/data")), dir.create(paste0(getwd(),"/data")), FALSE)

# check the zip file exists, if not; download it.

ifelse(!file.exists(paste0(getwd(),"/data","/repdata%2Fdata%2FStormData.csv.bz2")), 
       download.file(url = file_url,
                  destfile = "repdata%2Fdata%2FStormData.csv.bz2",
                  method = "curl"), FALSE)

# check the data loaded , if not; load it.

if (!exists("storm")) {
       storm <- read.csv("data/repdata%2Fdata%2FStormData.csv.bz2")
}

Data Processing

  1. Keep only the needed columns.

  2. keep only defined exp as per the document , page 12, these values are:-
    • H hundred (x100)
    • K thousand (x1,000)
    • M million (x1,000,000)
    • B billion (x1,000,000,000)

Since we have two exp columns and by using “OR” condtion to keep define multible values will have the original dataset -nothing removed- then the data set will be split to health and economic.

  1. Convert the exp labels to numbers.
  2. Multiply by the exp value.
storm2 <- storm[,c(8,23:28)]

storm2$EVTYPE <- 
  case_when(
  grepl("frost|glaze|hail|snow|winter|wintry|blizzard|sleet|cold|ice|freeze|avalanche|icy",
       storm2$EVTYPE, ignore.case = TRUE) ~ "winter",
  grepl("light",
        storm2$EVTYPE, ignore.case = TRUE) ~ "lightning",
  grepl("rain|flood|wet|fld",
        storm2$EVTYPE, ignore.case = TRUE) ~ "rain",
  grepl("thunder|tstm|tornado|wind|hurricane|funnel|tropical|storm|SPOUT",
        storm2$EVTYPE, ignore.case = TRUE) ~ "wind",
  grepl("fog|visibility|dark|dust",
        storm2$EVTYPE, ignore.case = TRUE) ~ "low visibility",
  grepl("surf|surge|tide|tsunami|current",
        storm2$EVTYPE, ignore.case = TRUE) ~ "ocean surge",
  grepl("heat|high +temp|record +temp|warm|dry",
        storm2$EVTYPE, ignore.case = TRUE) ~ "heat",
  grepl("fire|smoke",
        storm2$EVTYPE, ignore.case = TRUE) ~ "fire",
  grepl("volcan",
        storm2$EVTYPE, ignore.case = TRUE) ~ "volcan",
  TRUE ~ "other")


prop <- storm2 %>% filter(PROPDMGEXP %in% c("B", "H", "M", "K", NA, "")) 

crop <- storm2 %>% filter(CROPDMGEXP %in% c("B", "H", "M", "K", NA, ""))

prop$PROPDMGEXP <- gsub("H", 100, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("K", 1000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("M", 1000000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("B", 10000000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- as.numeric(prop$PROPDMGEXP)

crop$CROPDMGEXP <- gsub("H", 100, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("K", 1000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("M", 1000000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("B", 10000000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- as.numeric(crop$CROPDMGEXP)

Results

Health Impact

health_t <- storm2 %>%
  group_by(EVTYPE) %>%
  summarise(Total_Fatalities = sum(FATALITIES), Total_Injuries = sum(INJURIES))%>%
  mutate(Total = Total_Fatalities + Total_Injuries) %>%
  arrange(desc(Total_Fatalities))

knitr::kable(health_t)
EVTYPE Total_Fatalities Total_Injuries Total
wind 7082 104978 112060
heat 3181 9271 12452
rain 1661 8984 10645
winter 1367 8300 9667
lightning 817 5232 6049
ocean surge 768 904 1672
other 97 132 229
fire 90 1608 1698
low visibility 82 1119 1201
volcan 0 0 0
ggplot(health_t, aes(EVTYPE))+
  geom_col(fill = "#62929a", aes(y = Total_Fatalities*4.82321))+
  geom_col(fill = "#5c5757",width = 0.5, aes(y = Total_Injuries))+
  labs(title="Health Impact",
       x =NULL, y = "Total Injuries")+
  scale_y_continuous(label= scales::comma, sec.axis = sec_axis(~.*0.06746175, name="Total Fatalities"))+
  theme_hc()+
  coord_flip()+
  theme(axis.title.x.top=element_text(color="#62929a"),
        axis.title.x.bottom = element_text(color = "#5c5757"),
        axis.text.x.bottom = element_text(color = "#5c5757"),
        axis.text.x.top = element_text(color="#62929a"))

Economic Impact

prop_t <- na.omit(prop) %>%
  mutate(damage = PROPDMG * PROPDMGEXP) %>% 
  group_by(EVTYPE) %>% 
  summarise(prop_total = sum(damage))

crop_t <- na.omit(crop) %>%
  mutate(damage = CROPDMG * CROPDMGEXP) %>% 
  group_by(EVTYPE) %>% 
  summarise(crop_total = sum(damage))

Economic <-cbind(prop_t, crop_t[,2])
Economic$total <- Economic$prop_total + Economic$crop_total

ggplot(Economic, aes(EVTYPE, total))+
  geom_col()+
  coord_flip()+
  scale_y_continuous(label= scales::comma)+
  labs(title= "Economic Impact", y= "Economic Losses")+
  theme_hc()

2018-08-20