Load Libraries
Data loading
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# check the folder exists, if not; create it.
ifelse(!dir.exists(paste0(getwd(),"/data")), dir.create(paste0(getwd(),"/data")), FALSE)
# check the zip file exists, if not; download it.
ifelse(!file.exists(paste0(getwd(),"/data","/repdata%2Fdata%2FStormData.csv.bz2")),
download.file(url = file_url,
destfile = "repdata%2Fdata%2FStormData.csv.bz2",
method = "curl"), FALSE)
# check the data loaded , if not; load it.
if (!exists("storm")) {
storm <- read.csv("data/repdata%2Fdata%2FStormData.csv.bz2")
}Data Processing
Keep only the needed columns.
- keep only defined exp as per the document , page 12, these values are:-
- H hundred (x100)
- K thousand (x1,000)
- M million (x1,000,000)
- B billion (x1,000,000,000)
Since we have two exp columns and by using “OR” condtion to keep define multible values will have the original dataset -nothing removed- then the data set will be split to health and economic.
- Convert the exp labels to numbers.
- Multiply by the exp value.
storm2 <- storm[,c(8,23:28)]
storm2$EVTYPE <-
case_when(
grepl("frost|glaze|hail|snow|winter|wintry|blizzard|sleet|cold|ice|freeze|avalanche|icy",
storm2$EVTYPE, ignore.case = TRUE) ~ "winter",
grepl("light",
storm2$EVTYPE, ignore.case = TRUE) ~ "lightning",
grepl("rain|flood|wet|fld",
storm2$EVTYPE, ignore.case = TRUE) ~ "rain",
grepl("thunder|tstm|tornado|wind|hurricane|funnel|tropical|storm|SPOUT",
storm2$EVTYPE, ignore.case = TRUE) ~ "wind",
grepl("fog|visibility|dark|dust",
storm2$EVTYPE, ignore.case = TRUE) ~ "low visibility",
grepl("surf|surge|tide|tsunami|current",
storm2$EVTYPE, ignore.case = TRUE) ~ "ocean surge",
grepl("heat|high +temp|record +temp|warm|dry",
storm2$EVTYPE, ignore.case = TRUE) ~ "heat",
grepl("fire|smoke",
storm2$EVTYPE, ignore.case = TRUE) ~ "fire",
grepl("volcan",
storm2$EVTYPE, ignore.case = TRUE) ~ "volcan",
TRUE ~ "other")
prop <- storm2 %>% filter(PROPDMGEXP %in% c("B", "H", "M", "K", NA, ""))
crop <- storm2 %>% filter(CROPDMGEXP %in% c("B", "H", "M", "K", NA, ""))
prop$PROPDMGEXP <- gsub("H", 100, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("K", 1000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("M", 1000000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- gsub("B", 10000000, prop$PROPDMGEXP)
prop$PROPDMGEXP <- as.numeric(prop$PROPDMGEXP)
crop$CROPDMGEXP <- gsub("H", 100, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("K", 1000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("M", 1000000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- gsub("B", 10000000, crop$CROPDMGEXP)
crop$CROPDMGEXP <- as.numeric(crop$CROPDMGEXP)Results
Health Impact
health_t <- storm2 %>%
group_by(EVTYPE) %>%
summarise(Total_Fatalities = sum(FATALITIES), Total_Injuries = sum(INJURIES))%>%
mutate(Total = Total_Fatalities + Total_Injuries) %>%
arrange(desc(Total_Fatalities))
knitr::kable(health_t)| EVTYPE | Total_Fatalities | Total_Injuries | Total |
|---|---|---|---|
| wind | 7082 | 104978 | 112060 |
| heat | 3181 | 9271 | 12452 |
| rain | 1661 | 8984 | 10645 |
| winter | 1367 | 8300 | 9667 |
| lightning | 817 | 5232 | 6049 |
| ocean surge | 768 | 904 | 1672 |
| other | 97 | 132 | 229 |
| fire | 90 | 1608 | 1698 |
| low visibility | 82 | 1119 | 1201 |
| volcan | 0 | 0 | 0 |
ggplot(health_t, aes(EVTYPE))+
geom_col(fill = "#62929a", aes(y = Total_Fatalities*4.82321))+
geom_col(fill = "#5c5757",width = 0.5, aes(y = Total_Injuries))+
labs(title="Health Impact",
x =NULL, y = "Total Injuries")+
scale_y_continuous(label= scales::comma, sec.axis = sec_axis(~.*0.06746175, name="Total Fatalities"))+
theme_hc()+
coord_flip()+
theme(axis.title.x.top=element_text(color="#62929a"),
axis.title.x.bottom = element_text(color = "#5c5757"),
axis.text.x.bottom = element_text(color = "#5c5757"),
axis.text.x.top = element_text(color="#62929a"))Economic Impact
prop_t <- na.omit(prop) %>%
mutate(damage = PROPDMG * PROPDMGEXP) %>%
group_by(EVTYPE) %>%
summarise(prop_total = sum(damage))
crop_t <- na.omit(crop) %>%
mutate(damage = CROPDMG * CROPDMGEXP) %>%
group_by(EVTYPE) %>%
summarise(crop_total = sum(damage))
Economic <-cbind(prop_t, crop_t[,2])
Economic$total <- Economic$prop_total + Economic$crop_total
ggplot(Economic, aes(EVTYPE, total))+
geom_col()+
coord_flip()+
scale_y_continuous(label= scales::comma)+
labs(title= "Economic Impact", y= "Economic Losses")+
theme_hc()