##Data Processing
stormData <- read_csv(“repdata_data_StormData.csv.bz2”)
##Initial Exploration
summary(stormData)
##Load Relevant Libraries
library(ddplyr) library(tidyverse) library(ggplot2) library(knitr)
##Question 1 Processing Steps ##select just the event types, fatalities and injuries ##group the data by event type ##summarize the sum of fatalities and injuries by event type ##Transform the total number of health impacts by event type by calculating log base 10 ##The total number of each health impact was transformed to log base 10 due to the vast difference between the highest count level and the lowest
healthImpactData <- stormData %>% select(EVENT TYPE = EVTYPE,FATALITIES,INJURIES) %>% group_by(EVENT TYPE) %>% summarise( “Fatalities” = log10(sum(FATALITIES)), “Injuries” = log10(sum(INJURIES)) ) %>% arrange(desc(Fatalities),desc(Injuries)) %>% head(30) %>% pivot_longer(!EVENT TYPE,names_to = “Health Impact”, values_to = “Total”)
##Question 2 Processing ##select just the event types, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP ##convert costs to straight dollar amounts ##create total economic cost by combining cost from property and crop damage ##create a summary of the sums of the total economic impact by event type ##the log of the total dollars was taken due to the large variation between the highest dollar amount
EconomcImpactData <- stormData %>% select(EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP) ConvertedImpactData <- EconomcImpactData %>% filter(str_detect(toupper(CROPDMGEXP),“[KMB]”)|str_detect(toupper(PROPDMG),“[KMB]”)) %>% mutate( “Crop Economic Impact” = case_when( toupper(CROPDMGEXP) == “K”~ CROPDMG1000, toupper(CROPDMGEXP) == “M”~ CROPDMG1000000, toupper(CROPDMGEXP) == “B”~ CROPDMG1000000000, ), “Property Economic Impact”= case_when( toupper(PROPDMGEXP) == “K”~ PROPDMG1000, toupper(PROPDMGEXP) == “M”~ PROPDMG1000000, toupper(PROPDMGEXP) == “B”~ PROPDMG1000000000,
)
) %>% mutate( “Total Economic Impact”= case_when( (!is.na(Crop Economic Impact) & !is.na(Property Economic Impact))~Crop Economic Impact+ Property Economic Impact, !is.na(Crop Economic Impact) Crop Economic Impact, !is.na(Property Economic Impact)Property Economic Impact ) ) %>% rename(“Event Type” =EVTYPE) EconomcImpactSummary <- ConvertedImpactData %>% group_by(Event Type) %>% summarise( “Economic Impact By Event Type” = log10(sum(Total Economic Impact) )) %>% arrange(desc(Economic Impact By Event Type))
##Results Question 1 figure 1
healthImpactData %>% ggplot(aes(y=Total,x=reorder(EVENT TYPE,-Total), fill=Health Impact))+ xlab(“Event Type”)+ ylab(“log10 Total Number”)+ ggtitle(“Top 20 Storm Event Types’ Impact on Health in USA”)+ geom_bar(stat=“identity”,position=“dodge”)+ theme_minimal()+ theme(axis.text.x = element_text(angle = 45,hjust = 1), legend.position = c(.9,.9))
##Results Question 2 Figure 2
EconomcImpactSummary %>% head(20) %>% ggplot(aes(x=reorder(Event Type,-Economic Impact By Event Type),y=Economic Impact By Event Type, fill=Event Type))+ geom_bar(stat=“identity”)+ xlab(“Event Type”)+ ylab(“Log10 of Dollar Amount of Impact”)+ ggtitle(“Top 20 Storm Event Types’ Economic Impact in USA”)+ theme_minimal()+ theme(axis.text.x = element_text(angle = 45,hjust = 1), legend.position = “none”)
##R Session Information sessionInfo()
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.