##Data Processing

stormData <- read_csv("repdata_data_StormData.csv.bz2")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   BGN_DATE = col_character(),
##   BGN_TIME = col_character(),
##   TIME_ZONE = col_character(),
##   COUNTYNAME = col_character(),
##   STATE = col_character(),
##   EVTYPE = col_character(),
##   BGN_AZI = col_logical(),
##   BGN_LOCATI = col_logical(),
##   END_DATE = col_logical(),
##   END_TIME = col_logical(),
##   COUNTYENDN = col_logical(),
##   END_AZI = col_logical(),
##   END_LOCATI = col_logical(),
##   PROPDMGEXP = col_character(),
##   CROPDMGEXP = col_logical(),
##   WFO = col_logical(),
##   STATEOFFIC = col_logical(),
##   ZONENAMES = col_logical(),
##   REMARKS = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 5255570 parsing failures.
##  row col           expected actual                             file
## 1671 WFO 1/0/T/F/TRUE/FALSE     NG 'repdata_data_StormData.csv.bz2'
## 1673 WFO 1/0/T/F/TRUE/FALSE     NG 'repdata_data_StormData.csv.bz2'
## 1674 WFO 1/0/T/F/TRUE/FALSE     NG 'repdata_data_StormData.csv.bz2'
## 1675 WFO 1/0/T/F/TRUE/FALSE     NG 'repdata_data_StormData.csv.bz2'
## 1678 WFO 1/0/T/F/TRUE/FALSE     NG 'repdata_data_StormData.csv.bz2'
## .... ... .................. ...... ................................
## See problems(...) for more details.

##Initial Exploration

summary(stormData)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE        BGN_AZI        BGN_LOCATI     END_DATE       END_TIME      
##  Min.   :   0.000   Mode:logical   Mode:logical   Mode:logical   Mode:logical  
##  1st Qu.:   0.000   NA's:902297    TRUE:1         NA's:902297    NA's:902297   
##  Median :   0.000                  NA's:902296                                 
##  Mean   :   1.484                                                              
##  3rd Qu.:   1.000                                                              
##  Max.   :3749.000                                                              
##                                                                                
##    COUNTY_END COUNTYENDN       END_RANGE        END_AZI        END_LOCATI    
##  Min.   :0    Mode:logical   Min.   :  0.0000   Mode:logical   Mode:logical  
##  1st Qu.:0    NA's:902297    1st Qu.:  0.0000   NA's:902297    NA's:902297   
##  Median :0                   Median :  0.0000                                
##  Mean   :0                   Mean   :  0.9862                                
##  3rd Qu.:0                   3rd Qu.:  0.0000                                
##  Max.   :0                   Max.   :925.0000                                
##                                                                              
##      LENGTH              WIDTH                F               MAG         
##  Min.   :   0.0000   Min.   :   0.000   Min.   :0.0      Min.   :    0.0  
##  1st Qu.:   0.0000   1st Qu.:   0.000   1st Qu.:0.0      1st Qu.:    0.0  
##  Median :   0.0000   Median :   0.000   Median :1.0      Median :   50.0  
##  Mean   :   0.2301   Mean   :   7.503   Mean   :0.9      Mean   :   46.9  
##  3rd Qu.:   0.0000   3rd Qu.:   0.000   3rd Qu.:1.0      3rd Qu.:   75.0  
##  Max.   :2315.0000   Max.   :4400.000   Max.   :5.0      Max.   :22000.0  
##                                         NA's   :843563                    
##    FATALITIES          INJURIES            PROPDMG         PROPDMGEXP       
##  Min.   :  0.0000   Min.   :   0.0000   Min.   :   0.00   Length:902297     
##  1st Qu.:  0.0000   1st Qu.:   0.0000   1st Qu.:   0.00   Class :character  
##  Median :  0.0000   Median :   0.0000   Median :   0.00   Mode  :character  
##  Mean   :  0.0168   Mean   :   0.1557   Mean   :  12.06                     
##  3rd Qu.:  0.0000   3rd Qu.:   0.0000   3rd Qu.:   0.50                     
##  Max.   :583.0000   Max.   :1700.0000   Max.   :5000.00                     
##                                                                             
##     CROPDMG        CROPDMGEXP        WFO          STATEOFFIC     ZONENAMES     
##  Min.   :  0.000   Mode :logical   Mode:logical   Mode:logical   Mode:logical  
##  1st Qu.:  0.000   FALSE:19        TRUE:7166      NA's:902297    NA's:902297   
##  Median :  0.000   NA's :902278    NA's:895131                                 
##  Mean   :  1.527                                                               
##  3rd Qu.:  0.000                                                               
##  Max.   :990.000                                                               
##                                                                                
##     LATITUDE      LONGITUDE        LATITUDE_E     LONGITUDE_     REMARKS       
##  Min.   :   0   Min.   :-14451   Min.   :   0   Min.   :-14455   Mode:logical  
##  1st Qu.:2802   1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   NA's:902297   
##  Median :3540   Median :  8707   Median :   0   Median :     0                 
##  Mean   :2875   Mean   :  6940   Mean   :1452   Mean   :  3509                 
##  3rd Qu.:4019   3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                 
##  Max.   :9706   Max.   : 17124   Max.   :9706   Max.   :106220                 
##  NA's   :47                      NA's   :40                                    
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 

##Load Relevant Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ stringr 1.4.0
## ✓ tidyr   1.1.0     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)

##Question 1 Processing Steps ##select just the event types, fatalities and injuries ##group the data by event type ##summarize the sum of fatalities and injuries by event type ##Transform the total number of health impacts by event type by calculating log base 10 ##The total number of each health impact was transformed to log base 10 due to the vast difference between the highest count level and the lowest

healthImpactData <- stormData %>% 
  select(`EVENT TYPE` = EVTYPE,FATALITIES,INJURIES) %>%
  group_by(`EVENT TYPE`) %>% 
  summarise(
    "Fatalities" = log10(sum(FATALITIES)),
    "Injuries" = log10(sum(INJURIES))
  ) %>% 
  arrange(desc(`Fatalities`),desc(`Injuries`)) %>%
  head(30) %>% 
  pivot_longer(!`EVENT TYPE`,names_to = "Health Impact", values_to = "Total") 
## `summarise()` ungrouping output (override with `.groups` argument)

##Question 2 Processing ##select just the event types, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP ##convert costs to straight dollar amounts ##create total economic cost by combining cost from property and crop damage ##create a summary of the sums of the total economic impact by event type ##the log of the total dollars was taken due to the large variation between the highest dollar amount

EconomcImpactData <- stormData %>% 
  select(EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
ConvertedImpactData <-  EconomcImpactData %>% 
  filter(str_detect(toupper(CROPDMGEXP),"[KMB]")|str_detect(toupper(PROPDMG),"[KMB]")) %>% 
  mutate(
    "Crop Economic Impact" = case_when(
      toupper(CROPDMGEXP) == "K"~ CROPDMG*1000,
      toupper(CROPDMGEXP) == "M"~ CROPDMG*1000000,
      toupper(CROPDMGEXP) == "B"~ CROPDMG*1000000000,
    ),
    "Property Economic Impact"= case_when(
      toupper(PROPDMGEXP) == "K"~ PROPDMG*1000,
      toupper(PROPDMGEXP) == "M"~ PROPDMG*1000000,
      toupper(PROPDMGEXP) == "B"~ PROPDMG*1000000000,

    ) 
  ) %>% 
  mutate(
    "Total Economic Impact"= case_when(
      (!is.na(`Crop Economic Impact`) & !is.na(`Property Economic Impact`))~`Crop Economic Impact`+ `Property Economic Impact`,
      !is.na(`Crop Economic Impact`) ~`Crop Economic Impact`,
       !is.na(`Property Economic Impact`)~`Property Economic Impact`
    )
  ) %>% rename("Event Type" =EVTYPE)
EconomcImpactSummary <- ConvertedImpactData %>% 
  group_by(`Event Type`) %>% 
  summarise(
    "Economic Impact By Event Type" = log10(sum(`Total Economic Impact`)
  )) %>% arrange(desc(`Economic Impact By Event Type`))
## `summarise()` ungrouping output (override with `.groups` argument)

##Results Question 1 ##See figure 1

healthImpactData %>% 
ggplot(aes(y=Total,x=reorder(`EVENT TYPE`,-Total), fill=`Health Impact`))+
  xlab("Event Type")+
  ylab("log10 Total Number")+
  ggtitle("Top 20 Storm Event Types' Impact on Health in USA ")+
  geom_bar(stat="identity",position="dodge")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45,hjust = 1),
        legend.position = c(.9,.9))

##Results Question 2 ##See Figure 2

EconomcImpactSummary %>% 
  head(20) %>% 
  ggplot(aes(x=reorder(`Event Type`,-`Economic Impact By Event Type`),y=`Economic Impact By Event Type`, fill=`Event Type`))+
  geom_bar(stat="identity")+
  xlab("Event Type")+
  ylab("Log10 of Dollar Amount of Impact")+
  ggtitle("Top 20 Storm Event Types' Economic Impact in USA ")+
    theme_minimal()+
  theme(axis.text.x = element_text(angle = 45,hjust = 1),
        legend.position = "none")

##R Session Information sessionInfo()