Synopsis The data was downloaded from the course website URL and read in to R with no issues. Minimal adjustment and modification was needed to prepare the data for analysis. The primary questions to answer were: What are the Top 10 weather events in terms of the number of injuries and casualties? Analysis Result:Tornadoes were by far the most destructive weather event in terms of injuries and casualties, with 91346 injuries, and 5633 casualties. What are the Top 10 weather events in terms of economic impact? Analysis Result:Tornadoes were by far the most destructive weather event in terms of economic impact, with over $300 million in property and crop damage.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(knitr)
library(R.utils)
## Warning: package 'R.utils' was built under R version 4.0.3
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 4.0.3
## Loading required package: R.methodsS3
## Warning: package 'R.methodsS3' was built under R version 4.0.3
## R.methodsS3 v1.8.1 (2020-08-26 16:20:06 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.24.0 (2020-08-26 16:11:58 UTC) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
## 
##     throw
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, load, save
## R.utils v2.10.1 (2020-08-26 22:50:31 UTC) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
##     warnings
if (!file.exists('StormData.csv.bz2')) {
   url <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2' 
   download.file(url, destfile = 'StormData.csv.bz2', method = 'curl')
}
StormData <- read.csv('StormData.csv.bz2', stringsAsFactors = FALSE)
colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
dim(StormData)
## [1] 902297     37
str(StormData)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
knitr::opts_chunk$set(echo = TRUE)

Which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

###grouping and then summarizing the data by storm type and then the number of fatalities and injuries###
        
Fatalities_by_Storm_Type <- StormData %>% 
       group_by(EVTYPE) %>%                            
       summarise(FATALITIES = sum(FATALITIES),         
       INJURIES = sum(INJURIES)) %>%
       arrange(desc(FATALITIES + INJURIES)) %>%
       slice(1:10) %>%
       gather(CType, Value, c(FATALITIES, INJURIES))
## `summarise()` ungrouping output (override with `.groups` argument)
Casualties <- ggplot(data = Fatalities_by_Storm_Type,
                 aes(x = reorder(EVTYPE, -Value), 
                     y = Value,
                     fill = (CType))) +
    geom_bar(stat = 'identity', col = 'blue') +
    labs(title = 'Top 10 Events By Casualties', 
         x = 'Type of event',
         y = 'Counts',
         fill = 'Type') +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

print(Casualties)

Economic Impact

###Storm Data Documentation indicates: “Alphabetical characters used to signify magnitude include”K" for thousands, “M” for millions, and “B” for billions. To determine economic impact, the analysis considered the events labeled by “K”, “M” or “B”.###

### Create property ad crop damage for use in the analysis### 
Property_Damage <- StormData$PROPDMG
Property_Damage_Exponent <- StormData$PROPDMGEXP
Crop_Damage <- StormData$CROPDMG
Crop_Damage_Exponent <- StormData$CROPDMGEXP
Property_Damage_Exponent.n <- as.numeric(Property_Damage_Exponent)
## Warning: NAs introduced by coercion
Property_Damage <- Property_Damage * 10 ** replace(Property_Damage_Exponent.n, is.na(Property_Damage_Exponent.n), 0)
Property_Damage[Property_Damage_Exponent %in% "B"] <- Property_Damage[Property_Damage_Exponent %in% "B"] * 1e9
Property_Damage[Property_Damage_Exponent %in% c("M", "m")] <- Property_Damage[Property_Damage_Exponent %in% c("M", "m")] * 1e6
Property_Damage[Property_Damage_Exponent %in% c("K")]  <- Property_Damage[Property_Damage_Exponent %in% c("K")] * 1e3
Property_Damage[Property_Damage_Exponent %in% c("H", "h")] <- Property_Damage[Property_Damage_Exponent %in% c("H", "h")] * 1e2
Property_Damage[!(Property_Damage_Exponent %in% c("B", "M", "m", "K", "H", "h"))] <- Property_Damage[!(Property_Damage %in% c("B", "M", "m", "K", "H", "h"))] * 1
## Warning in Property_Damage[!(Property_Damage_Exponent %in% c("B", "M", "m", :
## number of items to replace is not a multiple of replacement length
Crop_Damage_Exponent.n <- as.numeric(Crop_Damage_Exponent)
## Warning: NAs introduced by coercion
Crop_Damage <- Crop_Damage * 10 ** replace(Crop_Damage_Exponent.n, is.na(Crop_Damage_Exponent.n), 0)
Crop_Damage[Crop_Damage_Exponent %in% "B"] <- Crop_Damage[Crop_Damage_Exponent %in% "B"] * 1e9
Crop_Damage[Crop_Damage_Exponent %in% c("M", "m")] <- Crop_Damage[Crop_Damage_Exponent %in% c("M", "m")] * 1e6
Crop_Damage[Crop_Damage_Exponent %in% c("K", "k")] <- Crop_Damage[Crop_Damage_Exponent %in% c("K", "k")] * 1e3
Crop_Damage[!(Crop_Damage_Exponent %in% c("B", "M", "m", "K", "k"))] <- Crop_Damage[!(Crop_Damage_Exponent %in% c("B", "M", "m", 
                                                              "K", "k"))] * 1
pd <- StormData$PROPDMG 
cd <- StormData$CROPDMG
StormData <- StormData %>% 
    # Dropping the columns with exponents
    select(-c(CROPDMGEXP, PROPDMGEXP)) %>%
    # Grouping by event type
    group_by(EVTYPE) %>%
    # Aggregating by property damage and crops damage
    # also shifting to millions
    summarise(PROPDMG = sum(PROPDMG) / 1e6,
              CROPDMG = sum(CROPDMG) / 1e6) %>%
    # Sorting by sum of property damage and crops damage in descending order
    arrange(desc(PROPDMG + CROPDMG)) %>% 
    # Taking first 10 records
    slice(1:10) %>%
    # Melting crops/property damage by type for plotting
    gather(TYPE, VALUE, CROPDMG:PROPDMG)
## `summarise()` ungrouping output (override with `.groups` argument)
Economic_Plot <- ggplot(StormData, 
              aes(x = reorder(EVTYPE, -VALUE), 
                  y = VALUE, fill = TYPE)) + 
    geom_bar(stat = "identity", col = 'green') +
    labs(x = "Type of event", y = "Value (in Millions)") +
    labs(title = "Top 10 Types of Events By Economic Consequences") +
    labs(fill = "Type") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0))

print(Economic_Plot)