load all the necessary libraries

library(data.table)
## Warning: package 'data.table' was built under R version 4.4.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(R.utils)
## Warning: package 'R.utils' was built under R version 4.4.3
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.27.0 (2024-11-01 18:00:02 UTC) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
## 
##     throw
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, load, save
## R.utils v2.13.0 (2025-02-24 21:20:02 UTC) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.3
setwd("C:/Users/sue6/OneDrive - Queensland University of Technology/Documents/R course related docs/repdata_data_StormData.csv")

“Data Processing”

Load data into R

A subset of data is created by keeping only the following columns, which will be required for analysis

Format Date and create a new column for Year from BGN_DATE

Normalize property damage data. Property damage expenditure is in different Dollar units (Hundreds, Thousands, Millions and Billions).

Normalize crop damage data. Crop damage expenditure is in different Dollar units (Hundreds, Thousands, Millions and Billions).

StormData <- read.csv("repdata_data_StormData.csv")

keepCols <- c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")

StormData_Used <- StormData[keepCols]

StormData_Used$Year <- as.numeric(format(as.Date(StormData_Used$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))

unique(StormData_Used$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
StormData_Used$PROPDMGEXP <- as.character(StormData_Used$PROPDMGEXP)
StormData_Used$PROPDMGEXP[toupper(StormData_Used$PROPDMGEXP) == 'H'] <- "2"
StormData_Used$PROPDMGEXP[toupper(StormData_Used$PROPDMGEXP) == 'K'] <- "3"
StormData_Used$PROPDMGEXP[toupper(StormData_Used$PROPDMGEXP) == 'M'] <- "6"
StormData_Used$PROPDMGEXP[toupper(StormData_Used$PROPDMGEXP) == 'B'] <- "9"
StormData_Used$PROPDMGEXP <- as.numeric(StormData_Used$PROPDMGEXP)
## Warning: NAs introduced by coercion
StormData_Used$PROPDMGEXP[is.na(StormData_Used$PROPDMGEXP)] <- 0
StormData_Used$TOTALPROPDMG <- StormData_Used$PROPDMG * 10^StormData_Used$PROPDMGEXP

unique(StormData_Used$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
StormData_Used$CROPDMGEXP <- as.character(StormData_Used$CROPDMGEXP)
StormData_Used$CROPDMGEXP[toupper(StormData_Used$CROPDMGEXP) == 'H'] <- "2"
StormData_Used$CROPDMGEXP[toupper(StormData_Used$CROPDMGEXP) == 'K'] <- "3"
StormData_Used$CROPDMGEXP[toupper(StormData_Used$CROPDMGEXP) == 'M'] <- "6"
StormData_Used$CROPDMGEXP[toupper(StormData_Used$CROPDMGEXP) == 'B'] <- "9"
StormData_Used$CROPDMGEXP <- as.numeric(StormData_Used$CROPDMGEXP)
## Warning: NAs introduced by coercion
StormData_Used$CROPDMGEXP[is.na(StormData_Used$CROPDMGEXP)] <- 0
StormData_Used$TOTALCROPDMG <- StormData_Used$CROPDMG * 10^StormData_Used$CROPDMGEXP

Results

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Deduce ten most fatal events from dataset

Deduce ten most Injuroius events from dataset

Merge the datasets

Visualization for Population consequences

Conclusion: Tornados are the most damaging for the US population

##              Event Fatalities
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
## 170          FLOOD        470
## 585    RIP CURRENT        368
## 359      HIGH WIND        248
## 19       AVALANCHE        224
##                 Event Injuries
## 834           TORNADO    91346
## 856         TSTM WIND     6957
## 170             FLOOD     6789
## 130    EXCESSIVE HEAT     6525
## 464         LIGHTNING     5230
## 275              HEAT     2100
## 427         ICE STORM     1975
## 153       FLASH FLOOD     1777
## 760 THUNDERSTORM WIND     1488
## 244              HAIL     1361
## Warning in melt.default(TotHealthDamage, id.vars = "Event"): The melt generic
## in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no
## longer actively developed, and this redirection is now deprecated. To continue
## using melt methods from reshape2 while both libraries are attached, e.g.
## melt.list, you can prepend the namespace, i.e. reshape2::melt(TotHealthDamage).
## In the next version, this warning will become an error.
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## Across the United States, which types of events have the greatest economic consequences?

Compute top ten events for property damage

Compute top ten events for Crop damage

Merge the datasets

Visualization for Economic consequences

Conclusion: Flood does the worst economic damage both to property and crops

##                 Event    Prop_Cost
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380677
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
## 244              HAIL  15735267513
## 402         HURRICANE  11868319010
## 848    TROPICAL STORM   7703890550
## 972      WINTER STORM   6688497251
## 359         HIGH WIND   5270046295
##                 Event   Crop_Cost
## 95            DROUGHT 13972566000
## 170             FLOOD  5661968450
## 590       RIVER FLOOD  5029459000
## 427         ICE STORM  5022113500
## 244              HAIL  3025954473
## 402         HURRICANE  2741910000
## 411 HURRICANE/TYPHOON  2607872800
## 153       FLASH FLOOD  1421317100
## 140      EXTREME COLD  1292973000
## 212      FROST/FREEZE  1094086000
## Warning in melt.default(TotEcoDamage, id.vars = "Event"): The melt generic in
## data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no
## longer actively developed, and this redirection is now deprecated. To continue
## using melt methods from reshape2 while both libraries are attached, e.g.
## melt.list, you can prepend the namespace, i.e. reshape2::melt(TotEcoDamage). In
## the next version, this warning will become an error.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_bar()`).