load the data:The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site:
Storm Data [47Mb] https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyr)
Load the data
# Load the data
storm_data <- read.csv("C:\\Users\\Changcheng\\Documents\\repdata_data_StormData.csv", na.strings = c("NA", "N/A", "NULL", " ", "", "?"), stringsAsFactors = FALSE)
# Check the first few rows of the data
head(storm_data)
# Check the dimensions of the data
dim(storm_data)
## [1] 902297 37
# Check the column names
colnames(storm_data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Create a new data frame with only the relevant columns
# Create a new data frame with only the relevant columns
stormdata_tidy <- storm_data %>%
subset(!is.na(EVTYPE)&(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)) %>%
select(BGN_DATE, END_DATE, EVTYPE, STATE,FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>%
mutate(BGN_DATE = as.Date(BGN_DATE, format = "%m/%d/%Y"),
END_DATE = as.Date(END_DATE, format = "%m/%d/%Y"),
)
# Check the structure of the new data frame
str(stormdata_tidy)
## 'data.frame': 254632 obs. of 10 variables:
## $ BGN_DATE : Date, format: "1950-04-18" "1950-04-18" ...
## $ END_DATE : Date, format: NA NA ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr NA NA NA NA ...
# Check the dimensions of the new data frame
dim(stormdata_tidy)
## [1] 254632 10
# Check the summary of the new data frame
summary(stormdata_tidy)
## BGN_DATE END_DATE EVTYPE
## Min. :1950-01-03 Min. :1993-01-01 Length:254632
## 1st Qu.:1997-01-23 1st Qu.:2000-05-24 Class :character
## Median :2002-08-02 Median :2005-03-07 Mode :character
## Mean :2000-06-13 Mean :2004-08-14
## 3rd Qu.:2008-05-07 3rd Qu.:2009-01-07
## Max. :2011-11-30 Max. :2011-11-30
## NA's :50928
## STATE FATALITIES INJURIES PROPDMG
## Length:254632 Min. : 0.00000 Min. : 0.0000 Min. : 0.00
## Class :character 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 2.00
## Mode :character Median : 0.00000 Median : 0.0000 Median : 5.00
## Mean : 0.05948 Mean : 0.5519 Mean : 42.75
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 25.00
## Max. :583.00000 Max. :1700.0000 Max. :5000.00
##
## PROPDMGEXP CROPDMG CROPDMGEXP
## Length:254632 Min. : 0.000 Length:254632
## Class :character 1st Qu.: 0.000 Class :character
## Mode :character Median : 0.000 Mode :character
## Mean : 5.411
## 3rd Qu.: 0.000
## Max. :990.000
##
Data Exploration Clean and tidy the data, including converting the EVTYPE column to uppercase and checking for unique values
# Convert to uppercase
stormdata_tidy$EVTYPE<-toupper(stormdata_tidy$EVTYPE)
# Check the unique values in the EVTYPE column
unique(stormdata_tidy$EVTYPE)
## [1] "TORNADO" "TSTM WIND"
## [3] "HAIL" "ICE STORM/FLASH FLOOD"
## [5] "WINTER STORM" "HURRICANE OPAL/HIGH WINDS"
## [7] "THUNDERSTORM WINDS" "HURRICANE ERIN"
## [9] "HURRICANE OPAL" "HEAVY RAIN"
## [11] "LIGHTNING" "THUNDERSTORM WIND"
## [13] "DENSE FOG" "RIP CURRENT"
## [15] "THUNDERSTORM WINS" "FLASH FLOODING"
## [17] "FLASH FLOOD" "TORNADO F0"
## [19] "THUNDERSTORM WINDS LIGHTNING" "THUNDERSTORM WINDS/HAIL"
## [21] "HEAT" "HIGH WINDS"
## [23] "WIND" "HEAVY RAINS"
## [25] "LIGHTNING AND HEAVY RAIN" "THUNDERSTORM WINDS HAIL"
## [27] "COLD" "HEAVY RAIN/LIGHTNING"
## [29] "FLASH FLOODING/THUNDERSTORM WI" "FLOODING"
## [31] "WATERSPOUT" "EXTREME COLD"
## [33] "LIGHTNING/HEAVY RAIN" "BREAKUP FLOODING"
## [35] "HIGH WIND" "FREEZE"
## [37] "RIVER FLOOD" "HIGH WINDS HEAVY RAINS"
## [39] "AVALANCHE" "MARINE MISHAP"
## [41] "HIGH TIDES" "HIGH WIND/SEAS"
## [43] "HIGH WINDS/HEAVY RAIN" "HIGH SEAS"
## [45] "COASTAL FLOOD" "SEVERE TURBULENCE"
## [47] "RECORD RAINFALL" "HEAVY SNOW"
## [49] "HEAVY SNOW/WIND" "DUST STORM"
## [51] "FLOOD" "APACHE COUNTY"
## [53] "SLEET" "DUST DEVIL"
## [55] "ICE STORM" "EXCESSIVE HEAT"
## [57] "THUNDERSTORM WINDS/FUNNEL CLOU" "GUSTY WINDS"
## [59] "FLOODING/HEAVY RAIN" "HEAVY SURF COASTAL FLOODING"
## [61] "HIGH SURF" "WILD FIRES"
## [63] "HIGH" "WINTER STORM HIGH WINDS"
## [65] "WINTER STORMS" "MUDSLIDES"
## [67] "RAINSTORM" "SEVERE THUNDERSTORM"
## [69] "SEVERE THUNDERSTORMS" "SEVERE THUNDERSTORM WINDS"
## [71] "THUNDERSTORMS WINDS" "FLOOD/FLASH FLOOD"
## [73] "FLOOD/RAIN/WINDS" "THUNDERSTORMS"
## [75] "FLASH FLOOD WINDS" "WINDS"
## [77] "FUNNEL CLOUD" "HIGH WIND DAMAGE"
## [79] "STRONG WIND" "HEAVY SNOWPACK"
## [81] "FLASH FLOOD/" "HEAVY SURF"
## [83] "DRY MIRCOBURST WINDS" "DRY MICROBURST"
## [85] "URBAN FLOOD" "THUNDERSTORM WINDSS"
## [87] "MICROBURST WINDS" "HEAT WAVE"
## [89] "UNSEASONABLY WARM" "COASTAL FLOODING"
## [91] "STRONG WINDS" "BLIZZARD"
## [93] "WATERSPOUT/TORNADO" "WATERSPOUT TORNADO"
## [95] "STORM SURGE" "URBAN/SMALL STREAM FLOOD"
## [97] "WATERSPOUT-" "TORNADOES, TSTM WIND, HAIL"
## [99] "TROPICAL STORM ALBERTO" "TROPICAL STORM"
## [101] "TROPICAL STORM GORDON" "TROPICAL STORM JERRY"
## [103] "LIGHTNING THUNDERSTORM WINDS" "URBAN FLOODING"
## [105] "MINOR FLOODING" "WATERSPOUT-TORNADO"
## [107] "LIGHTNING INJURY" "LIGHTNING AND THUNDERSTORM WIN"
## [109] "FLASH FLOODS" "THUNDERSTORM WINDS53"
## [111] "WILDFIRE" "DAMAGING FREEZE"
## [113] "THUNDERSTORM WINDS 13" "HURRICANE"
## [115] "SNOW" "LIGNTNING"
## [117] "FROST" "FREEZING RAIN/SNOW"
## [119] "HIGH WINDS/" "THUNDERSNOW"
## [121] "FLOODS" "COOL AND WET"
## [123] "HEAVY RAIN/SNOW" "GLAZE ICE"
## [125] "MUD SLIDE" "HIGH WINDS"
## [127] "RURAL FLOOD" "MUD SLIDES"
## [129] "EXTREME HEAT" "DROUGHT"
## [131] "COLD AND WET CONDITIONS" "EXCESSIVE WETNESS"
## [133] "SLEET/ICE STORM" "GUSTNADO"
## [135] "FREEZING RAIN" "SNOW AND HEAVY SNOW"
## [137] "GROUND BLIZZARD" "EXTREME WIND CHILL"
## [139] "MAJOR FLOOD" "SNOW/HEAVY SNOW"
## [141] "FREEZING RAIN/SLEET" "ICE JAM FLOODING"
## [143] "COLD AIR TORNADO" "WIND DAMAGE"
## [145] "FOG" "TSTM WIND 55"
## [147] "SMALL STREAM FLOOD" "THUNDERTORM WINDS"
## [149] "HAIL/WINDS" "SNOW AND ICE"
## [151] "WIND STORM" "GRASS FIRES"
## [153] "LAKE FLOOD" "HAIL/WIND"
## [155] "WIND/HAIL" "ICE"
## [157] "SNOW AND ICE STORM" "THUNDERSTORM WINDS"
## [159] "WINTER WEATHER" "DROUGHT/EXCESSIVE HEAT"
## [161] "THUNDERSTORMS WIND" "TUNDERSTORM WIND"
## [163] "URBAN AND SMALL STREAM FLOODIN" "THUNDERSTORM WIND/LIGHTNING"
## [165] "HEAVY RAIN/SEVERE WEATHER" "THUNDERSTORM"
## [167] "WATERSPOUT/ TORNADO" "LIGHTNING."
## [169] "HURRICANE-GENERATED SWELLS" "RIVER AND STREAM FLOOD"
## [171] "HIGH WINDS/COASTAL FLOOD" "RAIN"
## [173] "RIVER FLOODING" "ICE FLOES"
## [175] "THUNDERSTORM WIND G50" "LIGHTNING FIRE"
## [177] "HEAVY LAKE SNOW" "RECORD COLD"
## [179] "HEAVY SNOW/FREEZING RAIN" "COLD WAVE"
## [181] "DUST DEVIL WATERSPOUT" "TORNADO F3"
## [183] "TORNDAO" "FLOOD/RIVER FLOOD"
## [185] "MUD SLIDES URBAN FLOODING" "TORNADO F1"
## [187] "GLAZE/ICE STORM" "GLAZE"
## [189] "HEAVY SNOW/WINTER STORM" "MICROBURST"
## [191] "AVALANCE" "BLIZZARD/WINTER STORM"
## [193] "DUST STORM/HIGH WINDS" "ICE JAM"
## [195] "FOREST FIRES" "FROST\\FREEZE"
## [197] "THUNDERSTORM WINDS." "HVY RAIN"
## [199] "HAIL 150" "HAIL 075"
## [201] "HAIL 100" "THUNDERSTORM WIND G55"
## [203] "HAIL 125" "THUNDERSTORM WIND G60"
## [205] "THUNDERSTORM WINDS G60" "HARD FREEZE"
## [207] "HAIL 200" "HEAVY SNOW AND HIGH WINDS"
## [209] "HEAVY SNOW/HIGH WINDS & FLOOD" "HEAVY RAIN AND FLOOD"
## [211] "RIP CURRENTS/HEAVY SURF" "URBAN AND SMALL"
## [213] "WILDFIRES" "FOG AND COLD TEMPERATURES"
## [215] "SNOW/COLD" "FLASH FLOOD FROM ICE JAMS"
## [217] "TSTM WIND G58" "MUDSLIDE"
## [219] "HEAVY SNOW SQUALLS" "SNOW SQUALL"
## [221] "SNOW/ICE STORM" "HEAVY SNOW/SQUALLS"
## [223] "HEAVY SNOW-SQUALLS" "ICY ROADS"
## [225] "HEAVY MIX" "SNOW FREEZING RAIN"
## [227] "SNOW/SLEET" "SNOW/FREEZING RAIN"
## [229] "SNOW SQUALLS" "SNOW/SLEET/FREEZING RAIN"
## [231] "RECORD SNOW" "HAIL 0.75"
## [233] "RECORD HEAT" "THUNDERSTORM WIND 65MPH"
## [235] "THUNDERSTORM WIND/ TREES" "THUNDERSTORM WIND/AWNING"
## [237] "THUNDERSTORM WIND 98 MPH" "THUNDERSTORM WIND TREES"
## [239] "TORNADO F2" "RIP CURRENTS"
## [241] "HURRICANE EMILY" "COASTAL SURGE"
## [243] "HURRICANE GORDON" "HURRICANE FELIX"
## [245] "THUNDERSTORM WIND 60 MPH" "THUNDERSTORM WINDS 63 MPH"
## [247] "THUNDERSTORM WIND/ TREE" "THUNDERSTORM DAMAGE TO"
## [249] "THUNDERSTORM WIND 65 MPH" "FLASH FLOOD - HEAVY RAIN"
## [251] "THUNDERSTORM WIND." "FLASH FLOOD/ STREET"
## [253] "BLOWING SNOW" "HEAVY SNOW/BLIZZARD"
## [255] "THUNDERSTORM HAIL" "THUNDERSTORM WINDSHAIL"
## [257] "LIGHTNING WAUSEON" "THUDERSTORM WINDS"
## [259] "ICE AND SNOW" "STORM FORCE WINDS"
## [261] "HEAVY SNOW/ICE" "LIGHTING"
## [263] "HIGH WIND/HEAVY SNOW" "THUNDERSTORM WINDS AND"
## [265] "HEAVY PRECIPITATION" "HIGH WIND/BLIZZARD"
## [267] "TSTM WIND DAMAGE" "FLOOD FLASH"
## [269] "RAIN/WIND" "SNOW/ICE"
## [271] "HAIL 75" "HEAT WAVE DROUGHT"
## [273] "HEAVY SNOW/BLIZZARD/AVALANCHE" "HEAT WAVES"
## [275] "UNSEASONABLY WARM AND DRY" "UNSEASONABLY COLD"
## [277] "RECORD/EXCESSIVE HEAT" "THUNDERSTORM WIND G52"
## [279] "HIGH WAVES" "FLASH FLOOD/FLOOD"
## [281] "FLOOD/FLASH" "LOW TEMPERATURE"
## [283] "HEAVY RAINS/FLOODING" "THUNDERESTORM WINDS"
## [285] "THUNDERSTORM WINDS/FLOODING" "HYPOTHERMIA"
## [287] "THUNDEERSTORM WINDS" "THUNERSTORM WINDS"
## [289] "HIGH WINDS/COLD" "COLD/WINDS"
## [291] "SNOW/ BITTER COLD" "COLD WEATHER"
## [293] "RAPIDLY RISING WATER" "WILD/FOREST FIRE"
## [295] "ICE/STRONG WINDS" "SNOW/HIGH WINDS"
## [297] "HIGH WINDS/SNOW" "SNOWMELT FLOODING"
## [299] "HEAVY SNOW AND STRONG WINDS" "SNOW ACCUMULATION"
## [301] "SNOW/ ICE" "SNOW/BLOWING SNOW"
## [303] "TORNADOES" "THUNDERSTORM WIND/HAIL"
## [305] "FREEZING DRIZZLE" "HAIL 175"
## [307] "FLASH FLOODING/FLOOD" "HAIL 275"
## [309] "HAIL 450" "EXCESSIVE RAINFALL"
## [311] "THUNDERSTORMW" "HAILSTORM"
## [313] "TSTM WINDS" "TSTMW"
## [315] "TSTM WIND 65)" "TROPICAL STORM DEAN"
## [317] "THUNDERSTORM WINDS/ FLOOD" "LANDSLIDE"
## [319] "HIGH WIND AND SEAS" "THUNDERSTORMWINDS"
## [321] "WILD/FOREST FIRES" "HEAVY SEAS"
## [323] "HAIL DAMAGE" "FLOOD & HEAVY RAIN"
## [325] "THUNDERSTROM WIND" "FLOOD/FLASHFLOOD"
## [327] "HIGH WATER" "HIGH WIND 48"
## [329] "LANDSLIDES" "URBAN/SMALL STREAM"
## [331] "BRUSH FIRE" "HEAVY SHOWER"
## [333] "HEAVY SWELLS" "URBAN SMALL"
## [335] "URBAN FLOODS" "FLASH FLOOD/LANDSLIDE"
## [337] "HEAVY RAIN/SMALL STREAM URBAN" "FLASH FLOOD LANDSLIDES"
## [339] "TSTM WIND/HAIL" "OTHER"
## [341] "ICE JAM FLOOD (MINOR" "URBAN/SML STREAM FLD"
## [343] "ROUGH SURF" "MARINE ACCIDENT"
## [345] "COASTAL STORM" "EROSION/CSTL FLOOD"
## [347] "BEACH EROSION" "HEAVY RAIN/HIGH SURF"
## [349] "UNSEASONABLE COLD" "EARLY FROST"
## [351] "WINTRY MIX" "TORRENTIAL RAINFALL"
## [353] "LANDSLUMP" "HURRICANE EDOUARD"
## [355] "TIDAL FLOODING" "EXTREME WINDCHILL"
## [357] "EXTENDED COLD" "WHIRLWIND"
## [359] "HEAVY SNOW SHOWER" "LIGHT SNOW"
## [361] "MIXED PRECIP" "FREEZING SPRAY"
## [363] "DOWNBURST" "LIGHT SNOWFALL"
## [365] "GUSTY WIND/RAIN" "GUSTY WIND/HVY RAIN"
## [367] "COLD TEMPERATURE" "COLD AND SNOW"
## [369] "RAIN/SNOW" "TSTM WIND (G45)"
## [371] "GUSTY WIND" "TSTM WIND 40"
## [373] "TSTM WIND 45" "TSTM WIND (41)"
## [375] "TSTM WIND (G40)" "FROST/FREEZE"
## [377] "AGRICULTURAL FREEZE" "HYPOTHERMIA/EXPOSURE"
## [379] "LAKE EFFECT SNOW" "MIXED PRECIPITATION"
## [381] "BLACK ICE" "COASTALSTORM"
## [383] "DAM BREAK" "GRADIENT WIND"
## [385] "TSTM WIND AND LIGHTNING" "WET MICROBURST"
## [387] "HEAVY SURF AND WIND" "TYPHOON"
## [389] "HIGH SWELLS" "SMALL HAIL"
## [391] "UNSEASONAL RAIN" "COASTAL FLOODING/EROSION"
## [393] " TSTM WIND (G45)" "TSTM WIND (G45)"
## [395] "HIGH WIND (G40)" "TSTM WIND (G35)"
## [397] "COASTAL EROSION" "SEICHE"
## [399] "COASTAL FLOODING/EROSION" "HYPERTHERMIA/EXPOSURE"
## [401] "ROCK SLIDE" "GUSTY WIND/HAIL"
## [403] " TSTM WIND" "LANDSPOUT"
## [405] "EXCESSIVE SNOW" "FLOOD/FLASH/FLOOD"
## [407] "WIND AND WAVE" "LIGHT FREEZING RAIN"
## [409] "ICE ROADS" "ROUGH SEAS"
## [411] "TSTM WIND G45" "NON-SEVERE WIND DAMAGE"
## [413] "WARM WEATHER" "THUNDERSTORM WIND (G40)"
## [415] " FLASH FLOOD" "LATE SEASON SNOW"
## [417] "WINTER WEATHER MIX" "ROGUE WAVE"
## [419] "FALLING SNOW/ICE" "NON-TSTM WIND"
## [421] "NON TSTM WIND" "BLOWING DUST"
## [423] "VOLCANIC ASH" " HIGH SURF ADVISORY"
## [425] "HAZARDOUS SURF" "ICE ON ROAD"
## [427] "DROWNING" "EXTREME COLD/WIND CHILL"
## [429] "MARINE TSTM WIND" "HURRICANE/TYPHOON"
## [431] "WINTER WEATHER/MIX" "ASTRONOMICAL HIGH TIDE"
## [433] "HEAVY SURF/HIGH SURF" "TROPICAL DEPRESSION"
## [435] "LAKE-EFFECT SNOW" "MARINE HIGH WIND"
## [437] "TSUNAMI" "STORM SURGE/TIDE"
## [439] "COLD/WIND CHILL" "LAKESHORE FLOOD"
## [441] "MARINE THUNDERSTORM WIND" "MARINE STRONG WIND"
## [443] "ASTRONOMICAL LOW TIDE" "DENSE SMOKE"
## [445] "MARINE HAIL" "FREEZING FOG"
length(unique(stormdata_tidy$EVTYPE))
## [1] 446
Data processing 1. Clean the EVTYPE column The original EVTYPE column contained 446 unique values. Group the EVTYPE column to a more consistent format
Group the EVTYPE based on Storm Data Event Table as below:
Event Name Designator Event Name Designator
Astronomical Low Tide Z Avalanche Z Blizzard Z Coastal Flood Z Cold/Wind Chill Z Debris Flow C Dense Fog Z Dense Smoke Z Drought Z Dust Devil C Dust Storm Z Excessive Heat Z Extreme Cold/Wind Chill Z Flash Flood C Flood C Frost/Freeze Z Funnel Cloud C Freezing Fog Z Hail C Heat Z Heavy Rain C Heavy Snow Z High Surf Z High Wind Z Hurricane (Typhoon) Z Ice Storm Z Lake-Effect Snow Z Lakeshore Flood Z Lightning C Marine Hail M Marine High Wind M Marine Strong Wind M Marine Thunderstorm Wind M Rip Current Z Seiche Z Sleet Z Storm Surge/Tide Z Strong Wind Z Thunderstorm Wind C Tornado C Tropical Depression Z Tropical Storm Z Tsunami Z Volcanic Ash Z Waterspout M Wildfire Z Winter Storm Z Winter Weather Z
#Clean the EVTYPE column
stormdata_tidy$EVTYPE <- gsub(".*HIGH TIDE.*", "ASTRONOMICAL HIGH TIDE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*ASTRONOMICAL LOW TIDE.*", "ASTRONOMICAL LOW TIDE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub('^AVALANCH?E.*', 'AVALANCHE (Z)', stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*BLIZZARD.*", "BLIZZARD (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*COASTAL[/s*]FLOOD.*", "COASTAL FLOOD (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*EROSION.*", "COASTAL EROSION", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^COLD.*", "COLD/WIND CHILL (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(HYPOTHERMIA|LOW TEMPERATURE).*", "COLD/WIND CHILL (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*COLD$", "COLD/WIND CHILL (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DENSE FOG.*", "DENSE FOG (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DENSE SMOKE.*", "DENSE SMOKE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DROUGHT.*", "DROUGHT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DUST DEVIL.*", "DUST DEVIL (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DUST STORM.*", "DUST STORM (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*DUST$", "DUST STORM (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(EXCESSIVE|EXTREME) HEAT.*", "EXCESSIVE HEAT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*HYPERTHERMIA.*", "EXCESSIVE HEAT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*EXTREME COLD.*", "EXTREME COLD/WIND CHILL (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*EXTREME WIND.*", "EXTREME COLD/WIND CHILL (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(FLASH FLOOD|STREAM).*", "FLASH FLOOD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*RAPIDLY RISING WATER.*", "FLASH FLOOD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*URBAN.*SMALL", "FLASH FLOOD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^LAKE.*FLOOD$", "LAKESHORE FLOOD (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(FLOOD|HIGH WATER).*", "FLOOD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FLOOD(S|ING)?$", "FLOOD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FREEZING FOG.*", "FREEZING FOG (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*GLAZE.*", "FREEZING FOG (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^FOG*", "FREEZING FOG (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FROST.*", "FROST/FREEZE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FREEZE$", "FROST/FREEZE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FUNNEL CLOUD.*", "FUNNEL CLOUD (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^HAIL.*", "HAIL (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("SMALL HAIL.*", "HAIL (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^HEAT.*", "HEAT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*WARM.*", "HEAT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("RECORD HEAT.*", "HEAT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(HEAVY|HVY|EXCESSIVE|TORRENTIAL|RECORD)? ?(RAIN(FALL)?|SHOWER|PRECIPITATION).*", "HEAVY RAIN (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(DAM BREAK|RAINSTORM).*", "HEAVY RAIN (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(HEAVY|EXCESSIVE) SNOW.*", "HEAVY SNOW (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(SURF|SWELLS).*", "HIGH SURF (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*ROGUE WAVE.*", "HIGH SURF (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^HIGH.*WIND.*", "HIGH WIND (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("HIGH$", "HIGH WIND (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(HURRICANE|TYPHOON|HIGH WAVE).*", "HURRICANE/TYPHOON (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*IC[E|Y].*", "ICE STORM (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(HEAVY )?LAKE.*SNOW$", "LAKE-EFFECT SNOW (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(LIGNTNING|LIGHTN?ING).*", "LIGHTNING (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*MARINE HAIL.*", "MARINE HAIL (M)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*MARINE HIGH WIND.*", "MARINE HIGH WIND (M)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*MARINE STRONG WIND.*", "MARINE STRONG WIND (M)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*MARINE THUNDERSTORM.*", "MARINE THUNDERSTORM WIND (M)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(SLIDE|LANDSLUMP).*", "DEBRIS FLOW (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*RIP CURRENT.*", "RIP CURRENT (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*SEICHE.*", "SEICHE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("MARINE (MISHAP|ACCIDENT)", "SEICHE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(ROUGH |HEAVY )?SEA.*", "SEICHE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*SLEET.*", "SLEET (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*MIX.*", "SLEET (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*RAIN.*SNOW.*", "SLEET (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*SNOW.*RAIN.*", "SLEET (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*STORM SURGE.*", "STORM SURGE/TIDE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*COASTAL ?(STORM|SURGE).*", "STORM SURGE/TIDE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(STRONG|GUSTY)? ?WINDS?.*", "STRONG WIND (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^WINDS?.*", "STRONG WIND (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(SEVERE )?TH?UN?D?E.*", "THUNDERSTORM WIND (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(TSTM|APACHE COUNTY).*", "THUNDERSTORM WIND (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(BURST|WHIRLWIND|GUSTNADO).*", "THUNDERSTORM WIND (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^TORN.*", "TORNADO (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("LANDSPOUT.*", "TORNADO (C)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(TROPICAL DEPRESSION|GRADIENT WIND).*", "TROPICAL DEPRESSION (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*(TROPICAL STORM|STORM FORCE WINDS).*", "TROPICAL STORM (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*TSUNAMI.*", "TSUNAMI (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*VOLCANIC.*", "VOLCANIC ASH (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^WATERSPOUT.*", "WATERSPOUT (M)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*FIRE.*", "WILDFIRE (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*WINTER WEATHER.*", "WINTER WEATHER (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^(LIGHT|BLOWING|RECORD)? ?SNOW(FALL)?$", "WINTER WEATHER (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("(LIGHT )?FREEZING (RAIN|DRIZZLE|SPRAY)$", "WINTER WEATHER (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub("^SNOW.*", "WINTER WEATHER (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*WINTER.*STORM.*", "WINTER STORM (Z)", stormdata_tidy$EVTYPE)
stormdata_tidy$EVTYPE <- gsub(".*WET.*", "WET", stormdata_tidy$EVTYPE)
unique(stormdata_tidy$EVTYPE)
## [1] "TORNADO (C)"
## [2] "THUNDERSTORM WIND (C)"
## [3] "HAIL (C)"
## [4] "FLASH FLOOD (C)"
## [5] "WINTER STORM (Z)"
## [6] "HURRICANE/TYPHOON (Z)"
## [7] "HEAVY RAIN (C)"
## [8] "LIGHTNING (C)"
## [9] "DENSE FOG (Z)"
## [10] "RIP CURRENT (Z)"
## [11] "HEAT (Z)"
## [12] "HIGH WIND (Z)"
## [13] "STRONG WIND (Z)"
## [14] "COLD/WIND CHILL (Z)"
## [15] "FLOOD (C)"
## [16] "WATERSPOUT (M)"
## [17] "FROST/FREEZE (Z)"
## [18] "AVALANCHE (Z)"
## [19] "SEICHE (Z)"
## [20] "ASTRONOMICAL HIGH TIDE (Z)"
## [21] "SEVERE TURBULENCE"
## [22] "HEAVY SNOW (Z)"
## [23] "DUST STORM (Z)"
## [24] "SLEET (Z)"
## [25] "DUST DEVIL (C)"
## [26] "ICE STORM (Z)"
## [27] "EXCESSIVE HEAT (Z)"
## [28] "HIGH SURF (Z)"
## [29] "WILDFIRE (Z)"
## [30] "DEBRIS FLOW (Z)"
## [31] "FUNNEL CLOUD (C)"
## [32] "BLIZZARD (Z)"
## [33] "STORM SURGE/TIDE (Z)"
## [34] "TROPICAL STORM (Z)"
## [35] "WINTER WEATHER (Z)"
## [36] "WET"
## [37] "FREEZING FOG (Z)"
## [38] "DROUGHT (Z)"
## [39] "EXTREME COLD/WIND CHILL (Z)"
## [40] "LAKESHORE FLOOD (Z)"
## [41] "LAKE-EFFECT SNOW (Z)"
## [42] "FREEZING FOG (Z) AND COLD TEMPERATURES"
## [43] "OTHER"
## [44] "COASTAL EROSION"
## [45] "TROPICAL DEPRESSION (Z)"
## [46] "NON-SEVERE WIND DAMAGE"
## [47] "VOLCANIC ASH (Z)"
## [48] "DROWNING"
## [49] "MARINE HIGH WIND (M)"
## [50] "TSUNAMI (Z)"
## [51] "MARINE THUNDERSTORM WIND (M)"
## [52] "MARINE STRONG WIND (M)"
## [53] "ASTRONOMICAL LOW TIDE (Z)"
## [54] "DENSE SMOKE (Z)"
## [55] "MARINE HAIL (M)"
length(unique(stormdata_tidy$EVTYPE))
## [1] 55
top10_health_data <- stormdata_tidy %>%
group_by(EVTYPE) %>%
summarise(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE)) %>%
arrange(desc(Total_Fatalities + Total_Injuries)) %>%
slice(1:10)
Check the top 10 health data
top10_health_data
Create a bar plot for the top 10 weather events by health impact, fatalities and injuries with different colors in the same plot
top10_health_data_long <- top10_health_data %>%
pivot_longer(cols = c(Total_Fatalities, Total_Injuries), names_to = "Type", values_to = "Count")
Create a bar plot for the top 10 weather events by health impact, fatalities and injuries with different colors in the same plot
ggplot(top10_health_data_long, aes(x = reorder(EVTYPE, Count), y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 Weather Events by Health Impact (Fatalities + Injuries)",
x = "Weather Event Type",
y = "Count") +
scale_fill_manual(values = c("Total_Fatalities" = "red", "Total_Injuries" = "blue")) +
theme_minimal()
Result The plot shows the top 10 weather events by health impact, with
fatalities in red and injuries in blue. The x-axis represents the
weather event type, while the y-axis represents the count of fatalities
and injuries. The plot is flipped to make it easier to read the event
types. The top 10 weather events are displayed in descending order of
total fatalities and injuries. # The plot shows that “TORNADO (C)” has
the highest total fatalities and injuries, followed by “EXCESSIVE HEAT
(Z)” and “FLOOD (C)”. The plot also shows that “WINTER WEATHER (Z)” and
“WINTER STORM (Z)” have a significant number of injuries, while “HAIL
(C)” has a high number of fatalities. The plot also shows that “WINTER
WEATHER (Z)” and “WINTER STORM (Z)” have a significant number of
injuries, while “HAIL (C)” has a high number of fatalities.
Show the plot for fatalities and injuries respectively # Create a bar plot for the top 10 weather events by health impact, fatalities and injuries
# Create a bar plot for the top 10 weather events by health impact, fatalities
ggplot(top10_health_data, aes(x = reorder(EVTYPE, Total_Fatalities), y = Total_Fatalities)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Weather Events by Fatalities",
x = "Weather Event Type",
y = "Total Fatalities") +
theme_minimal()
# Create a bar plot for the top 10 weather events by health impact, injuries
ggplot(top10_health_data, aes(x = reorder(EVTYPE, Total_Injuries), y = Total_Injuries)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Weather Events by Injuries",
x = "Weather Event Type",
y = "Total Injuries") +
theme_minimal()
Data Processing for economic impact 2.1. Convert the PROPDMGEXP and CROPDMGEXP columns to numeric values
Check the summary and structure of stormdata_tidy
summary(stormdata_tidy)
## BGN_DATE END_DATE EVTYPE
## Min. :1950-01-03 Min. :1993-01-01 Length:254632
## 1st Qu.:1997-01-23 1st Qu.:2000-05-24 Class :character
## Median :2002-08-02 Median :2005-03-07 Mode :character
## Mean :2000-06-13 Mean :2004-08-14
## 3rd Qu.:2008-05-07 3rd Qu.:2009-01-07
## Max. :2011-11-30 Max. :2011-11-30
## NA's :50928
## STATE FATALITIES INJURIES PROPDMG
## Length:254632 Min. : 0.00000 Min. : 0.0000 Min. : 0.00
## Class :character 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 2.00
## Mode :character Median : 0.00000 Median : 0.0000 Median : 5.00
## Mean : 0.05948 Mean : 0.5519 Mean : 42.75
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 25.00
## Max. :583.00000 Max. :1700.0000 Max. :5000.00
##
## PROPDMGEXP CROPDMG CROPDMGEXP
## Length:254632 Min. : 0.000 Length:254632
## Class :character 1st Qu.: 0.000 Class :character
## Mode :character Median : 0.000 Mode :character
## Mean : 5.411
## 3rd Qu.: 0.000
## Max. :990.000
##
str(stormdata_tidy)
## 'data.frame': 254632 obs. of 10 variables:
## $ BGN_DATE : Date, format: "1950-04-18" "1950-04-18" ...
## $ END_DATE : Date, format: NA NA ...
## $ EVTYPE : chr "TORNADO (C)" "TORNADO (C)" "TORNADO (C)" "TORNADO (C)" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr NA NA NA NA ...
Fill the missing values in the PROPDMGEXP and CROPDMGEXP column with “0”
stormdata_tidy$CROPDMGEXP[is.na(stormdata_tidy$CROPDMGEXP)] <- "0"
stormdata_tidy$PROPDMGEXP[is.na(stormdata_tidy$PROPDMGEXP)] <- "0"
Check the unique values in the CROPDMGEXP and PROPDMGEXP column for the conversion
unique(stormdata_tidy$CROPDMGEXP)
## [1] "0" "M" "K" "m" "B" "k"
unique(stormdata_tidy$PROPDMGEXP)
## [1] "K" "M" "0" "B" "m" "+" "5" "6" "4" "h" "2" "7" "3" "H" "-"
Create a new column for crop and property damage
#Create a new column for crop damage
stormdata_tidy$CROPDMGEXP <- toupper(stormdata_tidy$CROPDMGEXP)
unique(stormdata_tidy$CROPDMGEXP)
## [1] "0" "M" "K" "B"
stormdata_tidy$CROPDMGVAL <- stormdata_tidy$CROPDMG
stormdata_tidy$CROPDMGVAL[stormdata_tidy$CROPDMGEXP == "K"] <- stormdata_tidy$CROPDMG[stormdata_tidy$CROPDMGEXP == "K"] * 1000
stormdata_tidy$CROPDMGVAL[stormdata_tidy$CROPDMGEXP == "M"] <- stormdata_tidy$CROPDMG[stormdata_tidy$CROPDMGEXP == "M"] * 1000000
stormdata_tidy$CROPDMGVAL[stormdata_tidy$CROPDMGEXP == "B"] <- stormdata_tidy$CROPDMG[stormdata_tidy$CROPDMGEXP == "B"] * 1000000000
stormdata_tidy$CROPDMGVAL[stormdata_tidy$CROPDMGEXP == "H"] <- stormdata_tidy$CROPDMG[stormdata_tidy$CROPDMGEXP == "H"] * 100
stormdata_tidy$CROPDMGVAL[stormdata_tidy$CROPDMGEXP == "0"] <- stormdata_tidy$CROPDMG[stormdata_tidy$CROPDMGEXP == "0"] * 1
# create a new column for property damage
stormdata_tidy$PROPDMGEXP <- toupper(stormdata_tidy$PROPDMGEXP)
unique(stormdata_tidy$PROPDMGEXP)
## [1] "K" "M" "0" "B" "+" "5" "6" "4" "H" "2" "7" "3" "-"
stormdata_tidy$PROPDMGVAL <- stormdata_tidy$PROPDMG
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "K"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "K"] * 1000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "M"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "M"] * 1000000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "B"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "B"] * 1000000000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "H"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "H"] * 100
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "0"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "0"] * 1
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "2"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "2"] * 100
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "3"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "3"] * 1000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "4"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "4"] * 10000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "5"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "5"] * 100000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "6"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "6"] * 1000000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "7"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "7"] * 10000000
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "+"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "+"] * 1
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "-"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "-"] * 1
stormdata_tidy$PROPDMGVAL[stormdata_tidy$PROPDMGEXP == "?"] <- stormdata_tidy$PROPDMG[stormdata_tidy$PROPDMGEXP == "?"] * 1
Create a new column for total damage and check the summary of the new data frame
# Create a new column for total damage
stormdata_tidy$TOTALDMGVAL <- stormdata_tidy$PROPDMGVAL + stormdata_tidy$CROPDMGVAL
# Check the structure of the new data frame
str(stormdata_tidy)
## 'data.frame': 254632 obs. of 13 variables:
## $ BGN_DATE : Date, format: "1950-04-18" "1950-04-18" ...
## $ END_DATE : Date, format: NA NA ...
## $ EVTYPE : chr "TORNADO (C)" "TORNADO (C)" "TORNADO (C)" "TORNADO (C)" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ FATALITIES : num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP : chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP : chr "0" "0" "0" "0" ...
## $ CROPDMGVAL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PROPDMGVAL : num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ TOTALDMGVAL: num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
# Check the summary of the new data frame
summary(stormdata_tidy)
## BGN_DATE END_DATE EVTYPE
## Min. :1950-01-03 Min. :1993-01-01 Length:254632
## 1st Qu.:1997-01-23 1st Qu.:2000-05-24 Class :character
## Median :2002-08-02 Median :2005-03-07 Mode :character
## Mean :2000-06-13 Mean :2004-08-14
## 3rd Qu.:2008-05-07 3rd Qu.:2009-01-07
## Max. :2011-11-30 Max. :2011-11-30
## NA's :50928
## STATE FATALITIES INJURIES PROPDMG
## Length:254632 Min. : 0.00000 Min. : 0.0000 Min. : 0.00
## Class :character 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 2.00
## Mode :character Median : 0.00000 Median : 0.0000 Median : 5.00
## Mean : 0.05948 Mean : 0.5519 Mean : 42.75
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 25.00
## Max. :583.00000 Max. :1700.0000 Max. :5000.00
##
## PROPDMGEXP CROPDMG CROPDMGEXP CROPDMGVAL
## Length:254632 Min. : 0.000 Length:254632 Min. :0.000e+00
## Class :character 1st Qu.: 0.000 Class :character 1st Qu.:0.000e+00
## Mode :character Median : 0.000 Mode :character Median :0.000e+00
## Mean : 5.411 Mean :1.928e+05
## 3rd Qu.: 0.000 3rd Qu.:0.000e+00
## Max. :990.000 Max. :5.000e+09
##
## PROPDMGVAL TOTALDMGVAL
## Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:2.000e+03 1st Qu.:2.500e+03
## Median :1.000e+04 Median :1.000e+04
## Mean :1.682e+06 Mean :1.875e+06
## 3rd Qu.:3.500e+04 3rd Qu.:5.000e+04
## Max. :1.150e+11 Max. :1.150e+11
##
# Check the first few rows of the new data frame
head(stormdata_tidy)
Check the top 10 weather events by economic impact
top10_economic_data <- stormdata_tidy %>%
group_by(EVTYPE) %>%
summarise(Total_Property_Damage = sum(PROPDMGVAL, na.rm = TRUE),
Total_Crop_Damage = sum(CROPDMGVAL, na.rm = TRUE),
Total_Damage = sum(TOTALDMGVAL, na.rm = TRUE)) %>%
arrange(desc(Total_Damage)) %>%
slice(1:10)
top10_economic_data
Create a bar plot for the top 10 weather events by economic impact, property damage and crop damage with different colors in the same plot
top10_economic_data_long <- top10_economic_data %>%
pivot_longer(cols = c(Total_Property_Damage, Total_Crop_Damage), names_to = "Type", values_to = "Count")
# Create a bar plot for the top 10 weather events by economic impact, property damage and crop damage with different colors in the same plot
ggplot(top10_economic_data_long, aes(x = reorder(EVTYPE, Count), y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 Weather Events by Economic Impact (Property Damage + Crop Damage)",
x = "Weather Event Type",
y = "Count") +
scale_fill_manual(values = c("Total_Property_Damage" = "red", "Total_Crop_Damage" = "blue")) +
theme_minimal()
Result The plot shows the top 10 weather events by economic impact, with
property damage in red and crop damage in blue. The x-axis represents
the weather event type, while the y-axis represents the count of
property damage and crop damage. The plot is flipped to make it easier
to read the event types. # The top 10 weather events are displayed in
descending order of total property damage and crop damage. # The plot
shows that “FLOOD (C)” has the highest total property damage and crop
damage, followed by “HURRICANE/TYPHOON (Z)” and “TORNADO (C)”. # The
plot also shows that “HAIL (C)” has a significant amount of property
damage, while “DROUGHT (Z)” has a high amount of crop damage.
ggplot(top10_economic_data, aes(x = reorder(EVTYPE, Total_Property_Damage), y = Total_Property_Damage)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Weather Events by Property Damage",
x = "Weather Event Type",
y = "Total Property Damage") +
theme_minimal()
# Create a bar plot for the top 10 weather events by economic impact, crop damage
ggplot(top10_economic_data, aes(x = reorder(EVTYPE, Total_Crop_Damage), y = Total_Crop_Damage)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Weather Events by Crop Damage",
x = "Weather Event Type",
y = "Total Crop Damage") +
theme_minimal()
Summary The analysis shows that tornadoes have the highest impact on
health, with the highest number of fatalities and injuries. Floods have
the highest economic impact, with the highest property and crop damage.
The analysis also shows that hurricanes/typhoons and excessive heat have
a significant impact on both health and economy. The analysis also shows
that floods have a significant impact on both health and economy, with a
high number of injuries and property damage. The analysis also shows
that winter weather events have a significant impact on health, with a
high number of injuries, while hail has a high number of fatalities. The
analysis also shows that drought has a significant impact on crop
damage, with a high amount of crop damage. The analysis also shows that
winter weather events have a significant impact on health, with a high
number of injuries, while hail has a high number of fatalities.