Explore the NOAA Storm Database and answer some basic questions about severe weather events
The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. Can be download the file from the course web site:
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
Store the original dataset into NOAA
setwd(“./GitHub/RepData_PeerAssessment2”)
if (!exists("NOAA"))
{
NOAA <- read.csv("repdata-data-StormData.csv")
}
# List of files in current directory
dim(NOAA)
## [1] 902297 37
# List Columns
names(NOAA)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
# Summary of the columns which we have intrest
summary(NOAA$EVTYPE)
## HAIL TSTM WIND THUNDERSTORM WIND
## 288661 219940 82563
## TORNADO FLASH FLOOD FLOOD
## 60652 54277 25326
## THUNDERSTORM WINDS HIGH WIND LIGHTNING
## 20843 20212 15754
## HEAVY SNOW HEAVY RAIN WINTER STORM
## 15708 11723 11433
## WINTER WEATHER FUNNEL CLOUD MARINE TSTM WIND
## 7026 6839 6175
## MARINE THUNDERSTORM WIND WATERSPOUT STRONG WIND
## 5812 3796 3566
## URBAN/SML STREAM FLD WILDFIRE BLIZZARD
## 3392 2761 2719
## DROUGHT ICE STORM EXCESSIVE HEAT
## 2488 2006 1678
## HIGH WINDS WILD/FOREST FIRE FROST/FREEZE
## 1533 1457 1342
## DENSE FOG WINTER WEATHER/MIX TSTM WIND/HAIL
## 1293 1104 1028
## EXTREME COLD/WIND CHILL HEAT HIGH SURF
## 1002 767 725
## TROPICAL STORM FLASH FLOODING EXTREME COLD
## 690 682 655
## COASTAL FLOOD LAKE-EFFECT SNOW FLOOD/FLASH FLOOD
## 650 636 624
## LANDSLIDE SNOW COLD/WIND CHILL
## 600 587 539
## FOG RIP CURRENT MARINE HAIL
## 538 470 442
## DUST STORM AVALANCHE WIND
## 427 386 340
## RIP CURRENTS STORM SURGE FREEZING RAIN
## 304 261 250
## URBAN FLOOD HEAVY SURF/HIGH SURF EXTREME WINDCHILL
## 249 228 204
## STRONG WINDS DRY MICROBURST ASTRONOMICAL LOW TIDE
## 196 186 174
## HURRICANE RIVER FLOOD LIGHT SNOW
## 174 173 154
## STORM SURGE/TIDE RECORD WARMTH COASTAL FLOODING
## 148 146 143
## DUST DEVIL MARINE HIGH WIND UNSEASONABLY WARM
## 141 135 126
## FLOODING ASTRONOMICAL HIGH TIDE MODERATE SNOWFALL
## 120 103 101
## URBAN FLOODING WINTRY MIX HURRICANE/TYPHOON
## 98 90 88
## FUNNEL CLOUDS HEAVY SURF RECORD HEAT
## 87 84 81
## FREEZE HEAT WAVE COLD
## 74 74 72
## RECORD COLD ICE THUNDERSTORM WINDS HAIL
## 64 61 61
## TROPICAL DEPRESSION SLEET UNSEASONABLY DRY
## 60 59 56
## FROST GUSTY WINDS THUNDERSTORM WINDSS
## 53 53 51
## MARINE STRONG WIND OTHER SMALL HAIL
## 48 48 47
## FUNNEL FREEZING FOG THUNDERSTORM
## 46 45 45
## Temperature record TSTM WIND (G45) Coastal Flooding
## 43 39 38
## WATERSPOUTS MONTHLY PRECIPITATION WINDS
## 37 36 36
## (Other)
## 2940
summary(NOAA$FATALITIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.0168 0.0000 583.0000
summary(NOAA$INJURIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1557 0.0000 1700.0000
summary(NOAA$PROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 12.06 0.50 5000.00
summary(NOAA$PROPDMGEXP)
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
summary(NOAA$CROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 1.527 0.000 990.000
summary(NOAA$CROPDMGEXP)
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
# Check if columns which we have intrest has null values
sum(is.na(NOAA$EVTYPE))
## [1] 0
sum(is.na(NOAA$FATALITIES))
## [1] 0
sum(is.na(NOAA$INJURIES))
## [1] 0
sum(is.na(NOAA$PROPDMG))
## [1] 0
sum(is.na(NOAA$PROPDMGEXP))
## [1] 0
sum(is.na(NOAA$CROPDMG))
## [1] 0
sum(is.na(NOAA$CROPDMGEXP))
## [1] 0
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
health = NOAA[NOAA$FATALITIES > 0 | NOAA$INJURIES > 0, c("EVTYPE", "FATALITIES", "INJURIES")]
# Fatalities graph
topFatlities = health %>% group_by("Storm Type" = EVTYPE) %>% summarise(Fatalities = sum(FATALITIES)) %>% top_n(8, Fatalities) %>% arrange(Fatalities)
ggplot(topFatlities, aes( x = topFatlities$`Storm Type`,
y = Fatalities,fill = `Storm Type`) ) +
geom_bar(stat = "identity") + scale_fill_brewer(palette = "Set2") +
ggtitle("Top Injuries and Fatalities Across the U.S. from 1950 to 2011") +
xlab("Storm Type") + ylab("Fatalities") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
# Injury Graph
topInjuries = health %>% group_by("Storm Type" = EVTYPE) %>% summarise(Injuries = sum(INJURIES)) %>% top_n(8, Injuries) %>% arrange(Injuries)
ggplot(topInjuries, aes( x = topInjuries$`Storm Type`,
y = Injuries,fill = `Storm Type`) ) +
geom_bar(stat = "identity") + scale_fill_brewer(palette = "Set2") +
ggtitle("Top Injuries and Fatalities Across the U.S. from 1950 to 2011") +
xlab("Storm Type") + ylab("Fatalities") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))