Aim of this report is to analyze the storms and severe weather conditions in U.S. to asses the impact on human and economy. Using U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database (year 1950 - 2011), this report has attempted to find the weather conditions which are most harmful with respect to population health and asses their impact on the economy. After analysis, it is found that Tornado is the most harmful event, which has caused most economical and human losses.
Data for this analysis has been used from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The events in the database start in the year 1950 and end in November 2011.
Load the data from csv file. File has been compressed using bz2 algorithm, unzip it before reading the file.
storm <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), na.strings = "NA")
head(storm[, c(1, 2, 3, 6, 7, 8, 23, 24, 25, 26, 27, 28)])
## STATE__ BGN_DATE BGN_TIME COUNTYNAME STATE EVTYPE FATALITIES
## 1 1 4/18/1950 0:00:00 0130 MOBILE AL TORNADO 0
## 2 1 4/18/1950 0:00:00 0145 BALDWIN AL TORNADO 0
## 3 1 2/20/1951 0:00:00 1600 FAYETTE AL TORNADO 0
## 4 1 6/8/1951 0:00:00 0900 MADISON AL TORNADO 0
## 5 1 11/15/1951 0:00:00 1500 CULLMAN AL TORNADO 0
## 6 1 11/15/1951 0:00:00 2000 LAUDERDALE AL TORNADO 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
Here, we will anlyse the data to find the most harmful events to human being.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# select event, fatalities, and injuries columns and summarise them based on event
summaryData <- storm[, c(8, 23, 24)];
summaryData <- summarise(group_by(summaryData, EVTYPE), FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES));
summaryData <- arrange(summaryData, desc(FATALITIES), desc(INJURIES));
# add ID for each event typr
summaryData <- mutate(summaryData, ID = 1:nrow(summaryData));
# get summary
totalEvents <- nrow(summaryData);
maxFatalities <- max(summaryData$FATALITIES);
maxInjuries <- max(summaryData$INJURIES);
event <- summaryData[1,]$EVTYPE
summary(summaryData);
## EVTYPE FATALITIES INJURIES
## HIGH SURF ADVISORY: 1 Min. : 0.00 Min. : 0.0
## COASTAL FLOOD : 1 1st Qu.: 0.00 1st Qu.: 0.0
## FLASH FLOOD : 1 Median : 0.00 Median : 0.0
## LIGHTNING : 1 Mean : 15.38 Mean : 142.7
## TSTM WIND : 1 3rd Qu.: 0.00 3rd Qu.: 0.0
## TSTM WIND (G45) : 1 Max. :5633.00 Max. :91346.0
## (Other) :979
## ID
## Min. : 1
## 1st Qu.:247
## Median :493
## Mean :493
## 3rd Qu.:739
## Max. :985
##
# show top-20 events
summaryData <- mutate(summaryData, ID = 1:nrow(summaryData));
summaryData <- summaryData[1:20, ];
head(summaryData[1:20, c(1, 2, 3)], 20)
## Source: local data frame [20 x 3]
##
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
## 11 WINTER STORM 206 1321
## 12 RIP CURRENTS 204 297
## 13 HEAT WAVE 172 309
## 14 EXTREME COLD 160 231
## 15 THUNDERSTORM WIND 133 1488
## 16 HEAVY SNOW 127 1021
## 17 EXTREME COLD/WIND CHILL 125 24
## 18 STRONG WIND 103 280
## 19 BLIZZARD 101 805
## 20 HIGH SURF 101 152
# plot events vs fatalities
qplot(x = factor(ID), y = FATALITIES, data = summaryData, fill = ID) +
geom_bar(stat="identity", position="dodge") +
xlab("Event") + ylab("No of Fatalities")
# plot events vs injuries
qplot(x = factor(ID), y = INJURIES, data = summaryData, fill = ID) +
geom_bar(stat="identity", position="dodge") +
xlab("Event") + ylab("No of Injuries")
# select event, property damage, crop damage and summarise them based on event
summaryData <- storm[, c(8, 25, 27)];
summaryData <- summarise(group_by(summaryData, EVTYPE), PROPDMG = sum(PROPDMG), CROPDMG = sum(CROPDMG));
summaryData <- mutate(summaryData, LOSS = PROPDMG + CROPDMG);
# calculate total losses
summaryData <- arrange(summaryData, desc(LOSS));
summaryData <- mutate(summaryData, ID = 1:nrow(summaryData));
maxPropLoss <- max(summaryData$PROPDMG);
maxCropLoss <- max(summaryData$CROPDMG);
maxLoss <- max(summaryData$LOSS);
event <- summaryData[1,]$EVTYPE
summary(summaryData);
## EVTYPE PROPDMG CROPDMG
## HIGH SURF ADVISORY: 1 Min. : 0 Min. : 0
## COASTAL FLOOD : 1 1st Qu.: 0 1st Qu.: 0
## FLASH FLOOD : 1 Median : 0 Median : 0
## LIGHTNING : 1 Mean : 11050 Mean : 1399
## TSTM WIND : 1 3rd Qu.: 35 3rd Qu.: 0
## TSTM WIND (G45) : 1 Max. :3212258 Max. :579596
## (Other) :979
## LOSS ID
## Min. : 0 Min. : 1
## 1st Qu.: 0 1st Qu.:247
## Median : 0 Median :493
## Mean : 12449 Mean :493
## 3rd Qu.: 50 3rd Qu.:739
## Max. :3312277 Max. :985
##
# show top-20 events
summaryData <- mutate(summaryData, ID = 1:nrow(summaryData));
summaryData <- summaryData[1:20, ];
head(summaryData[1:20, c(1, 2, 3, 4)], 20)
## Source: local data frame [20 x 4]
##
## EVTYPE PROPDMG CROPDMG LOSS
## 1 TORNADO 3212258.16 100018.52 3312276.68
## 2 FLASH FLOOD 1420124.59 179200.46 1599325.05
## 3 TSTM WIND 1335965.61 109202.60 1445168.21
## 4 HAIL 688693.38 579596.28 1268289.66
## 5 FLOOD 899938.48 168037.88 1067976.36
## 6 THUNDERSTORM WIND 876844.17 66791.45 943635.62
## 7 LIGHTNING 603351.78 3580.61 606932.39
## 8 THUNDERSTORM WINDS 446293.18 18684.93 464978.11
## 9 HIGH WIND 324731.56 17283.21 342014.77
## 10 WINTER STORM 132720.59 1978.99 134699.58
## 11 HEAVY SNOW 122251.99 2165.72 124417.71
## 12 WILDFIRE 84459.34 4364.20 88823.54
## 13 ICE STORM 66000.67 1688.95 67689.62
## 14 STRONG WIND 62993.81 1616.90 64610.71
## 15 HEAVY RAIN 50842.14 11122.80 61964.94
## 16 HIGH WINDS 55625.00 1759.60 57384.60
## 17 TROPICAL STORM 48423.68 5899.12 54322.80
## 18 WILD/FOREST FIRE 39344.95 4189.54 43534.49
## 19 DROUGHT 4099.05 33898.62 37997.67
## 20 FLASH FLOODING 28497.15 5126.05 33623.20
# plot events vs fatalities
qplot(x = factor(ID), y = LOSS / 1000, data = summaryData, fill = ID) +
geom_bar(stat="identity", position="dodge") +
xlab("Event") + ylab("Total Loss (Million USD)")