NOAA Storm Database Analytics

Import the data into RStudio

data <- read.csv("stormdata.csv")
head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Checking data

library(ggplot2)
library(plyr)
nmissing <- function(x) sum(is.na(x))
colwise(nmissing)(data)
##   STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## 1       0        0        0         0      0          0     0      0         0
##   BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI
## 1       0          0        0        0          0     902297         0       0
##   END_LOCATI LENGTH WIDTH      F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1          0      0     0 843563   0          0        0       0          0
##   CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE LATITUDE_E
## 1       0          0   0          0         0       47         0         40
##   LONGITUDE_ REMARKS REFNUM
## 1          0       0      0

Finding the total Harm with sum of FATALITIES and INJURIES by EVTYPE

injuryDataFrame <- ddply(data, .(EVTYPE), 
                         summarize, 
                         TotalHarm = sum(FATALITIES + INJURIES))

injuryDataFrame <- injuryDataFrame[order(injuryDataFrame$TotalHarm, decreasing = T), ]

TopHarm <- injuryDataFrame[1:10, ]

Property Damage: Sum of PROPDMG by EVTYPE and PROPDMGEXP

prop <- ddply(data, .(EVTYPE, PROPDMGEXP), 
              summarize, 
              PROPDMG = sum(PROPDMG))

Property Damage: Value of Property Damage

prop <- mutate(prop, 
               PropertyDamage = ifelse(toupper(PROPDMGEXP) =='K', 
                                       PROPDMG*1000, 
                                       ifelse(toupper(PROPDMGEXP) =='M', 
                                              PROPDMG*1000000, 
                                              ifelse(toupper(PROPDMGEXP) == 'B',
                                                     PROPDMG*1000000000,
                                                     ifelse(toupper(PROPDMGEXP) == 'H',
                                                            PROPDMG*100, PROPDMG)))))

Property Damage: Property Damage based on Event Type

prop <- subset(prop, 
               select = c("EVTYPE", "PropertyDamage"))

prop.total <- ddply(prop, .(EVTYPE), 
                    summarize, 
                    TotalPropDamage = sum(PropertyDamage))

Crop Damage: Sum of CROPDMG by EVTYPE and CROPDMGEXP.

crop <- ddply(data, .(EVTYPE, CROPDMGEXP), 
              summarize, 
              CROPDMG = sum(CROPDMG))

Real Crop Damage Based on CROPDMGEXP.

crop <- mutate(crop, 
               CropDamage = ifelse(toupper(CROPDMGEXP) =='K', 
                                   CROPDMG*1000, 
                                   ifelse(toupper(CROPDMGEXP) =='M', 
                                          CROPDMG*1000000,
                                          ifelse(toupper(CROPDMGEXP) == 'B',
                                                 CROPDMG*1000000000,
                                                 ifelse(toupper(CROPDMGEXP) == 'H',
                                                        CROPDMG*100, 
                                                        CROPDMG)))))

Sum of Crop Damage by Event Type

crop <- subset(crop, 
               select = c("EVTYPE", "CropDamage"))

crop.total <- ddply(crop, .(EVTYPE), 
                    summarize, 
                    TotalCropDamage = sum(CropDamage))

Total Damage : Merge Property and Crop Damage - Top Ten Damages

damageDataFrame <- merge(prop.total, crop.total, by="EVTYPE")

damageDataFrame <- mutate(damageDataFrame, 
                          TotalDamage = TotalPropDamage + TotalCropDamage)

damageDataFrame <- damageDataFrame[order(damageDataFrame$TotalDamage, decreasing = T), ]

TopDamage <- damageDataFrame[1:10, ]

Results of the Analysis

1 . Population Health Casualties

The results of the top 10 harmful types based on the sum of casualties are:

TopHarm
##                EVTYPE TotalHarm
## 834           TORNADO     96979
## 130    EXCESSIVE HEAT      8428
## 856         TSTM WIND      7461
## 170             FLOOD      7259
## 464         LIGHTNING      6046
## 275              HEAT      3037
## 153       FLASH FLOOD      2755
## 427         ICE STORM      2064
## 760 THUNDERSTORM WIND      1621
## 972      WINTER STORM      1527

Plot Based on Previous Storm Data

plot1 <- ggplot(TopHarm, aes(EVTYPE,TotalHarm, fill=EVTYPE)) + 
  geom_bar(stat="identity") + 
  xlab("Top 10 events")+ 
  ylab("Total Harm / Fatalties")+ 
  ggtitle("Fatalities due to severe weather events in the U.S from 1950-2011") +
  theme(axis.text.x=element_text(angle=45,hjust=1))

plot1 

Most Fatalties are Caused by Tornado

  1. Economic Casualties

The top ten damages caused by Tornado are as follows:

TopDamage
##                EVTYPE TotalPropDamage TotalCropDamage  TotalDamage
## 170             FLOOD    144657709807      5661968450 150319678257
## 411 HURRICANE/TYPHOON     69305840000      2607872800  71913712800
## 834           TORNADO     56937160779       414953270  57352114049
## 670       STORM SURGE     43323536000            5000  43323541000
## 244              HAIL     15732267543      3025954473  18758222016
## 153       FLASH FLOOD     16140812067      1421317100  17562129167
## 95            DROUGHT      1046106000     13972566000  15018672000
## 402         HURRICANE     11868319010      2741910000  14610229010
## 590       RIVER FLOOD      5118945500      5029459000  10148404500
## 427         ICE STORM      3944927860      5022113500   8967041360

The Plot Based on Total Damage - Sum of totalCropDamage & totalPropDamage

plot2 <- ggplot(TopDamage, aes( EVTYPE,TotalDamage, fill=EVTYPE)) +
  geom_bar(stat="identity") + 
  xlab("Top 10 events") + 
  ylab("Total Economic damage") + 
  ggtitle("Total Economic damage due to severe weather events in the U.S from 1950-2011") +
  theme(axis.text.x=element_text(angle=45,hjust=1))

plot2

Most Damages Caused by Flood

This is the plot based on the Total Crops Damaged:

plot3 <- ggplot(TopDamage, aes( EVTYPE,TotalCropDamage, fill=EVTYPE)) + 
  geom_bar(stat="identity") + 
  xlab("Top 10 events") + 
  ylab("Total Crop Economic damage") + 
  ggtitle("Total Economic Crop damage due to severe weather events in the U.S from 1950-2011") +
  theme(axis.text.x=element_text(angle=45,hjust=1))

plot3