NOAA Strom Data Documentation

Synopsis
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
The database tracks characteristics of major storms and weather events in the United States.
It include when and where stroms occur, as well as estimates of any fatalities, injuries, and property damage.
The events in the database start in the year 1950 and end in November 2011.
This comes file is in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size.
Data available on Strom Data
National Whether Services data on click here
The details on data is available on link
Across the United States, which types of events are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?

Data Processing

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.2

library(reshape2)

## Warning: package 'reshape2' was built under R version 4.0.2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data1<-read.csv("Strom.csv",header=T,sep=",")
## I have changed the name of file to Strom.csv
columns <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
data2 <- data1[, columns]

sort(table(data2$EVTYPE), decreasing = TRUE)[1:10]

## 
##               HAIL          TSTM WIND  THUNDERSTORM WIND            TORNADO 
##             288661             219940              82563              60652 
##        FLASH FLOOD              FLOOD THUNDERSTORM WINDS          HIGH WIND 
##              54277              25326              20843              20212 
##          LIGHTNING         HEAVY SNOW 
##              15754              15708

data2$EVTYPE1 <- "OTHER"
# group by keyword in EVTYPE
data2$EVTYPE1[grep("HAIL", data2$EVTYPE, ignore.case = TRUE)] <- "HAIL"
data2$EVTYPE1[grep("HEAT", data2$EVTYPE, ignore.case = TRUE)] <- "HEAT"
data2$EVTYPE1[grep("FLOOD", data2$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
data2$EVTYPE1[grep("WIND", data2$EVTYPE, ignore.case = TRUE)] <- "WIND"
data2$EVTYPE1[grep("STORM", data2$EVTYPE, ignore.case = TRUE)] <- "STORM"
data2$EVTYPE1[grep("SNOW", data2$EVTYPE, ignore.case = TRUE)] <- "SNOW"
data2$EVTYPE1[grep("TORNADO", data2$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
data2$EVTYPE1[grep("WINTER", data2$EVTYPE, ignore.case = TRUE)] <- "WINTER"
data2$EVTYPE1[grep("RAIN", data2$EVTYPE, ignore.case = TRUE)] <- "RAIN"
# listing the transformed event types 
sort(table(data2$EVTYPE1), decreasing = TRUE)

## 
##    HAIL    WIND   STORM   FLOOD TORNADO   OTHER  WINTER    SNOW    RAIN    HEAT 
##  289270  255362  113156   82686   60700   48970   19604   17660   12241    2648

sort(table(data2$PROPDMGEXP), decreasing = TRUE)[1:10]

## 
##             K      M      0      B      5      1      2      ?      m 
## 465934 424665  11330    216     40     28     25     13      8      7

sort(table(data2$CROPDMGEXP), decreasing = TRUE)[1:10]

## 
##             K      M      k      0      B      ?      2      m   <NA> 
## 618413 281832   1994     21     19      9      7      1      1

data2$PROPDMGEXP <- as.character(data2$PROPDMGEXP)
data2$PROPDMGEXP[is.na(data2$PROPDMGEXP)] <- 0 # NA's considered as dollars
data2$PROPDMGEXP[!grepl("K|M|B", data2$PROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
data2$PROPDMGEXP[grep("K", data2$PROPDMGEXP, ignore.case = TRUE)] <- "3"
data2$PROPDMGEXP[grep("M", data2$PROPDMGEXP, ignore.case = TRUE)] <- "6"
data2$PROPDMGEXP[grep("B", data2$PROPDMGEXP, ignore.case = TRUE)] <- "9"
data2$PROPDMGEXP <- as.numeric(as.character(data2$PROPDMGEXP))
data2$PROPDMG <- data2$PROPDMG * 10^data2$PROPDMGEXP

data2$CROPDMGEXP <- as.character(data2$CROPDMGEXP)
data2$CROPDMGEXP[is.na(data2$CROPDMGEXP)] <- 0 # NA's considered as dollars
data2$CROPDMGEXP[!grepl("K|M|B", data2$CROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
data2$CROPDMGEXP[grep("K", data2$CROPDMGEXP, ignore.case = TRUE)] <- "3"
data2$CROPDMGEXP[grep("M", data2$CROPDMGEXP, ignore.case = TRUE)] <- "6"
data2$CROPDMGEXP[grep("B", data2$CROPDMGEXP, ignore.case = TRUE)] <- "9"
data2$CROPDMGEXP <- as.numeric(as.character(data2$CROPDMGEXP))
data2$CROPDMG<- data2$CROPDMG * 10^data2$CROPDMGEXP
sort(table(data2$PROPDMG), decreasing = TRUE)[1:10]

## 
##      0   5000  10000   1000   2000  25000  50000   3000  20000  15000 
## 663123  31731  21787  17544  17186  17104  13596  10364   9179   8617

sort(table(data2$CROPDMG), decreasing = TRUE)[1:10]

## 
##      0   5000  10000  50000  1e+05   1000   2000  25000  20000  5e+05 
## 880198   4097   2349   1984   1233    956    951    830    758    721

##Analysis of data

#Aggregating events for harmful event for public health
da<-data2%>%select(EVTYPE1,FATALITIES,INJURIES)
da<-melt(da,id.vars="EVTYPE1",measure.vars=c("FATALITIES","INJURIES"))
da<-aggregate(value~EVTYPE1+variable,data=da,FUN=sum,na.rm=T)
da1<-head(da[order(-da$value),],12)
head(da1)

##    EVTYPE1   variable value
## 18 TORNADO   INJURIES 91407
## 14   OTHER   INJURIES 12224
## 13    HEAT   INJURIES  9224
## 19    WIND   INJURIES  9001
## 11   FLOOD   INJURIES  8602
## 8  TORNADO FATALITIES  5661

#Aggregating events for economic variables
data3<-data2%>%select(EVTYPE1,PROPDMG,CROPDMG)
data3<-melt(data3,id.vars="EVTYPE1",measure.vars=c("PROPDMG","CROPDMG"))
data3<-aggregate(value~EVTYPE1+variable,data=data3,FUN=sum,na.rm=T)
data4<-head(data3[order(-data3$value),],12)
head(data4)

##    EVTYPE1 variable        value
## 1    FLOOD  PROPDMG 167502193929
## 4    OTHER  PROPDMG  97246712337
## 7    STORM  PROPDMG  66304415393
## 8  TORNADO  PROPDMG  58593098029
## 14   OTHER  CROPDMG  23588880870
## 2     HAIL  PROPDMG  15733043048

Results

#Resulting event harmful  for public health
da1$variable<-as.factor(da1$variable)
g<-ggplot(da1,aes(EVTYPE1,value,fill=variable))
g+geom_bar(stat="identity")

#Resulting events for economic variables
data4$variable<-as.factor(data4$variable)
g<-ggplot(data4,aes(EVTYPE1,value,fill=variable))
g+geom_bar(stat="identity")

NOAA Strom Data Documentation

datasciencegit30

7/28/2020

Results