Synopsis

Extream weather such as storm can damage on the crops and properties, and cause substantial human injury and fatality. Properly tracking and analysis of the historical weather events data might provide insight on how to avoid or reduce its detrimental effects. This report attempts to analyze the weather event data collected from 1950 to 2011 accross the United States. Specifically, this report focus on the most harmful weather events on the public health as well as the economy. To evaluate the public health effect, fatalities and injuries were choosed. This report found that Torando is the most harmful weather event types for both fatalities and injuries. To evaluate the economic effect, propertaties damage and crops damage were selected. This report found that Torando is also the most harmful event types for propertaties. Among others, Hail was the most dangerious event types for crops.

Data Processing

load package:

library(knitr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lattice)

Read partially of the weather event data which is in a CSV format with proper header

stormdata<-read.table("repdata-data-StormData.csv", head=TRUE, sep=",",nrows=100)

Briefly checking the data how does it look like

head(stormdata, n=2L)
##   STATE__          BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1 4/18/1950 0:00:00      130       CST     97     MOBILE    AL
## 2       1 4/18/1950 0:00:00      145       CST      3    BALDWIN    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0      NA         NA       NA       NA          0
## 2 TORNADO         0      NA         NA       NA       NA          0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0      NA         NA     14   100 3   0          0
## 2         NA         0      NA         NA      2   150 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0         NA  NA         NA        NA
## 2        0     2.5          K       0         NA  NA         NA        NA
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806      NA      1
## 2     3042      8755          0          0      NA      2

Read all data interested and preprocessing the data. All missing values were labeled as “NA”

alldata<-subset(read.table("repdata-data-StormData.csv", head=TRUE, na.strings="NA",sep=","), select=c("BGN_DATE","EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG"))
#Create a new variable "YEAR"
tempdate<-as.Date(strptime(alldata$BGN_DATE, '%m/%d/%Y'))
tempyear<-format(tempdate, '%Y')
alldata$YEAR<-as.factor(tempyear)
alldata<-alldata[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG", "YEAR")]
dim(alldata)
## [1] 902297      6
summary(alldata)
##                EVTYPE         FATALITIES          INJURIES        
##  HAIL             :288661   Min.   :  0.0000   Min.   :   0.0000  
##  TSTM WIND        :219940   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  THUNDERSTORM WIND: 82563   Median :  0.0000   Median :   0.0000  
##  TORNADO          : 60652   Mean   :  0.0168   Mean   :   0.1557  
##  FLASH FLOOD      : 54277   3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##  FLOOD            : 25326   Max.   :583.0000   Max.   :1700.0000  
##  (Other)          :170878                                         
##     PROPDMG           CROPDMG             YEAR       
##  Min.   :   0.00   Min.   :  0.000   2011   : 62174  
##  1st Qu.:   0.00   1st Qu.:  0.000   2008   : 55663  
##  Median :   0.00   Median :  0.000   2010   : 48161  
##  Mean   :  12.06   Mean   :  1.527   2009   : 45817  
##  3rd Qu.:   0.50   3rd Qu.:  0.000   2006   : 44034  
##  Max.   :5000.00   Max.   :990.000   2007   : 43289  
##                                      (Other):603159
#Check for missing value
colSums(is.na(alldata))
##     EVTYPE FATALITIES   INJURIES    PROPDMG    CROPDMG       YEAR 
##          0          0          0          0          0          0

Results

The most harmful weather event for the public health

Events cause fatalities or injuries

In this report, two variables, fatalities and injuries, were choosed for evaluating the public health effects caused by weather. To collect meaningful data, the report will subset those events which did not cause negative loss with respect to the fatalities as well as injuries.

EventHealth<-subset(alldata, FATALITIES>=0|INJURIES>=0, select=c("YEAR","EVTYPE","FATALITIES", "INJURIES"))
dim(EventHealth)
## [1] 902297      4

Event which caused maximal one-time injuries or fatalities

##The event caused maximal one-time injuries
subset(EventHealth, INJURIES==max(EventHealth$INJURIES) )
##        YEAR  EVTYPE FATALITIES INJURIES
## 157885 1979 TORNADO         42     1700
##The event caused maximal one-time fatalities
subset(EventHealth, FATALITIES==max(EventHealth$FATALITIES) )
##        YEAR EVTYPE FATALITIES INJURIES
## 198704 1995   HEAT        583        0

Event type which caused most injuries or fatalities in total

#Event type which caused most injuries in total
gyinj<-group_by(EventHealth, EVTYPE)
gyinjutotal<-summarize(gyinj, INJURIES=sum(INJURIES))
gyinjutotalmax<-subset(gyinjutotal, INJURIES==max(gyinjutotal$INJURIES))
gyinjutotalmax
## Source: local data frame [1 x 2]
## 
##    EVTYPE INJURIES
## 1 TORNADO    91346
#Event type which caused most fatalities in total
gyfatatotal<-summarize(gyinj, avgfata=sum(FATALITIES))
gyfatatotalmax<-subset(gyfatatotal, avgfata==max(gyfatatotal$avgfata))
gyfatatotalmax
## Source: local data frame [1 x 2]
## 
##    EVTYPE avgfata
## 1 TORNADO    5633

The injuries caused by Tornado and other event type from 1950 to 2011

The summary of injuries caused by Tornado and Other Weather Types

#Create a new variable which consists of two levels
wt<-factor((EventHealth$EVTYPE==gyinjutotalmax$EVTYPE),
                    levels = c(FALSE, TRUE), labels= c("Other","Tornado"))
EventHealth$WEATHERTYPE<-wt

#Average Injuries 
grp<-group_by(EventHealth, WEATHERTYPE)
im<-summarize(grp, INJURIES=mean(INJURIES))
barchart(INJURIES~WEATHERTYPE, data=im, col= "blue"
         ,main="Average Injuries", xlab="Weather Type"
         ,ylab="Average Injuries")

#Total Fatalities
it<-summarize(grp, INJURIES=sum(INJURIES))
barchart(INJURIES~WEATHERTYPE, data=it, col= "blue"
         ,main="Total Injuries", xlab="Weather Type"
         ,ylab="Total Injuries")

The detailed injuriess caused by Tornado and Other Weather Types

injusub<-EventHealth[,c("YEAR","WEATHERTYPE","INJURIES")]
grp_cols<-names(injusub)[-3]
dots<-lapply(grp_cols, as.symbol)
sumdata<-injusub %>% group_by_(.dots=dots) %>% summarize(INJURIES=sum(INJURIES))
barchart(INJURIES~WEATHERTYPE|YEAR, data=sumdata, strip=TRUE, pch=20,main="Injuries caused by Tornado and Other Event Types", xlab="Weather Type", ylab="Total Injuries")

The fatalities caused by Tornado and other event type from 1950 to 2011

Summary of fatalities caused by Tornado and Other Weather Types

wt<-factor((EventHealth$EVTYPE==gyfatatotalmax$EVTYPE),
                    levels = c(FALSE, TRUE), labels= c("Other","Tornado"))
EventHealth$WEATHERTYPE<-wt
#Average Fatalities 
grp<-group_by(EventHealth, WEATHERTYPE)
fm<-summarize(grp, FATALITIES=mean(FATALITIES))
barchart(FATALITIES~WEATHERTYPE, data=fm, col= "blue"
         ,main="Average Fatalities", xlab="Weather Type"
         ,ylab="Average Fatalities")

#Total Fatalities
ft<-summarize(grp, FATALITIES=sum(FATALITIES))
barchart(FATALITIES~WEATHERTYPE, data=ft, col= "blue"
         ,main="Total Fatalities", xlab="Weather Type"
         ,ylab="Total Fatalities")

The detailed fatalities caused by Tornado and Other Weather Types

injusub<-EventHealth[,c("YEAR","WEATHERTYPE","FATALITIES")]
grp_cols<-names(injusub)[-3]
dots<-lapply(grp_cols, as.symbol)
sumdata<-injusub %>% group_by_(.dots=dots) %>% summarize(FATALITIES=sum(FATALITIES))
barchart(FATALITIES~WEATHERTYPE|YEAR, data=sumdata, strip=TRUE, pch=20,main="Fatalities caused by Tornado and Other Event Types", xlab="Weather Type", ylab="Total Fatalities")

The most harmful weather event for the economy

Events cause properties damage or crops damage

In this report, two variables, properties damage and crops damage, were choosed for evaluating the economic effects caused by weather. To collect meaningful data, the report will subset those events which did not cause negative loss with respect to the properties damage or crops damage.

EventEconomy<-subset(alldata, PROPDMG>=0|CROPDMG>=0, select=c("YEAR","EVTYPE","PROPDMG", "CROPDMG"))
dim(EventEconomy)
## [1] 902297      4

Event type which caused maximal one-time property or crop damages

##The event caused maximal one-time property damages
subset(EventEconomy, PROPDMG==max(EventEconomy$PROPDMG) )
##        YEAR            EVTYPE PROPDMG CROPDMG
## 778568 2009 THUNDERSTORM WIND    5000       0
## 808182 2010       FLASH FLOOD    5000       0
## 808183 2010       FLASH FLOOD    5000       0
## 900685 2011        WATERSPOUT    5000       0
##The event caused maximal one-time crop damages
subset(EventEconomy, CROPDMG==max(EventEconomy$CROPDMG) )
##        YEAR  EVTYPE PROPDMG CROPDMG
## 544253 2004 DROUGHT       0     990

Event type which caused most properties damage or crops damage in total

#Event type which caused properties damage in total
gyinj<-group_by(EventEconomy, EVTYPE)
gyproptotal<-summarize(gyinj, PROPDMG=sum(PROPDMG))
gyproptotalmax<-subset(gyproptotal, PROPDMG==max(gyproptotal$PROPDMG))
gyproptotalmax
## Source: local data frame [1 x 2]
## 
##    EVTYPE PROPDMG
## 1 TORNADO 3212258
#Event type which caused crops damage in total
gycroptotal<-summarize(gyinj, CROPDMG=sum(CROPDMG))
gycroptotalmax<-subset(gycroptotal, CROPDMG==max(gycroptotal$CROPDMG))
gycroptotalmax
## Source: local data frame [1 x 2]
## 
##   EVTYPE  CROPDMG
## 1   HAIL 579596.3

The property damage caused by Tornado and other event type from 1950 to 2011

#Create a new variable which consists of two levels
wt<-factor((EventEconomy$EVTYPE==gyproptotalmax$EVTYPE),
                    levels = c(FALSE, TRUE), labels= c("Other","Tornado"))
EventEconomy$WEATHERTYPE<-wt
#Average Property Damage 
grp<-group_by(EventEconomy, WEATHERTYPE)
pm<-summarize(grp, PROPDMG=mean(PROPDMG))
barchart(PROPDMG~WEATHERTYPE, data=pm, col= "blue"
         ,main="Average Property Damage", xlab="Weather Type"
         ,ylab="Average Property Damage")

#Total Property Damage
pt<-summarize(grp, PROPDMG=sum(PROPDMG))
barchart(PROPDMG~WEATHERTYPE, data=pt, col= "blue"
         ,main="Total Property Damage", xlab="Weather Type"
         ,ylab="Total Property Damage")

The detail property damage caused by Tornado and Other Event Types

injusub<-EventEconomy[,c("YEAR","WEATHERTYPE","PROPDMG")]
grp_cols<-names(injusub)[-3]
dots<-lapply(grp_cols, as.symbol)
sumdata<-injusub %>% group_by_(.dots=dots) %>% summarize(PROPDMG=sum(PROPDMG))
barchart(PROPDMG~WEATHERTYPE|YEAR, data=sumdata, strip=TRUE, pch=20,main="Property Damage Caused by Tornado and Other Event Types", xlab="Weather Type", ylab="Total Injuries")

The crop damage caused by Hail and other event type from 1950 to 2011

The average crop damage caused by Hail and Other Event Types

wt<-factor((EventEconomy$EVTYPE==gycroptotalmax$EVTYPE),
                    levels = c(FALSE, TRUE), labels= c("Other","Hail"))
EventEconomy$WEATHERTYPE<-wt
#Average Crop Damage 
grp<-group_by(EventEconomy, WEATHERTYPE)
cm<-summarize(grp, CROPDMG=mean(CROPDMG))
barchart(CROPDMG~WEATHERTYPE, data=cm, col= "blue"
         ,main="Average Crop Damage", xlab="Weather Type"
         ,ylab="Average Crop Damage")

#Total Crop Damage
ct<-summarize(grp, CROPDMG=sum(CROPDMG))
barchart(CROPDMG~WEATHERTYPE, data=ct, col= "blue"
         ,main="Total Crop Damage", xlab="Weather Type"
         ,ylab="Total Crop Damage")

The detail crop damage caused by Hail and Other Event Types

injusub<-EventEconomy[,c("YEAR","WEATHERTYPE","CROPDMG")]
grp_cols<-names(injusub)[-3]
dots<-lapply(grp_cols, as.symbol)
sumdata<-injusub %>% group_by_(.dots=dots) %>% summarize(CROPDMG=sum(CROPDMG))
barchart(CROPDMG~WEATHERTYPE|YEAR, data=sumdata, strip=TRUE, pch=20,main="Crop Damage Caused by Hail and Other Event Types", xlab="Weather Type", ylab="Total Crop Damage")