knitr::opts_chunk$set(echo = TRUE)
The Storm Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA) which documents the occurrence of different weather events having sufficient intensity to cause publication health and economic consequences. These data was collected from April 1950 until November 2011.
The effect of weather events on publication health was measured by analyzing the number of fatalities and injuries caused while the values from property and crop damage were studied for economic consequences.
StormData<- read.csv("repdata_data_StormData.csv.bz2")
str(StormData)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
summary(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.0 Min. : 0.0 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.: 0.0 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :1.0 Median : 50.0 Median : 0.0000 Median : 0.0000
## Mean :0.9 Mean : 46.9 Mean : 0.0168 Mean : 0.1557
## 3rd Qu.:1.0 3rd Qu.: 75.0 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :5.0 Max. :22000.0 Max. :583.0000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
#Q1 Which event types are most harmful with respect to population health? ##Data processing Pull out the event type, fatalities and injuries columns for data analysis
#aggregate the number of fatality and injury by event types
Fatality<- with(StormData, aggregate(FATALITIES, by= list(EVTYPE), FUN=sum))
Injury<- with(StormData, aggregate(INJURIES, by= list(EVTYPE), FUN=sum))
#name the columns of both data sets
names(Fatality)<- c("Event_Type", "Fatalities")
names(Injury)<- c("Event_Type", "Injuries")
The top 5 of weather event type which are most harmful to population health.
#Fatality
TopFatality<- Fatality[order(-Fatality$Fatalities),][1:5,]
print(TopFatality)
## Event_Type Fatalities
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
#Injury
TopInjury<- Injury[order(-Injury$Injuries),][1:5,]
print(TopInjury)
## Event_Type Injuries
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
plot histogram of top 10 fatalities by weather event types
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
par(margin(2,2,2,2))
## NULL
#Fatality
f<-ggplot(TopFatality, aes(x= reorder(Event_Type, Fatalities), y= Fatalities, fill= Event_Type))+geom_bar(stat= "identity")+ labs(title= "Top 5 Weather Event On Fatality", x="Weather Event Type", y="Total Number of Fatalities")+ theme(axis.text.x = element_text(angle=30, hjust=1))
#Injury
i<-ggplot(TopInjury, aes(x= reorder(Event_Type, Injuries), y= Injuries, fill= Event_Type))+geom_bar(stat= "identity")+ labs(title= "Top 5 Weather Event On Injury", x= "Weather Event Types", y= "Total Number of Injuries")+ theme(axis.text.x = element_text(angle=30, hjust=1))
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
grid.arrange(f, i, ncol=2)
From the analysis above, Tornado causes the highest number of fatality and injury.
pull out the EVTYPE, PROPDMG, PROPDGMEXP, CROPDMG, CROPDMGEXP columns and preprocess the data to calculate the total value on property and crop damages
##aggregate the property damage and crop damage records
Economic<- StormData[, c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
##change PROPDMGEXP and CROPDMGEXP "K, M, B" to numeric form while others to "0"
Economic$PROPDMGEXP[is.na(Economic$PROPDMGEXP)]<-0
Economic$PROPDMGEXP[!grepl("K|M|B", Economic$PROPDMGEXP, ignore.case = TRUE)]<- 0
Economic$PROPDMGEXP[grep("K", Economic$PROPDMGEXP, ignore.case = TRUE)]<- 10^3
Economic$PROPDMGEXP[grep("M", Economic$PROPDMGEXP, ignore.case = TRUE)]<- 10^6
Economic$PROPDMGEXP[grep("B", Economic$PROPDMGEXP, ignore.case = TRUE)]<- 10^9
Economic$CROPDMGEXP[is.na(Economic$CROPDMGEXP)]<-0
Economic$CROPDMGEXP[!grepl("K|M|B", Economic$CROPDMGEXP, ignore.case= TRUE)]<- 0
Economic$CROPDMGEXP[grep("K", Economic$CROPDMGEXP, ignore.case = TRUE)]<- 10^3
Economic$CROPDMGEXP[grep("M", Economic$CROPDMGEXP, ignore.case = TRUE)]<- 10^6
Economic$CROPDMGEXP[grep("B", Economic$CROPDMGEXP, ignore.case = TRUE)]<- 10^9
Economic$PROPDMGEXP<- as.numeric(Economic$PROPDMGEXP)
Economic$CROPDMGEXP<- as.numeric(Economic$CROPDMGEXP)
##multiple the exponent with the damage values for property and crop
Economic$PROP<- Economic$PROPDMG*Economic$PROPDMGEXP
Economic$CROP<- Economic$CROPDMG*Economic$CROPDMGEXP
Economic$Total<- with(Economic, sum(Economic$PROP, Economic$CROP), na.rm=TRUE)
calculate the sum of damage value by event type for both property and crop
PropDamage<- with(Economic, aggregate(PROP, by=list(EVTYPE), FUN= sum))
CropDamage<- with(Economic, aggregate(CROP, by=list(EVTYPE), FUN= sum))
names(PropDamage)<- c("Event_Type", "Property_Damage")
names(CropDamage)<- c("Event_Type", "Crop_Damage")
##combine the property and crop damage values
TotalDamage<- cbind(PropDamage, CropDamage)
TotalDamage$Sum<- TotalDamage$Property_Damage+TotalDamage$Crop_Damage
The top5 event types which cause most economic consequences are:
TopDamage<- TotalDamage[order(-TotalDamage$Sum),][1:5,]
print(TopDamage)
## Event_Type Property_Damage Event_Type Crop_Damage
## 170 FLOOD 144657709800 FLOOD 5661968450
## 411 HURRICANE/TYPHOON 69305840000 HURRICANE/TYPHOON 2607872800
## 834 TORNADO 56937160480 TORNADO 414953110
## 670 STORM SURGE 43323536000 STORM SURGE 5000
## 244 HAIL 15732266720 HAIL 3025954450
## Sum
## 170 150319678250
## 411 71913712800
## 834 57352113590
## 670 43323541000
## 244 18758221170
plot graphs
par(mar= c(1,1,1,1))
colnames(TopDamage)<- make.unique(names(TopDamage))
p<- ggplot(TopDamage, aes(x= Sum, y= reorder(Event_Type, Sum),fill= Event_Type))
p+geom_bar(stat= "identity")+ labs(title= "Top 5 Weather Events Causing Economic Consequences", x= "Total Damage Value ($)", y="Event Type")
Tornado has the biggest impact in causing population health concerns while flood has caused the most economic damage.