This report shows the most harmful weather event related in public health and property.
The result of the investigation, most harmful events are tornado and flood.
In relation to the property damage , there is also the crop damage.
But it is small amount of money , it is not included here .
Sys.setlocale(category = "LC_ALL", locale = "English")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
library(ggplot2)
DF <- read.csv("./data/repdata-data-StormData.csv", stringsAsFactors = FALSE)
names(DF)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
DF <- DF[, c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP",
"CROPDMG", "CROPDMGEXP")]
head(DF,10)
## BGN_DATE EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1 4/18/1950 0:00:00 TORNADO 0 15 25.0 K
## 2 4/18/1950 0:00:00 TORNADO 0 0 2.5 K
## 3 2/20/1951 0:00:00 TORNADO 0 2 25.0 K
## 4 6/8/1951 0:00:00 TORNADO 0 2 2.5 K
## 5 11/15/1951 0:00:00 TORNADO 0 2 2.5 K
## 6 11/15/1951 0:00:00 TORNADO 0 6 2.5 K
## 7 11/16/1951 0:00:00 TORNADO 0 1 2.5 K
## 8 1/22/1952 0:00:00 TORNADO 0 0 2.5 K
## 9 2/13/1952 0:00:00 TORNADO 1 14 25.0 K
## 10 2/13/1952 0:00:00 TORNADO 0 0 25.0 K
## CROPDMG CROPDMGEXP
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
tail(DF,10)
## BGN_DATE EVTYPE FATALITIES INJURIES PROPDMG
## 902288 11/5/2011 0:00:00 WINTER WEATHER 0 0 0
## 902289 11/28/2011 0:00:00 FROST/FREEZE 0 0 0
## 902290 11/12/2011 0:00:00 HIGH WIND 0 0 0
## 902291 11/28/2011 0:00:00 WINTER WEATHER 0 0 0
## 902292 11/28/2011 0:00:00 WINTER WEATHER 0 0 0
## 902293 11/30/2011 0:00:00 HIGH WIND 0 0 0
## 902294 11/10/2011 0:00:00 HIGH WIND 0 0 0
## 902295 11/8/2011 0:00:00 HIGH WIND 0 0 0
## 902296 11/9/2011 0:00:00 BLIZZARD 0 0 0
## 902297 11/28/2011 0:00:00 HEAVY SNOW 0 0 0
## PROPDMGEXP CROPDMG CROPDMGEXP
## 902288 K 0 K
## 902289 K 0 K
## 902290 K 0 K
## 902291 K 0 K
## 902292 K 0 K
## 902293 K 0 K
## 902294 K 0 K
## 902295 K 0 K
## 902296 K 0 K
## 902297 K 0 K
for(i in 1:nrow(DF)){
if( is.na(as.Date(DF$BGN_DATE[i], "%m/%d/%Y"))){
stop("Invalid data found ")
}
}
Make “Year” field. it is not a field that in needed in this investigation, but will be needed in the future.
dates <- as.Date(DF$BGN_DATE, "%m/%d/%Y")
DF$YEAR <- as.integer(format(dates,"%Y"))
DF <- DF[1:i-1,]
unique(DF$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
DF$PROPDMGEXP_POW <- DF$PROPDMGEXP
DF[grep("B", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 9
DF[grep("M", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 6
DF[grep("K", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 3
DF[grep("H", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 2
DF[grep("\\+", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[grep("\\-", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[grep("\\?", DF$PROPDMGEXP, ignore.case = TRUE),]$PROPDMGEXP_POW <- 0
DF[nchar(DF$PROPDMGEXP)==0,]$PROPDMGEXP_POW <- 0
unique(DF$PROPDMGEXP_POW)
## [1] "3" "6" "0" "9" "5" "4" "2" "7" "1" "8"
DF.injuries <- DF.injuries[order(DF.injuries$x,decreasing = TRUE),]
DF.injuries <- DF.injuries[1:10,]
DF.fatalities <- DF.fatalities[order(DF.fatalities$x,decreasing = TRUE),]
DF.fatalities <- DF.fatalities[1:10,]
DF.property <- DF.property[order(DF.property$x,decreasing = TRUE),]
DF.property <- DF.property[1:10,]
Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to popuulation health?
Answer: Following plots, show the most dangerous 10 disaster for public health.
Result of the investigation , it was confirmed that a **tornado is the most dangerous hazard**
#make plot for injuries
ggplot(DF.injuries,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) +
geom_bar(position="dodge",stat="identity",binwidth=1) +
ggtitle("Top 10 disasters of injuries") +
labs(x="",y="Number of people injured") +
theme(axis.text.x=element_text(angle=45,hjust=1))
#make plot for fatalities
ggplot(DF.fatalities,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) +
geom_bar(position="dodge",stat="identity",binwidth=1) +
ggtitle("Top 10 disasters of fatalities") +
labs(x="",y="Number of people fatalities") +
theme(axis.text.x=element_text(angle=45,hjust=1))
Q2. Across the United States, which types of events have the greatest economic consequences?
Answer: The following plot, show the most dangerous 10 disaster for property.
Result of the investigation , it was confirmed that a **flood is the most dangerous hazard**
#make plot for property damage
ggplot(DF.property,aes(reorder(EVENT_TYPE,x),x,fill=EVENT_TYPE)) +
geom_bar(position="dodge",stat="identity",binwidth=1) +
ggtitle("The 10 most harmful events to property") +
labs(x="",y="Amounts of property damage") +
theme(axis.text.x=element_text(angle=45,hjust=1))