There are so many natural events happening across United States throughout the year. The data of these events are captured by various Agencies, Organizations, Departments, and Individuals from across the nation and finally reported to National Climatic Data Center (NCDC). This report covers (1) downloading the data from NCDC website, (2) cleaning up the same, (3) analysing the cleaned data, (4) identify the dreadful events for both Public Health and Economy, and (5) reporting the results.
The below section shows ste-by-step processing involved in how the data is acquired, processed, analysed, and reported.
ASSUMPTION: File “repdata-data-StormData.csv.bz2” is downloaded from Course site, unzipped into StormData.csv, and made available in local working directory.
stormdata <- read.csv("c:/repdata-data-StormData.csv.bz2", header=TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
names(stormdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
FATALITIES and INJURIES to arrive at no of people affected by natural eventsq1data <- mutate(q1data, health_damage = FATALITIES+INJURIES)
result1 <-
q1data %>%
group_by(EVTYPE) %>%
summarize(TOTAL_DAMAGE = sum(health_damage)
) %>%
arrange(desc(TOTAL_DAMAGE))
The most harmful event is TORNADO
result1[1:5,]
## Source: local data frame [5 x 2]
##
## EVTYPE TOTAL_DAMAGE
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
firstfive <- result1$TOTAL_DAMAGE[1:5]
bp1 <- barplot(firstfive,
axis.lty = 1,
axisnames = TRUE,
names.arg=result1$EVTYPE[1:5],
col="lightblue",
width=0.86,
ylab = "Total Human Impact",
xlab = "Natural Event Type",
main = "The Most Harmful Natural Events for Public Health")
text(bp1, pos=2, col="black", labels=as.character(firstfive))
q2data <- subset(q2data, (q2data$PROPDMG != 0 | q2data$CROPDMG != 0) )
q2data <- subset(q2data, (q2data$PROPDMGEXP != "+" &
q2data$PROPDMGEXP != "-" &
q2data$PROPDMGEXP != " " &
q2data$PROPDMGEXP != "?"
) )
unique(q2data$PROPDMGEXP)
## [1] K M B m 0 5 6 4 h 2 7 3 H
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(q2data$CROPDMGEXP)
## [1] M K m B ? 0 k
## Levels: ? 0 2 B k K m M
q2data$NETPROPDMG <- 0
q2data$NETCROPDMG <- 0
q2data$TOTALDMG <- 0
head(q2data,5)
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP NETPROPDMG NETCROPDMG
## 1 TORNADO 25.0 K 0 0 0
## 2 TORNADO 2.5 K 0 0 0
## 3 TORNADO 25.0 K 0 0 0
## 4 TORNADO 2.5 K 0 0 0
## 5 TORNADO 2.5 K 0 0 0
## TOTALDMG
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
q2data[which(q2data$PROPDMGEXP %in% (1:9)),]$NETPROPDMG <-
q2data[which(q2data$PROPDMGEXP %in% (1:9)),]$PROPDMG * 10;
q2data[which(toupper(q2data$PROPDMGEXP) == "H"),]$NETPROPDMG <-
q2data[which(toupper(q2data$PROPDMGEXP) == "H"),]$PROPDMG * 100;
q2data[which(toupper(q2data$PROPDMGEXP) == "K"),]$NETPROPDMG <-
q2data[which(toupper(q2data$PROPDMGEXP) == "K"),]$PROPDMG * 1000;
q2data[which(toupper(q2data$PROPDMGEXP) == "M"),]$NETPROPDMG <-
q2data[which(toupper(q2data$PROPDMGEXP) == "M"),]$PROPDMG * 1000000;
q2data[which(toupper(q2data$PROPDMGEXP) == "B"),]$NETPROPDMG <-
q2data[which(toupper(q2data$PROPDMGEXP) == "B"),]$PROPDMG * 1000000000;
q2data[which(q2data$CROPDMGEXP %in% (1:9)),]$NETCROPDMG <-
q2data[which(q2data$CROPDMGEXP %in% (1:9)),]$CROPDMG * 10;
q2data[which(toupper(q2data$CROPDMGEXP) == "H"),]$NETCROPDMG <-
q2data[which(toupper(q2data$CROPDMGEXP) == "H"),]$CROPDMG * 100;
q2data[which(toupper(q2data$CROPDMGEXP) == "K"),]$NETCROPDMG <-
q2data[which(toupper(q2data$CROPDMGEXP) == "K"),]$CROPDMG * 1000;
q2data[which(toupper(q2data$CROPDMGEXP) == "M"),]$NETCROPDMG <-
q2data[which(toupper(q2data$CROPDMGEXP) == "M"),]$CROPDMG * 1000000;
q2data[which(toupper(q2data$CROPDMGEXP) == "B"),]$NETCROPDMG <-
q2data[which(toupper(q2data$CROPDMGEXP) == "B"),]$CROPDMG * 1000000000;
NETPROPDMG and NETCROPDMG to arrive at total damage caused by natural eventsq2data<- mutate(q2data, TOTDMG = NETPROPDMG+NETCROPDMG)
result2 <-
q2data %>%
group_by(EVTYPE) %>%
summarize(TOTAL_DAMAGE = sum(TOTDMG)
) %>%
arrange(desc(TOTAL_DAMAGE))
result2$TOTAL_DAMAGE <- (result2$TOTAL_DAMAGE/1000000000)
The most harmful event is FLOOD
result2[1:5,]
## Source: local data frame [5 x 2]
##
## EVTYPE TOTAL_DAMAGE
## 1 FLOOD 150.31968
## 2 HURRICANE/TYPHOON 71.91371
## 3 TORNADO 57.35211
## 4 STORM SURGE 43.32354
## 5 HAIL 18.75822
firstfivecrop <- round(result2$TOTAL_DAMAGE[1:5], digits=2)
bp2 <- barplot(firstfivecrop,
axis.lty = 1,
axisnames = TRUE,
names.arg=result2$EVTYPE[1:5],
col="lightblue",
width=0.86,
ylab = "Total Crop Damaged (in Billions USD)",
xlab = "Natural Event Type",
main = "The Most Harmful Natural Events for Economy");
text(bp2, pos=2.2, col="black", labels=as.character(firstfivecrop))
The natural events that are harmful to “Public Health”" are different than the events that are harmful to “Economy”. In the above analysis, “TORNADO” is the most harmful natural event as far as Public Health is concerned, where as “FLOOD” affects Economy the most.