Course Project 2

1. Title : The TORNADO is the climatic biggest problem in US

2. Synopsis

Severe weather events may cause serious public health & economic burdens for communities. According to the U.S. National Oceanic and Atmospheric Administrations(NOAA) storm database, the weather conditions were categorized as 48 events. Among them, the TONADO is the biggest problem as a concern of both health and economy. The TONADO caused total 84.742 causualities (4,658 fatalities and 80084 injuries) and 41 billions damages.(Most of damages caused by TONADO was products. The most considerable crop damage was induced by DROUGHT.)

3. Data Processing

workingpath <- "C:\\Users\\MED1\\ReprodRes"
setwd(workingpath)

##### read & preprocess data #####
download.file ("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "stormdata.csv.bz2")
storm <- read.csv("stormdata.csv.bz2", header=TRUE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## 따옴표로 묶인 문자열내에 EOF가 있습니다
# types of events #
event <- c("ASTRONOMICAL LOW TIDE", "AVALANCHE", "BLIZZARD", "COASTAL FLOOD", "COLD/WIND CHILL",
           "DEBRIS FLOW", "DENSE FOG", "DENSE SMOKE", "DROUGHT", "DUST DEVIL",
           "DUST STORM", "EXCESSIVE HEAT", "EXTREME COLD/WIND CHILL", "FLASH FLOOD", "FLOOD",
           "FROST/FREEZE", "FUNNEL CLOUD", "FREEZING FOG", "HAIL", "HEAT",
           "HEAVY RAIN", "HEAVY SNOW", "HIGH SURF", "HIGH WIND", "HURRICANE",
           "ICE STORM", "LAKE-EFFECT SNOW", "LAKESHORE FLOOD", "LIGHTNING", "MARINE HAIL",
           "MARINE HIGH WIND", "MARINE STRONG WIND", "MARINE THUNDERSTORM WIND", "RIP CURRENT", "SEICHE",
           "SLEET", "STORM SURGE/TIDE", "STRONG WIND", "THUNDERSTORM WIND", "TORNADO",
           "TROPICAL DEPRESSION", "TROPICAL STORM", "TSUNAMI", "VOLCANIC ASH", "WATERSPOUT",
           "WILDFIRE", "WINTER STORM", "WINTER WEATHER")

name.select <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
storm.sub <- subset(storm, select=name.select)
rm(storm)

# select cases according to events #
n.event <- length(event)
event.id <- vector()
for (i in c(1:n.event)){
    event.id <- c(event.id, which(storm.sub$EVTYPE==event[i]))
}
storm.sub <- storm.sub[event.id,]

# change types of variables #
as.numeric.factor <- function (x) { as.numeric(levels(x))[x] }
storm.sub$FATALITIES <- as.numeric.factor(storm.sub$FATALITIES)
## Warning in as.numeric.factor(storm.sub$FATALITIES): 강제형변환에 의해 생성된 NA
## 입니다
storm.sub$INJURIES <- as.numeric.factor(storm.sub$INJURIES)
## Warning in as.numeric.factor(storm.sub$INJURIES): 강제형변환에 의해 생성된 NA 입
## 니다
storm.sub$PROPDMG <- as.numeric.factor(storm.sub$PROPDMG)
## Warning in as.numeric.factor(storm.sub$PROPDMG): 강제형변환에 의해 생성된 NA 입
## 니다
storm.sub$CROPDMG <- as.numeric.factor(storm.sub$CROPDMG)
## Warning in as.numeric.factor(storm.sub$CROPDMG): 강제형변환에 의해 생성된 NA 입
## 니다
storm.sub$EVTYPE <- as.character(storm.sub$EVTYPE)
storm.sub$PROPDMGEXP <- as.character(storm.sub$PROPDMGEXP)
storm.sub$CROPDMGEXP <- as.character(storm.sub$CROPDMGEXP)

# calculate the amounts of damage #
PROPDMG.K.index <- which(storm.sub$PROPDMGEXP == "K")
PROPDMG.M.index <- which(storm.sub$PROPDMGEXP == "M")
PROPDMG.B.index <- which(storm.sub$PROPDMGEXP == "B")
CROPDMG.K.index <- which(storm.sub$CROPDMGEXP == "K")
CROPDMG.M.index <- which(storm.sub$CROPDMGEXP == "M")
CROPDMG.B.index <- which(storm.sub$CROPDMGEXP == "B")

storm.sub$PROPDMG[PROPDMG.K.index] <- storm.sub$PROPDMG[PROPDMG.K.index] * 10^3
storm.sub$PROPDMG[PROPDMG.M.index] <- storm.sub$PROPDMG[PROPDMG.M.index] * 10^6
storm.sub$PROPDMG[PROPDMG.B.index] <- storm.sub$PROPDMG[PROPDMG.B.index] * 10^9
storm.sub$CROPDMG[CROPDMG.K.index] <- storm.sub$CROPDMG[CROPDMG.K.index] * 10^3
storm.sub$CROPDMG[CROPDMG.M.index] <- storm.sub$CROPDMG[CROPDMG.M.index] * 10^6
storm.sub$CROPDMG[CROPDMG.B.index] <- storm.sub$CROPDMG[CROPDMG.B.index] * 10^9

# total amount of health problems & economic problems #
storm.sub$HEALTHPOP <- storm.sub$FATALITIES + storm.sub$INJURIES
storm.sub$DMGTOTAL <- storm.sub$PROPDMG + storm.sub$CROPDMG

4. Results

##### Data analysis #####
# total amount of damages - population health & economic consequences #
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
storm.evtype <- group_by(storm.sub, EVTYPE)
storm.evtype.healthpop.sum <- summarise(storm.evtype, sum.health = sum(HEALTHPOP, na.rm=TRUE))
storm.evtype.dmgtotal.sum <- summarise(storm.evtype, sum.dmg = sum(DMGTOTAL, na.rm=TRUE))

storm.evtype.healthpop.sum$EVTYPE[which(storm.evtype.healthpop.sum$sum.health == max(storm.evtype.healthpop.sum$sum.health))]
## [1] "TORNADO"
storm.evtype.dmgtotal.sum$EVTYPE[which(storm.evtype.dmgtotal.sum$sum.dmg == max(storm.evtype.dmgtotal.sum$sum.dmg))]
## [1] "TORNADO"
par(mfrow=c(1, 2))
barplot(storm.evtype.healthpop.sum$sum.health, main="Health Problems(casuality)", ylab="persons")
barplot(storm.evtype.dmgtotal.sum$sum.dmg, main="Economic Damage", ylab="Dollars")
total health & economic problems

total health & economic problems

par(mfrow=c(1,1))
storm.evtype.healthpop.sum$EVTYPE[which(storm.evtype.healthpop.sum$sum.health == max(storm.evtype.healthpop.sum$sum.health))]
## [1] "TORNADO"
storm.evtype.dmgtotal.sum$EVTYPE[which(storm.evtype.dmgtotal.sum$sum.dmg == max(storm.evtype.dmgtotal.sum$sum.dmg))]
## [1] "TORNADO"
head( storm.evtype.healthpop.sum[order(storm.evtype.healthpop.sum$sum.health, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE         sum.health
##   <chr>               <dbl>
## 1 TORNADO             84742
## 2 FLOOD                6757
## 3 EXCESSIVE HEAT       5770
## 4 LIGHTNING            4190
## 5 ICE STORM            2035
head( storm.evtype.dmgtotal.sum[order(storm.evtype.dmgtotal.sum$sum.dmg, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE         sum.dmg
##   <chr>            <dbl>
## 1 TORNADO   41182220970.
## 2 FLOOD     14672056027 
## 3 HURRICANE 11962119010 
## 4 DROUGHT   10705543000 
## 5 HAIL       9562935593.
# individual effects of fatality & injury / individual effects of product & crop #
storm.evtype.fatality.sum <- summarise(storm.evtype, sum.fatality = sum(FATALITIES, na.rm=TRUE))
storm.evtype.injuries.sum <- summarise(storm.evtype, sum.injuries = sum(INJURIES, na.rm=TRUE))
storm.evtype.propdmg.sum <- summarise(storm.evtype, sum.propdmg = sum(PROPDMG, na.rm=TRUE))
storm.evtype.cropdmg.sum <- summarise(storm.evtype, sum.cropdmg = sum(CROPDMG, na.rm=TRUE))
par(mfrow=c(1, 2))
barplot(storm.evtype.fatality.sum$sum.fatality, main="FATALITIES", ylab="persons")
barplot(storm.evtype.injuries.sum$sum.injuries, main="INJURIES")
individual effects of fatality & injury

individual effects of fatality & injury

par(mfrow=c(1,1))
storm.evtype.fatality.sum$EVTYPE[which(storm.evtype.fatality.sum$sum.fatality == max(storm.evtype.fatality.sum$sum.fatality))]
## [1] "TORNADO"
storm.evtype.injuries.sum$EVTYPE[which(storm.evtype.injuries.sum$sum.injuries == max(storm.evtype.injuries.sum$sum.injuries))]
## [1] "TORNADO"
head( storm.evtype.fatality.sum[order(storm.evtype.fatality.sum$sum.fatality, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE         sum.fatality
##   <chr>                 <dbl>
## 1 TORNADO                4658
## 2 EXCESSIVE HEAT         1416
## 3 HEAT                    708
## 4 LIGHTNING               562
## 5 FLASH FLOOD             559
head( storm.evtype.injuries.sum[order(storm.evtype.injuries.sum$sum.injuries, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE         sum.injuries
##   <chr>                 <dbl>
## 1 TORNADO               80084
## 2 FLOOD                  6499
## 3 EXCESSIVE HEAT         4354
## 4 LIGHTNING              3628
## 5 ICE STORM              1959
par(mfrow=c(1, 2))
barplot(storm.evtype.propdmg.sum$sum.propdmg, main="PRODUCTS", ylab="Dollars")
barplot(storm.evtype.cropdmg.sum$sum.cropdmg, main="CROP")
individual effects of product & crop

individual effects of product & crop

par(mfrow=c(1,1))
storm.evtype.propdmg.sum$EVTYPE[which(storm.evtype.propdmg.sum$sum.propdmg == max(storm.evtype.propdmg.sum$sum.propdmg))]
## [1] "TORNADO"
storm.evtype.cropdmg.sum$EVTYPE[which(storm.evtype.cropdmg.sum$sum.cropdmg == max(storm.evtype.cropdmg.sum$sum.cropdmg))]
## [1] "DROUGHT"
head( storm.evtype.propdmg.sum[order(storm.evtype.propdmg.sum$sum.propdmg, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE       sum.propdmg
##   <chr>              <dbl>
## 1 TORNADO     40966206600.
## 2 FLOOD       12338106477 
## 3 HURRICANE    9400719010 
## 4 FLASH FLOOD  8100968357.
## 5 HAIL         7617562203.
head( storm.evtype.cropdmg.sum[order(storm.evtype.cropdmg.sum$sum.cropdmg, decreasing=TRUE),], 5 )
## # A tibble: 5 x 2
##   EVTYPE    sum.cropdmg
##   <chr>           <dbl>
## 1 DROUGHT    9860245000
## 2 ICE STORM  5021998500
## 3 HURRICANE  2561400000
## 4 FLOOD      2333949550
## 5 HAIL       1945373390