Summary
In this analysis was used U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It has records from 1950 to 2018 of the characteristics of major storms and weather events and estimatives of any fatalities, injuries, and property damage it caused.
The objective o the analysis was to answer the type of natural calamity is most harmful with respect to population health and the type of events that have greatest economic consequences.
The data was loaded using the read.csv() function.
setwd("C:/Users/beatr/Desktop/CourseraR/5_Reproducible research/Project 2")
storm <- read.csv("repdata_data_StormData.csv.bz2")
head(storm)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
It was select the variable of interest: EVTYPE, FATALITIES, PROPDMG, PROODGMEXP, CROPDMG, CROPDGMEXP.
The total human harm will be equal to the sum of total fatalities and total injuries. It was grouped by type of event.
The total demage wil be equal to the sum of total propriety demages and total crop demages.
The sum was grouped by type of event.
library(plyr)
#Separate Variables of interest: EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP CROPDMG CROPDMGEXP
storm1 <- storm[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
str(storm1)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
# Create new variable harm = FATALITIES + INJURIES
storm2 <- mutate(storm1, harm = INJURIES + FATALITIES)
head(storm2)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO 0 15 25.0 K 0 15
## 2 TORNADO 0 0 2.5 K 0 0
## 3 TORNADO 0 2 25.0 K 0 2
## 4 TORNADO 0 2 2.5 K 0 2
## 5 TORNADO 0 2 2.5 K 0 2
## 6 TORNADO 0 6 2.5 K 0 6
#Create new variable demage = PROPDMG*PROPDMGEXP + CROPDMG*CROPDMGEXP
unique(storm2$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
#- ? + 0 1 2 3 4 5 6 7 8 B h H K m M
storm3 <- mutate(storm2,
PEXP =revalue(storm2$PROPDMGEXP, c(" "=NA, "-"=NA, "?"=NA, "+"=NA, "h"=2, "H"=2, "K"=3, "m"=5 ,"M"=5, "B"=9)),
CEXP =revalue(storm2$CROPDMGEXP, c(" "=NA, "-"=NA, "?"=NA, "+"=NA, "h"=2, "H"=2, "K"=3, "m"=5 ,"M"=5, "B"=9))
)
## The following `from` values were not present in `x`:
## The following `from` values were not present in `x`: , -, +, h, H
head(storm3)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO 0 15 25.0 K 0 15
## 2 TORNADO 0 0 2.5 K 0 0
## 3 TORNADO 0 2 25.0 K 0 2
## 4 TORNADO 0 2 2.5 K 0 2
## 5 TORNADO 0 2 2.5 K 0 2
## 6 TORNADO 0 6 2.5 K 0 6
## PEXP CEXP
## 1 3
## 2 3
## 3 3
## 4 3
## 5 3
## 6 3
storm4 <- mutate(storm3,
PEXP =revalue(storm3$PEXP, c("1"=10, "2"=100, "3"=1000, "4"=10000 ,"5"=1000000, "6"=1000000, "7"=10000000, "8"=10000000, "9"=1000000000)),
CEXP =revalue(storm3$CEXP, c("1"=10, "2"=100, "3"=1000, "4"=10000 ,"5"=1000000, "6"=1000000, "7"=10000000, "8"=10000000, "9"=1000000000))
)
## The following `from` values were not present in `x`: 1, 4, 6, 7, 8
storm4$PEXP <- as.numeric(as.character(storm4$PEXP))
storm4$CEXP <- as.numeric(as.character(storm4$CEXP))
## Warning: NAs introduzidos por coerção
head(storm4)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO 0 15 25.0 K 0 15
## 2 TORNADO 0 0 2.5 K 0 0
## 3 TORNADO 0 2 25.0 K 0 2
## 4 TORNADO 0 2 2.5 K 0 2
## 5 TORNADO 0 2 2.5 K 0 2
## 6 TORNADO 0 6 2.5 K 0 6
## PEXP CEXP
## 1 1000 NA
## 2 1000 NA
## 3 1000 NA
## 4 1000 NA
## 5 1000 NA
## 6 1000 NA
str(storm4)
## 'data.frame': 902297 obs. of 10 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ harm : num 15 0 2 2 2 6 1 0 15 0 ...
## $ PEXP : num 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
## $ CEXP : num NA NA NA NA NA NA NA NA NA NA ...
storm5<-mutate(storm4, PRODMG1=PROPDMG*PEXP, CROPDMG1=CROPDMG*CEXP, na.omit())
head(storm5)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO 0 15 25.0 K 0 15
## 2 TORNADO 0 0 2.5 K 0 0
## 3 TORNADO 0 2 25.0 K 0 2
## 4 TORNADO 0 2 2.5 K 0 2
## 5 TORNADO 0 2 2.5 K 0 2
## 6 TORNADO 0 6 2.5 K 0 6
## PEXP CEXP PRODMG1 CROPDMG1
## 1 1000 NA 25000 NA
## 2 1000 NA 2500 NA
## 3 1000 NA 25000 NA
## 4 1000 NA 2500 NA
## 5 1000 NA 2500 NA
## 6 1000 NA 2500 NA
storm6<-mutate(storm5, demage=PRODMG1+CROPDMG1)
head(storm6)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP harm
## 1 TORNADO 0 15 25.0 K 0 15
## 2 TORNADO 0 0 2.5 K 0 0
## 3 TORNADO 0 2 25.0 K 0 2
## 4 TORNADO 0 2 2.5 K 0 2
## 5 TORNADO 0 2 2.5 K 0 2
## 6 TORNADO 0 6 2.5 K 0 6
## PEXP CEXP PRODMG1 CROPDMG1 demage
## 1 1000 NA 25000 NA NA
## 2 1000 NA 2500 NA NA
## 3 1000 NA 25000 NA NA
## 4 1000 NA 2500 NA NA
## 5 1000 NA 2500 NA NA
## 6 1000 NA 2500 NA NA
storm7 <- storm6[,c("EVTYPE","FATALITIES","INJURIES","PRODMG1","CROPDMG1","harm", "demage")]
head(storm7)
## EVTYPE FATALITIES INJURIES PRODMG1 CROPDMG1 harm demage
## 1 TORNADO 0 15 25000 NA 15 NA
## 2 TORNADO 0 0 2500 NA 0 NA
## 3 TORNADO 0 2 25000 NA 2 NA
## 4 TORNADO 0 2 2500 NA 2 NA
## 5 TORNADO 0 2 2500 NA 2 NA
## 6 TORNADO 0 6 2500 NA 6 NA
str(storm7)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PRODMG1 : num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ CROPDMG1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ harm : num 15 0 2 2 2 6 1 0 15 0 ...
## $ demage : num NA NA NA NA NA NA NA NA NA NA ...
#types of events have the greatest human harm
#order harms decrescent
fatal <- aggregate(FATALITIES ~ EVTYPE, storm7, FUN = sum)
fatal5 <- fatal[order(-fatal$FATALITIES), ][1:5, ]
head(fatal5)
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
injury <- aggregate(INJURIES ~ EVTYPE, storm7, FUN = sum)
injury5 <- injury[order(-injury$INJURIES), ][1:5, ]
head(injury5)
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
harm <- aggregate(harm ~ EVTYPE, storm7, FUN = sum)
harm5<- harm[order(-harm$harm), ][1:5, ]
head(harm5)
## EVTYPE harm
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
#types of events have the greatest economic consequences
propdm <- aggregate(PRODMG1 ~ EVTYPE, storm7, FUN = sum)
propdm5 <- propdm[order(-propdm$PRODMG1), ][1:5, ]
head(propdm5)
## EVTYPE PRODMG1
## 62 FLOOD 144657709800
## 179 HURRICANE/TYPHOON 69305840000
## 333 TORNADO 57039360480
## 281 STORM SURGE 43323536000
## 50 FLASH FLOOD 16839233510
cropdm <- aggregate(CROPDMG1 ~ EVTYPE, storm7, FUN = sum)
cropdm5 <- cropdm[order(-cropdm$CROPDMG1), ][1:5, ]
head(cropdm5)
## EVTYPE CROPDMG1
## 16 DROUGHT 13972566000
## 34 FLOOD 5661968450
## 98 RIVER FLOOD 5029459000
## 85 ICE STORM 5022113500
## 52 HAIL 3025537450
demage <- aggregate(demage ~ EVTYPE, storm7, FUN = sum)
demage5<- demage[order(-demage$demage), ][1:5, ]
head(demage5)
## EVTYPE demage
## 23 FLOOD 138007444500
## 62 HURRICANE/TYPHOON 29348167800
## 99 TORNADO 16570326150
## 57 HURRICANE 12405268000
## 75 RIVER FLOOD 10108369000
demage5<-mutate(demage5, demageB=demage/1000000000, na.omit())
head(demage5)
## EVTYPE demage demageB
## 23 FLOOD 138007444500 138.00744
## 62 HURRICANE/TYPHOON 29348167800 29.34817
## 99 TORNADO 16570326150 16.57033
## 57 HURRICANE 12405268000 12.40527
## 75 RIVER FLOOD 10108369000 10.10837
library(ggplot2)
ggplot(harm5,aes(y=harm,x=EVTYPE)) +
geom_bar(stat = "identity") +
geom_text(aes(label=harm),
position=position_dodge(width=0.9),
vjust=-0.25,
size = 3) +
ylim(0,100000)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(x="Event type",
y="Fatalities and injuries",
title="U.S. top 5 event type - Fatalities and injuries")
ggplot(demage5,aes(y=demageB,x=EVTYPE)) +
geom_bar(stat = "identity") +
geom_text(aes(label=demageB),
position=position_dodge(width=0.9),
vjust=-0.25,
size = 3) +
ylim(0,150)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(x="Event type",
y="Demage",
title="U.S. top 5 event type - Demage in US$ bilions")
Floods were responsible for the highest demage. Tornados were the main responsible for human harm.