Data over 60 year period shows that tornadoes, heat, floods, and winds cause the most harm to people in terms of deaths and injuries. Floods, hurricanes, tornadoes, and storms cause the most damage to their property.
By default, display all R code chunks,using the knitr::opts_chunk option.
I am using the following libraries.
knitr::opts_chunk$set(echo=TRUE)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
storm=read.csv("repdata%2Fdata%2FStormData.csv.bz2",stringsAsFactors = FALSE)
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(storm)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
The variables needed for this analysis are EVTYPE, FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP, CROPDMG, and CROPDMGEXP.
myvars <- c("EVTYPE","FATALITIES","INJURIES",
"PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
storm2 <- storm[myvars]
str(storm2)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
head(storm2)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
Convert some variables to uppercase.
storm2$EVTYPE=toupper(storm2$EVTYPE)
storm2$PROPDMGEXP=toupper(storm2$PROPDMGEXP)
storm2$CROPDMGEXP=toupper(storm2$CROPDMGEXP)
Examine the distribution of EVTYPE.
temp=storm2 %>%
group_by(EVTYPE) %>%
summarize(records=n()) %>%
arrange(desc(records)) %>%
print(all())
## # A tibble: 898 x 2
## EVTYPE records
## <chr> <int>
## 1 HAIL 288661
## 2 TSTM WIND 219942
## 3 THUNDERSTORM WIND 82564
## 4 TORNADO 60652
## 5 FLASH FLOOD 54277
## 6 FLOOD 25327
## 7 THUNDERSTORM WINDS 20843
## 8 HIGH WIND 20214
## 9 LIGHTNING 15754
## 10 HEAVY SNOW 15708
## # ... with 888 more rows
Group some values of EVTYPE.
storm2$Event=storm2$EVTYPE
storm2$Event[grepl("WIND",storm2$Event)]="WIND"
storm2$Event[grepl("FLOOD",storm2$Event)]="FLOOD"
storm2$Event[grepl("HEAT",storm2$Event)]="HEAT"
storm2$Event[1:50]
## [1] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [8] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [15] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [22] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [29] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [36] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [43] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [50] "TORNADO"
storm2$EVTYPE[1:50]
## [1] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [8] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [15] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [22] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [29] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [36] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [43] "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO" "TORNADO"
## [50] "TORNADO"
What the unique values of PRODMGEXP and CROPDMGEXP?
unique(storm2$PROPDMGEXP)
## [1] "K" "M" "" "B" "+" "0" "5" "6" "?" "4" "2" "3" "H" "7" "-" "1" "8"
unique(storm2$CROPDMGEXP)
## [1] "" "M" "K" "B" "?" "0" "2"
Create PropUnit and CropUnit so that damages (Prop and Crop) can be calculated in US dollars.
storm2$PropUnit[storm2$PROPDMGEXP=="H"]=100
storm2$PropUnit[storm2$PROPDMGEXP=="K"]=1000
storm2$PropUnit[storm2$PROPDMGEXP=="M"]=1000000
storm2$PropUnit[storm2$PROPDMGEXP=="B"]=1000000000
storm2$CropUnit[storm2$CROPDMGEXP=="H"]=100
storm2$CropUnit[storm2$CROPDMGEXP=="K"]=1000
storm2$CropUnit[storm2$CROPDMGEXP=="M"]=1000000
storm2$CropUnit[storm2$CROPDMGEXP=="B"]=1000000000
storm2$DmgCrop=storm2$CROPDMG*storm2$CropUnit
storm2$DmgProp=storm2$PROPDMG*storm2$PropUnit
head(storm2)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
## Event PropUnit CropUnit DmgCrop DmgProp
## 1 TORNADO 1000 NA NA 25000
## 2 TORNADO 1000 NA NA 2500
## 3 TORNADO 1000 NA NA 25000
## 4 TORNADO 1000 NA NA 2500
## 5 TORNADO 1000 NA NA 2500
## 6 TORNADO 1000 NA NA 2500
Replace missing values with 0.
Next sum DmgCrop and DmgProp to create Dmg in millions of US dollars.
storm2[is.na(storm2)] <- 0
storm2$DMG=(storm2$DmgCrop+storm2$DmgProp)/1000000
summary(storm2)
## EVTYPE FATALITIES INJURIES
## Length:902297 Min. : 0.0000 Min. : 0.0000
## Class :character 1st Qu.: 0.0000 1st Qu.: 0.0000
## Mode :character Median : 0.0000 Median : 0.0000
## Mean : 0.0168 Mean : 0.1557
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :583.0000 Max. :1700.0000
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
## Event PropUnit CropUnit
## Length:902297 Min. :0.000e+00 Min. :0.00e+00
## Class :character 1st Qu.:0.000e+00 1st Qu.:0.00e+00
## Mode :character Median :0.000e+00 Median :0.00e+00
## Mean :5.737e+04 Mean :1.25e+04
## 3rd Qu.:1.000e+03 3rd Qu.:1.00e+03
## Max. :1.000e+09 Max. :1.00e+09
## DmgCrop DmgProp DMG
## Min. :0.000e+00 Min. :0.000e+00 Min. :0.00e+00
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00 1st Qu.:0.00e+00
## Median :0.000e+00 Median :0.000e+00 Median :0.00e+00
## Mean :5.442e+04 Mean :4.736e+05 Mean :5.30e-01
## 3rd Qu.:0.000e+00 3rd Qu.:5.000e+02 3rd Qu.:0.00e+00
## Max. :5.000e+09 Max. :1.150e+11 Max. :1.15e+05
Storm2 datasets need to be aggregated so that we can analyze which events cause the most damage to humans and property.
deaths=storm2 %>%
group_by(Event) %>%
summarize(deaths=sum(FATALITIES)) %>%
arrange(desc(deaths))
injuries=storm2 %>%
group_by(Event) %>%
summarize(injuries=sum(INJURIES)) %>%
arrange(desc(injuries))
damages=storm2 %>%
group_by(Event) %>%
summarize(damages=sum(DMG)) %>%
arrange(desc(damages))
Take the first 10 rows of deaths, injuries, and damages datasets.
deaths2=deaths[1:10,]
injuries2=injuries[1:10,]
damages2=damages[1:10,]
The following two bar graphs show weather related deaths and injuries.
Tornado, heat, flood, and wind are the primary causes of harm to people.
barplot(deaths2$deaths, names.arg = deaths2$Event, col = heat.colors(10),
cex.names=.5, legend.text = deaths2$Event,
ylab = "Deaths", main = "Deaths by Weather Events")
barplot(injuries2$injuries, names.arg = injuries2$Event, col = heat.colors(10),
cex.names=.5, legend.text = injuries2$Event,
ylab = "Injuries", main = "Injuries by Weather Events")
The bar graph shows weather related damages to property and crops. As suspected, floods, hurricanes, and tornadoes cause the most damage to property and crops.
barplot(damages2$damages, names.arg = damages2$Event, col = heat.colors(10),
cex.names=.5, legend.text = damages2$Event,
ylab = "Damages in Millions of US Dollars", main = "Property and Crop Damages by Weather Events")