Let’s download the datas and have a quick overview
if (!file.exists("StormData.csv.bz2")) {
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="StormData.csv.bz2")
}
datas <- read.csv("StormData.csv.bz2")
head(datas)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
dim(datas)
## [1] 902297 37
# POPHELATHIMPACT will contain the sum of fatalities and injuries
# We create a dedicated dataset
# We sort the dataset descending by sum of injury and fatality
# Let's limit the dataset to the top ten event types
datas$POPHEALTHIMPACT = datas$FATALITIES + datas$INJURIES
populationHealthImpactDatas = aggregate(datas$POPHEALTHIMPACT, by=list((datas$EVTYPE)),sum)
colnames(populationHealthImpactDatas) = c("EVTYPE", "SUMFATINJ")
populationHealthImpactDatas = populationHealthImpactDatas[with(populationHealthImpactDatas, order(-SUMFATINJ)),]
populationHealthImpactDatas = populationHealthImpactDatas[1:10,]
# TOTALDMG will contain the sum of property and crops damages
# We create a dedicated dataset
# We sort the dataset descending by sum of property and crops damages
# Let's limit the dataset to the top ten event types
datas$TOTALDMG = datas$PROPDMG + datas$CROPDMG
economicConsequencesImpactDatas = aggregate(datas$TOTALDMG, by=list((datas$EVTYPE)),sum)
colnames(economicConsequencesImpactDatas) = c("EVTYPE", "SUMECODMG")
economicConsequencesImpactDatas = economicConsequencesImpactDatas[with(economicConsequencesImpactDatas, order(-SUMECODMG)),]
economicConsequencesImpactDatas = economicConsequencesImpactDatas[1:10,]
par(mar = c(12, 10, 3, 7), mgp = c(5, 1, 0))
barplot(populationHealthImpactDatas$SUMFATINJ, las = 2, main = "10 types of events most harmful with respect to population health", names.arg = populationHealthImpactDatas$EVTYPE, ylab = "Sum of injuries and fatalities", col = c('red'))
par(mar = c(12, 8, 3, 7), mgp = c(5, 1, 0))
barplot(economicConsequencesImpactDatas$SUMECODMG, las = 2, main = "10 types of events with the greatest economic consequences", names.arg = economicConsequencesImpactDatas$EVTYPE, ylab = "Sum of properties and crops consequences", col = c('blue'))