### Working directory
setwd('D:/users/beukenmjm/Onedrive - ZuydHogeschool/Documents/Data science cursus coursera/course5week4')
### Loading data and remove quotationmarks
data <- read.csv('./repdata%2Fdata%2FStormData.csv', quote = "\"")
head(data, 5)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
### installing some packages
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.1
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
subdata <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
subsetdata <- data[subdata]
names(subsetdata)
## [1] "EVTYPE" "FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP"
## [6] "CROPDMG" "CROPDMGEXP"
head(subsetdata)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
### Looking at population health, on one hand to fatalities and on the other hand looking at injuries.
fatal <- aggregate(FATALITIES ~ EVTYPE, data = data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data = data, FUN = sum)
### creating data frames of the relevant data.
D1 <- data.frame(fatal$FATALITIES, row.names = fatal$EVTYPE)
D2 <- data.frame(injury$INJURIES, row.names = injury$EVTYPE)
### merge two data frames by row name
merged.data <- merge(D1,D2,by=0,all=TRUE)
merged.data[is.na(merged.data)] <- 0
head(merged.data)
## Row.names fatal.FATALITIES injury.INJURIES
## 1 HIGH SURF ADVISORY 0 0
## 2 COASTAL FLOOD 0 0
## 3 FLASH FLOOD 0 0
## 4 LIGHTNING 0 0
## 5 TSTM WIND 0 0
## 6 TSTM WIND (G45) 0 0
### 1 calculate sum rows merged data
merged.data <- transform(merged.data, sum=rowSums(merged.data[,2:3]))
### Order data frame in descending order
sorted.data <- merged.data[order(merged.data$sum, decreasing=TRUE), ][1:10, ]
print(sorted.data)
## Row.names fatal.FATALITIES injury.INJURIES sum
## 834 TORNADO 5633 91346 96979
## 130 EXCESSIVE HEAT 1903 6525 8428
## 856 TSTM WIND 504 6957 7461
## 170 FLOOD 470 6789 7259
## 464 LIGHTNING 816 5230 6046
## 275 HEAT 937 2100 3037
## 153 FLASH FLOOD 978 1777 2755
## 427 ICE STORM 89 1975 2064
## 760 THUNDERSTORM WIND 133 1488 1621
## 972 WINTER STORM 206 1321 1527
### Drop sum column
sorted.data$sum <- NULL
print(sorted.data)
## Row.names fatal.FATALITIES injury.INJURIES
## 834 TORNADO 5633 91346
## 130 EXCESSIVE HEAT 1903 6525
## 856 TSTM WIND 504 6957
## 170 FLOOD 470 6789
## 464 LIGHTNING 816 5230
## 275 HEAT 937 2100
## 153 FLASH FLOOD 978 1777
## 427 ICE STORM 89 1975
## 760 THUNDERSTORM WIND 133 1488
## 972 WINTER STORM 206 1321
### Create som new columnnames
colnames(sorted.data) <- c("Event", "Fatal", "Injury")
### first prepare property damage
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="K"] <- 1000
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="M"] <- 10^6
subsetdata$PROPEXP[subsetdata$PROPDMGEXP ==""] <- 1
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="B"] <- 10^9
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="m"] <- 10^6
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="+"] <- 0
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="0"] <- 1
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="5"] <- 10^5
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="6"] <- 10^6
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="?"] <- 0
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="4"] <- 10000
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="2"] <- 100
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="3"] <- 1000
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="h"] <- 100
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="7"] <- 10^7
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="H"] <- 100
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="-"] <- 0
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="1"] <- 10
subsetdata$PROPEXP[subsetdata$PROPDMGEXP =="8"] <- 10^8
subsetdata$PROPDMGVAL <- subsetdata$PROPDMG * subsetdata$PROPEXP
### second prop damage
subsetdata$CROPEXP[subsetdata$CROPDMGEXP == ""] <- 1
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="M"] <- 10^6
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="K"] <- 1000
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="m"] <- 10^9
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="B"] <- 10^6
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="?"] <- 0
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="0"] <- 1
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="k"] <- 1000
subsetdata$CROPEXP[subsetdata$CROPDMGEXP =="2"] <- 100
subsetdata$CROPDMGVAL <- subsetdata$CROPDMG * subsetdata$CROPEXP
### Looking on one hand to damage to crop and on the other hand looking to damage at properties.
prop.damage <- aggregate(subsetdata$PROPDMGVAL ~ EVTYPE, data = data, FUN = sum)
crop.damage <- aggregate(subsetdata$CROPDMGVAL ~ EVTYPE, data = data, FUN = sum)
colnames(prop.damage)[2] <- c("Value prop damage")
colnames(crop.damage)[2] <- c("Value crop damage")
head(prop.damage)
## EVTYPE Value prop damage
## 1 HIGH SURF ADVISORY 200000
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 50000
## 4 LIGHTNING 0
## 5 TSTM WIND 8100000
## 6 TSTM WIND (G45) 8000
### creating data frames of the relevant data.
Data1 <- data.frame(prop.damage$`Value prop damage`, row.names = prop.damage$EVTYPE)
Data2 <- data.frame(crop.damage$`Value crop damage`, row.names = crop.damage$EVTYPE)
head(Data1)
## prop.damage..Value.prop.damage.
## HIGH SURF ADVISORY 200000
## COASTAL FLOOD 0
## FLASH FLOOD 50000
## LIGHTNING 0
## TSTM WIND 8100000
## TSTM WIND (G45) 8000
### merge two data frames by row name
merged.data.economic <- merge(Data1,Data2,by=0,all=TRUE)
head(merged.data.economic)
## Row.names prop.damage..Value.prop.damage.
## 1 HIGH SURF ADVISORY 200000
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 50000
## 4 LIGHTNING 0
## 5 TSTM WIND 8100000
## 6 TSTM WIND (G45) 8000
## crop.damage..Value.crop.damage.
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
colnames(merged.data.economic) <- c("Events", "Value prop damage", "Value crop damage")
head(merged.data.economic)
## Events Value prop damage Value crop damage
## 1 HIGH SURF ADVISORY 200000 0
## 2 COASTAL FLOOD 0 0
## 3 FLASH FLOOD 50000 0
## 4 LIGHTNING 0 0
## 5 TSTM WIND 8100000 0
## 6 TSTM WIND (G45) 8000 0
### 1 calculate sum rows merged data
merged.data.economic <- transform(merged.data.economic, sum=rowSums(merged.data.economic[,2:3]))
### Order data frame in descending order
sorted.data.economic <- merged.data.economic[order(merged.data.economic$sum, decreasing=TRUE), ][1:10, ]
print(sorted.data.economic)
## Events Value.prop.damage Value.crop.damage sum
## 170 FLOOD 144657709807 5661968450 150319678257
## 411 HURRICANE/TYPHOON 69305840000 1099382800 70405222800
## 834 TORNADO 56947380617 414953270 57362333887
## 670 STORM SURGE 43323536000 5000 43323541000
## 244 HAIL 15735267513 3025954473 18761221986
## 153 FLASH FLOOD 16822673979 1421317100 18243991079
## 402 HURRICANE 11868319010 2741910000 14610229010
## 95 DROUGHT 1046106000 12474066000 13520172000
## 409 HURRICANE OPAL 3172846000 10009000000 13181846000
## 848 TROPICAL STORM 7703890550 678346000 8382236550
### Drop sum column
sorted.data.economic$sum <- NULL
print(sorted.data.economic)
## Events Value.prop.damage Value.crop.damage
## 170 FLOOD 144657709807 5661968450
## 411 HURRICANE/TYPHOON 69305840000 1099382800
## 834 TORNADO 56947380617 414953270
## 670 STORM SURGE 43323536000 5000
## 244 HAIL 15735267513 3025954473
## 153 FLASH FLOOD 16822673979 1421317100
## 402 HURRICANE 11868319010 2741910000
## 95 DROUGHT 1046106000 12474066000
## 409 HURRICANE OPAL 3172846000 10009000000
## 848 TROPICAL STORM 7703890550 678346000
### create a stacked bar chart
p <- plot_ly(sorted.data, x= ~Event, y= ~Fatal, type = 'bar', name = 'Fatal', width = 800, height = 500) %>%
add_trace(y= ~Injury, name='Injury') %>%
layout(title = "Top 10 events most hurtful to public health", yaxis = list(title = 'Number'), xaxis = list(title = 'Eventtype', tickangle = -45), barmode = 'stack', margin = list(b = 150))
print(p)
###The plot above shows that tornado has caused the highest number of fatalities and injuries.
webshot::install_phantomjs()
## phantomjs has been installed to C:\Users\beukenmjm\AppData\Roaming\PhantomJS
export(p, file = "public health.png")
### create a stacked bar chart
q <- plot_ly(sorted.data.economic, x= ~Events, y= ~Value.prop.damage, type = 'bar', name = 'Value property damage', width = 800, height = 500) %>%
add_trace(y= ~Value.crop.damage, name='Value crop damage') %>%
layout(title = "Top 10 events highest economic damage", yaxis = list(title = 'Value of damage'), xaxis = list(title = 'Eventtype', tickangle = -45), barmode = 'stack', margin = list(b = 150))
print(q)
### The plot above shows that flood has caused the most severe economic consequences.
webshot::install_phantomjs()
## phantomjs has been installed to C:\Users\beukenmjm\AppData\Roaming\PhantomJS
export(q, file = "economicdamage.png")