###Synopsis In this report we will study the differents natural event. And wich have the biggest impact on healt, the number of injure and death for each event. And wich event has the bigst econommic impact We will see that Tornado has a big impact on both.
###Data processing get the data
data <- read.csv("C:\\Users\\rnoireau\\Downloads\\repdata_data_StormData.csv.bz2")
I will use this library in my code
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
###Which types of events are most harmful with respect to population health?
mean of fatal victim
healt <- data %>%
filter(!is.na(FATALITIES)) %>%
filter(!is.na(INJURIES)) %>%
group_by(EVTYPE) %>%
summarise(mean_Fatalities = mean(FATALITIES), mean_injuries = mean(INJURIES))%>%
arrange(desc(mean_Fatalities),desc(mean_injuries))
I first order by fatality because itβs more importante than injuries
top 20 of wort event for healt
head(healt,20)
## # A tibble: 20 x 3
## EVTYPE mean_Fatalities mean_injuries
## <fct> <dbl> <dbl>
## 1 TORNADOES, TSTM WIND, HAIL 25 0
## 2 COLD AND SNOW 14 0
## 3 TROPICAL STORM GORDON 8 43
## 4 RECORD/EXCESSIVE HEAT 5.67 0
## 5 EXTREME HEAT 4.36 7.05
## 6 HEAT WAVE DROUGHT 4 15
## 7 HIGH WIND/SEAS 4 0
## 8 MARINE MISHAP 3.5 2.5
## 9 WINTER STORMS 3.33 5.67
## 10 HIGH WIND AND SEAS 3 20
## 11 Heavy surf and wind 3 0
## 12 ROUGH SEAS 2.67 1.67
## 13 HEAT WAVES 2.5 0
## 14 RIP CURRENTS/HEAVY SURF 2.5 0
## 15 HEAT WAVE 2.32 4.18
## 16 UNSEASONABLY WARM AND DRY 2.23 0
## 17 HURRICANE OPAL/HIGH WINDS 2 0
## 18 TSUNAMI 1.65 6.45
## 19 HEAVY SEAS 1.5 0
## 20 Hypothermia/Exposure 1.33 0
Only keep the first 10, because a lots of event have no bien impact
healt <- data.frame(head(cbind(as.character(healt$EVTYPE), healt$mean_Fatalities, healt$mean_injuries),10))
Reorganising data to be able to build the next plot
X2 <- data.frame(healt$X2)
X3 <- data.frame(healt$X3)
names(X3) <- c("healt.X2")
healtplotvict <- rbind(X2, X3)
Creating data frame for the plot
healtplotlabel <- c("fatal","fatal","fatal","fatal","fatal","fatal","fatal","fatal","fatal","fatal","injure","injure","injure","injure","injure","injure","injure","injure","injure","injure")
healtplotevent <-as.data.frame(lapply(rbind(data.frame(healt$X1), data.frame(healt$X1)), unlist))
New data frame
healt <- data.frame(cbind(healtplotevent, healtplotlabel, healtplotvict))
Creating the plot of mean of fatality and injure for event type
library(dplyr)
library(ggplot2)
p <- ggplot(data = healt, aes(x = as.factor(healt.X1), y = as.numeric(unlist(healt.X2)), fill = as.factor(healtplotlabel)))
g <- p + geom_bar(stat = "identity") + xlab("event type") + ylab("victims") +coord_flip()
print(g)
On this plot we can see that tornado is the worst for healt
make sum of number of death and injure, for each event
healtvictim <- data %>%
filter(!is.na(FATALITIES)) %>%
filter(!is.na(INJURIES)) %>%
filter(EVTYPE != "?") %>%
group_by(EVTYPE) %>%
summarise(sum_Fatalities = sum(FATALITIES), sum_injuries = sum(INJURIES))
Sum both death and injure
healt_victim <- cbind(as.character(healtvictim$EVTYPE), healtvictim$FATALITIES + healtvictim$INJURIES, healtvictim$FATALITIES, healtvictim$INJURIES)
## Warning: Unknown or uninitialised column: 'FATALITIES'.
## Warning: Unknown or uninitialised column: 'INJURIES'.
## Warning: Unknown or uninitialised column: 'FATALITIES'.
## Warning: Unknown or uninitialised column: 'INJURIES'.
rbind(data.frame(healt$X2),data.frame(healt$X2))
## data frame with 0 columns and 0 rows
See the top 20 wort for healt event
head(healt_victim,20)
## [,1]
## [1,] " HIGH SURF ADVISORY"
## [2,] " COASTAL FLOOD"
## [3,] " FLASH FLOOD"
## [4,] " LIGHTNING"
## [5,] " TSTM WIND"
## [6,] " TSTM WIND (G45)"
## [7,] " WATERSPOUT"
## [8,] " WIND"
## [9,] "ABNORMAL WARMTH"
## [10,] "ABNORMALLY DRY"
## [11,] "ABNORMALLY WET"
## [12,] "ACCUMULATED SNOWFALL"
## [13,] "AGRICULTURAL FREEZE"
## [14,] "APACHE COUNTY"
## [15,] "ASTRONOMICAL HIGH TIDE"
## [16,] "ASTRONOMICAL LOW TIDE"
## [17,] "AVALANCE"
## [18,] "AVALANCHE"
## [19,] "BEACH EROSIN"
## [20,] "Beach Erosion"
With the previous data we can conclude that tornado is the worst event for healt
##Across the United States, which types of events have the greatest economic consequences?
table to have to good value
values <- c(10^-3, 10^2, 10^3,10^6, 1)
names(values) <- c("m", "h", "K","M", "")
get only the useful data
cost <- data %>%
filter(!is.na(PROPDMG)) %>%
filter(!is.na(CROPDMG)) %>%
filter(EVTYPE != "?") %>%
group_by(EVTYPE) %>%
summarise(mean_propdmg = mean(PROPDMG*values[PROPDMGEXP]), mean_cropdmg = mean(CROPDMG*values[CROPDMGEXP]))%>%
arrange(desc(mean_propdmg),desc(mean_cropdmg))
get only the top 10
cost_exp <- data.frame(head(cbind(cost$EVTYPE, cost$mean_propdmg, cost$mean_cropdmg),10))
X2 <- data.frame(cost_exp$X2)
X3 <- data.frame(cost_exp$X3)
names(X3) <- c("cost_exp.X2")
create data to make the plot
cost_expplotcost <- rbind(X2, X3)
cost_expplotlabel <- c("propdmg","propdmg","propdmg","propdmg","propdmg","propdmg","propdmg","propdmg","propdmg","propdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg", "cropdmg" )
cost_expplotevent <-as.data.frame(lapply(rbind(data.frame(cost_exp$X1), data.frame(cost_exp$X1)), unlist))
This plot represent the cost of the top 10 worst event for econo
cost_exp <- data.frame(cbind(cost_expplotevent, cost_expplotlabel, cost_expplotcost))
Create the plot
p <- ggplot(data = cost_exp, aes(x = as.factor(cost_exp.X1), y = as.numeric(unlist(cost_exp.X2)), fill = as.factor(cost_expplotlabel)))
g <- p + geom_bar(stat = "identity") + xlab("event type") + ylab("cost")
print(g)
On this plot we can see that tornado is the worst for economie
costtotal <- data %>%
filter(!is.na(PROPDMG)) %>%
filter(!is.na(CROPDMG)) %>%
group_by(EVTYPE) %>%
summarise(sum_propdmg = sum(PROPDMG), sum_cropdmg = sum(CROPDMG))
cost_total <- cbind(as.character(costtotal$EVTYPE), costtotal$sum_propdmg + costtotal$sum_cropdmg, costtotal$sum_propdmg, costtotal$sum_cropdmg)
top 10 of higth cost event
head(cost_total, 10)
## [,1] [,2] [,3] [,4]
## [1,] " HIGH SURF ADVISORY" "200" "200" "0"
## [2,] " COASTAL FLOOD" "0" "0" "0"
## [3,] " FLASH FLOOD" "50" "50" "0"
## [4,] " LIGHTNING" "0" "0" "0"
## [5,] " TSTM WIND" "108" "108" "0"
## [6,] " TSTM WIND (G45)" "8" "8" "0"
## [7,] " WATERSPOUT" "0" "0" "0"
## [8,] " WIND" "0" "0" "0"
## [9,] "?" "5" "5" "0"
## [10,] "ABNORMAL WARMTH" "0" "0" "0"
With the previous data we can conclude that tornado is the worst event for economie
##Results To the to last question the ansaw is Tornato, we can see it on the last to graph