#Reproducible Research - Course Project 2 : NOAA Storm Database analysis / Population health and economic impact of weather events accross the United States.
##Synopsis: The aim of this project is to analyze the NOAA Storm Database. The datas cover a period from 1950 to November 2011. Two questions will be treated during this analysis.
dataset = read.csv("c:/datascience/05/project2/repdata_data_StormData.csv")
names(dataset)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
dataset$EVTYPE <- as.factor(dataset$EVTYPE)
df_top10_fatalities <- dataset %>%
group_by(EVTYPE) %>%
summarize(PeopleAffected = sum(FATALITIES) + sum(INJURIES)) %>%
arrange(desc(PeopleAffected)) %>%
head(10)
df_top10_fatalities
## # A tibble: 10 x 2
## EVTYPE PeopleAffected
## <fct> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
ggplot(data=df_top10_fatalities, aes(fill="red", x = reorder(EVTYPE, -PeopleAffected), y = PeopleAffected)) +
geom_bar(stat = "identity") +
ggtitle("Event type most harmful w.r.t population health") +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))
df_top10_damage <- dataset %>%
group_by(EVTYPE) %>%
summarize(EconomicDamage = sum(PROPDMG) + sum(CROPDMG)) %>%
arrange(desc(EconomicDamage)) %>%
head(10)
df_top10_damage
## # A tibble: 10 x 2
## EVTYPE EconomicDamage
## <fct> <dbl>
## 1 TORNADO 3312277.
## 2 FLASH FLOOD 1599325.
## 3 TSTM WIND 1445168.
## 4 HAIL 1268290.
## 5 FLOOD 1067976.
## 6 THUNDERSTORM WIND 943636.
## 7 LIGHTNING 606932.
## 8 THUNDERSTORM WINDS 464978.
## 9 HIGH WIND 342015.
## 10 WINTER STORM 134700.
ggplot(data=df_top10_damage, aes(fill=3, x = reorder(EVTYPE, -EconomicDamage), y = EconomicDamage)) +
geom_bar(stat = "identity") +
ggtitle("Event type most harmful w.r.t economic damage") +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))