knitr::opts_chunk$set(echo = TRUE)
This document explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) data. The following questions are adressed in analyses: Which types of events are most harmful to population health? Which types of events have the greatest economic consequences?
The dataset. a raw cvs file, is quite large and takes some time for loading, but its manageable. In case its to slow one can add cache = TRUE.
library(readr)
library(rmarkdown)
library(knitr)
df <- read_csv("repdata_data_StormData.csv/repdata_data_StormData.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## BGN_DATE = col_character(),
## BGN_TIME = col_character(),
## TIME_ZONE = col_character(),
## COUNTYNAME = col_character(),
## STATE = col_character(),
## EVTYPE = col_character(),
## BGN_AZI = col_logical(),
## BGN_LOCATI = col_logical(),
## END_DATE = col_logical(),
## END_TIME = col_logical(),
## COUNTYENDN = col_logical(),
## END_AZI = col_logical(),
## END_LOCATI = col_logical(),
## PROPDMGEXP = col_character(),
## CROPDMGEXP = col_logical(),
## WFO = col_logical(),
## STATEOFFIC = col_logical(),
## ZONENAMES = col_logical(),
## REMARKS = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 5255570 parsing failures.
## row col expected actual file
## 1671 WFO 1/0/T/F/TRUE/FALSE NG 'repdata_data_StormData.csv/repdata_data_StormData.csv'
## 1673 WFO 1/0/T/F/TRUE/FALSE NG 'repdata_data_StormData.csv/repdata_data_StormData.csv'
## 1674 WFO 1/0/T/F/TRUE/FALSE NG 'repdata_data_StormData.csv/repdata_data_StormData.csv'
## 1675 WFO 1/0/T/F/TRUE/FALSE NG 'repdata_data_StormData.csv/repdata_data_StormData.csv'
## 1678 WFO 1/0/T/F/TRUE/FALSE NG 'repdata_data_StormData.csv/repdata_data_StormData.csv'
## .... ... .................. ...... .......................................................
## See problems(...) for more details.
print("Across the United States, which types of events are most harmful with respect to population health?")
## [1] "Across the United States, which types of events are most harmful with respect to population health?"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
print("Fatalities by atmospheric events")
## [1] "Fatalities by atmospheric events"
df2 <- summarize(group_by(df, EVTYPE), FATALITIES = sum(FATALITIES))
df2 <- arrange(df2, desc(FATALITIES))
#df2
df2 <- df2[1:5,]
barplot(df2$FATALITIES, axisnames = TRUE, names.arg = df2$EVTYPE, cex.names = 0.6)
print("Injuries by atmospheric events")
## [1] "Injuries by atmospheric events"
df3 <- summarize(group_by(df, EVTYPE), INJURIES = sum(INJURIES))
df3 <- arrange(df3, desc(INJURIES))
#df2
df3 <- df3[1:5,]
barplot(df3$INJURIES, axisnames = TRUE, names.arg = df2$EVTYPE, cex.names = 0.6)
print("Across the United States, which types of events have the greatest economic consequences?")
## [1] "Across the United States, which types of events have the greatest economic consequences?"
df4 <- mutate(df, Costs = sum(df$CROPDMG + df$PROPDMG, rm.na = TRUE))
df4 <- summarize(group_by(df4, EVTYPE), Costs = sum(Costs))
df4 <- arrange(df4, desc(Costs))
df4 <- df4[1:5,]
barplot(df4$Costs, axisnames = TRUE, names.arg = df4$EVTYPE, cex.names = 0.6)