Synopsis

the purpose of our study is to explore the NOAA Storm Database and to make analysis about severe weather events.

Objectifs :

  1. Identify events that are harmful to population health.

  2. Identify events that have the greatest economic consequences.

Loading required Packages

library(dplyr)
library(ggplot2)

Data Processing

Read the original files and display column names.

if(!file.exists("StormData.csv.bz2")) {
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
  datafile = "StormData.csv.bz2", method = "curl")
}

# Loading data
StormData <- read.csv(bzfile("stormData.csv.bz2"), sep=",", header=T)

colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

StormData contains 902297 rows and 37 columns

#Select useful data

StormData1=StormData[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]


#("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8")
#(1e3,1e6, 1, 1e9,1e6,  1,  1,1e5,1e6,  1,1e4,1e2,1e3,  1,1e7,1e2,  1, 10,1e8)

# Convert the factors in variable PROPDMGEXP in appropriated number units to calculate Property Damage 

StormData1$PROPDMGEXP=as.factor(StormData1$PROPDMGEXP)

StormData1$PROPDMGNUM=StormData1$PROPDMGEXP
levels(StormData1$PROPDMGNUM)=c(1,1,1,1,1,10,1e2,1e3,1e4,1e5,1e6,1e7,1e8,1e9,1,1e2,1e3,1e6,1e6)
StormData1$PROPDMGNUM=as.numeric(as.character(StormData1$PROPDMGNUM))

# Convert the factors in variable CROPDMGEXP in appropriated number units to calculate Crop Damage 
 
StormData1$CROPDMGEXP=as.factor(StormData1$CROPDMGEXP)

StormData1$CROPDMGNUM=StormData1$CROPDMGEXP
levels(StormData1$CROPDMGNUM)=c(1,1,1,10,1e9,1e3,1e3,1e6,1e6)
StormData1$CROPDMGNUM=as.numeric(as.character(StormData1$CROPDMGNUM))

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?

# plot number of fatalities with the most harmful event type

tb_fatalities=StormData1%>%group_by(EVTYPE)%>%summarise(fatalities=sum(FATALITIES,na.rm=TRUE))%>%
  arrange(desc(fatalities))
tb_fatalities$EVTYPE <- factor(tb_fatalities$EVTYPE, levels = tb_fatalities$EVTYPE)

tb_fatalities=tb_fatalities[1:10,]

ggplot(tb_fatalities, aes(EVTYPE,fatalities)) + 
    geom_bar(stat = "identity", fill = "orange") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Fatalities") + ggtitle("Top 10 Weather Events with number of fatalities higher")

# plot number of injuries with the most harmful event type

tb_injuries=StormData1%>%group_by(EVTYPE)%>%summarise(injuries=sum(INJURIES,na.rm=TRUE))%>%
  arrange(desc(injuries))
tb_injuries$EVTYPE <- factor(tb_injuries$EVTYPE, levels = tb_injuries$EVTYPE)

tb_injuries=tb_injuries[1:10,]

ggplot(tb_injuries, aes(EVTYPE,injuries)) + 
    geom_bar(stat = "identity", fill = "orange") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Top 10 Weather Events with number of injuries higher")

Question 2: Across the United States, which types of events have the greatest economic consequences ?

# plot number of damages with the most harmful event type

StormData1$TOTAL_DOMMAGE=StormData1$PROPDMGNUM*StormData1$PROPDMG+StormData1$CROPDMGNUM*StormData1$CROPDMG

tb_damages = StormData1%>%group_by(EVTYPE)%>%
  summarise(TOTAL_DOMMAGE=sum(TOTAL_DOMMAGE,na.rm=TRUE))%>%
  arrange(desc(TOTAL_DOMMAGE))

tb_damages$EVTYPE <- factor(tb_damages$EVTYPE, levels = tb_damages$EVTYPE)

tb_damages=tb_damages[1:10,]


ggplot(tb_damages, aes(x = EVTYPE, y = TOTAL_DOMMAGE)) + 
    geom_bar(stat = "identity", fill = "orange") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by top 10 Weather Events")

In summary,

TORNADO is the harmful event with respect to population health, and

FLOOD is the event which have the greatest economic consequences.