the purpose of our study is to explore the NOAA Storm Database and to make analysis about severe weather events.
Objectifs :
Identify events that are harmful to population health.
Identify events that have the greatest economic consequences.
library(dplyr)
library(ggplot2)
Read the original files and display column names.
if(!file.exists("StormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
datafile = "StormData.csv.bz2", method = "curl")
}
# Loading data
StormData <- read.csv(bzfile("stormData.csv.bz2"), sep=",", header=T)
colnames(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
StormData contains 902297 rows and 37 columns
#Select useful data
StormData1=StormData[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
#("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8")
#(1e3,1e6, 1, 1e9,1e6, 1, 1,1e5,1e6, 1,1e4,1e2,1e3, 1,1e7,1e2, 1, 10,1e8)
# Convert the factors in variable PROPDMGEXP in appropriated number units to calculate Property Damage
StormData1$PROPDMGEXP=as.factor(StormData1$PROPDMGEXP)
StormData1$PROPDMGNUM=StormData1$PROPDMGEXP
levels(StormData1$PROPDMGNUM)=c(1,1,1,1,1,10,1e2,1e3,1e4,1e5,1e6,1e7,1e8,1e9,1,1e2,1e3,1e6,1e6)
StormData1$PROPDMGNUM=as.numeric(as.character(StormData1$PROPDMGNUM))
# Convert the factors in variable CROPDMGEXP in appropriated number units to calculate Crop Damage
StormData1$CROPDMGEXP=as.factor(StormData1$CROPDMGEXP)
StormData1$CROPDMGNUM=StormData1$CROPDMGEXP
levels(StormData1$CROPDMGNUM)=c(1,1,1,10,1e9,1e3,1e3,1e6,1e6)
StormData1$CROPDMGNUM=as.numeric(as.character(StormData1$CROPDMGNUM))
# plot number of fatalities with the most harmful event type
tb_fatalities=StormData1%>%group_by(EVTYPE)%>%summarise(fatalities=sum(FATALITIES,na.rm=TRUE))%>%
arrange(desc(fatalities))
tb_fatalities$EVTYPE <- factor(tb_fatalities$EVTYPE, levels = tb_fatalities$EVTYPE)
tb_fatalities=tb_fatalities[1:10,]
ggplot(tb_fatalities, aes(EVTYPE,fatalities)) +
geom_bar(stat = "identity", fill = "orange") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Top 10 Weather Events with number of fatalities higher")
# plot number of injuries with the most harmful event type
tb_injuries=StormData1%>%group_by(EVTYPE)%>%summarise(injuries=sum(INJURIES,na.rm=TRUE))%>%
arrange(desc(injuries))
tb_injuries$EVTYPE <- factor(tb_injuries$EVTYPE, levels = tb_injuries$EVTYPE)
tb_injuries=tb_injuries[1:10,]
ggplot(tb_injuries, aes(EVTYPE,injuries)) +
geom_bar(stat = "identity", fill = "orange") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries") + ggtitle("Top 10 Weather Events with number of injuries higher")
# plot number of damages with the most harmful event type
StormData1$TOTAL_DOMMAGE=StormData1$PROPDMGNUM*StormData1$PROPDMG+StormData1$CROPDMGNUM*StormData1$CROPDMG
tb_damages = StormData1%>%group_by(EVTYPE)%>%
summarise(TOTAL_DOMMAGE=sum(TOTAL_DOMMAGE,na.rm=TRUE))%>%
arrange(desc(TOTAL_DOMMAGE))
tb_damages$EVTYPE <- factor(tb_damages$EVTYPE, levels = tb_damages$EVTYPE)
tb_damages=tb_damages[1:10,]
ggplot(tb_damages, aes(x = EVTYPE, y = TOTAL_DOMMAGE)) +
geom_bar(stat = "identity", fill = "orange") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages ($)") + ggtitle("Property & Crop Damages by top 10 Weather Events")
In summary,
TORNADO is the harmful event with respect to population health, and
FLOOD is the event which have the greatest economic consequences.