library(ggplot2)

Synopsis:

Data processing:

#loading data:
storm<-read.csv("repdata_data_stormData.csv",sep=",",header = T,
                na.strings = c("","+","?","-"))

Exploratory data analysis

#data variables
names(storm)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
#subsetting the data:

v_storm <- storm[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

#data structur:
str(v_storm)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  NA NA NA NA ...
#missing variables:
na=sum(is.na(v_storm))

paste("There are",na,"missing variables in my data set")
## [1] "There are 1084369 missing variables in my data set"
#transforming evtype to factor:
v_storm$EVTYPE<-as.factor(v_storm$EVTYPE)
v_storm$PROPDMGEXP<-as.factor(v_storm$PROPDMGEXP)
v_storm$CROPDMGEXP<-as.factor(v_storm$CROPDMGEXP)

Health perspective:

#aggregating:
fata<-aggregate(FATALITIES~EVTYPE, v_storm,sum)

fata <- fata[order(-fata$FATALITIES), ][1:20, ]

head(fata)
##             EVTYPE FATALITIES
## 833        TORNADO       5633
## 129 EXCESSIVE HEAT       1903
## 152    FLASH FLOOD        978
## 274           HEAT        937
## 463      LIGHTNING        816
## 855      TSTM WIND        504
#plot
p1=ggplot(data=fata,aes(x =reorder(EVTYPE,FATALITIES),y=FATALITIES))+
        geom_bar(stat="identity",fill="tomato")

p2=p1+ theme(axis.text.x = element_text(angle = 60, hjust = 1))

p3=p2+xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 20 Weather Events")

p3

#aggregating:
injur<-aggregate(INJURIES~EVTYPE, v_storm,sum)

injur <- injur[order(-injur$INJURIES), ][1:20, ]

head(injur)
##             EVTYPE INJURIES
## 833        TORNADO    91346
## 855      TSTM WIND     6957
## 169          FLOOD     6789
## 129 EXCESSIVE HEAT     6525
## 463      LIGHTNING     5230
## 274           HEAT     2100
#plot
g1=ggplot(data=injur,aes(x =reorder(EVTYPE,INJURIES),y=INJURIES))+
        geom_bar(stat="identity",fill="red")

g2=g1+ theme(axis.text.x = element_text(angle = 60, hjust = 1))

g3=g2+xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries by top 20 Weather Events")

g3

Economic perspective:

eco<-aggregate(CROPDMG+PROPDMG~EVTYPE,v_storm,sum)

eco <- eco[order(-eco$`CROPDMG + PROPDMG`), ][1:20, ]

head(eco)
##                EVTYPE CROPDMG + PROPDMG
## 833           TORNADO         3312276.7
## 152       FLASH FLOOD         1599325.1
## 855         TSTM WIND         1445168.2
## 243              HAIL         1268289.7
## 169             FLOOD         1067976.4
## 759 THUNDERSTORM WIND          943635.6
#plot
e1=ggplot(data=eco,aes(x =reorder(EVTYPE,`CROPDMG + PROPDMG`),y=`CROPDMG + PROPDMG`))+
        geom_bar(stat="identity",fill="green")

e2=e1+ theme(axis.text.x = element_text(angle = 60, hjust = 1))

e3=e2+xlab("Event Type") + ylab("Damage($)") + ggtitle("Property & Crop Damages by top 20 Weather Events")

e3

Results:

  • The data I have provides a strong evidence that TORNADOs have the worst consequents not only on the population health fatalities ans injuries but also economically.

  • So preparing for severe weather events must take into consideration the need to prioritize resources for many different types of events but more importantly Tornados