library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.3.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(grid)
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file_name <- "repdata_data_StormData.csv.bz2"
download.file(file_url, file_name, method = "curl")
stormdata <- read.csv(file_name, header=TRUE, sep = ",")
str(stormdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(stormdata, 3)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
# create a dataframe that will be modified
mydata <- stormdata
mydata$fatalities_injuries <- mydata$FATALITIES + mydata$INJURIES
Harmfulevents <- mydata %>% group_by(EVTYPE) %>% summarize(sum(FATALITIES,na.rm=TRUE), sum(INJURIES,na.rm=TRUE),sum(fatalities_injuries,na.rm=TRUE))
colnames(Harmfulevents)<-c("EventType","Total_fatalities", "Total_injuries", "Total_fatalities_injuries")
sorted_Harmfulevents <- arrange(Harmfulevents, -Total_fatalities_injuries)
head(sorted_Harmfulevents,10)
## # A tibble: 10 × 4
## EventType Total_fatalities Total_injuries
## <fctr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 TSTM WIND 504 6957
## 4 FLOOD 470 6789
## 5 LIGHTNING 816 5230
## 6 HEAT 937 2100
## 7 FLASH FLOOD 978 1777
## 8 ICE STORM 89 1975
## 9 THUNDERSTORM WIND 133 1488
## 10 WINTER STORM 206 1321
## # ... with 1 more variables: Total_fatalities_injuries <dbl>
mydata$TOTALDMG <- mydata$PROPDMG + mydata$CROPDMG
Damage_events <- mydata %>% group_by(EVTYPE) %>% summarize(sum(PROPDMG,na.rm=TRUE), sum(CROPDMG,na.rm=TRUE),sum(TOTALDMG,na.rm=TRUE))
colnames(Damage_events)<-c("EventType","Total_PropertyDamage", "Total_CropDamage", "Total_Damage")
sorted_Damage_events <- arrange(Damage_events, -Total_Damage)
head(sorted_Damage_events,10)
## # A tibble: 10 × 4
## EventType Total_PropertyDamage Total_CropDamage Total_Damage
## <fctr> <dbl> <dbl> <dbl>
## 1 TORNADO 3212258.2 100018.52 3312276.7
## 2 FLASH FLOOD 1420124.6 179200.46 1599325.1
## 3 TSTM WIND 1335965.6 109202.60 1445168.2
## 4 HAIL 688693.4 579596.28 1268289.7
## 5 FLOOD 899938.5 168037.88 1067976.4
## 6 THUNDERSTORM WIND 876844.2 66791.45 943635.6
## 7 LIGHTNING 603351.8 3580.61 606932.4
## 8 THUNDERSTORM WINDS 446293.2 18684.93 464978.1
## 9 HIGH WIND 324731.6 17283.21 342014.8
## 10 WINTER STORM 132720.6 1978.99 134699.6
x <-ggplot(head(sorted_Harmfulevents,5), aes(x=reorder(EventType,Total_fatalities_injuries), y=Total_fatalities_injuries))
x1 <- x + geom_bar(stat="identity",color="white", fill="navyblue") +
xlab("Event Types") + ylab("Total Fatalities & Injuries") +
ggtitle("Top 5 Combined Fatalities & Injuries by Event Type") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
y <-ggplot(head(sorted_Harmfulevents,5), aes(x=reorder(EventType,Total_fatalities), y=Total_fatalities))
y1 <- y + geom_bar(stat="identity",color="white", fill="dodgerblue2") +
xlab("Event Types") + ylab("Total Fatalities") +
ggtitle("Top 5 Fatalities by Event Type") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
z <-ggplot(head(sorted_Harmfulevents,5), aes(x=reorder(EventType,Total_injuries), y=Total_injuries))
z1 <- z + geom_bar(stat="identity",color="white", fill="cadetblue") +
xlab("Event Types") + ylab("Total Injuries") +
ggtitle("Top 5 Injuries by Event Type") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
grid.arrange (x1, y1, z1)
x <-ggplot(head(sorted_Damage_events,5), aes(x=reorder(EventType,Total_Damage), y=Total_Damage))
x2 <- x + geom_bar(stat="identity",color="white", fill="darkorange2") +
xlab("Event Types") + ylab("Total Property & Crop Damage") +
ggtitle("Top 5 Event Types by Property & Crop Damage Costs") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
y <-ggplot(head(sorted_Damage_events,5), aes(x=reorder(EventType,Total_PropertyDamage), y=Total_PropertyDamage))
y2 <- y + geom_bar(stat="identity",color="white", fill="goldenrod3") +
xlab("Event Types") + ylab("Total Property Damage") +
ggtitle("Top 5 Event Types by Property Damage Cost") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
z <-ggplot(head(sorted_Damage_events,5), aes(x=reorder(EventType,Total_CropDamage), y=Total_CropDamage))
z2 <- z + geom_bar(stat="identity",color="white", fill="khaki4") +
xlab("Event Types") + ylab("Total Crop Damage") +
ggtitle("Top 5 Event Types by Crop Damage Cost") +
theme(plot.title = element_text(size=10, face="bold",margin = margin(10, 0, 10, 0)))+
theme(axis.text.y=element_text(size=6)) +
theme(axis.text.x=element_text(size=6)) +
coord_flip()
grid.arrange (x2, y2, z2)