##Synopsis
This work is an assessment of the various types of extreme weather conditions which cause fatalities and injuries to people in the United States as well as those extreme weather conditions which cause economic damage in the form of property damage or crop damage. The data analyzed here begins in 1950 and ends in November 2011. The data was prepared by the National Weather Service and its documentation can be found here ( https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf).
It was determined in this analysis that those weather conditions associated with Extreme Wind conditions cause the maximum fatalities or injuries and they also cause the maximum economic damage in the form of property and crop damage. After extreme wind conditions the other cause of fatalities or injuries and economic damage in the form of property and crop damage are weather conditions associated with snow or ice. These include snowfall, snow showers, hail, sleet, blizzards, black ice, freezing rain, and glaze. The specific insight that was gained through this analysis is that as per this data there is no extreme weather condition other than these two broad weather conditions (‘Extreme Wind’ and ‘Snow or Ice’) which cause injuries or fatalities or economic damage in the form of property and crop damage in the USA.
##Data Processing
rm(list = ls())
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
dat <- read.csv(("repdata-data-StormData.csv.bz2"), header = TRUE, stringsAsFactors = FALSE)
str(dat)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
##Data Cleaning to reduce the number of different extreme weather conditions to some broader categories to make it easier to comprehend what is going on.
dat$EVTYPE <- as.factor(dat$EVTYPE)
#levels(dat$EVTYPE)
nlevels(dat$EVTYPE)
## [1] 985
#cleaning for Hot or Dry
levels(dat$EVTYPE) <- sub(".*[Hh][Ee][Aa][Tt].*|.*[Ww][Aa][Rr][Mm].*|.*[Hh][Oo][Tt].*",
"Hot or Dry", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Dd][Rr][IiYy].*|DROUGHT|HIGH TEMPERATURE RECORD",
"Hot or Dry", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 916
#levels(dat$EVTYPE)
levels(dat$EVTYPE) <- sub(".*RECORD\\s+HIGH\\s+TEMPERATURE(S?)", "Hot or Dry", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Hh][Yy][Pp][Ee][Rr][Tt][Hh][Ee][Rr][Mm][Ii][Aa].*", "Hot or Dry", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 913
#levels(dat$EVTYPE)
#cleaning for Winter or Cold
levels(dat$EVTYPE) <- sub(".*[Ww][Ii][Nn][Tt][Ee][Rr].*|[Ww][Ii][Nn][Tt](E?)[Rr][Yy]", "Winter or Cold", dat$EVTYPE)
levels(dat$EVTYPE) <- sub(".*[Cc][Oo][Ll][Dd].*|.*[Cc][Oo][Oo][Ll].*", "Winter or Cold", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Hh][Yy][Pp][Oo][Tt][Hh][Ee][Rr][Mm][Ii][Aa].*", "Winter or Cold", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ll][Oo][Ww]\\s+[Tt][Ee][Mm][Pp].*", "Winter or Cold", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 845
#levels(dat$EVTYPE)
#cleaning for Extreme Wind
levels(dat$EVTYPE) <- sub(".*[Ww][Ii][Nn][Dd].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*TORNADO(S?).*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Tt][Yy][Pp][Hh][Oo][Oo][Nn].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Hh][Uu][Rr][Rr][Ii][Cc][Aa][Nn][Ee].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*TORNDAO.*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ww][Nn][Dd].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ss][Tt][Oo][Rr][Mm].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ff][Uu][Nn][Nn][Ee][Ll].*", "EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub("DOWNBURST|MICROBURST|WALL CLOUD|TROPICAL DEPRESSION",
"EXTREME WIND", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*EXTREME WIND.*|TSTM", "EXTREME WIND", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 538
#levels(dat$EVTYPE)
#cleaning for Snow or Ice
levels(dat$EVTYPE) <- sub(".*[Ss][Nn][Oo][Ww].*|.*[Ii][Cc][EeYy].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Hh][Aa][Ii][Ll].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ff][Rr][Ee][Ee][Zz][Ii][Nn][Gg].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ff][Rr][Ee][Ee][Zz][Ee].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ss][Ll][Ee][Ee][Tt]","SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ff][Rr][Oo][Ss][Tt].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Bb][Ll][Ii][Zz][Zz][Aa][Rr][Dd].*", "SNOW or ICE", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Gg][Ll][Aa][Zz][Ee].*", "SNOW or ICE", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 355
#levels(dat$EVTYPE)
#cleaning for Summary
levels(dat$EVTYPE) <- sub("^Summary.*", "Summary", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 293
#levels(dat$EVTYPE)
#cleaning for Rain or Wet
levels(dat$EVTYPE) <- sub(".*[Ww][Ee][Tt].*|.*[Rr][Aa][Ii][Nn].*", "Rain or Wet", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*SHOWER(S?).*", "Rain or Wet", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 245
#levels(dat$EVTYPE)
#cleaning for Flood or Surf
levels(dat$EVTYPE) <- sub(".*[Ff][Ll][Oo][Oo](O?)[Dd].*|.*[Ss][Uu][Rr][Ff].*", "Flood or Surf", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*[Ww][Aa][Tt][Ee][Rr].*", "Flood or Surf", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub(".*HIGH TIDE.*|BLOW-OUT TIDE", "Flood or Surf", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 137
#levels(dat$EVTYPE)
#cleaning for Fire
levels(dat$EVTYPE) <- sub(".*FIRE.*", "FIRE", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 127
#levels(dat$EVTYPE)
#cleaning for Lightening
levels(dat$EVTYPE) <- sub("\\s+LIGHT(N?)ING(.*)|LIGNTNING|LIGHTING", "LIGHTNING",levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub("LIGHTNING.|LIGHTNING INJURY|LIGHTNING DAMAGE|LIGHTNING WAUSEON",
"LIGHTNING",levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 121
#levels(dat$EVTYPE)
#cleaning for smoke ash and dust
levels(dat$EVTYPE) <- sub("(.*)SMOKE|(.*)Ash|(.*)Dust", "Smoke Ash Dust", levels(dat$EVTYPE))
levels(dat$EVTYPE) <- sub("Smoke Ash Dust Devil|Smoke Ash Dust Plume", "Smoke Ash Dust", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 116
#levels(dat$EVTYPE)
#cleaning for volcanic activity
levels(dat$EVTYPE) <- sub("VOLCANIC ERUPTION|VOLCANIC ASH|VOLCANIC ASHFALL", "VOLCANIC", levels(dat$EVTYPE))
nlevels(dat$EVTYPE)
## [1] 114
levels(dat$EVTYPE)
## [1] "EXTREME WIND" "SNOW or ICE"
## [3] "Winter or Cold" "Rain or Wet"
## [5] "LIGHTNING" "DENSE FOG"
## [7] "RIP CURRENT" "Flood or Surf"
## [9] "Hot or Dry" "RECORD HIGH"
## [11] "RECORD LOW" "AVALANCHE"
## [13] "MARINE MISHAP" "HIGH SEAS"
## [15] "SEVERE TURBULENCE" "APACHE COUNTY"
## [17] "DUST DEVIL" "HEAVY PRECIPATATION"
## [19] "BLOWING DUST" "URBAN/SMALL"
## [21] "FIRE" "HIGH"
## [23] "MUDSLIDES" "URBAN AND SMALL"
## [25] "GUSTNADO AND" "NORMAL PRECIPITATION"
## [27] "WAYTERSPOUT" "URBAN AND SMALL STREAM"
## [29] "MUD SLIDE" "SMALL STREAM AND"
## [31] "MUD SLIDES" "GUSTNADO"
## [33] "FOG" "COASTAL SURGE"
## [35] "HIGH WAVES" "URBAN/SMALL STREAM"
## [37] "AVALANCE" "BELOW NORMAL PRECIPITATION"
## [39] "RECORD TEMPERATURES" "OTHER"
## [41] "MUDSLIDE" "HEAVY MIX"
## [43] "RIP CURRENTS" "DAM FAILURE"
## [45] "LIGHTNING WAUSEON" "SOUTHEAST"
## [47] "HEAVY PRECIPITATION" "BEACH EROSIN"
## [49] "MUD/ROCK SLIDE" "EXTREME WINDW"
## [51] "LANDSLIDE" "EXCESSIVE"
## [53] "HEAVY SEAS" "?"
## [55] "EXCESSIVE PRECIPITATION" "MILD PATTERN"
## [57] "LANDSLIDES" "SAHARAN DUST"
## [59] "HEAVY SWELLS" "URBAN SMALL"
## [61] "SMALL STREAM" "URBAN/SML STREAM FLD"
## [63] "Other" "Temperature record"
## [65] "Smoke Ash Dust" "Marine Accident"
## [67] "Beach Erosion" "Landslump"
## [69] "Mudslide" "Heavy Precipitation"
## [71] "Record temperature" "MIXED PRECIP"
## [73] "Mudslides" "Summary"
## [75] "Microburst" "No Severe Weather"
## [77] "Record Temperatures" "Sml Stream Fld"
## [79] "MUDSLIDE/LANDSLIDE" "NONE"
## [81] "DAM BREAK" "Flood or SurfS"
## [83] "URBAN/SML STREAM FLDG" "Mixed Precipitation"
## [85] "Record High" "SUMMARY OF MARCH 24-25"
## [87] "SUMMARY OF MARCH 27" "SUMMARY OF MARCH 29"
## [89] "URBAN/SMALL STRM FLDG" "HIGH SWELLS"
## [91] "HIGH SWELLS" "VOLCANIC"
## [93] "BEACH EROSION" "COASTAL EROSION"
## [95] "SEICHE" "ROCK SLIDE"
## [97] "PATCHY DENSE FOG" "RECORD TEMPERATURE"
## [99] "VOG" "MONTHLY PRECIPITATION"
## [101] "MONTHLY TEMPERATURE" "MIXED PRECIPITATION"
## [103] "REMNANTS OF FLOYD" "LANDSPOUT"
## [105] "RECORD PRECIPITATION" "ROUGH SEAS"
## [107] "LANDSLUMP" "RED FLAG CRITERIA"
## [109] "ROGUE WAVE" "DUST DEVEL"
## [111] "NORTHERN LIGHTS" "DROWNING"
## [113] "TSUNAMI" "ASTRONOMICAL LOW TIDE"
##Results
#Analysis for Injuries and Fatalities by weather condition
health <- dat[, c(8, 23:24)]
head(health)
## EVTYPE FATALITIES INJURIES
## 1 EXTREME WIND 0 15
## 2 EXTREME WIND 0 0
## 3 EXTREME WIND 0 2
## 4 EXTREME WIND 0 2
## 5 EXTREME WIND 0 2
## 6 EXTREME WIND 0 6
tail(health)
## EVTYPE FATALITIES INJURIES
## 902292 EXTREME WIND 0 0
## 902293 EXTREME WIND 0 0
## 902294 EXTREME WIND 0 0
## 902295 EXTREME WIND 0 0
## 902296 EXTREME WIND 0 0
## 902297 EXTREME WIND 0 0
health <- health %>% mutate (Total = FATALITIES + INJURIES) %>%
arrange(desc(Total))
head(health)
## EVTYPE FATALITIES INJURIES Total
## 1 EXTREME WIND 42 1700 1742
## 2 EXTREME WIND 1 1568 1569
## 3 EXTREME WIND 90 1228 1318
## 4 EXTREME WIND 158 1150 1308
## 5 EXTREME WIND 36 1150 1186
## 6 EXTREME WIND 116 785 901
str(health)
## 'data.frame': 902297 obs. of 4 variables:
## $ EVTYPE : Factor w/ 114 levels "EXTREME WIND",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ FATALITIES: num 42 1 90 158 36 116 44 2 7 0 ...
## $ INJURIES : num 1700 1568 1228 1150 1150 ...
## $ Total : num 1742 1569 1318 1308 1186 ...
tail(health)
## EVTYPE FATALITIES INJURIES Total
## 902292 EXTREME WIND 0 0 0
## 902293 EXTREME WIND 0 0 0
## 902294 EXTREME WIND 0 0 0
## 902295 EXTREME WIND 0 0 0
## 902296 EXTREME WIND 0 0 0
## 902297 EXTREME WIND 0 0 0
levels(health$EVTYPE)
## [1] "EXTREME WIND" "SNOW or ICE"
## [3] "Winter or Cold" "Rain or Wet"
## [5] "LIGHTNING" "DENSE FOG"
## [7] "RIP CURRENT" "Flood or Surf"
## [9] "Hot or Dry" "RECORD HIGH"
## [11] "RECORD LOW" "AVALANCHE"
## [13] "MARINE MISHAP" "HIGH SEAS"
## [15] "SEVERE TURBULENCE" "APACHE COUNTY"
## [17] "DUST DEVIL" "HEAVY PRECIPATATION"
## [19] "BLOWING DUST" "URBAN/SMALL"
## [21] "FIRE" "HIGH"
## [23] "MUDSLIDES" "URBAN AND SMALL"
## [25] "GUSTNADO AND" "NORMAL PRECIPITATION"
## [27] "WAYTERSPOUT" "URBAN AND SMALL STREAM"
## [29] "MUD SLIDE" "SMALL STREAM AND"
## [31] "MUD SLIDES" "GUSTNADO"
## [33] "FOG" "COASTAL SURGE"
## [35] "HIGH WAVES" "URBAN/SMALL STREAM"
## [37] "AVALANCE" "BELOW NORMAL PRECIPITATION"
## [39] "RECORD TEMPERATURES" "OTHER"
## [41] "MUDSLIDE" "HEAVY MIX"
## [43] "RIP CURRENTS" "DAM FAILURE"
## [45] "LIGHTNING WAUSEON" "SOUTHEAST"
## [47] "HEAVY PRECIPITATION" "BEACH EROSIN"
## [49] "MUD/ROCK SLIDE" "EXTREME WINDW"
## [51] "LANDSLIDE" "EXCESSIVE"
## [53] "HEAVY SEAS" "?"
## [55] "EXCESSIVE PRECIPITATION" "MILD PATTERN"
## [57] "LANDSLIDES" "SAHARAN DUST"
## [59] "HEAVY SWELLS" "URBAN SMALL"
## [61] "SMALL STREAM" "URBAN/SML STREAM FLD"
## [63] "Other" "Temperature record"
## [65] "Smoke Ash Dust" "Marine Accident"
## [67] "Beach Erosion" "Landslump"
## [69] "Mudslide" "Heavy Precipitation"
## [71] "Record temperature" "MIXED PRECIP"
## [73] "Mudslides" "Summary"
## [75] "Microburst" "No Severe Weather"
## [77] "Record Temperatures" "Sml Stream Fld"
## [79] "MUDSLIDE/LANDSLIDE" "NONE"
## [81] "DAM BREAK" "Flood or SurfS"
## [83] "URBAN/SML STREAM FLDG" "Mixed Precipitation"
## [85] "Record High" "SUMMARY OF MARCH 24-25"
## [87] "SUMMARY OF MARCH 27" "SUMMARY OF MARCH 29"
## [89] "URBAN/SMALL STRM FLDG" "HIGH SWELLS"
## [91] "HIGH SWELLS" "VOLCANIC"
## [93] "BEACH EROSION" "COASTAL EROSION"
## [95] "SEICHE" "ROCK SLIDE"
## [97] "PATCHY DENSE FOG" "RECORD TEMPERATURE"
## [99] "VOG" "MONTHLY PRECIPITATION"
## [101] "MONTHLY TEMPERATURE" "MIXED PRECIPITATION"
## [103] "REMNANTS OF FLOYD" "LANDSPOUT"
## [105] "RECORD PRECIPITATION" "ROUGH SEAS"
## [107] "LANDSLUMP" "RED FLAG CRITERIA"
## [109] "ROGUE WAVE" "DUST DEVEL"
## [111] "NORTHERN LIGHTS" "DROWNING"
## [113] "TSUNAMI" "ASTRONOMICAL LOW TIDE"
health.T <- health %>% select(c(1,4) ) %>% group_by(EVTYPE) %>%
summarize(Totals = paste(Total, collapse = ", "), Grand.total = sum(Total))
health.T
## # A tibble: 2 x 3
## EVTYPE Totals Grand.total
## <fct> <chr> <dbl>
## 1 EXTREME W… 1742, 1569, 1318, 1308, 1186, 901, 844, 802, 787,… 144601
## 2 SNOW or I… 201, 159, 136, 116, 102, 80, 72, 72, 72, 67, 52, … 11072
str(health.T)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 114 levels "EXTREME WIND",..: 1 2
## $ Totals : chr "1742, 1569, 1318, 1308, 1186, 901, 844, 802, 787, 750, 720, 711, 611, 583, 577, 561, 550, 533, 526, 521, 506, 5"| __truncated__ "201, 159, 136, 116, 102, 80, 72, 72, 72, 67, 52, 51, 51, 47, 45, 44, 43, 41, 41, 40, 34, 32, 32, 31, 31, 31, 30"| __truncated__
## $ Grand.total: num 144601 11072
health.T$EVTYPE <- factor(health.T$EVTYPE)
str(health.T)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Totals : chr "1742, 1569, 1318, 1308, 1186, 901, 844, 802, 787, 750, 720, 711, 611, 583, 577, 561, 550, 533, 526, 521, 506, 5"| __truncated__ "201, 159, 136, 116, 102, 80, 72, 72, 72, 67, 52, 51, 51, 47, 45, 44, 43, 41, 41, 40, 34, 32, 32, 31, 31, 31, 30"| __truncated__
## $ Grand.total: num 144601 11072
health.T <- as.data.frame(health.T)
str(health.T)
## 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Totals : chr "1742, 1569, 1318, 1308, 1186, 901, 844, 802, 787, 750, 720, 711, 611, 583, 577, 561, 550, 533, 526, 521, 506, 5"| __truncated__ "201, 159, 136, 116, 102, 80, 72, 72, 72, 67, 52, 51, 51, 47, 45, 44, 43, 41, 41, 40, 34, 32, 32, 31, 31, 31, 30"| __truncated__
## $ Grand.total: num 144601 11072
levels(health.T$EVTYPE)
## [1] "EXTREME WIND" "SNOW or ICE"
ggplot(health.T, aes(x = EVTYPE, y = Grand.total, fill = EVTYPE)) +
geom_bar(stat = 'identity') +
xlab("Type of Extreme Weather Condition") +
ylab("Number of people injured or killed") +
ggtitle ("Injuries or Fatalities by Extreme Weather Condition") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(fill = guide_legend(title = "Weather Condition"))
#Analysis for Economic Damage (property damage and crop damage) by weather condition
str(dat)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : Factor w/ 114 levels "EXTREME WIND",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
property <- dat[, c("EVTYPE", "PROPDMG")]
head(property)
## EVTYPE PROPDMG
## 1 EXTREME WIND 25.0
## 2 EXTREME WIND 2.5
## 3 EXTREME WIND 25.0
## 4 EXTREME WIND 2.5
## 5 EXTREME WIND 2.5
## 6 EXTREME WIND 2.5
property.reduce <- property %>% group_by(EVTYPE) %>%
summarize (Total.DMG = paste(PROPDMG, collapse = ", "), Grand.Total = sum(PROPDMG))
head(property.reduce)
## # A tibble: 2 x 3
## EVTYPE Total.DMG Grand.Total
## <fct> <chr> <dbl>
## 1 EXTREME W… 25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, 2.5… 8688402.
## 2 SNOW or I… 0, 0, 0, 0, 5, 5, 5, 50, 900, 1, 10, 0, 10, 50, 0… 2196098.
str(property.reduce)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 114 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, 2.5, 2.5, 250, 0, 25, 25, 25, 25, 25, 25, 25, 2.5, 2.5, 25, 25, 2"| __truncated__ "0, 0, 0, 0, 5, 5, 5, 50, 900, 1, 10, 0, 10, 50, 0, 0, 500, 5, 0, 20, 10, 0, 0, 0, 0, 0, 0, 100, 1, 10, 0.05, 0,"| __truncated__
## $ Grand.Total: num 8688402 2196098
property.reduce
## # A tibble: 2 x 3
## EVTYPE Total.DMG Grand.Total
## <fct> <chr> <dbl>
## 1 EXTREME W… 25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, 2.5… 8688402.
## 2 SNOW or I… 0, 0, 0, 0, 5, 5, 5, 50, 900, 1, 10, 0, 10, 50, 0… 2196098.
property.reduce$EVTYPE <- factor(property.reduce$EVTYPE)
str(property.reduce)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, 2.5, 2.5, 250, 0, 25, 25, 25, 25, 25, 25, 25, 2.5, 2.5, 25, 25, 2"| __truncated__ "0, 0, 0, 0, 5, 5, 5, 50, 900, 1, 10, 0, 10, 50, 0, 0, 500, 5, 0, 20, 10, 0, 0, 0, 0, 0, 0, 100, 1, 10, 0.05, 0,"| __truncated__
## $ Grand.Total: num 8688402 2196098
property.reduce <- as.data.frame(property.reduce)
str(property.reduce)
## 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25, 2.5, 2.5, 250, 0, 25, 25, 25, 25, 25, 25, 25, 2.5, 2.5, 25, 25, 2"| __truncated__ "0, 0, 0, 0, 5, 5, 5, 50, 900, 1, 10, 0, 10, 50, 0, 0, 500, 5, 0, 20, 10, 0, 0, 0, 0, 0, 0, 100, 1, 10, 0.05, 0,"| __truncated__
## $ Grand.Total: num 8688402 2196098
levels(property.reduce$EVTYPE)
## [1] "EXTREME WIND" "SNOW or ICE"
ggplot(property.reduce, aes(x = EVTYPE, y = Grand.Total, fill = EVTYPE)) +
geom_bar(stat = 'identity') +
xlab("Type of Extreme Weather Condition") +
ylab("Estimated Property Damage (in dollars)") +
ggtitle ("Property Damage by Extreme Weather Condition") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(fill = guide_legend(title = "Weather Condition"))
crop <- dat[, c("EVTYPE","CROPDMG")]
head(crop)
## EVTYPE CROPDMG
## 1 EXTREME WIND 0
## 2 EXTREME WIND 0
## 3 EXTREME WIND 0
## 4 EXTREME WIND 0
## 5 EXTREME WIND 0
## 6 EXTREME WIND 0
crop.reduce <- crop %>% group_by(EVTYPE) %>%
summarize (Total.DMG = paste(CROPDMG, collapse = ", "), Grand.Total = sum(CROPDMG))
crop.reduce
## # A tibble: 2 x 3
## EVTYPE Total.DMG Grand.Total
## <fct> <chr> <dbl>
## 1 EXTREME W… 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… 1172332.
## 2 SNOW or I… 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… 205495.
str(crop.reduce)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 114 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "| __truncated__ "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0,"| __truncated__
## $ Grand.Total: num 1172332 205495
crop.reduce$EVTYPE <- factor(crop.reduce$EVTYPE)
str(crop.reduce)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "| __truncated__ "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0,"| __truncated__
## $ Grand.Total: num 1172332 205495
crop.reduce <- as.data.frame(crop.reduce)
str(crop.reduce)
## 'data.frame': 2 obs. of 3 variables:
## $ EVTYPE : Factor w/ 2 levels "EXTREME WIND",..: 1 2
## $ Total.DMG : chr "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "| __truncated__ "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0,"| __truncated__
## $ Grand.Total: num 1172332 205495
levels(crop.reduce$EVTYPE)
## [1] "EXTREME WIND" "SNOW or ICE"
ggplot(crop.reduce, aes(x = EVTYPE, y = Grand.Total, fill = EVTYPE)) +
geom_bar(stat = 'identity') +
xlab("Type of Extreme Weather Condition") +
ylab("Estimated Crop Damage (in dollars)") +
ggtitle ("Crop Damage by Extreme Weather Condition") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(fill = guide_legend(title = "Weather Condition"))