## [1] "RESULTS SECTION AND FIGURES ARE AFTER THE CODE INPUT"
## [1] "Description: Storm types and their health and economic costs:"
## [1] "Synopsis: Many natural phenomena are destructive to human health and can cause extensive economic losses. Across the US, according to the data provided, certain types of events are far more harmful to both health and economy than others. By most metrics, tornadoes are the most destructive natural phenomena -- they cause the largest number of nationwide fatalities and injuries, as well as causing the maximum damage to property. Heat, excessive heat and flash floods also cause very large numbers of fatalities and injuries. Many phenomena which cause fatalities and injuries have geographical attributes: fatalities and injuries due to tornadoes are found primarily in prairie states such as AL, TX and IN; deaths owing to avalanches are found in mountain states such as CO, UT, WA and AK. Similarly, phenomena with economic consequences also have geographical attributes."
options(scipen=999)
## [1] "Data Processing"
##REDUCING DATA TO NECESSARY VARIALES
storm_physical <- select(storm_data, STATE, EVTYPE, FATALITIES, INJURIES) #Fatalities and Injuries
storm_econ <- storm_data %>% select(STATE, EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP )#Economic Variables
storm_date_physical <- storm_data %>% select(STATE, EVTYPE, BGN_DATE, END_DATE, FATALITIES, INJURIES)
storm_date_eco <- storm_data %>% select(STATE, EVTYPE, BGN_DATE, END_DATE, PROPDMG, CROPDMG)
#Grouping on basis of physical data
by_state_physical <- group_by(storm_physical, STATE)
by_event_physical <- group_by(storm_physical, EVTYPE) %>% print()
## Source: local data frame [902,297 x 4]
## Groups: EVTYPE [985]
##
## STATE EVTYPE FATALITIES INJURIES
## <fctr> <fctr> <dbl> <dbl>
## 1 AL TORNADO 0 15
## 2 AL TORNADO 0 0
## 3 AL TORNADO 0 2
## 4 AL TORNADO 0 2
## 5 AL TORNADO 0 2
## 6 AL TORNADO 0 6
## 7 AL TORNADO 0 1
## 8 AL TORNADO 0 0
## 9 AL TORNADO 1 14
## 10 AL TORNADO 0 0
## # ... with 902,287 more rows
#Grouping on basis of economic data
by_state_eco <- group_by(storm_econ, STATE)
by_event_eco <- group_by(storm_econ, EVTYPE) %>% print()
## Source: local data frame [902,297 x 6]
## Groups: EVTYPE [985]
##
## STATE EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## <fctr> <fctr> <dbl> <fctr> <dbl> <fctr>
## 1 AL TORNADO 25.0 K 0
## 2 AL TORNADO 2.5 K 0
## 3 AL TORNADO 25.0 K 0
## 4 AL TORNADO 2.5 K 0
## 5 AL TORNADO 2.5 K 0
## 6 AL TORNADO 2.5 K 0
## 7 AL TORNADO 2.5 K 0
## 8 AL TORNADO 2.5 K 0
## 9 AL TORNADO 25.0 K 0
## 10 AL TORNADO 25.0 K 0
## # ... with 902,287 more rows
##SUMMARIZING PHYSICAL AND ECONOMIC DATA
paste("This sections summarizes Physical and Economic Data")
## [1] "This sections summarizes Physical and Economic Data"
## Summary of Physical Data
summarize_event_physical <- summarize_at(by_event_physical, c("FATALITIES", "INJURIES"), sum)
ordered_fatalities_event <- summarize_event_physical[order((summarize_event_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
## EVTYPE FATALITIES
## <fctr> <dbl>
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
## # ... with 975 more rows
ordered_injuries_event <- summarize_event_physical[order((summarize_event_physical$INJURIES), decreasing = TRUE),][-c(2)]
top_events <- head(ordered_fatalities_event,n=10)[,1]
top_events <- top_events[[1]] %>% print()
## [1] TORNADO EXCESSIVE HEAT FLASH FLOOD HEAT
## [5] LIGHTNING TSTM WIND FLOOD RIP CURRENT
## [9] HIGH WIND AVALANCHE
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
summarize_state_physical <- summarize_at(by_state_physical, c("FATALITIES", "INJURIES"), sum)
ordered_fatalities_state <- summarize_state_physical[order((summarize_state_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 72 × 2
## STATE FATALITIES
## <fctr> <dbl>
## 1 IL 1421
## 2 TX 1366
## 3 PA 846
## 4 AL 784
## 5 MO 754
## 6 FL 746
## 7 MS 555
## 8 CA 550
## 9 AR 530
## 10 TN 521
## # ... with 62 more rows
ordered_injuries_state <- summarize_state_physical[order((summarize_state_physical$INJURIES), decreasing = TRUE),][-c(2)]
by_state_event <- group_by(by_state_physical, STATE, EVTYPE)
summarize_state_event <- summarize_at(by_state_event, c("FATALITIES", "INJURIES"), sum)
ordered_fatalities_state_event <- summarize_state_event[order((summarize_state_event$FATALITIES), decreasing = TRUE),][-c(4)] %>% print
## Source: local data frame [4,258 x 3]
## Groups: STATE [72]
##
## STATE EVTYPE FATALITIES
## <fctr> <fctr> <dbl>
## 1 IL HEAT 653
## 2 AL TORNADO 617
## 3 TX TORNADO 538
## 4 MS TORNADO 450
## 5 MO TORNADO 388
## 6 AR TORNADO 379
## 7 TN TORNADO 368
## 8 PA EXCESSIVE HEAT 359
## 9 IL EXCESSIVE HEAT 330
## 10 OK TORNADO 296
## # ... with 4,248 more rows
by_event_state <- group_by(by_state_physical, EVTYPE, STATE)
summarize_event_state <- summarize_at(by_event_state, c("FATALITIES", "INJURIES"), sum)
ordered_fatalities_event_state <- summarize_event_state[order((summarize_event_state$FATALITIES), decreasing = TRUE),][-c(4)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
##
## EVTYPE STATE FATALITIES
## <fctr> <fctr> <dbl>
## 1 HEAT IL 653
## 2 TORNADO AL 617
## 3 TORNADO TX 538
## 4 TORNADO MS 450
## 5 TORNADO MO 388
## 6 TORNADO AR 379
## 7 TORNADO TN 368
## 8 EXCESSIVE HEAT PA 359
## 9 EXCESSIVE HEAT IL 330
## 10 TORNADO OK 296
## # ... with 4,248 more rows
ordered_injuries_event_state <- summarize_event_state[order((summarize_event_state$INJURIES), decreasing = TRUE),][-c(3)]
## SUMMARIZING ECONOMIC
summarize_by_event_cropdmg <- summarize_at(by_event_eco, c("CROPDMG"), sum)
ordered_event_cropdmg <- summarize_by_event_cropdmg[order((summarize_by_event_cropdmg$CROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
## EVTYPE CROPDMG
## <fctr> <dbl>
## 1 HAIL 579596.28
## 2 FLASH FLOOD 179200.46
## 3 FLOOD 168037.88
## 4 TSTM WIND 109202.60
## 5 TORNADO 100018.52
## 6 THUNDERSTORM WIND 66791.45
## 7 DROUGHT 33898.62
## 8 THUNDERSTORM WINDS 18684.93
## 9 HIGH WIND 17283.21
## 10 HEAVY RAIN 11122.80
## # ... with 975 more rows
summarize_by_event_propdmg <- summarize_at(by_event_eco, c("PROPDMG"), sum)
ordered_event_propdmg <- summarize_by_event_propdmg[order((summarize_by_event_propdmg$PROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
## EVTYPE PROPDMG
## <fctr> <dbl>
## 1 TORNADO 3212258.2
## 2 FLASH FLOOD 1420124.6
## 3 TSTM WIND 1335965.6
## 4 FLOOD 899938.5
## 5 THUNDERSTORM WIND 876844.2
## 6 HAIL 688693.4
## 7 LIGHTNING 603351.8
## 8 THUNDERSTORM WINDS 446293.2
## 9 HIGH WIND 324731.6
## 10 WINTER STORM 132720.6
## # ... with 975 more rows
summarize_by_state_cropdmg <- summarize_at(by_state_eco, c("CROPDMG"), sum)
ordered_state_cropdmg <- summarize_by_state_cropdmg[order((summarize_by_state_cropdmg$CROPDMG), decreasing = TRUE),]
summarize_by_state_propdmg <- summarize_at(by_state_eco, c("PROPDMG"), sum)
ordered_state_propdmg <- summarize_by_state_propdmg[order((summarize_by_state_propdmg$PROPDMG), decreasing = TRUE),]
summarize_event_eco_damage <- bind_cols(summarize_by_event_cropdmg[-c(1)], summarize_by_event_propdmg)
summarize_event_eco_damage %>% mutate(TOTALDMG = PROPDMG + CROPDMG)
## # A tibble: 985 × 4
## CROPDMG EVTYPE PROPDMG TOTALDMG
## <dbl> <fctr> <dbl> <dbl>
## 1 0 HIGH SURF ADVISORY 200 200
## 2 0 COASTAL FLOOD 0 0
## 3 0 FLASH FLOOD 50 50
## 4 0 LIGHTNING 0 0
## 5 0 TSTM WIND 108 108
## 6 0 TSTM WIND (G45) 8 8
## 7 0 WATERSPOUT 0 0
## 8 0 WIND 0 0
## 9 0 ? 5 5
## 10 0 ABNORMAL WARMTH 0 0
## # ... with 975 more rows
by_event_state_eco <- group_by(by_state_eco, EVTYPE, STATE)[-c(4,6)]
summarize_event_state_eco <- summarize_at(by_event_state_eco, c("CROPDMG", "PROPDMG"), sum) %>% print()
## Source: local data frame [4,258 x 4]
## Groups: EVTYPE [?]
##
## EVTYPE STATE CROPDMG PROPDMG
## <fctr> <fctr> <dbl> <dbl>
## 1 HIGH SURF ADVISORY AS 0 200
## 2 COASTAL FLOOD NJ 0 0
## 3 FLASH FLOOD TX 0 50
## 4 LIGHTNING AZ 0 0
## 5 TSTM WIND GA 0 8
## 6 TSTM WIND IL 0 100
## 7 TSTM WIND (G45) FL 0 8
## 8 WATERSPOUT FL 0 0
## 9 WIND WA 0 0
## 10 ? WV 0 5
## # ... with 4,248 more rows
ordered_crop_event_state_eco <- summarize_event_state_eco[order((summarize_event_state_eco$CROPDMG), decreasing = TRUE),][-c(4)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
##
## EVTYPE STATE CROPDMG
## <fctr> <fctr> <dbl>
## 1 HAIL NE 201031.15
## 2 HAIL TX 103947.70
## 3 HAIL KS 80734.15
## 4 HAIL IA 47875.76
## 5 FLOOD IA 43273.10
## 6 TSTM WIND NE 37418.00
## 7 HAIL ND 28818.70
## 8 FLASH FLOOD WI 25645.37
## 9 FLASH FLOOD IA 25187.50
## 10 FLASH FLOOD NE 25018.17
## # ... with 4,248 more rows
ordered_prop_event_state_eco <- summarize_event_state_eco[order((summarize_event_state_eco$PROPDMG), decreasing = TRUE),][-c(3)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
##
## EVTYPE STATE PROPDMG
## <fctr> <fctr> <dbl>
## 1 TORNADO TX 283097.2
## 2 TORNADO MS 187840.9
## 3 TORNADO AL 167816.2
## 4 TORNADO OK 165167.9
## 5 TORNADO FL 159752.6
## 6 TORNADO IA 152142.8
## 7 TORNADO GA 151349.5
## 8 TSTM WIND TX 144959.0
## 9 TORNADO KS 143209.9
## 10 TORNADO MO 132159.9
## # ... with 4,248 more rows
summarize_state_damage <- bind_cols(summarize_by_state_propdmg, summarize_by_state_cropdmg[,2]) %>% print
## # A tibble: 72 × 3
## STATE PROPDMG CROPDMG
## <fctr> <dbl> <dbl>
## 1 AK 33995.51 205.00
## 2 AL 363606.66 9666.94
## 3 AM 5653.80 50.00
## 4 AN 294.00 0.00
## 5 AR 361121.58 25819.13
## 6 AS 2954.50 1564.00
## 7 AZ 83046.67 1374.00
## 8 CA 203598.79 21152.16
## 9 CO 81496.79 9290.50
## 10 CT 29155.17 30.00
## # ... with 62 more rows
summarize_state_damage %>% mutate(TOTALDMG = PROPDMG + CROPDMG)
## # A tibble: 72 × 4
## STATE PROPDMG CROPDMG TOTALDMG
## <fctr> <dbl> <dbl> <dbl>
## 1 AK 33995.51 205.00 34200.51
## 2 AL 363606.66 9666.94 373273.60
## 3 AM 5653.80 50.00 5703.80
## 4 AN 294.00 0.00 294.00
## 5 AR 361121.58 25819.13 386940.71
## 6 AS 2954.50 1564.00 4518.50
## 7 AZ 83046.67 1374.00 84420.67
## 8 CA 203598.79 21152.16 224750.95
## 9 CO 81496.79 9290.50 90787.29
## 10 CT 29155.17 30.00 29185.17
## # ... with 62 more rows
cor(summarize_state_damage[,2], summarize_state_damage[,3])
## CROPDMG
## PROPDMG 0.5919995
top_events_eco <- bind_cols(ordered_event_propdmg, ordered_event_cropdmg)
state_all_eco <- bind_cols(summarize_state_physical, summarize_state_damage)[-c(2,3,4)]
state_eco_ranked_propdmg <- state_all_eco[order((state_all_eco$PROPDMG),decreasing = TRUE),] %>% print()
## # A tibble: 72 × 3
## STATE PROPDMG CROPDMG
## <fctr> <dbl> <dbl>
## 1 TX 937138.0 156169.36
## 2 IA 685487.3 157808.71
## 3 OH 559834.4 28328.15
## 4 GA 485873.7 10501.81
## 5 MS 481811.8 56077.89
## 6 KS 387183.8 139798.97
## 7 FL 374428.0 10668.38
## 8 NY 373109.4 18916.15
## 9 AL 363606.7 9666.94
## 10 AR 361121.6 25819.13
## # ... with 62 more rows
# SUMMARIZING THE EFFECTS OF TOP EVENTS
uni_top_events <- data.frame()
for (i in 1:10) {
uni_top_events <- bind_rows(uni_top_events, filter(by_state_physical, EVTYPE == top_events[i]))
}
uni_top_events_by_state <- group_by(uni_top_events, STATE)
uni_top_events_by_event <- group_by(uni_top_events, EVTYPE)
summarize_at(uni_top_events_by_state, vars(FATALITIES), sum)
## # A tibble: 55 × 2
## STATE FATALITIES
## <fctr> <dbl>
## 1 AK 37
## 2 AL 738
## 3 AR 503
## 4 AS 9
## 5 AZ 156
## 6 CA 236
## 7 CO 124
## 8 CT 24
## 9 DC 24
## 10 DE 17
## # ... with 45 more rows
summarize_at(uni_top_events_by_event, vars(FATALITIES), sum) %>% print()
## # A tibble: 10 × 2
## EVTYPE FATALITIES
## <fctr> <dbl>
## 1 AVALANCHE 224
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 FLOOD 470
## 5 HEAT 937
## 6 HIGH WIND 248
## 7 LIGHTNING 816
## 8 RIP CURRENT 368
## 9 TORNADO 5633
## 10 TSTM WIND 504
paste("RESULTS")
## [1] "RESULTS"
paste("The events that cause the most fatalities are")
## [1] "The events that cause the most fatalities are"
summarize_event_physical[order((summarize_event_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
## EVTYPE FATALITIES
## <fctr> <dbl>
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
## # ... with 975 more rows
paste("The events that cause the most injuries are")
## [1] "The events that cause the most injuries are"
summarize_event_physical[order((summarize_event_physical$INJURIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
## EVTYPE FATALITIES
## <fctr> <dbl>
## 1 TORNADO 5633
## 2 TSTM WIND 504
## 3 FLOOD 470
## 4 EXCESSIVE HEAT 1903
## 5 LIGHTNING 816
## 6 HEAT 937
## 7 ICE STORM 89
## 8 FLASH FLOOD 978
## 9 THUNDERSTORM WIND 133
## 10 HAIL 15
## # ... with 975 more rows
paste("The events that cause the most damage to crops are")
## [1] "The events that cause the most damage to crops are"
summarize_by_event_cropdmg[order((summarize_by_event_cropdmg$CROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
## EVTYPE CROPDMG
## <fctr> <dbl>
## 1 HAIL 579596.28
## 2 FLASH FLOOD 179200.46
## 3 FLOOD 168037.88
## 4 TSTM WIND 109202.60
## 5 TORNADO 100018.52
## 6 THUNDERSTORM WIND 66791.45
## 7 DROUGHT 33898.62
## 8 THUNDERSTORM WINDS 18684.93
## 9 HIGH WIND 17283.21
## 10 HEAVY RAIN 11122.80
## # ... with 975 more rows
# Barchart of Crop Damage
g = ggplot(head(top_events_eco[c(3,4)], n=10), aes(x = factor(EVTYPE),
y = (CROPDMG), fill = as.factor(EVTYPE)))
g + geom_bar(stat = "identity") + labs(x = "Event", y = "(Crop Damages)") +
ggtitle("Crop Damage by Event") + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
paste("The events that cause the most damage to property are")
## [1] "The events that cause the most damage to property are"
summarize_by_event_propdmg[order((summarize_by_event_propdmg$PROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
## EVTYPE PROPDMG
## <fctr> <dbl>
## 1 TORNADO 3212258.2
## 2 FLASH FLOOD 1420124.6
## 3 TSTM WIND 1335965.6
## 4 FLOOD 899938.5
## 5 THUNDERSTORM WIND 876844.2
## 6 HAIL 688693.4
## 7 LIGHTNING 603351.8
## 8 THUNDERSTORM WINDS 446293.2
## 9 HIGH WIND 324731.6
## 10 WINTER STORM 132720.6
## # ... with 975 more rows
#Barchart of Property Damage
g = ggplot(head(top_events_eco[c(1,2)], n=10), aes(x = factor(EVTYPE),
y = (PROPDMG), fill = as.factor(EVTYPE)))
g + geom_bar(stat = "identity") + labs(x = "Event", y = "(Property Damages)") +
ggtitle("Property Damage by Event") + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
paste("The 10 states suffering the most fatalities due to weather are")
## [1] "The 10 states suffering the most fatalities due to weather are"
summarize_state_physical[order((summarize_state_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 72 × 2
## STATE FATALITIES
## <fctr> <dbl>
## 1 IL 1421
## 2 TX 1366
## 3 PA 846
## 4 AL 784
## 5 MO 754
## 6 FL 746
## 7 MS 555
## 8 CA 550
## 9 AR 530
## 10 TN 521
## # ... with 62 more rows
par(mfrow = c(1,1))
barplot(height = ordered_fatalities_event[1:10,]$FATALITIES, main = "Events with Most Fatalities",
ylab = "Fatalities", legend = (ordered_fatalities_event[1:10,]$EVTYPE), col = topo.colors(10))
paste("The 10 states suffering the most injuries due to weather are")
## [1] "The 10 states suffering the most injuries due to weather are"
summarize_state_physical[order((summarize_state_physical$INJURIES), decreasing = TRUE),][-c(2)] %>% print()
## # A tibble: 72 × 2
## STATE INJURIES
## <fctr> <dbl>
## 1 TX 17667
## 2 MO 8998
## 3 AL 8742
## 4 OH 7112
## 5 MS 6675
## 6 FL 5918
## 7 OK 5710
## 8 IL 5563
## 9 AR 5550
## 10 TN 5202
## # ... with 62 more rows
paste("The 10 states that suffered the worst crop damage are and the phenomena that caused the damage are")
## [1] "The 10 states that suffered the worst crop damage are and the phenomena that caused the damage are"
summarize_event_state_eco[order((summarize_event_state_eco$CROPDMG), decreasing = TRUE),][-c(4)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
##
## EVTYPE STATE CROPDMG
## <fctr> <fctr> <dbl>
## 1 HAIL NE 201031.15
## 2 HAIL TX 103947.70
## 3 HAIL KS 80734.15
## 4 HAIL IA 47875.76
## 5 FLOOD IA 43273.10
## 6 TSTM WIND NE 37418.00
## 7 HAIL ND 28818.70
## 8 FLASH FLOOD WI 25645.37
## 9 FLASH FLOOD IA 25187.50
## 10 FLASH FLOOD NE 25018.17
## # ... with 4,248 more rows
paste("The 10 states that suffered the worst property damage are and the phenomena that caused the damage are")
## [1] "The 10 states that suffered the worst property damage are and the phenomena that caused the damage are"
summarize_event_state_eco[order((summarize_event_state_eco$PROPDMG), decreasing = TRUE),][-c(3)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
##
## EVTYPE STATE PROPDMG
## <fctr> <fctr> <dbl>
## 1 TORNADO TX 283097.2
## 2 TORNADO MS 187840.9
## 3 TORNADO AL 167816.2
## 4 TORNADO OK 165167.9
## 5 TORNADO FL 159752.6
## 6 TORNADO IA 152142.8
## 7 TORNADO GA 151349.5
## 8 TSTM WIND TX 144959.0
## 9 TORNADO KS 143209.9
## 10 TORNADO MO 132159.9
## # ... with 4,248 more rows
```