## [1] "RESULTS SECTION AND FIGURES ARE AFTER THE CODE INPUT"
## [1] "Description: Storm types and their health and economic costs:"
## [1] "Synopsis: Many natural phenomena are destructive to human health and can cause extensive economic losses. Across the US, according to the data provided, certain types of events are far more harmful to both health and economy than others. By most metrics, tornadoes are the most destructive natural phenomena -- they cause the largest number of nationwide fatalities and injuries, as well as causing the maximum damage to property. Heat, excessive heat and flash floods also cause very large numbers of fatalities and injuries. Many phenomena which cause fatalities and injuries have geographical attributes: fatalities and injuries due to tornadoes are found primarily in prairie states such as AL, TX and IN; deaths owing to avalanches are found in mountain states such as CO, UT, WA and AK. Similarly, phenomena with economic consequences also have geographical attributes."
options(scipen=999)
## [1] "Data Processing"
##REDUCING DATA TO NECESSARY VARIALES

storm_physical <- select(storm_data, STATE, EVTYPE, FATALITIES, INJURIES) #Fatalities and Injuries
storm_econ <- storm_data %>% select(STATE, EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP )#Economic Variables
storm_date_physical <- storm_data %>% select(STATE, EVTYPE, BGN_DATE, END_DATE, FATALITIES, INJURIES)
storm_date_eco <- storm_data %>% select(STATE, EVTYPE, BGN_DATE, END_DATE, PROPDMG, CROPDMG)
#Grouping on basis of physical data
by_state_physical <- group_by(storm_physical, STATE)
by_event_physical <- group_by(storm_physical, EVTYPE) %>% print()
## Source: local data frame [902,297 x 4]
## Groups: EVTYPE [985]
## 
##     STATE  EVTYPE FATALITIES INJURIES
##    <fctr>  <fctr>      <dbl>    <dbl>
## 1      AL TORNADO          0       15
## 2      AL TORNADO          0        0
## 3      AL TORNADO          0        2
## 4      AL TORNADO          0        2
## 5      AL TORNADO          0        2
## 6      AL TORNADO          0        6
## 7      AL TORNADO          0        1
## 8      AL TORNADO          0        0
## 9      AL TORNADO          1       14
## 10     AL TORNADO          0        0
## # ... with 902,287 more rows
#Grouping on basis of economic data
by_state_eco <- group_by(storm_econ, STATE)
by_event_eco <- group_by(storm_econ, EVTYPE) %>% print()
## Source: local data frame [902,297 x 6]
## Groups: EVTYPE [985]
## 
##     STATE  EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
##    <fctr>  <fctr>   <dbl>     <fctr>   <dbl>     <fctr>
## 1      AL TORNADO    25.0          K       0           
## 2      AL TORNADO     2.5          K       0           
## 3      AL TORNADO    25.0          K       0           
## 4      AL TORNADO     2.5          K       0           
## 5      AL TORNADO     2.5          K       0           
## 6      AL TORNADO     2.5          K       0           
## 7      AL TORNADO     2.5          K       0           
## 8      AL TORNADO     2.5          K       0           
## 9      AL TORNADO    25.0          K       0           
## 10     AL TORNADO    25.0          K       0           
## # ... with 902,287 more rows
##SUMMARIZING PHYSICAL AND ECONOMIC DATA
paste("This sections summarizes Physical and Economic Data")
## [1] "This sections summarizes Physical and Economic Data"
## Summary of Physical Data
summarize_event_physical <- summarize_at(by_event_physical, c("FATALITIES", "INJURIES"), sum) 
ordered_fatalities_event <- summarize_event_physical[order((summarize_event_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
##            EVTYPE FATALITIES
##            <fctr>      <dbl>
## 1         TORNADO       5633
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4            HEAT        937
## 5       LIGHTNING        816
## 6       TSTM WIND        504
## 7           FLOOD        470
## 8     RIP CURRENT        368
## 9       HIGH WIND        248
## 10      AVALANCHE        224
## # ... with 975 more rows
ordered_injuries_event <- summarize_event_physical[order((summarize_event_physical$INJURIES), decreasing = TRUE),][-c(2)] 
top_events <- head(ordered_fatalities_event,n=10)[,1]
top_events <- top_events[[1]] %>% print()
##  [1] TORNADO        EXCESSIVE HEAT FLASH FLOOD    HEAT          
##  [5] LIGHTNING      TSTM WIND      FLOOD          RIP CURRENT   
##  [9] HIGH WIND      AVALANCHE     
## 985 Levels:    HIGH SURF ADVISORY  COASTAL FLOOD ... WND
summarize_state_physical <- summarize_at(by_state_physical, c("FATALITIES", "INJURIES"), sum) 
ordered_fatalities_state <- summarize_state_physical[order((summarize_state_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>%  print()
## # A tibble: 72 × 2
##     STATE FATALITIES
##    <fctr>      <dbl>
## 1      IL       1421
## 2      TX       1366
## 3      PA        846
## 4      AL        784
## 5      MO        754
## 6      FL        746
## 7      MS        555
## 8      CA        550
## 9      AR        530
## 10     TN        521
## # ... with 62 more rows
ordered_injuries_state <- summarize_state_physical[order((summarize_state_physical$INJURIES), decreasing = TRUE),][-c(2)]
by_state_event <- group_by(by_state_physical, STATE, EVTYPE)
summarize_state_event <- summarize_at(by_state_event, c("FATALITIES", "INJURIES"), sum) 
ordered_fatalities_state_event <- summarize_state_event[order((summarize_state_event$FATALITIES), decreasing = TRUE),][-c(4)] %>% print
## Source: local data frame [4,258 x 3]
## Groups: STATE [72]
## 
##     STATE         EVTYPE FATALITIES
##    <fctr>         <fctr>      <dbl>
## 1      IL           HEAT        653
## 2      AL        TORNADO        617
## 3      TX        TORNADO        538
## 4      MS        TORNADO        450
## 5      MO        TORNADO        388
## 6      AR        TORNADO        379
## 7      TN        TORNADO        368
## 8      PA EXCESSIVE HEAT        359
## 9      IL EXCESSIVE HEAT        330
## 10     OK        TORNADO        296
## # ... with 4,248 more rows
by_event_state <- group_by(by_state_physical, EVTYPE, STATE)
summarize_event_state <- summarize_at(by_event_state, c("FATALITIES", "INJURIES"), sum) 
ordered_fatalities_event_state <- summarize_event_state[order((summarize_event_state$FATALITIES), decreasing = TRUE),][-c(4)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
## 
##            EVTYPE  STATE FATALITIES
##            <fctr> <fctr>      <dbl>
## 1            HEAT     IL        653
## 2         TORNADO     AL        617
## 3         TORNADO     TX        538
## 4         TORNADO     MS        450
## 5         TORNADO     MO        388
## 6         TORNADO     AR        379
## 7         TORNADO     TN        368
## 8  EXCESSIVE HEAT     PA        359
## 9  EXCESSIVE HEAT     IL        330
## 10        TORNADO     OK        296
## # ... with 4,248 more rows
ordered_injuries_event_state <- summarize_event_state[order((summarize_event_state$INJURIES), decreasing = TRUE),][-c(3)] 
## SUMMARIZING ECONOMIC
summarize_by_event_cropdmg <- summarize_at(by_event_eco, c("CROPDMG"), sum) 
ordered_event_cropdmg <- summarize_by_event_cropdmg[order((summarize_by_event_cropdmg$CROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
##                EVTYPE   CROPDMG
##                <fctr>     <dbl>
## 1                HAIL 579596.28
## 2         FLASH FLOOD 179200.46
## 3               FLOOD 168037.88
## 4           TSTM WIND 109202.60
## 5             TORNADO 100018.52
## 6   THUNDERSTORM WIND  66791.45
## 7             DROUGHT  33898.62
## 8  THUNDERSTORM WINDS  18684.93
## 9           HIGH WIND  17283.21
## 10         HEAVY RAIN  11122.80
## # ... with 975 more rows
summarize_by_event_propdmg <- summarize_at(by_event_eco, c("PROPDMG"), sum) 
ordered_event_propdmg <- summarize_by_event_propdmg[order((summarize_by_event_propdmg$PROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
##                EVTYPE   PROPDMG
##                <fctr>     <dbl>
## 1             TORNADO 3212258.2
## 2         FLASH FLOOD 1420124.6
## 3           TSTM WIND 1335965.6
## 4               FLOOD  899938.5
## 5   THUNDERSTORM WIND  876844.2
## 6                HAIL  688693.4
## 7           LIGHTNING  603351.8
## 8  THUNDERSTORM WINDS  446293.2
## 9           HIGH WIND  324731.6
## 10       WINTER STORM  132720.6
## # ... with 975 more rows
summarize_by_state_cropdmg <- summarize_at(by_state_eco, c("CROPDMG"), sum) 
ordered_state_cropdmg <- summarize_by_state_cropdmg[order((summarize_by_state_cropdmg$CROPDMG), decreasing = TRUE),]
summarize_by_state_propdmg <- summarize_at(by_state_eco, c("PROPDMG"), sum) 
ordered_state_propdmg <- summarize_by_state_propdmg[order((summarize_by_state_propdmg$PROPDMG), decreasing = TRUE),]
summarize_event_eco_damage <- bind_cols(summarize_by_event_cropdmg[-c(1)], summarize_by_event_propdmg)
summarize_event_eco_damage %>% mutate(TOTALDMG = PROPDMG + CROPDMG)
## # A tibble: 985 × 4
##    CROPDMG                EVTYPE PROPDMG TOTALDMG
##      <dbl>                <fctr>   <dbl>    <dbl>
## 1        0    HIGH SURF ADVISORY     200      200
## 2        0         COASTAL FLOOD       0        0
## 3        0           FLASH FLOOD      50       50
## 4        0             LIGHTNING       0        0
## 5        0             TSTM WIND     108      108
## 6        0       TSTM WIND (G45)       8        8
## 7        0            WATERSPOUT       0        0
## 8        0                  WIND       0        0
## 9        0                     ?       5        5
## 10       0       ABNORMAL WARMTH       0        0
## # ... with 975 more rows
by_event_state_eco <- group_by(by_state_eco, EVTYPE, STATE)[-c(4,6)]
summarize_event_state_eco <- summarize_at(by_event_state_eco, c("CROPDMG", "PROPDMG"), sum) %>% print()
## Source: local data frame [4,258 x 4]
## Groups: EVTYPE [?]
## 
##                   EVTYPE  STATE CROPDMG PROPDMG
##                   <fctr> <fctr>   <dbl>   <dbl>
## 1     HIGH SURF ADVISORY     AS       0     200
## 2          COASTAL FLOOD     NJ       0       0
## 3            FLASH FLOOD     TX       0      50
## 4              LIGHTNING     AZ       0       0
## 5              TSTM WIND     GA       0       8
## 6              TSTM WIND     IL       0     100
## 7        TSTM WIND (G45)     FL       0       8
## 8             WATERSPOUT     FL       0       0
## 9                   WIND     WA       0       0
## 10                     ?     WV       0       5
## # ... with 4,248 more rows
ordered_crop_event_state_eco <- summarize_event_state_eco[order((summarize_event_state_eco$CROPDMG), decreasing = TRUE),][-c(4)] %>%  print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
## 
##         EVTYPE  STATE   CROPDMG
##         <fctr> <fctr>     <dbl>
## 1         HAIL     NE 201031.15
## 2         HAIL     TX 103947.70
## 3         HAIL     KS  80734.15
## 4         HAIL     IA  47875.76
## 5        FLOOD     IA  43273.10
## 6    TSTM WIND     NE  37418.00
## 7         HAIL     ND  28818.70
## 8  FLASH FLOOD     WI  25645.37
## 9  FLASH FLOOD     IA  25187.50
## 10 FLASH FLOOD     NE  25018.17
## # ... with 4,248 more rows
ordered_prop_event_state_eco <- summarize_event_state_eco[order((summarize_event_state_eco$PROPDMG), decreasing = TRUE),][-c(3)] %>%  print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
## 
##       EVTYPE  STATE  PROPDMG
##       <fctr> <fctr>    <dbl>
## 1    TORNADO     TX 283097.2
## 2    TORNADO     MS 187840.9
## 3    TORNADO     AL 167816.2
## 4    TORNADO     OK 165167.9
## 5    TORNADO     FL 159752.6
## 6    TORNADO     IA 152142.8
## 7    TORNADO     GA 151349.5
## 8  TSTM WIND     TX 144959.0
## 9    TORNADO     KS 143209.9
## 10   TORNADO     MO 132159.9
## # ... with 4,248 more rows
summarize_state_damage <- bind_cols(summarize_by_state_propdmg, summarize_by_state_cropdmg[,2]) %>% print
## # A tibble: 72 × 3
##     STATE   PROPDMG  CROPDMG
##    <fctr>     <dbl>    <dbl>
## 1      AK  33995.51   205.00
## 2      AL 363606.66  9666.94
## 3      AM   5653.80    50.00
## 4      AN    294.00     0.00
## 5      AR 361121.58 25819.13
## 6      AS   2954.50  1564.00
## 7      AZ  83046.67  1374.00
## 8      CA 203598.79 21152.16
## 9      CO  81496.79  9290.50
## 10     CT  29155.17    30.00
## # ... with 62 more rows
summarize_state_damage %>% mutate(TOTALDMG = PROPDMG + CROPDMG) 
## # A tibble: 72 × 4
##     STATE   PROPDMG  CROPDMG  TOTALDMG
##    <fctr>     <dbl>    <dbl>     <dbl>
## 1      AK  33995.51   205.00  34200.51
## 2      AL 363606.66  9666.94 373273.60
## 3      AM   5653.80    50.00   5703.80
## 4      AN    294.00     0.00    294.00
## 5      AR 361121.58 25819.13 386940.71
## 6      AS   2954.50  1564.00   4518.50
## 7      AZ  83046.67  1374.00  84420.67
## 8      CA 203598.79 21152.16 224750.95
## 9      CO  81496.79  9290.50  90787.29
## 10     CT  29155.17    30.00  29185.17
## # ... with 62 more rows
cor(summarize_state_damage[,2], summarize_state_damage[,3]) 
##           CROPDMG
## PROPDMG 0.5919995
top_events_eco <- bind_cols(ordered_event_propdmg, ordered_event_cropdmg)
state_all_eco <- bind_cols(summarize_state_physical, summarize_state_damage)[-c(2,3,4)] 
state_eco_ranked_propdmg <- state_all_eco[order((state_all_eco$PROPDMG),decreasing = TRUE),] %>%  print()
## # A tibble: 72 × 3
##     STATE  PROPDMG   CROPDMG
##    <fctr>    <dbl>     <dbl>
## 1      TX 937138.0 156169.36
## 2      IA 685487.3 157808.71
## 3      OH 559834.4  28328.15
## 4      GA 485873.7  10501.81
## 5      MS 481811.8  56077.89
## 6      KS 387183.8 139798.97
## 7      FL 374428.0  10668.38
## 8      NY 373109.4  18916.15
## 9      AL 363606.7   9666.94
## 10     AR 361121.6  25819.13
## # ... with 62 more rows
# SUMMARIZING THE EFFECTS OF TOP EVENTS

uni_top_events <- data.frame()
for (i in 1:10) {
   uni_top_events <- bind_rows(uni_top_events, filter(by_state_physical, EVTYPE == top_events[i]))
}

uni_top_events_by_state <- group_by(uni_top_events, STATE)
uni_top_events_by_event <- group_by(uni_top_events, EVTYPE)

summarize_at(uni_top_events_by_state, vars(FATALITIES), sum)
## # A tibble: 55 × 2
##     STATE FATALITIES
##    <fctr>      <dbl>
## 1      AK         37
## 2      AL        738
## 3      AR        503
## 4      AS          9
## 5      AZ        156
## 6      CA        236
## 7      CO        124
## 8      CT         24
## 9      DC         24
## 10     DE         17
## # ... with 45 more rows
summarize_at(uni_top_events_by_event, vars(FATALITIES), sum) %>% print()
## # A tibble: 10 × 2
##            EVTYPE FATALITIES
##            <fctr>      <dbl>
## 1       AVALANCHE        224
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4           FLOOD        470
## 5            HEAT        937
## 6       HIGH WIND        248
## 7       LIGHTNING        816
## 8     RIP CURRENT        368
## 9         TORNADO       5633
## 10      TSTM WIND        504
paste("RESULTS")
## [1] "RESULTS"
paste("The events that cause the most fatalities are")
## [1] "The events that cause the most fatalities are"
summarize_event_physical[order((summarize_event_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
##            EVTYPE FATALITIES
##            <fctr>      <dbl>
## 1         TORNADO       5633
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4            HEAT        937
## 5       LIGHTNING        816
## 6       TSTM WIND        504
## 7           FLOOD        470
## 8     RIP CURRENT        368
## 9       HIGH WIND        248
## 10      AVALANCHE        224
## # ... with 975 more rows
paste("The events that cause the most injuries are")
## [1] "The events that cause the most injuries are"
summarize_event_physical[order((summarize_event_physical$INJURIES), decreasing = TRUE),][-c(3)] %>% print()
## # A tibble: 985 × 2
##               EVTYPE FATALITIES
##               <fctr>      <dbl>
## 1            TORNADO       5633
## 2          TSTM WIND        504
## 3              FLOOD        470
## 4     EXCESSIVE HEAT       1903
## 5          LIGHTNING        816
## 6               HEAT        937
## 7          ICE STORM         89
## 8        FLASH FLOOD        978
## 9  THUNDERSTORM WIND        133
## 10              HAIL         15
## # ... with 975 more rows
paste("The events that cause the most damage to crops are")
## [1] "The events that cause the most damage to crops are"
summarize_by_event_cropdmg[order((summarize_by_event_cropdmg$CROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
##                EVTYPE   CROPDMG
##                <fctr>     <dbl>
## 1                HAIL 579596.28
## 2         FLASH FLOOD 179200.46
## 3               FLOOD 168037.88
## 4           TSTM WIND 109202.60
## 5             TORNADO 100018.52
## 6   THUNDERSTORM WIND  66791.45
## 7             DROUGHT  33898.62
## 8  THUNDERSTORM WINDS  18684.93
## 9           HIGH WIND  17283.21
## 10         HEAVY RAIN  11122.80
## # ... with 975 more rows
# Barchart of Crop Damage
g = ggplot(head(top_events_eco[c(3,4)], n=10), aes(x = factor(EVTYPE), 
    y = (CROPDMG), fill = as.factor(EVTYPE)))
g + geom_bar(stat = "identity") + labs(x = "Event", y = "(Crop Damages)") + 
  ggtitle("Crop Damage by Event") + theme(axis.title.x=element_blank(),
                                            axis.text.x=element_blank(),
                                            axis.ticks.x=element_blank())

paste("The events that cause the most damage to property are")
## [1] "The events that cause the most damage to property are"
summarize_by_event_propdmg[order((summarize_by_event_propdmg$PROPDMG), decreasing = TRUE),] %>% print()
## # A tibble: 985 × 2
##                EVTYPE   PROPDMG
##                <fctr>     <dbl>
## 1             TORNADO 3212258.2
## 2         FLASH FLOOD 1420124.6
## 3           TSTM WIND 1335965.6
## 4               FLOOD  899938.5
## 5   THUNDERSTORM WIND  876844.2
## 6                HAIL  688693.4
## 7           LIGHTNING  603351.8
## 8  THUNDERSTORM WINDS  446293.2
## 9           HIGH WIND  324731.6
## 10       WINTER STORM  132720.6
## # ... with 975 more rows
#Barchart of Property Damage
g = ggplot(head(top_events_eco[c(1,2)], n=10), aes(x = factor(EVTYPE), 
    y = (PROPDMG), fill = as.factor(EVTYPE)))
g + geom_bar(stat = "identity") + labs(x = "Event", y = "(Property Damages)") + 
  ggtitle("Property Damage by Event") + theme(axis.title.x=element_blank(),
                                            axis.text.x=element_blank(),
                                            axis.ticks.x=element_blank())

paste("The 10 states suffering the most fatalities due to weather are")
## [1] "The 10 states suffering the most fatalities due to weather are"
summarize_state_physical[order((summarize_state_physical$FATALITIES), decreasing = TRUE),][-c(3)] %>%   print()
## # A tibble: 72 × 2
##     STATE FATALITIES
##    <fctr>      <dbl>
## 1      IL       1421
## 2      TX       1366
## 3      PA        846
## 4      AL        784
## 5      MO        754
## 6      FL        746
## 7      MS        555
## 8      CA        550
## 9      AR        530
## 10     TN        521
## # ... with 62 more rows
par(mfrow = c(1,1))
barplot(height = ordered_fatalities_event[1:10,]$FATALITIES, main = "Events with Most Fatalities",
        ylab = "Fatalities", legend = (ordered_fatalities_event[1:10,]$EVTYPE), col = topo.colors(10))

paste("The 10 states suffering the most injuries due to weather are")
## [1] "The 10 states suffering the most injuries due to weather are"
summarize_state_physical[order((summarize_state_physical$INJURIES), decreasing = TRUE),][-c(2)] %>% print()
## # A tibble: 72 × 2
##     STATE INJURIES
##    <fctr>    <dbl>
## 1      TX    17667
## 2      MO     8998
## 3      AL     8742
## 4      OH     7112
## 5      MS     6675
## 6      FL     5918
## 7      OK     5710
## 8      IL     5563
## 9      AR     5550
## 10     TN     5202
## # ... with 62 more rows
paste("The 10 states that suffered the worst crop damage are and the phenomena that caused the damage are")
## [1] "The 10 states that suffered the worst crop damage are and the phenomena that caused the damage are"
summarize_event_state_eco[order((summarize_event_state_eco$CROPDMG), decreasing = TRUE),][-c(4)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
## 
##         EVTYPE  STATE   CROPDMG
##         <fctr> <fctr>     <dbl>
## 1         HAIL     NE 201031.15
## 2         HAIL     TX 103947.70
## 3         HAIL     KS  80734.15
## 4         HAIL     IA  47875.76
## 5        FLOOD     IA  43273.10
## 6    TSTM WIND     NE  37418.00
## 7         HAIL     ND  28818.70
## 8  FLASH FLOOD     WI  25645.37
## 9  FLASH FLOOD     IA  25187.50
## 10 FLASH FLOOD     NE  25018.17
## # ... with 4,248 more rows
paste("The 10 states that suffered the worst property damage are and the phenomena that caused the damage are")
## [1] "The 10 states that suffered the worst property damage are and the phenomena that caused the damage are"
summarize_event_state_eco[order((summarize_event_state_eco$PROPDMG), decreasing = TRUE),][-c(3)] %>% print()
## Source: local data frame [4,258 x 3]
## Groups: EVTYPE [985]
## 
##       EVTYPE  STATE  PROPDMG
##       <fctr> <fctr>    <dbl>
## 1    TORNADO     TX 283097.2
## 2    TORNADO     MS 187840.9
## 3    TORNADO     AL 167816.2
## 4    TORNADO     OK 165167.9
## 5    TORNADO     FL 159752.6
## 6    TORNADO     IA 152142.8
## 7    TORNADO     GA 151349.5
## 8  TSTM WIND     TX 144959.0
## 9    TORNADO     KS 143209.9
## 10   TORNADO     MO 132159.9
## # ... with 4,248 more rows











```