Before start, load the data

rm(list=ls())
getwd()
## [1] "E:/Dropbox/Git/Reproducible2"
list.files()
## [1] "code.R"                         "markdown.html"                 
## [3] "markdown.Rmd"                   "repdata_data_StormData.csv.bz2"
## [5] "Reproducible2.Rproj"
data<-read.csv("repdata_data_StormData.csv.bz2")
table(data$STATE__)
## 
##     1     2     4     5     6     8     9    10    11    12    13    15    16 
## 22739  4390  6156 27102 10780 20473  3294  1913   450 22124 25259  2547  4767 
##    17    18    19    20    21    22    23    24    25    26    27    28    29 
## 28488 21506 31069 53441 22092 17323  4524  8173  5651 17911 23609 22192 35648 
##    30    31    32    33    34    35    36    37    38    39    40    41    42 
## 14695 30271  3139  3022  8074  7130 21058 25351 14630 24923 46802  4821 22226 
##    44    45    46    47    48    49    50    51    53    54    55    56    60 
##   839 17125 21728 21721 83728  4135  3871 21189  3312  9099 19781  7332   257 
##    66    68    72    78    81    83    84    85    86    87    88    89    90 
##   306     1  3016   338   274     1    28  5337    96  1879  3250    23   654 
##    91    92    93    94    95 
##  1347   262     9    70  1526
colnames(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
head(table(data$EVTYPE))
## 
##    HIGH SURF ADVISORY         COASTAL FLOOD           FLASH FLOOD 
##                     1                     1                     1 
##             LIGHTNING             TSTM WIND       TSTM WIND (G45) 
##                     1                     4                     1

Population health can be monitored by looking up injuries + fatalities

data1<-data.frame(cbind(data$EVTYPE, data$INJURIES + data$FATALITIES))
colnames(data1)<-c("disaster", "effect")
data1$effect<-as.numeric(data1$effect)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
disaster_group<-group_by(data1, disaster)
summary1<-disaster_group %>% summarise(sum = sum(effect))
head(table(summary1$sum))
## 
##   0   1   2   3   4   5 
## 765  55  30  10  11   9
summary1<-subset(summary1, summary1$sum != 0)

Huge typos! have to modify & unite groups

Finding groups of strings

library(stringdist)
## Warning: package 'stringdist' was built under R version 4.0.5
dist.matrix<-stringdistmatrix(tolower(summary1$disaster),tolower(summary1$disaster), method = 'jw')
row.names(dist.matrix)<-summary1$disaster
names(dist.matrix)<-summary1$disaster
dist.matrix<-as.dist(dist.matrix)
clusts<-hclust(dist.matrix, method = 'ward.D2')
plot(clusts)

Clust level of 0.4 seems reasonable

summary1$groups<-cutree(clusts,h=0.4)
#A function that matches the gruop and name
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
summary1<-summary1%>%
  group_by(groups)%>%
  mutate(disaster_type=Mode(disaster))
summary1<-data.frame(summary1$disaster_type, summary1$sum)
summary1<-group_by(summary1, summary1.disaster_type)
summary1<-summary1 %>% summarise(sum = sum(summary1.sum))
table(summary1$summary1.disaster_type)
## 
##                       AVALANCE                      BLACK ICE 
##                              1                              1 
##                       BLIZZARD                   blowing snow 
##                              1                              1 
##                  COASTAL FLOOD                  Coastal Storm 
##                              1                              1 
##                           Cold               Cold Temperature 
##                              1                              1 
##                COLD/WIND CHILL                      DENSE FOG 
##                              1                              1 
##                        DROUGHT                       DROWNING 
##                              1                              1 
##                 DRY MICROBURST                     Dust Devil 
##                              1                              1 
##                 EXCESSIVE HEAT                  Extended Cold 
##                              1                              1 
##                    FLASH FLOOD                          FLOOD 
##                              1                              1 
##                            FOG                         FREEZE 
##                              1                              1 
##                   FUNNEL CLOUD                          GLAZE 
##                              1                              1 
##                     GUSTY WIND                           HAIL 
##                              1                              1 
##                 HAZARDOUS SURF                           HEAT 
##                              1                              1 
##                     HEAVY RAIN                     HEAVY SNOW 
##                              1                              1 
##                           HIGH                      HIGH WIND 
##                              1                              1 
##                      HURRICANE          HYPERTHERMIA/EXPOSURE 
##                              1                              1 
##                            ICE                    ICE ON ROAD 
##                              1                              1 
##                      ICE STORM                      LANDSLIDE 
##                              1                              1 
##                      LIGHTNING                Marine Accident 
##                              1                              1 
##                 MINOR FLOODING                   MIXED PRECIP 
##                              1                              1 
##         NON-SEVERE WIND DAMAGE                  NON TSTM WIND 
##                              1                              1 
##                      RAIN/SNOW           RAPIDLY RISING WATER 
##                              1                              1 
##                    RECORD COLD                    RIP CURRENT 
##                              1                              1 
##                    RIVER FLOOD                     ROGUE WAVE 
##                              1                              1 
##                          SLEET                     SMALL HAIL 
##                              1                              1 
##                           Snow                    SNOW SQUALL 
##                              1                              1 
##                    STORM SURGE                    THUNDERSNOW 
##                              1                              1 
##                        TORNADO                 TROPICAL STORM 
##                              1                              1 
##                      TSTM WIND                        TSUNAMI 
##                              1                              1 
##                        TYPHOON              UNSEASONABLY COLD 
##                              1                              1 
## URBAN AND SMALL STREAM FLOODIN                     WATERSPOUT 
##                              1                              1 
##                     WILD FIRES                           WIND 
##                              1                              1 
##                   WINTER STORM 
##                              1
colnames(summary1)<-c("disaster", "sum")
barplot(summary1$sum~summary1$disaster)

It seems that one specific incident caused the most casualties

subset(summary1, summary1$sum==max(summary1$sum))
## # A tibble: 1 x 2
##   disaster   sum
##   <chr>    <dbl>
## 1 TORNADO  97026

Tornado caused most casualties in the US.

Q2

Regarding ecnomic consequences, new subdataset can be extracted. Ecnomic damage was estimated using the sum of property and crop damage.

Other processes were analogous to the previous analysis.

colnames(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
# Damage is summation of property damage and crop damage
data2<-data.frame(cbind(data$EVTYPE, data$PROPDMG+data$CROPDMG))
colnames(data2)<-c("disaster", "damage")
data2$damage<-as.numeric(data2$damage)
disaster_group<-group_by(data2, disaster)
summary1<-disaster_group %>% summarise(sum = sum(damage))
summary2<-disaster_group %>% summarise(sum = sum(damage), average = mean(damage))
#Eliminate occasions with zero damage
summary1<-subset(summary1, summary1$sum != 0)
#Finding groups of strings
dist.matrix<-stringdistmatrix(tolower(summary1$disaster),tolower(summary1$disaster), method = 'jw')
row.names(dist.matrix)<-summary1$disaster
names(dist.matrix)<-summary1$disaster
dist.matrix<-as.dist(dist.matrix)
clusts<-hclust(dist.matrix, method = 'ward.D2')
plot(clusts)

Also, clust level of 0.4 seems reasonable, therefore

summary1$groups<-cutree(clusts,h=0.4)

#A function that matches the gruop and name
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

summary1<-summary1%>%
  group_by(groups)%>%
  mutate(disaster_type=Mode(disaster))
summary1<-data.frame(summary1$disaster_type, summary1$sum)
summary1<-group_by(summary1, summary1.disaster_type)
summary1<-summary1 %>% summarise(sum = sum(summary1.sum))
table(summary1$summary1.disaster_type)
## 
##        HIGH SURF ADVISORY               FLASH FLOOD                 TSTM WIND 
##                         1                         1                         1 
##                         ?       AGRICULTURAL FREEZE             APACHE COUNTY 
##                         1                         1                         1 
##    ASTRONOMICAL HIGH TIDE                 AVALANCHE             Beach Erosion 
##                         1                         1                         1 
##                  BLIZZARD              BLOWING DUST          BREAKUP FLOODING 
##                         1                         1                         1 
##                BRUSH FIRE COASTAL  FLOODING/EROSION           COASTAL EROSION 
##                         1                         1                         1 
##                      Cold                 DAM BREAK                 DENSE FOG 
##                         1                         1                         1 
##                 DOWNBURST                   DROUGHT            DRY MICROBURST 
##                         1                         1                         1 
##                Dust Devil                DUST STORM        Erosion/Cstl Flood 
##                         1                         1                         1 
##            EXCESSIVE HEAT             Extended Cold         FLASH FLOOD/FLOOD 
##                         1                         1                         1 
##                     FLOOD          FLOOD/RAIN/WINDS                    Freeze 
##                         1                         1                         1 
##          Freezing drizzle                     FROST              FUNNEL CLOUD 
##                         1                         1                         1 
##                     Glaze           GROUND BLIZZARD                  GUSTNADO 
##                         1                         1                         1 
##                GUSTY WIND                      HAIL               HAIL DAMAGE 
##                         1                         1                         1 
##                 HAIL/WIND                 HAILSTORM                      HEAT 
##                         1                         1                         1 
##           HEAVY LAKE SNOW                 HEAVY MIX       HEAVY PRECIPITATION 
##                         1                         1                         1 
##      HEAVY RAIN AND FLOOD      Heavy Rain/High Surf        HEAVY SNOW-SQUALLS 
##                         1                         1                         1 
##  HEAVY SNOW/FREEZING RAIN                Heavy Surf               HIGH  WINDS 
##                         1                         1                         1 
##                 HIGH SEAS                 HURRICANE                       ICE 
##                         1                         1                         1 
##                 ICE FLOES          ICE/STRONG WINDS          LAKE-EFFECT SNOW 
##                         1                         1                         1 
##                LAKE FLOOD                 LANDSLIDE          LATE SEASON SNOW 
##                         1                         1                         1 
##       LIGHT FREEZING RAIN                Light snow               MAJOR FLOOD 
##                         1                         1                         1 
##           Marine Accident                Microburst                 MUD SLIDE 
##                         1                         1                         1 
##    NON-SEVERE WIND DAMAGE                     Other               RECORD COLD 
##                         1                         1                         1 
##               RIP CURRENT               RIVER FLOOD               RURAL FLOOD 
##                         1                         1                         1 
##       SEVERE THUNDERSTORM                SMALL HAIL                      Snow 
##                         1                         1                         1 
##       SNOW AND HEAVY SNOW               SNOW SQUALL         SNOW/ BITTER COLD 
##                         1                         1                         1 
##         STORM FORCE WINDS         THUDERSTORM WINDS                   TORNADO 
##                         1                         1                         1 
##       TROPICAL DEPRESSION                 Tstm Wind            TSTM WIND/HAIL 
##                         1                         1                         1 
##         Unseasonable Cold           URBAN AND SMALL                WATERSPOUT 
##                         1                         1                         1 
##                WILD FIRES              WINTER STORM            WINTER WEATHER 
##                         1                         1                         1
colnames(summary1)<-c("disaster", "sum")
barplot(summary1$sum~summary1$disaster)

subset(summary1, summary1$sum==max(summary1$sum))
## # A tibble: 1 x 2
##   disaster      sum
##   <chr>       <dbl>
## 1 TORNADO  3314563.

Therefore, tornado caused most economic damages in the US also.

For the average damage per incident, a barplot was established.

barplot(summary2$average~summary2$disaster)

subset(summary2, summary2$average==max(summary2$average))
## # A tibble: 1 x 3
##   disaster                sum average
##   <chr>                 <dbl>   <dbl>
## 1 TROPICAL STORM GORDON  1000    1000

Therefore, in average per incident, tropical storm exhibited the highest damage