Synopsis

(энд чинь Synopsis-ийн текст байна…)

Data Processing

storm <- read.csv("repdata-data-StormData.csv",
                  stringsAsFactors = FALSE)
dim(storm)
## [1] 902297     37
head(storm)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
storm_sub <- storm %>%
  select(BGN_DATE,
         EVTYPE,
         FATALITIES, INJURIES,
         PROPDMG, PROPDMGEXP,
         CROPDMG, CROPDMGEXP)

storm_sub$BGN_DATE <- as.Date(storm_sub$BGN_DATE,
                              format = "%m/%d/%Y %H:%M:%S")

storm_sub$EVTYPE <- toupper(storm_sub$EVTYPE)
storm_sub$EVTYPE <- trimws(storm_sub$EVTYPE)

exp_to_num <- function(exp) {
  exp <- toupper(trimws(exp))
  ifelse(exp == "K", 1e3,
  ifelse(exp == "M", 1e6,
  ifelse(exp == "B", 1e9,
  ifelse(exp %in% c("", "+", "-", "?", "0"), 1,
         1))))
}

storm_sub$PROP_EXP_NUM <- exp_to_num(storm_sub$PROPDMGEXP)
storm_sub$CROP_EXP_NUM <- exp_to_num(storm_sub$CROPDMGEXP)

storm_sub <- storm_sub %>%
  mutate(
    PROP_DAMAGE  = PROPDMG * PROP_EXP_NUM,
    CROP_DAMAGE  = CROPDMG * CROP_EXP_NUM,
    TOTAL_DAMAGE = PROP_DAMAGE + CROP_DAMAGE
  )

health_by_event <- storm_sub %>%
  group_by(EVTYPE) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries   = sum(INJURIES,   na.rm = TRUE)
  ) %>%
  mutate(total_health_impact = total_fatalities + total_injuries) %>%
  arrange(desc(total_health_impact))

head(health_by_event, 10)
## # A tibble: 10 × 4
##    EVTYPE            total_fatalities total_injuries total_health_impact
##    <chr>                        <dbl>          <dbl>               <dbl>
##  1 TORNADO                       5633          91346               96979
##  2 EXCESSIVE HEAT                1903           6525                8428
##  3 TSTM WIND                      504           6957                7461
##  4 FLOOD                          470           6789                7259
##  5 LIGHTNING                      816           5230                6046
##  6 HEAT                           937           2100                3037
##  7 FLASH FLOOD                    978           1777                2755
##  8 ICE STORM                       89           1975                2064
##  9 THUNDERSTORM WIND              133           1488                1621
## 10 WINTER STORM                   206           1321                1527
library(ggplot2)

top_health <- health_by_event[1:10, ]

ggplot(top_health,
       aes(x = reorder(EVTYPE, total_health_impact),
           y = total_health_impact)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  xlab("Event Type") +
  ylab("Total Fatalities + Injuries") +
  ggtitle("Top 10 Weather Events Most Harmful to Population Health in the U.S.")

econ_by_event <- storm_sub %>%
  group_by(EVTYPE) %>%
  summarise(
    total_property_damage = sum(PROP_DAMAGE, na.rm = TRUE),
    total_crop_damage     = sum(CROPDMG,     na.rm = TRUE),
    total_econ_damage     = sum(TOTAL_DAMAGE, na.rm = TRUE)
  ) %>%
  arrange(desc(total_econ_damage))

head(econ_by_event, 10)
## # A tibble: 10 × 4
##    EVTYPE            total_property_damage total_crop_damage total_econ_damage
##    <chr>                             <dbl>             <dbl>             <dbl>
##  1 FLOOD                     144657709807            168038.     150319678257 
##  2 HURRICANE/TYPHOON          69305840000              4798.      71913712800 
##  3 TORNADO                    56937160779.           100019.      57352114049.
##  4 STORM SURGE                43323536000                 5       43323541000 
##  5 HAIL                       15732267048.           579596.      18758221521.
##  6 FLASH FLOOD                16140862067.           179200.      17562179167.
##  7 DROUGHT                     1046106000             33899.      15018672000 
##  8 HURRICANE                  11868319010              5339.      14610229010 
##  9 RIVER FLOOD                 5118945500              3490       10148404500 
## 10 ICE STORM                   3944927860              1689.       8967041360
top_econ <- econ_by_event[1:10, ]

ggplot(top_econ,
       aes(x = reorder(EVTYPE, total_econ_damage),
           y = total_econ_damage / 1e9)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  xlab("Event Type") +
  ylab("Total Damage (Billions of USD)") +
  ggtitle("Top 10 Weather Events by Total Economic Damage in the U.S.")