𝚌𝚊𝚌𝚑𝚎 = 𝚃𝚁𝚄𝙴

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] ru_RU.UTF-8/ru_RU.UTF-8/ru_RU.UTF-8/C/ru_RU.UTF-8/ru_RU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5    formatR_1.4     tools_3.3.1     htmltools_0.3.5
##  [5] yaml_2.1.13     Rcpp_0.12.5     stringi_1.1.1   rmarkdown_1.0  
##  [9] knitr_1.13      stringr_1.0.0   digest_0.6.9    evaluate_0.9

Загружаем данные

setwd("~/Documents/Документы/Самообразование/Data Science/R5 Reproducible Research/Course Project 2")

# dir.create("data")
# download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "data/StormData.csv.bz2", method = "curl")
# data <- read.csv("data/StormData.csv.bz2", na.strings = "")

Сохраняем фрейм в виде объекта для ускорения последующей загрузки.

# save(data, file = "data/StormData.RData")
load("data/StormData.RData")

проверяем вид таблицы.

head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Сколько раз встречаются каждый из типов явлений в таблице? Какие из них самые частые.

evtypenumber <- table(data$EVTYPE)
head(sort(evtypenumber, decreasing = T), 20)
## 
##                     HAIL                TSTM WIND        THUNDERSTORM WIND 
##                   288661                   219940                    82563 
##                  TORNADO              FLASH FLOOD                    FLOOD 
##                    60652                    54277                    25326 
##       THUNDERSTORM WINDS                HIGH WIND                LIGHTNING 
##                    20843                    20212                    15754 
##               HEAVY SNOW               HEAVY RAIN             WINTER STORM 
##                    15708                    11723                    11433 
##           WINTER WEATHER             FUNNEL CLOUD         MARINE TSTM WIND 
##                     7026                     6839                     6175 
## MARINE THUNDERSTORM WIND               WATERSPOUT              STRONG WIND 
##                     5812                     3796                     3566 
##     URBAN/SML STREAM FLD                 WILDFIRE 
##                     3392                     2761

Что в конце таблицы? Какие события редки?

head(sort(evtypenumber, decreasing = F), 20)
## 
##          HIGH SURF ADVISORY               COASTAL FLOOD 
##                           1                           1 
##                 FLASH FLOOD                   LIGHTNING 
##                           1                           1 
##             TSTM WIND (G45)                  WATERSPOUT 
##                           1                           1 
##                        WIND                           ? 
##                           1                           1 
##              ABNORMALLY WET               APACHE COUNTY 
##                           1                           1 
##                    AVALANCE                BEACH EROSIN 
##                           1                           1 
##               Beach Erosion BEACH EROSION/COASTAL FLOOD 
##                           1                           1 
##           BITTER WIND CHILL     BLIZZARD AND HEAVY SNOW 
##                           1                           1 
##            Blizzard Summary            BLIZZARD WEATHER 
##                           1                           1 
##      BLIZZARD/FREEZING RAIN          BLIZZARD/HIGH WIND 
##                           1                           1

Есть неизвестные события (“?”), а есть те, которые повторяют частотнче. Но есть строки со словом “summary”

names(evtypenumber[1:20])
##  [1] "   HIGH SURF ADVISORY"  " COASTAL FLOOD"        
##  [3] " FLASH FLOOD"           " LIGHTNING"            
##  [5] " TSTM WIND"             " TSTM WIND (G45)"      
##  [7] " WATERSPOUT"            " WIND"                 
##  [9] "?"                      "ABNORMAL WARMTH"       
## [11] "ABNORMALLY DRY"         "ABNORMALLY WET"        
## [13] "ACCUMULATED SNOWFALL"   "AGRICULTURAL FREEZE"   
## [15] "APACHE COUNTY"          "ASTRONOMICAL HIGH TIDE"
## [17] "ASTRONOMICAL LOW TIDE"  "AVALANCE"              
## [19] "AVALANCHE"              "BEACH EROSIN"

Первые 8 элементов (в алфавитном порядке) начинаются с пробелов. Их меньше 10, так что этим можно пренебречь.

Суммируем количество травм по каждому типу. Выводим первые 20

injuriessum <- tapply(data$INJURIES, data$EVTYPE, sum)
head(sort(injuriessum, decreasing = T), 20)
##            TORNADO          TSTM WIND              FLOOD 
##              91346               6957               6789 
##     EXCESSIVE HEAT          LIGHTNING               HEAT 
##               6525               5230               2100 
##          ICE STORM        FLASH FLOOD  THUNDERSTORM WIND 
##               1975               1777               1488 
##               HAIL       WINTER STORM  HURRICANE/TYPHOON 
##               1361               1321               1275 
##          HIGH WIND         HEAVY SNOW           WILDFIRE 
##               1137               1021                911 
## THUNDERSTORM WINDS           BLIZZARD                FOG 
##                908                805                734 
##   WILD/FOREST FIRE         DUST STORM 
##                545                440

Количество жертв:

fatalitiessum <- tapply(data$FATALITIES, data$EVTYPE, sum)
head(sort(fatalitiessum, decreasing = T), 20)
##                 TORNADO          EXCESSIVE HEAT             FLASH FLOOD 
##                    5633                    1903                     978 
##                    HEAT               LIGHTNING               TSTM WIND 
##                     937                     816                     504 
##                   FLOOD             RIP CURRENT               HIGH WIND 
##                     470                     368                     248 
##               AVALANCHE            WINTER STORM            RIP CURRENTS 
##                     224                     206                     204 
##               HEAT WAVE            EXTREME COLD       THUNDERSTORM WIND 
##                     172                     160                     133 
##              HEAVY SNOW EXTREME COLD/WIND CHILL             STRONG WIND 
##                     127                     125                     103 
##                BLIZZARD               HIGH SURF 
##                     101                     101

Просто график

hist(evtypenumber)