(энд чинь Synopsis-ийн текст байна…)
storm <- read.csv("repdata-data-StormData.csv",
stringsAsFactors = FALSE)
dim(storm)
## [1] 902297 37
head(storm)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
storm_sub <- storm %>%
select(BGN_DATE,
EVTYPE,
FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP)
storm_sub$BGN_DATE <- as.Date(storm_sub$BGN_DATE,
format = "%m/%d/%Y %H:%M:%S")
storm_sub$EVTYPE <- toupper(storm_sub$EVTYPE)
storm_sub$EVTYPE <- trimws(storm_sub$EVTYPE)
exp_to_num <- function(exp) {
exp <- toupper(trimws(exp))
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9,
ifelse(exp %in% c("", "+", "-", "?", "0"), 1,
1))))
}
storm_sub$PROP_EXP_NUM <- exp_to_num(storm_sub$PROPDMGEXP)
storm_sub$CROP_EXP_NUM <- exp_to_num(storm_sub$CROPDMGEXP)
storm_sub <- storm_sub %>%
mutate(
PROP_DAMAGE = PROPDMG * PROP_EXP_NUM,
CROP_DAMAGE = CROPDMG * CROP_EXP_NUM,
TOTAL_DAMAGE = PROP_DAMAGE + CROP_DAMAGE
)
health_by_event <- storm_sub %>%
group_by(EVTYPE) %>%
summarise(
total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE)
) %>%
mutate(total_health_impact = total_fatalities + total_injuries) %>%
arrange(desc(total_health_impact))
head(health_by_event, 10)
## # A tibble: 10 × 4
## EVTYPE total_fatalities total_injuries total_health_impact
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
library(ggplot2)
top_health <- health_by_event[1:10, ]
ggplot(top_health,
aes(x = reorder(EVTYPE, total_health_impact),
y = total_health_impact)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Event Type") +
ylab("Total Fatalities + Injuries") +
ggtitle("Top 10 Weather Events Most Harmful to Population Health in the U.S.")
econ_by_event <- storm_sub %>%
group_by(EVTYPE) %>%
summarise(
total_property_damage = sum(PROP_DAMAGE, na.rm = TRUE),
total_crop_damage = sum(CROPDMG, na.rm = TRUE),
total_econ_damage = sum(TOTAL_DAMAGE, na.rm = TRUE)
) %>%
arrange(desc(total_econ_damage))
head(econ_by_event, 10)
## # A tibble: 10 × 4
## EVTYPE total_property_damage total_crop_damage total_econ_damage
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 168038. 150319678257
## 2 HURRICANE/TYPHOON 69305840000 4798. 71913712800
## 3 TORNADO 56937160779. 100019. 57352114049.
## 4 STORM SURGE 43323536000 5 43323541000
## 5 HAIL 15732267048. 579596. 18758221521.
## 6 FLASH FLOOD 16140862067. 179200. 17562179167.
## 7 DROUGHT 1046106000 33899. 15018672000
## 8 HURRICANE 11868319010 5339. 14610229010
## 9 RIVER FLOOD 5118945500 3490 10148404500
## 10 ICE STORM 3944927860 1689. 8967041360
top_econ <- econ_by_event[1:10, ]
ggplot(top_econ,
aes(x = reorder(EVTYPE, total_econ_damage),
y = total_econ_damage / 1e9)) +
geom_bar(stat = "identity") +
coord_flip() +
xlab("Event Type") +
ylab("Total Damage (Billions of USD)") +
ggtitle("Top 10 Weather Events by Total Economic Damage in the U.S.")