The data in here is the data collected by National Weather Service Instruction. This shows the storms, flood, head and other kinds of events that affect the public health or economy in 1920 to 2011, including 902297 observation in the data.
The conclusions are as follows:
storm <- read.csv("StormData.csv", sep=",")
Head of the data
head(storm)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Fatalities
sum(storm$FATALITIES)
## [1] 15145
Injuries
sum(storm$INJURIES)
## [1] 140528
There are about 15 thousand dead and 140 thousand injury in 1950 to 2011 by tornado,flood etc.
index_health = fatalities * k + injuries
RATIO k = total fatalities / total injuries
k = sum(storm$INJURIES)/sum(storm$FATALITIES)
### compute the ratio k
index_health <- storm$FATALITIES * k + storm$INJURIES
### compute the index_health
PRODPDMG is not clean enough
levels(storm$PROPDMGEXP)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
Checking the counts
tapply(storm$PROPDMGE, storm$PROPDMGEXP, length)
## Warning: Name partially matched in data frame
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
The data is 0.03% unclear.
a <- tapply(storm$PROPDMGE, storm$PROPDMGEXP, length)
## Warning: Name partially matched in data frame
sum(a[-1][-13][-15][-16])/sum(a)
## [1] 0.0003635
Small data for public health data Index Summary
summary(index_health)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 5410
Health index with injuries or fatalities:
sum(index_health > 0)
## [1] 21929
21929 events have influence in public health and store it to data_health
data_health1 <- storm[index_health > 0, ]
### keep the data which index_health>0
data_health <- data.frame(data_health1$EVTYPE, data_health1$BGN_DATE, data_health1$STATE,
index_health[index_health > 0])
names(data_health) <- c("EVTYPE", "BGN_DATE", "STATE", "index_health")
Economy data event type,date,State,and index of public health.
data_eco1 <- storm[storm$PROPDMGEXP == "M" | storm$PROPDMGEXP == "B", ]
Writing Variance PROPDMG into dollar
index_eco_consequance <- 0
for (i in 1:dim(data_eco1)[1]) {
if (data_eco1$PROPDMGEXP[i] == "M") {
index_eco_consequance[i] <- data_eco1$PROPDMG[i] * 10^6
#### if magnitude is 'M',multiply 10^6
} else {
index_eco_consequance[i] <- data_eco1$PROPDMG[i] * 10^9
#### if magnitude is 'B',multiply 10^9
}
}
Create the data_eco
data_eco <- data.frame(data_eco1$EVTYPE, data_eco1$BGN_DATE, data_eco1$STATE,
index_eco_consequance)
names(data_eco) <- c("EVTYPE", "BGN_DATE", "STATE", "index_eco")
head(data_health[order(data_health$index_health, decreasing = T), ], 40)
## EVTYPE BGN_DATE STATE index_health
## 7667 HEAT 7/12/1995 0:00:00 IL 5409.6
## 21439 TORNADO 5/22/2011 0:00:00 MO 2616.1
## 6360 TORNADO 4/10/1979 0:00:00 TX 2089.7
## 3112 TORNADO 6/9/1953 0:00:00 MA 2063.1
## 3153 TORNADO 6/8/1953 0:00:00 MI 1861.3
## 5988 TORNADO 5/11/1953 0:00:00 TX 1654.8
## 8476 ICE STORM 2/8/1994 0:00:00 OH 1577.3
## 4812 TORNADO 4/3/1974 0:00:00 OH 1484.0
## 21396 TORNADO 4/27/2011 0:00:00 AL 1208.3
## 3590 TORNADO 3/3/1966 0:00:00 MS 1032.9
## 2274 TORNADO 5/25/1955 0:00:00 KS 965.9
## 12435 EXCESSIVE HEAT 7/28/1999 0:00:00 IL 918.6
## 21306 TORNADO 4/27/2011 0:00:00 AL 885.6
## 16458 HURRICANE/TYPHOON 8/13/2004 0:00:00 FL 845.0
## 12840 EXCESSIVE HEAT 7/4/1999 0:00:00 PA 821.6
## 11965 FLOOD 10/17/1998 0:00:00 TX 818.6
## 1616 TORNADO 4/21/1967 0:00:00 IL 806.2
## 417 TORNADO 3/21/1952 0:00:00 AR 788.9
## 12612 EXCESSIVE HEAT 7/18/1999 0:00:00 MO 786.7
## 11978 FLOOD 10/17/1998 0:00:00 TX 750.0
## 6212 TORNADO 5/11/1970 0:00:00 TX 741.2
## 1847 TORNADO 4/11/1965 0:00:00 IN 717.7
## 11964 FLOOD 10/17/1998 0:00:00 TX 702.1
## 3641 TORNADO 2/21/1971 0:00:00 MS 689.4
## 520 TORNADO 5/15/1968 0:00:00 AR 665.5
## 311 TORNADO 11/15/1989 0:00:00 AL 657.9
## 1611 TORNADO 4/21/1967 0:00:00 IL 632.7
## 3520 TORNADO 12/5/1953 0:00:00 MS 622.6
## 8623 EXCESSIVE HEAT 7/1/1995 0:00:00 PA 621.7
## 1765 TORNADO 8/28/1990 0:00:00 IL 619.1
## 2356 TORNADO 6/8/1966 0:00:00 KS 598.5
## 3640 TORNADO 2/21/1971 0:00:00 MS 574.0
## 2121 TORNADO 5/15/1968 0:00:00 IA 570.6
## 11972 FLOOD 10/17/1998 0:00:00 TX 555.7
## 11009 TORNADO 4/8/1998 0:00:00 AL 554.9
## 11968 FLOOD 10/17/1998 0:00:00 TX 550.0
## 2573 TORNADO 4/3/1974 0:00:00 KY 544.6
## 1841 TORNADO 4/11/1965 0:00:00 IN 539.6
## 1846 TORNADO 4/11/1965 0:00:00 IN 539.6
## 18665 EXCESSIVE HEAT 8/4/2007 0:00:00 MO 537.6
Number of events
plot_data2 <- head(data_health[order(data_health$index_health, decreasing = T),
], 40)
### write the first 40th data into plot_data2
aaa <- tapply(plot_data2$index_health, plot_data2$EVTYPE, length)
aaa <- sort(aaa, decreasing = T)
names_health <- names(aaa)
### get the names of events in the plot_data2
plot_data2 <- plot_data2[plot_data2$EVTYPE %in% names_health, ]
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.1
### use package ggplot2
g <- ggplot(plot_data2, aes(EVTYPE))
g + geom_bar(aes(fill = EVTYPE)) + labs(x = "types of events", y = "count of types in first 40 harmful types") +
theme(title = element_text("the most Harmful Types of Events for Public Health "))
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Public Health ' not found in PostScript font database
head(data_eco[order(data_eco$index_eco, decreasing = T), ], 40)
## EVTYPE BGN_DATE STATE index_eco
## 8081 FLOOD 1/1/2006 0:00:00 CA 1.150e+11
## 7839 STORM SURGE 8/29/2005 0:00:00 LA 3.130e+10
## 7838 HURRICANE/TYPHOON 8/28/2005 0:00:00 LA 1.693e+10
## 7885 STORM SURGE 8/29/2005 0:00:00 MS 1.126e+10
## 7768 HURRICANE/TYPHOON 10/24/2005 0:00:00 FL 1.000e+10
## 7884 HURRICANE/TYPHOON 8/28/2005 0:00:00 MS 7.350e+09
## 7886 HURRICANE/TYPHOON 8/29/2005 0:00:00 MS 5.880e+09
## 7269 HURRICANE/TYPHOON 8/13/2004 0:00:00 FL 5.420e+09
## 6361 TROPICAL STORM 6/5/2001 0:00:00 TX 5.150e+09
## 2774 WINTER STORM 3/12/1993 0:00:00 AL 5.000e+09
## 3011 RIVER FLOOD 8/31/1993 0:00:00 IL 5.000e+09
## 7275 HURRICANE/TYPHOON 9/4/2004 0:00:00 FL 4.830e+09
## 7282 HURRICANE/TYPHOON 9/13/2004 0:00:00 FL 4.000e+09
## 7843 HURRICANE/TYPHOON 9/23/2005 0:00:00 LA 4.000e+09
## 9635 STORM SURGE/TIDE 9/12/2008 0:00:00 TX 4.000e+09
## 4725 FLOOD 4/18/1997 0:00:00 ND 3.000e+09
## 5631 HURRICANE 9/15/1999 0:00:00 NC 3.000e+09
## 10934 TORNADO 5/22/2011 0:00:00 MO 2.800e+09
## 3161 HEAVY RAIN/SEVERE WEATHER 5/8/1995 0:00:00 LA 2.500e+09
## 7224 HURRICANE/TYPHOON 9/13/2004 0:00:00 AL 2.500e+09
## 2918 HURRICANE OPAL 10/3/1995 0:00:00 FL 2.100e+09
## 8031 HURRICANE/TYPHOON 9/23/2005 0:00:00 TX 2.090e+09
## 11012 FLOOD 5/1/2011 0:00:00 TN 2.000e+09
## 10504 HAIL 10/5/2010 0:00:00 AZ 1.800e+09
## 5419 HURRICANE 9/21/1998 0:00:00 PR 1.700e+09
## 2919 TORNADOES, TSTM WIND, HAIL 3/12/1993 0:00:00 FL 1.600e+09
## 5931 WILD/FOREST FIRE 5/4/2000 0:00:00 NM 1.500e+09
## 7752 HURRICANE/TYPHOON 7/9/2005 0:00:00 FL 1.500e+09
## 10284 FLOOD 5/1/2010 0:00:00 TN 1.500e+09
## 10906 TORNADO 4/27/2011 0:00:00 AL 1.500e+09
## 7272 HIGH WIND 8/13/2004 0:00:00 FL 1.300e+09
## 3902 SEVERE THUNDERSTORM 5/5/1995 0:00:00 TX 1.200e+09
## 6750 WILDFIRE 10/25/2003 0:00:00 CA 1.040e+09
## 2923 HURRICANE OPAL 10/4/1995 0:00:00 FL 1.000e+09
## 6726 FLASH FLOOD 5/7/2003 0:00:00 AL 1.000e+09
## 7677 HURRICANE/TYPHOON 8/27/2005 0:00:00 AL 1.000e+09
## 9634 HURRICANE 9/12/2008 0:00:00 TX 1.000e+09
## 10743 TORNADO 4/27/2011 0:00:00 AL 1.000e+09
## 11009 FLOOD 5/1/2011 0:00:00 MS 1.000e+09
## 7270 HIGH WIND 8/13/2004 0:00:00 FL 9.290e+08
Count the number of events in the first 40 data
plot_data <- head(data_eco[order(data_eco$index_eco, decreasing = T), ], 40)
aaa <- tapply(plot_data$EVTYPE, plot_data$EVTYPE, length)
aaa <- sort(aaa, decreasing = T)
names_eco <- names(aaa[1:7])
plot_data <- plot_data[plot_data$EVTYPE %in% names_eco, ]
g <- ggplot(plot_data, aes(EVTYPE))
g + geom_bar(aes(fill = EVTYPE)) + labs(x = "types of events", y = "count of types in first 40 harmful types") +
theme(title = element_text("the most Harmful Types of Events for Economy "))
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
## Warning: font family 'the most Harmful Types of Events for Economy ' not found in PostScript font database
-The tornado,flood and excessive heat made the biggest influence to public health.
-The hurricane/typhoon,flood and tornado made the biggest influence to economy.