美國2012-2014年遭槍殺分析

這個分析檢查了2012-2014年美國的槍殺率,數據來源是CDC(美國疾病管制與預防中心)。

這裡可以看到數據。

資料匯入

guns <- data.table::fread("/Users/yuzhe/Downloads/guns.csv", header = T, drop = 1)
head(guns)
##    year month  intent police sex age                           race
## 1: 2012    01 Suicide      0   M  34         Asian/Pacific Islander
## 2: 2012    01 Suicide      0   F  21                          White
## 3: 2012    01 Suicide      0   M  60                          White
## 4: 2012    02 Suicide      0   M  64                          White
## 5: 2012    02 Suicide      0   M  31                          White
## 6: 2012    02 Suicide      0   M  17 Native American/Native Alaskan
##    hispanic           place education
## 1:      100            Home         4
## 2:      100          Street         3
## 3:      100 Other specified         4
## 4:      100            Home         4
## 5:      100 Other specified         2
## 6:      100            Home         1
guns <- na.omit(guns)

# The purpose of *10000 and the *100 are to convert 2012, 01, 01 into 20120101 for readability
guns$year <- as.numeric(guns$year)
guns$month <- as.numeric(guns$month)

guns$date <- (guns$year*10000 + guns$month*100 + 1) %>% anydate()

monthly_rates <- aggregate(x = guns$date, by = list(month = guns$date), length)

圖表

2012年死亡人數折線圖

ggplot(monthly_rates[1:12, ], aes(x = month, y = x, color = "steelblue")) +
  geom_line() +
  ylim(0, 3500) +
  labs(title = "2012 Gun Deaths Line Chart") +
  xlab("Monthly Gun Death Count in the US, 2014") +
  ylab("Gun Deaths Count")

2013年死亡人數折線圖

ggplot(monthly_rates[13:24, ], aes(x = month, y = x, color = "steelblue")) +
  geom_line() +
  ylim(0, 3500) +
  labs(title = "2013 Gun Deaths Line Chart") +
  xlab("Monthly Gun Death Count in the US, 2013") +
  ylab("Gun Deaths Count")

2014年死亡人數折線圖

ggplot(monthly_rates[25:36, ], aes(x = month, y = x, color = "steelblue")) +
  geom_line() +
  ylim(0, 3500) +
  labs(title = "2014 Gun Deaths Line Chart") +
  xlab("Monthly Gun Death Count in the US, 2014") +
  ylab("Gun Deaths Count")

2012-2014年死亡人數與男女 長條圖

intent_sex <- aggregate(guns$sex, by = list(intent = guns$intent, sex = guns$sex), length)
ggplot(intent_sex, aes(x = intent, y = x, fill = sex)) +
  geom_bar(stat = "identity", position = "stack") +
  ylab("Count") +
  ggtitle("Gender distribution\nGun Deaths US: 2012-2014")

2012-2014年死亡人數與教育程度 長條圖

guns$education = as.factor(guns$education)
intent_edu <- aggregate(guns$education, by = list(intent = guns$intent, education = guns$education), length)
ggplot(intent_edu, aes(x = intent, y = x, fill = education)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  xlab("Count") +
  ylab("Intent") +
  ggtitle("Education distribution\nin Gun Deaths US: 2012-2014")

2012-2014年死亡人數與地區分佈 長條圖

intent_place <- aggregate(guns$intent, by = list(place = guns$place, intent = guns$intent), length)
ggplot(intent_place, aes(x = intent, y = x, fill = place)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  xlab("Count") +
  ylab("Intent") +
  ggtitle("Location distribution\nin Gun Deaths US: 2012-2014")

2012-2014年死亡人數與地區分佈 長條圖

Trade/service areaIndustrial/construction視為Street,並把剩下除了HomeStreet設為Other

#These are too many categories and it's hard to arrive to conclusions
# let's merge 'street' with 'trade/service area' and the rest to 'Other'
guns[which(guns$place == "Trade/service area" | guns$place == "Industrial/construction"), "place"]  = "Street"
guns[which(guns$place != "Street" & guns$place != "Home"), "place"] = "Other"

intent_place <- aggregate(guns$intent, by = list(place = guns$place, intent = guns$intent), length)
ggplot(intent_place, aes(x = intent, y = x, fill = place)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  xlab("Count") +
  ylab("Intent") +
  ggtitle("Location distribution\nin Gun Deaths US: 2012-2014")

2012-2014年死亡人數與地區分佈(比例) 長條圖

place_died <- guns %>%
        group_by(place, intent) %>%
        tally() %>%
        group_by(place) %>%
        mutate(pct = n / sum(n))

ggplot(place_died, aes(factor(place), pct, fill = intent)) +
  geom_bar(stat = "identity", color = "grey40") +
  xlab("Place") +
  ylab("Percentage")+
  labs(fill = "Intent")

性別和死亡種類數量 長條圖

ggplot(intent_sex, aes(x = sex, y = x, fill = intent)) +
  geom_bar(stat = "identity", position = "dodge") +
  xlab("Gender") +
  ylab("Freqency") +
  ggtitle("Gender Distribution by Intent")

教育程度和死亡種類 長條圖

ggplot(intent_edu, aes(x = education, y = x, fill = intent)) +
  geom_bar(stat = "identity", position = "dodge") +
  ggtitle("Education Distribution by Intent")

年齡分佈 直方圖

ggplot(guns, aes(x = age)) +
  geom_histogram(breaks = seq(0, 100, 10), fill="lightblue", color="lightyellow")

Suicide和Homicide年齡分佈比較 直方圖

suicide = guns[guns$intent == 'Suicide', ]
homicide = guns[guns$intent == 'Homicide', ] 

p1 <- ggplot(suicide, aes(x = age))+
  geom_histogram(breaks = seq(0, 100, 10), color="lightyellow", fill="lightskyblue2") +
  ggtitle("Suicide gun deaths\nAge Distribution")
p2 <- ggplot(homicide, aes(x = age))+
  geom_histogram(breaks = seq(0, 100, 10), color="lightyellow", fill="lightskyblue2") +
  ggtitle("Homicide gun deaths\nAge Distribution")

multiplot(p1, p2, cols = 2)

意外種類和年齡 箱形圖

guns_box <- guns[which(guns$intent != "Undetermined"),]
qplot(factor(intent), age, data = guns_box, geom = "boxplot", xlab = "Intent")

意外種類、年齡和性別 箱形圖

guns_box <- guns[which(guns$intent != "Undetermined"), ]
qplot(intent, age, fill = sex, data = guns_box, geom = "boxplot", xlab = "Intent")