library(Zelig)
library(plyr)
library(stargazer)
library(texreg)
library(DescTools)
library(magrittr)
library(faraway)
library(dplyr)
library(tidyr)
library(survival)
library(readr)
library(foreign)
library(nnet)
library(ggplot2)
library(reshape2)
library(plotly)
library(rgl)
library(sjmisc)
library(ZeligChoice)
library(scales)
d_csv <- read_csv("C:/Users/AHMED/Desktop/guns.csv", col_names = TRUE)
d_csv2 <- read_csv("C:/Users/AHMED/Desktop/guns2.csv", col_names = TRUE)
data(d_csv)
head(d_csv)
## # A tibble: 6 × 12
## X1 year month intent police sex sex1 age
## <int> <int> <int> <chr> <int> <chr> <int> <int>
## 1 1 2012 1 Suicide 0 M 1 34
## 2 2 2012 1 Suicide 0 F 0 21
## 3 3 2012 1 Suicide 0 M 1 60
## 4 4 2012 2 Suicide 0 M 1 64
## 5 5 2012 2 Suicide 0 M 1 31
## 6 6 2012 2 Suicide 0 M 1 17
## # ... with 4 more variables: race <chr>, hispanic <int>, place <chr>,
## # education <int>
table(d_csv$intent, d_csv$sex)
##
## F M
## Accidental 215 1410
## Homicide 5293 28457
## Suicide 8687 54475
## Undetermined 169 637
g2 <- ggplot()+
geom_bar(data = d_csv, aes(x = intent, y = sex, fill=intent), stat = "identity")+ facet_wrap("sex") +
theme(axis.text.x=element_blank())+
ylab("Sex") + coord_flip()
g2 + ggtitle("Relationship Between Sex and Intent") +
theme(plot.title = element_text(lineheight=.8, face="bold"))

table(d_csv$intent, d_csv$month)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## Accidental 151 126 132 97 115 112 147 164 116 129 159 177
## Homicide 2711 2095 2660 2720 2845 2985 3130 2987 2859 2862 2809 3087
## Suicide 5219 4729 5309 5438 5506 5367 5513 5419 5343 5254 5084 4981
## Undetermined 72 56 66 73 72 68 57 73 80 51 78 60
g3 <- ggplot()+
geom_bar(data = d_csv, aes(x = intent, y = month, fill=intent), stat = "identity")+ facet_wrap("month") +
theme(axis.text.x=element_blank())+
ylab("Months") + coord_flip()
g3 + ggtitle("Relationship Between Month and Intent") +
theme(plot.title = element_text(lineheight=.8, face="bold"))

table(d_csv$intent, d_csv$race)
##
## Asian/Pacific Islander Black Hispanic
## Accidental 12 321 145
## Homicide 527 19145 5348
## Suicide 745 3331 3169
## Undetermined 10 125 72
##
## Native American/Native Alaskan White
## Accidental 21 1126
## Homicide 300 8430
## Suicide 554 55363
## Undetermined 14 585
g4 <- ggplot()+
geom_bar(data = d_csv, aes(x = intent, y = race, fill=intent), stat = "identity")+ facet_wrap("race") +
theme(axis.text.x=element_blank())+
ylab("Race") + coord_flip()
g4 + ggtitle("Relationship Between Race and Intent") +
theme(plot.title = element_text(lineheight=.8, face="bold"))

table(d_csv$intent, d_csv$education)
##
## 1 2 3 4 5
## Accidental 492 633 327 146 27
## Homicide 11465 14980 5391 1493 421
## Suicide 9291 26321 15532 11147 871
## Undetermined 200 324 180 93 9
g5 <- ggplot(d_csv, mapping = aes(x = education, y = intent))
g5 <- g5 + geom_bin2d()
g5 + ggtitle("Relationship Between Education and Intent") +
theme(plot.title = element_text(lineheight=.8, face="bold"))

g5

g6 <- ggplot()+
geom_bar(data = d_csv, aes(x = intent, y = education, fill=intent), stat = "identity")+ facet_wrap("education") +
theme(axis.text.x=element_blank())+
ylab("Race") + coord_flip()
g6 + ggtitle("Relationship Between Education and Intent") +
theme(plot.title = element_text(lineheight=.8, face="bold"))

g7 <- ggplot(data = d_csv, mapping = aes(x=race)) + geom_bar()
g7

g8 <- ggplot(data = d_csv, mapping = aes(x=intent)) + geom_bar()
g8

d_csv3 <- d_csv2
d_csv3$sex <-factor(d_csv3$sex, labels = c("M", "F"))
d_csv3$place <-factor(d_csv3$place, labels = c("Farm", "Home", "Industrial", "Other specified", "Other unspecified", "Residential", "School", "Sports", "Street", "Trade"))
d_csv3$intent <-factor(d_csv3$intent, labels = c("S", "H"))
qplot(sex, age, data = d_csv3, geom="boxplot")+ geom_point() +
facet_grid(.~place, scales="free",space="free")

ds <- ddply(d_csv3, .(intent), summarise, mean = mean(age), sd = sd(age))
ggplot()+
geom_point(data= d_csv3, aes(x=intent, y=age)) + scale_y_continuous() + scale_x_discrete() + xlab("Suicide Rates vs. Non-Suicide Rates") +
geom_point(data=ds, aes(x=intent, y = mean, colour = 'red', size = 3)) +
geom_errorbar(data=ds, aes(x = intent, y = mean, ymin = mean - sd, ymax = mean + sd),
colour = 'red', width = 0.4) +
facet_grid(sex ~ place, margins = TRUE)

head(d_csv2)
## # A tibble: 6 × 14
## X1 year month intent intentn intentalt police sex sex1 age
## <int> <int> <int> <chr> <int> <chr> <int> <chr> <int> <int>
## 1 1 2012 1 Suicide 1 Suicide 0 M 1 34
## 2 2 2012 1 Suicide 1 Suicide 0 F 0 21
## 3 3 2012 1 Suicide 1 Suicide 0 M 1 60
## 4 4 2012 2 Suicide 1 Suicide 0 M 1 64
## 5 5 2012 2 Suicide 1 Suicide 0 M 1 31
## 6 6 2012 2 Suicide 1 Suicide 0 M 1 17
## # ... with 4 more variables: race <chr>, hispanic <int>, place <chr>,
## # education <int>
gunx.z1 <- zelig(intentn ~ sex, data=d_csv2, model="logit", x=TRUE, cite=F)
gunx.z1
## Model:
##
## Call:
## z5$zelig(formula = intentn ~ sex, data = d_csv2, x = TRUE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4626 -1.4626 0.9168 0.9168 0.9755
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.49544 0.01744 28.414 < 2e-16
## sexM 0.15391 0.01891 8.139 3.97e-16
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 125280 on 96911 degrees of freedom
## Residual deviance: 125214 on 96910 degrees of freedom
## AIC: 125218
##
## Number of Fisher Scoring iterations: 4
##
## Next step: Use 'setx' method
gunx.z2 <- zelig(intentn ~ sex + race + place + education + month + age, data=d_csv2, model="logit", x=TRUE, cite=F)
gunx.z2
## Model:
##
## Call:
## z5$zelig(formula = intentn ~ sex + race + place + education +
## month + age, data = d_csv2, x = TRUE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9746 -0.4548 0.3244 0.5427 3.0725
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0862571 0.1658869 -12.576 < 2e-16
## sexM 0.9482412 0.0253295 37.436 < 2e-16
## raceBlack -1.7252139 0.0660373 -26.125 < 2e-16
## raceHispanic -0.4843252 0.0678370 -7.140 9.36e-13
## raceNative American/Native Alaskan 0.4503317 0.1003137 4.489 7.15e-06
## raceWhite 1.3182337 0.0641480 20.550 < 2e-16
## placeHome 0.0647881 0.1473945 0.440 0.66026
## placeIndustrial/construction -0.5618730 0.2307180 -2.435 0.01488
## placeOther specified -0.4762156 0.1487508 -3.201 0.00137
## placeOther unspecified -0.5891241 0.1498838 -3.931 8.48e-05
## placeResidential institution -0.7153453 0.2506228 -2.854 0.00431
## placeSchool/instiution -1.0223602 0.1806663 -5.659 1.52e-08
## placeSports 1.0471779 0.3292144 3.181 0.00147
## placeStreet -1.7175874 0.1499785 -11.452 < 2e-16
## placeTrade/service area -0.9353560 0.1538732 -6.079 1.21e-09
## education 0.3556944 0.0105113 33.839 < 2e-16
## month -0.0160029 0.0027830 -5.750 8.91e-09
## age 0.0310136 0.0005868 52.854 < 2e-16
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 125280 on 96911 degrees of freedom
## Residual deviance: 72210 on 96894 degrees of freedom
## AIC: 72246
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
gunx.z3 <- zelig(intentn ~ sex*race + place + education + month + age, data=d_csv2, model="logit", x=TRUE, cite=F)
gunx.z3
## Model:
##
## Call:
## z5$zelig(formula = intentn ~ sex * race + place + education +
## month + age, data = d_csv2, x = TRUE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9834 -0.4565 0.3199 0.5393 2.8797
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -2.2656057 0.2080135 -10.892
## sexM 1.1805791 0.1576851 7.487
## raceBlack -0.9691772 0.1536191 -6.309
## raceHispanic 0.0768284 0.1580061 0.486
## raceNative American/Native Alaskan 0.7635798 0.2407202 3.172
## raceWhite 1.3837138 0.1433743 9.651
## placeHome 0.0697131 0.1477255 0.472
## placeIndustrial/construction -0.5585805 0.2317518 -2.410
## placeOther specified -0.4681542 0.1490992 -3.140
## placeOther unspecified -0.5802699 0.1502337 -3.862
## placeResidential institution -0.7040451 0.2506062 -2.809
## placeSchool/instiution -1.0179252 0.1811162 -5.620
## placeSports 1.0986128 0.3305341 3.324
## placeStreet -1.7095106 0.1503514 -11.370
## placeTrade/service area -0.9302352 0.1542617 -6.030
## education 0.3511560 0.0105032 33.433
## month -0.0157089 0.0027850 -5.641
## age 0.0309402 0.0005877 52.650
## sexM:raceBlack -0.8627399 0.1700902 -5.072
## sexM:raceHispanic -0.6605768 0.1746581 -3.782
## sexM:raceNative American/Native Alaskan -0.3875324 0.2645256 -1.465
## sexM:raceWhite -0.0724273 0.1601308 -0.452
## Pr(>|z|)
## (Intercept) < 2e-16
## sexM 7.05e-14
## raceBlack 2.81e-10
## raceHispanic 0.626799
## raceNative American/Native Alaskan 0.001514
## raceWhite < 2e-16
## placeHome 0.636991
## placeIndustrial/construction 0.015941
## placeOther specified 0.001690
## placeOther unspecified 0.000112
## placeResidential institution 0.004964
## placeSchool/instiution 1.91e-08
## placeSports 0.000888
## placeStreet < 2e-16
## placeTrade/service area 1.64e-09
## education < 2e-16
## month 1.69e-08
## age < 2e-16
## sexM:raceBlack 3.93e-07
## sexM:raceHispanic 0.000156
## sexM:raceNative American/Native Alaskan 0.142919
## sexM:raceWhite 0.651052
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 125280 on 96911 degrees of freedom
## Residual deviance: 72057 on 96890 degrees of freedom
## AIC: 72101
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
screenreg(list(gunx.z1, gunx.z2, gunx.z3))
##
## ====================================================================================
## Model 1 Model 2 Model 3
## ------------------------------------------------------------------------------------
## (Intercept) 0.50 *** -2.09 *** -2.27 ***
## (0.02) (0.17) (0.21)
## sexM 0.15 *** 0.95 *** 1.18 ***
## (0.02) (0.03) (0.16)
## raceBlack -1.73 *** -0.97 ***
## (0.07) (0.15)
## raceHispanic -0.48 *** 0.08
## (0.07) (0.16)
## raceNative American/Native Alaskan 0.45 *** 0.76 **
## (0.10) (0.24)
## raceWhite 1.32 *** 1.38 ***
## (0.06) (0.14)
## placeHome 0.06 0.07
## (0.15) (0.15)
## placeIndustrial/construction -0.56 * -0.56 *
## (0.23) (0.23)
## placeOther specified -0.48 ** -0.47 **
## (0.15) (0.15)
## placeOther unspecified -0.59 *** -0.58 ***
## (0.15) (0.15)
## placeResidential institution -0.72 ** -0.70 **
## (0.25) (0.25)
## placeSchool/instiution -1.02 *** -1.02 ***
## (0.18) (0.18)
## placeSports 1.05 ** 1.10 ***
## (0.33) (0.33)
## placeStreet -1.72 *** -1.71 ***
## (0.15) (0.15)
## placeTrade/service area -0.94 *** -0.93 ***
## (0.15) (0.15)
## education 0.36 *** 0.35 ***
## (0.01) (0.01)
## month -0.02 *** -0.02 ***
## (0.00) (0.00)
## age 0.03 *** 0.03 ***
## (0.00) (0.00)
## sexM:raceBlack -0.86 ***
## (0.17)
## sexM:raceHispanic -0.66 ***
## (0.17)
## sexM:raceNative American/Native Alaskan -0.39
## (0.26)
## sexM:raceWhite -0.07
## (0.16)
## ------------------------------------------------------------------------------------
## AIC 125218.27 72246.42 72100.77
## BIC 125237.24 72417.09 72309.36
## Log Likelihood -62607.14 -36105.21 -36028.38
## Deviance 125214.27 72210.42 72056.77
## Num. obs. 96912 96912 96912
## ====================================================================================
## *** p < 0.001, ** p < 0.01, * p < 0.05