For this assignment I am using the General Social Survey of 2012. In the following assessment I want to see the effect of basic demographics of respondents affect whether they believe that the racial disparities between blacks and whites is due to discrimination.
library(Zelig)
library(DescTools)
library(stargazer)
library(dplyr)
library(scatterplot3d)
library(tidyr)
library(memisc)
library(pander)
library(gmodels)
library(Hmisc)
library(car)
library(foreign)
library(readstata13)
gss<-read.dta("C:/Users/Xiomara/Desktop/R/GSS.dta")
names(gss)
## [1] "CASEID" "WORKBLKS" "RACDIF1" "RACMAR" "RACDIF2" "RACDIF3"
## [7] "HELPBLK" "HELPPOOR" "YEAR" "SEX" "AGE" "RACE"
## [13] "REALINC" "REALRINC" "EDUC" "DEGREE" "PRESTG80" "PAPRES80"
## [19] "MARITAL" "DIVORCE" "CHILDS" "RELIG" "WRKSLF" "UNEMP"
## [25] "REGION" "SIZE" "RACLIVE" "FEAR" "GUN" "POLVIEWS"
## [31] "FECHLD" "FEFAM"
Data1<-select(gss, WORKBLKS, RACDIF1, RACDIF2, RACDIF3, RACMAR, HELPBLK, HELPPOOR, SEX, AGE, RACE, REALINC, EDUC, RELIG)
names(Data1)
## [1] "WORKBLKS" "RACDIF1" "RACDIF2" "RACDIF3" "RACMAR" "HELPBLK"
## [7] "HELPPOOR" "SEX" "AGE" "RACE" "REALINC" "EDUC"
## [13] "RELIG"
df<- data.frame(RACDIF1= c("NO", "YES", NA), stringAsFactor=FALSE)
Data1$AGE= as.numeric(Data1$AGE)
Data1$SEX= as.numeric(Data1$SEX)
Data1$EDUC= as.numeric(Data1$EDUC)
Data1$RELIG= as.numeric(Data1$RELIG)
Data1$RACE = as.numeric(Data1$RACE)
Data1$age2[Data1$AGE>46]=1
Data1$age2[Data1$AGE<46]=2
The following three models are used to demonstrate how these basic demographics affect whether a respondent believes that racial disparities exists because of discrimination. Model number 1 (m1) is showing whether age and sex affect respondents answer. Model 2(m2) adds education to the previous model to see is statistical significance changes and model 3(m3) adds religion.
m1<-zelig(RACDIF1 ~ AGE + SEX, data=Data1, model="logit", cite=F)
m2<-zelig(RACDIF1 ~ AGE + SEX + EDUC, data=Data1, model="logit", cite=F)
m3<-zelig(RACDIF1~ AGE + SEX + EDUC + RELIG, data=Data1, model="logit", cite=F)
stargazer(m1, m2, m3, type="html")
| Dependent variable: | |||
| RACDIF1 | |||
| (1) | (2) | (3) | |
| AGE | 0.002** | 0.002** | 0.001 |
| (0.001) | (0.001) | (0.001) | |
| SEX | -0.264*** | -0.263*** | -0.273*** |
| (0.027) | (0.027) | (0.028) | |
| EDUC | 0.010** | 0.011** | |
| (0.005) | (0.005) | ||
| RELIG | -0.044*** | ||
| (0.008) | |||
| Constant | 0.739*** | 0.600*** | 0.748*** |
| (0.056) | (0.087) | (0.092) | |
| Observations | 22,773 | 22,724 | 22,645 |
| Log Likelihood | -15,296.990 | -15,261.920 | -15,194.420 |
| Akaike Inf. Crit. | 30,599.990 | 30,531.850 | 30,398.850 |
| Note: | p<0.1; p<0.05; p<0.01 | ||
To determine the best model we have to look at the Akaike Inf. Crit. this determines that model 3 is the best fit.
m4 <- zelig(RACDIF1 ~ RACE + AGE, data=Data1, model="logit", cite=F)
m5 <- zelig(RACDIF1 ~ RACE + AGE + RACE:AGE, data=Data1, model="logit", cite=F)
stargazer(m4, m5, type="html")
| Dependent variable: | ||
| RACDIF1 | ||
| (1) | (2) | |
| RACE | -0.618*** | -0.235*** |
| (0.026) | (0.072) | |
| AGE | -0.001 | 0.020*** |
| (0.001) | (0.004) | |
| RACE:AGE | -0.010*** | |
| (0.002) | ||
| Constant | 1.840*** | 0.993*** |
| (0.073) | (0.167) | |
| Observations | 22,773 | 22,773 |
| Log Likelihood | -15,039.560 | -15,023.580 |
| Akaike Inf. Crit. | 30,085.110 | 30,055.170 |
| Note: | p<0.1; p<0.05; p<0.01 | |
s1<-zelig(RACDIF1 ~ RACE + EDUC + AGE, data=Data1, model="logit", cite=F)
s2<-setx(s1, RACE="2")
s3<-sim(s1, x=s2)
summary(s3)
##
## Model: logit
## Number of simulations: 1000
##
## Values of X
## (Intercept) RACE EDUC AGE
## 7591 1 2 13.06139 45.7206
##
## Expected Values: E(Y|X)
## mean sd 2.5% 97.5%
## 1 0.6334724 0.003369527 0.6272744 0.6399982
##
## Predicted Values: Y|X
## 0 1
## 1 0.369 0.631
v1<-setx(s1, RACE ="3")
v2<-sim(s1, x=v1)
summary(v2)
##
## Model: logit
## Number of simulations: 1000
##
## Values of X
## (Intercept) RACE EDUC AGE
## 7591 1 3 13.06139 45.7206
##
## Expected Values: E(Y|X)
## mean sd 2.5% 97.5%
## 1 0.4825899 0.005990585 0.4710358 0.4945211
##
## Predicted Values: Y|X
## 0 1
## 1 0.494 0.506
n1<-setx(s1, RACE="4")
n2<-sim(s1, x=n1)
summary(n2)
##
## Model: logit
## Number of simulations: 1000
##
## Values of X
## (Intercept) RACE EDUC AGE
## 7591 1 4 13.06139 45.7206
##
## Expected Values: E(Y|X)
## mean sd 2.5% 97.5%
## 1 0.3336668 0.01058983 0.3128387 0.3538362
##
## Predicted Values: Y|X
## 0 1
## 1 0.653 0.347
Data1$RACDIF1= as.factor(Data1$RACDIF1)
xh1<-setx(m5, age2 = mean(Data1$age2)+ sd(Data1$age), RACE=2)
xl1<-setx(m5, age2 = mean(Data1$age2), RACE=2)
xh0<-setx(m5, age2= mean(Data1$age2)+ sd(Data1$RACDIF1), RACE=3)
xl0<-setx(m5, age2=mean(Data1$age2), RACE=3)
zh1 <- sim(m5, x=xh1)
zl1 <- sim(m5, x=xl1)
zh0 <- sim(m5, x=xh0)
zl0 <- sim(m5, x=xl0)
eff <- (zh1$qi$ev - zl1$qi$ev) -(zh0$qi$ev - zl0$qi$ev)
summary(eff)
## V1
## Min. :-0.031957
## 1st Qu.:-0.008158
## Median :-0.000861
## Mean :-0.001003
## 3rd Qu.: 0.005876
## Max. : 0.036489
hist(eff)
Data1$RACMAR= as.numeric(Data1$RACMAR)
model.c <- zelig(RACMAR~ RACE + AGE, data=Data1, model="poisson")
How to cite this model in Zelig: Kosuke Imai, Gary King, and Oliva Lau. 2007. “poisson: Poisson Regression for Event Count Dependent Variables” in Kosuke Imai, Gary King, and Olivia Lau, “Zelig: Everyone’s Statistical Software,” http://gking.harvard.edu/zelig
stargazer(model.c, type="html")
| Dependent variable: | |
| RACMAR | |
| RACE | 0.037*** |
| (0.008) | |
| AGE | -0.002*** |
| (0.0002) | |
| Constant | 1.042*** |
| (0.020) | |
| Observations | 28,689 |
| Log Likelihood | -42,574.340 |
| Akaike Inf. Crit. | 85,154.670 |
| Note: | p<0.1; p<0.05; p<0.01 |
model1<-zelig(RACMAR ~ RACE + AGE, data=Data1, model= "poisson")
## How to cite this model in Zelig:
## Kosuke Imai, Gary King, and Oliva Lau. 2007. "poisson: Poisson Regression for Event Count Dependent Variables" in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software," http://gking.harvard.edu/zelig
model2 <- setx(model1, RACE= "2")
model3 <- sim(model1, x=model2)
summary(model3)
##
## Model: poisson
## Number of simulations: 1000
##
## Values of X
## (Intercept) RACE AGE
## 1 1 2 44.94541
##
## Expected Values: E(Y|X)
## mean sd 2.5% 97.5%
## 1 2.749163 0.01079807 2.728488 2.770411
##
## Predicted Values: Y|X
## mean sd 2.5% 97.5%
## 1 2.666 1.57827 0 6
plot(model3)