Introduction

For this assignment I am using the General Social Survey of 2012. In the following assessment I want to see the effect of basic demographics of respondents affect whether they believe that the racial disparities between blacks and whites is due to discrimination.

library(Zelig)
library(DescTools)
library(stargazer)
library(dplyr)
library(scatterplot3d)
library(tidyr)
library(memisc)
library(pander)
library(gmodels)
library(Hmisc)
library(car)
library(foreign)
library(readstata13)
gss<-read.dta("C:/Users/Xiomara/Desktop/R/GSS.dta")
names(gss)
##  [1] "CASEID"   "WORKBLKS" "RACDIF1"  "RACMAR"   "RACDIF2"  "RACDIF3" 
##  [7] "HELPBLK"  "HELPPOOR" "YEAR"     "SEX"      "AGE"      "RACE"    
## [13] "REALINC"  "REALRINC" "EDUC"     "DEGREE"   "PRESTG80" "PAPRES80"
## [19] "MARITAL"  "DIVORCE"  "CHILDS"   "RELIG"    "WRKSLF"   "UNEMP"   
## [25] "REGION"   "SIZE"     "RACLIVE"  "FEAR"     "GUN"      "POLVIEWS"
## [31] "FECHLD"   "FEFAM"
Data1<-select(gss, WORKBLKS, RACDIF1, RACDIF2, RACDIF3, RACMAR, HELPBLK, HELPPOOR, SEX, AGE, RACE, REALINC, EDUC, RELIG)
names(Data1)
##  [1] "WORKBLKS" "RACDIF1"  "RACDIF2"  "RACDIF3"  "RACMAR"   "HELPBLK" 
##  [7] "HELPPOOR" "SEX"      "AGE"      "RACE"     "REALINC"  "EDUC"    
## [13] "RELIG"
df<- data.frame(RACDIF1= c("NO", "YES", NA), stringAsFactor=FALSE)
Data1$AGE= as.numeric(Data1$AGE)
Data1$SEX= as.numeric(Data1$SEX)
Data1$EDUC= as.numeric(Data1$EDUC)
Data1$RELIG= as.numeric(Data1$RELIG)
Data1$RACE = as.numeric(Data1$RACE)

Data1$age2[Data1$AGE>46]=1
Data1$age2[Data1$AGE<46]=2

Logit Model #1

The following three models are used to demonstrate how these basic demographics affect whether a respondent believes that racial disparities exists because of discrimination. Model number 1 (m1) is showing whether age and sex affect respondents answer. Model 2(m2) adds education to the previous model to see is statistical significance changes and model 3(m3) adds religion.

m1<-zelig(RACDIF1 ~ AGE + SEX, data=Data1, model="logit", cite=F)
m2<-zelig(RACDIF1 ~ AGE + SEX + EDUC, data=Data1, model="logit", cite=F)
m3<-zelig(RACDIF1~ AGE + SEX + EDUC + RELIG, data=Data1, model="logit", cite=F)
stargazer(m1, m2, m3, type="html")
Dependent variable:
RACDIF1
(1) (2) (3)
AGE 0.002** 0.002** 0.001
(0.001) (0.001) (0.001)
SEX -0.264*** -0.263*** -0.273***
(0.027) (0.027) (0.028)
EDUC 0.010** 0.011**
(0.005) (0.005)
RELIG -0.044***
(0.008)
Constant 0.739*** 0.600*** 0.748***
(0.056) (0.087) (0.092)
Observations 22,773 22,724 22,645
Log Likelihood -15,296.990 -15,261.920 -15,194.420
Akaike Inf. Crit. 30,599.990 30,531.850 30,398.850
Note: p<0.1; p<0.05; p<0.01

To determine the best model we have to look at the Akaike Inf. Crit. this determines that model 3 is the best fit.

Logit Model #2

m4 <- zelig(RACDIF1 ~ RACE + AGE, data=Data1, model="logit", cite=F)

m5 <- zelig(RACDIF1 ~ RACE + AGE + RACE:AGE, data=Data1, model="logit", cite=F)
stargazer(m4, m5, type="html")
Dependent variable:
RACDIF1
(1) (2)
RACE -0.618*** -0.235***
(0.026) (0.072)
AGE -0.001 0.020***
(0.001) (0.004)
RACE:AGE -0.010***
(0.002)
Constant 1.840*** 0.993***
(0.073) (0.167)
Observations 22,773 22,773
Log Likelihood -15,039.560 -15,023.580
Akaike Inf. Crit. 30,085.110 30,055.170
Note: p<0.1; p<0.05; p<0.01

Simulation

s1<-zelig(RACDIF1 ~ RACE + EDUC + AGE, data=Data1, model="logit", cite=F)
s2<-setx(s1, RACE="2")
s3<-sim(s1, x=s2)
summary(s3)
## 
##   Model: logit 
##   Number of simulations: 1000 
## 
## Values of X 
##      (Intercept) RACE     EDUC     AGE
## 7591           1    2 13.06139 45.7206
## 
## Expected Values: E(Y|X)
##        mean          sd      2.5%     97.5%
## 1 0.6334724 0.003369527 0.6272744 0.6399982
## 
## Predicted Values: Y|X
##       0     1
## 1 0.369 0.631
v1<-setx(s1, RACE ="3")
v2<-sim(s1, x=v1)
summary(v2)
## 
##   Model: logit 
##   Number of simulations: 1000 
## 
## Values of X 
##      (Intercept) RACE     EDUC     AGE
## 7591           1    3 13.06139 45.7206
## 
## Expected Values: E(Y|X)
##        mean          sd      2.5%     97.5%
## 1 0.4825899 0.005990585 0.4710358 0.4945211
## 
## Predicted Values: Y|X
##       0     1
## 1 0.494 0.506
n1<-setx(s1, RACE="4")
n2<-sim(s1, x=n1)
summary(n2)
## 
##   Model: logit 
##   Number of simulations: 1000 
## 
## Values of X 
##      (Intercept) RACE     EDUC     AGE
## 7591           1    4 13.06139 45.7206
## 
## Expected Values: E(Y|X)
##        mean         sd      2.5%     97.5%
## 1 0.3336668 0.01058983 0.3128387 0.3538362
## 
## Predicted Values: Y|X
##       0     1
## 1 0.653 0.347
Data1$RACDIF1= as.factor(Data1$RACDIF1)
xh1<-setx(m5, age2 = mean(Data1$age2)+ sd(Data1$age), RACE=2)
xl1<-setx(m5, age2 = mean(Data1$age2), RACE=2)
xh0<-setx(m5, age2= mean(Data1$age2)+ sd(Data1$RACDIF1), RACE=3)
xl0<-setx(m5, age2=mean(Data1$age2), RACE=3)
zh1 <- sim(m5, x=xh1)

zl1 <- sim(m5, x=xl1)

zh0 <- sim(m5, x=xh0)

zl0 <- sim(m5, x=xl0)

eff <- (zh1$qi$ev - zl1$qi$ev) -(zh0$qi$ev - zl0$qi$ev)


summary(eff)
##        V1           
##  Min.   :-0.031957  
##  1st Qu.:-0.008158  
##  Median :-0.000861  
##  Mean   :-0.001003  
##  3rd Qu.: 0.005876  
##  Max.   : 0.036489
hist(eff)

GLM using Count Data

Data1$RACMAR= as.numeric(Data1$RACMAR)
model.c <- zelig(RACMAR~ RACE + AGE, data=Data1, model="poisson")

How to cite this model in Zelig: Kosuke Imai, Gary King, and Oliva Lau. 2007. “poisson: Poisson Regression for Event Count Dependent Variables” in Kosuke Imai, Gary King, and Olivia Lau, “Zelig: Everyone’s Statistical Software,” http://gking.harvard.edu/zelig

stargazer(model.c, type="html")
Dependent variable:
RACMAR
RACE 0.037***
(0.008)
AGE -0.002***
(0.0002)
Constant 1.042***
(0.020)
Observations 28,689
Log Likelihood -42,574.340
Akaike Inf. Crit. 85,154.670
Note: p<0.1; p<0.05; p<0.01
model1<-zelig(RACMAR ~ RACE + AGE, data=Data1, model= "poisson")
## How to cite this model in Zelig:
## Kosuke Imai, Gary King, and Oliva Lau. 2007. "poisson: Poisson Regression for Event Count Dependent Variables" in Kosuke Imai, Gary King, and Olivia Lau, "Zelig: Everyone's Statistical Software," http://gking.harvard.edu/zelig
model2 <- setx(model1, RACE= "2")
model3 <- sim(model1, x=model2)
summary(model3)
## 
##   Model: poisson 
##   Number of simulations: 1000 
## 
## Values of X 
##   (Intercept) RACE      AGE
## 1           1    2 44.94541
## 
## Expected Values: E(Y|X)
##       mean         sd     2.5%    97.5%
## 1 2.749163 0.01079807 2.728488 2.770411
## 
## Predicted Values: Y|X
##    mean      sd 2.5% 97.5%
## 1 2.666 1.57827    0     6
plot(model3)