library(haven)
library(foreign) 
library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(broom)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(readr) 
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(car) 
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(alr3)

## 
## Attaching package: 'alr3'

## The following object is masked from 'package:MASS':
## 
##     forbes

library(zoo)
library(nortest)
library(plotrix)
library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:plotrix':
## 
##     rescale

## The following object is masked from 'package:readr':
## 
##     col_factor

library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(VGAM)

## Loading required package: stats4

## Loading required package: splines

## 
## Attaching package: 'VGAM'

## The following object is masked from 'package:survey':
## 
##     calibrate

## The following object is masked from 'package:lmtest':
## 
##     lrtest

## The following object is masked from 'package:car':
## 
##     logit

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(sandwich)

anes2016<-read_dta("C:\\Users\\Jaire\\OneDrive\\Desktop\\Stats for Dem Data 2\\Homework 4\\ANES2016.dta")

Count data models:

Research Question -

To what extent are race and highest level of education predictors of the amount of news media Americans consume within a week?

Dependent Variable -

The count outcome of interest is the amount of news media consumed within a week by number of days. The question reads, “During a typical week, how many days do you watch, read, or listen to news on TV, radio, printed newspapers, or the Internet, not including sports?”

Responses are “0. None 1. One day 2. Two days 3. Three days 4. Four days 5. Five days 6. Six days 7. Seven days”.

Offset Term:

The offset term is the amount of days new media is consumed within a week over the U.S. population.

# Raw data
table (anes2016$V161008)

## 
##   -9   -8    0    1    2    3    4    5    6    7 
##    2    2   73  187  213  287  266  641  307 2293

# Recoded variable
anes2016$NewsMedDays_Wk<-recode(anes2016$V161008, recodes = "-9:-8=NA")
  table(anes2016$NewsMediaWeekly)

## Warning: Unknown or uninitialised column: 'NewsMediaWeekly'.

## < table of extent 0 >

Predictors:

Self-Identified Race -

# Raw data:
table(anes2016$V161310X)

## 
##   -9    1    2    3    4    5    6 
##   33 3038  398  148   27  450  177

# Transformation:
anes2016$Race<-recode(anes2016$V161310X, recodes = "-9=NA; 1='White'; 2='Black'; 3='AsNatPac'; 4='NatAmAlsk'; 5='Hisp'; 6='Other'", as.factor = T)

# Race from anes2016$V161310X
table(anes2016$Race)

## 
##  AsNatPac     Black      Hisp NatAmAlsk     Other     White 
##       148       398       450        27       177      3038

Highest Level of Education -

# Highest Level of Education Raw Data:

table(anes2016$V161270)

## 
##  -9   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  90  95 
##  15   1   3  15  22  32  40  62 107 810 899 313 288 955 499  88  93   5  24

# Highest Level of Education Transformations:
anes2016$HLEducation<-recode(anes2016$V161270, recodes = "15:95=NA;-9=NA;10=NA;1:8='LessHSDipGED'; 9='HSDipGED'; 11:12='Assoc'; 13='Bachlrs'; 14='Mastrs'", as.factor = T)

# Highest Level of Education from V161270:

table(anes2016$HLEducation)

## 
##        Assoc      Bachlrs     HSDipGED LessHSDipGED       Mastrs 
##          601          955          810          282          499

Survey design and analytic data:

sub<-dplyr::select(anes2016, NewsMedDays_Wk,HLEducation,Race) %>%
  filter(complete.cases(.))


options(survey.lonely.psu = "adjust")
des<-svydesign(nest = TRUE, ids=~anes2016$V160202, strata =~anes2016$V160201, weights =~anes2016$V160101, data=anes2016)

Poisson analysis:

svyhist(~NewsMedDays_Wk, des)

svyby(~NewsMedDays_Wk, ~HLEducation+Race, des, svymean, na.rm=T)

##                         HLEducation      Race NewsMedDays_Wk         se
## Assoc.AsNatPac                Assoc  AsNatPac       5.630261 0.50137371
## Bachlrs.AsNatPac            Bachlrs  AsNatPac       5.215142 0.37671031
## HSDipGED.AsNatPac          HSDipGED  AsNatPac       5.060120 0.50931521
## LessHSDipGED.AsNatPac  LessHSDipGED  AsNatPac       3.312181 1.60246350
## Mastrs.AsNatPac              Mastrs  AsNatPac       5.519003 0.29657870
## Assoc.Black                   Assoc     Black       5.389373 0.27744039
## Bachlrs.Black               Bachlrs     Black       5.366685 0.26445907
## HSDipGED.Black             HSDipGED     Black       5.123662 0.27249870
## LessHSDipGED.Black     LessHSDipGED     Black       5.524967 0.33298083
## Mastrs.Black                 Mastrs     Black       6.478856 0.19493130
## Assoc.Hisp                    Assoc      Hisp       5.373412 0.32354460
## Bachlrs.Hisp                Bachlrs      Hisp       5.644516 0.24133401
## HSDipGED.Hisp              HSDipGED      Hisp       4.367205 0.21163535
## LessHSDipGED.Hisp      LessHSDipGED      Hisp       5.242054 0.26844876
## Mastrs.Hisp                  Mastrs      Hisp       5.381923 0.44078116
## Assoc.NatAmAlsk               Assoc NatAmAlsk       7.000000 0.00000000
## Bachlrs.NatAmAlsk           Bachlrs NatAmAlsk       3.879474 0.87914448
## HSDipGED.NatAmAlsk         HSDipGED NatAmAlsk       4.697120 0.75614465
## LessHSDipGED.NatAmAlsk LessHSDipGED NatAmAlsk       3.902801 1.21815725
## Mastrs.NatAmAlsk             Mastrs NatAmAlsk       6.213157 0.67497868
## Assoc.Other                   Assoc     Other       5.352571 0.40295070
## Bachlrs.Other               Bachlrs     Other       5.647385 0.30486206
## HSDipGED.Other             HSDipGED     Other       4.342879 0.59757381
## LessHSDipGED.Other     LessHSDipGED     Other       4.250437 0.95345052
## Mastrs.Other                 Mastrs     Other       6.335459 0.22634687
## Assoc.White                   Assoc     White       5.298228 0.13069650
## Bachlrs.White               Bachlrs     White       5.830701 0.06393591
## HSDipGED.White             HSDipGED     White       5.315730 0.11007105
## LessHSDipGED.White     LessHSDipGED     White       4.988245 0.22790459
## Mastrs.White                 Mastrs     White       6.033667 0.08655150

fit1<-svyglm(NewsMedDays_Wk~factor(HLEducation)+factor(Race), design=des, family=poisson)
summary(fit1)

## 
## Call:
## svyglm(formula = NewsMedDays_Wk ~ factor(HLEducation) + factor(Race), 
##     design = des, family = poisson)
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~anes2016$V160202, strata = ~anes2016$V160201, 
##     weights = ~anes2016$V160101, data = anes2016)
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.606720   0.051463  31.221  < 2e-16 ***
## factor(HLEducation)Bachlrs       0.072760   0.023711   3.069  0.00264 ** 
## factor(HLEducation)HSDipGED     -0.039383   0.027672  -1.423  0.15719    
## factor(HLEducation)LessHSDipGED -0.041657   0.037770  -1.103  0.27219    
## factor(HLEducation)Mastrs        0.119727   0.023910   5.007 1.85e-06 ***
## factor(Race)Black                0.077848   0.051765   1.504  0.13516    
## factor(Race)Hisp                 0.006971   0.055937   0.125  0.90102    
## factor(Race)NatAmAlsk           -0.065576   0.139100  -0.471  0.63816    
## factor(Race)Other                0.009206   0.072345   0.127  0.89894    
## factor(Race)White                0.081586   0.050110   1.628  0.10604    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 0.7792628)
## 
## Number of Fisher Scoring iterations: 4

round(exp(summary(fit1)$coef[-1,1]), 3)

##      factor(HLEducation)Bachlrs     factor(HLEducation)HSDipGED 
##                           1.075                           0.961 
## factor(HLEducation)LessHSDipGED       factor(HLEducation)Mastrs 
##                           0.959                           1.127 
##               factor(Race)Black                factor(Race)Hisp 
##                           1.081                           1.007 
##           factor(Race)NatAmAlsk               factor(Race)Other 
##                           0.937                           1.009 
##               factor(Race)White 
##                           1.085

Model Deviance -

fit2<-glm(anes2016$NewsMedDays_Wk~factor(anes2016$HLEducation)+factor(anes2016$Race), family=poisson)
summary(fit2)

## 
## Call:
## glm(formula = anes2016$NewsMedDays_Wk ~ factor(anes2016$HLEducation) + 
##     factor(anes2016$Race), family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4753  -0.4358   0.3814   0.6027   0.9930  
## 
## Coefficients:
##                                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)                               1.638664   0.044237  37.042  < 2e-16
## factor(anes2016$HLEducation)Bachlrs       0.052283   0.022237   2.351 0.018717
## factor(anes2016$HLEducation)HSDipGED     -0.048605   0.023469  -2.071 0.038355
## factor(anes2016$HLEducation)LessHSDipGED -0.031485   0.031908  -0.987 0.323770
## factor(anes2016$HLEducation)Mastrs        0.089099   0.025618   3.478 0.000505
## factor(anes2016$Race)Black                0.063296   0.048218   1.313 0.189284
## factor(anes2016$Race)Hisp                -0.005956   0.047782  -0.125 0.900795
## factor(anes2016$Race)NatAmAlsk           -0.044485   0.107596  -0.413 0.679279
## factor(anes2016$Race)Other                0.019787   0.056208   0.352 0.724817
## factor(anes2016$Race)White                0.070445   0.041759   1.687 0.091610
##                                             
## (Intercept)                              ***
## factor(anes2016$HLEducation)Bachlrs      *  
## factor(anes2016$HLEducation)HSDipGED     *  
## factor(anes2016$HLEducation)LessHSDipGED    
## factor(anes2016$HLEducation)Mastrs       ***
## factor(anes2016$Race)Black                  
## factor(anes2016$Race)Hisp                   
## factor(anes2016$Race)NatAmAlsk              
## factor(anes2016$Race)Other                  
## factor(anes2016$Race)White               .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2817.2  on 3121  degrees of freedom
## Residual deviance: 2754.5  on 3112  degrees of freedom
##   (1149 observations deleted due to missingness)
## AIC: 13568
## 
## Number of Fisher Scoring iterations: 4

Deviance -

There is roughly a 300 to 358 point dispersion or difference between the deviance and degress of freedom within fit2. However, the ratio (variance = mean) is close to “1”(0.9408128) as well as the model fit statistic (0.9999988).

scale<-sqrt(fit2$deviance/fit2$df.residual)
scale

## [1] 0.9408128

1-pchisq(fit2$deviance, df = fit2$df.residual)

## [1] 0.9999988

Remarks

The poisson model seemed to work well; the ratio of the variance value to the mean is close to “1”(0.9408128) as well as the model fit statistic (0.9999988).

Alternative modeling -

fit3<-glm(anes2016$NewsMedDays_Wk~factor(anes2016$HLEducation)+factor(anes2016$Race), family=quasipoisson)
summary(fit3)

## 
## Call:
## glm(formula = anes2016$NewsMedDays_Wk ~ factor(anes2016$HLEducation) + 
##     factor(anes2016$Race), family = quasipoisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4753  -0.4358   0.3814   0.6027   0.9930  
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               1.638664   0.036477  44.923  < 2e-16
## factor(anes2016$HLEducation)Bachlrs       0.052283   0.018336   2.851  0.00438
## factor(anes2016$HLEducation)HSDipGED     -0.048605   0.019352  -2.512  0.01207
## factor(anes2016$HLEducation)LessHSDipGED -0.031485   0.026311  -1.197  0.23153
## factor(anes2016$HLEducation)Mastrs        0.089099   0.021124   4.218 2.54e-05
## factor(anes2016$Race)Black                0.063296   0.039759   1.592  0.11149
## factor(anes2016$Race)Hisp                -0.005956   0.039400  -0.151  0.87985
## factor(anes2016$Race)NatAmAlsk           -0.044485   0.088721  -0.501  0.61612
## factor(anes2016$Race)Other                0.019787   0.046347   0.427  0.66947
## factor(anes2016$Race)White                0.070445   0.034433   2.046  0.04085
##                                             
## (Intercept)                              ***
## factor(anes2016$HLEducation)Bachlrs      ** 
## factor(anes2016$HLEducation)HSDipGED     *  
## factor(anes2016$HLEducation)LessHSDipGED    
## factor(anes2016$HLEducation)Mastrs       ***
## factor(anes2016$Race)Black                  
## factor(anes2016$Race)Hisp                   
## factor(anes2016$Race)NatAmAlsk              
## factor(anes2016$Race)Other                  
## factor(anes2016$Race)White               *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.6799266)
## 
##     Null deviance: 2817.2  on 3121  degrees of freedom
## Residual deviance: 2754.5  on 3112  degrees of freedom
##   (1149 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

Negative binomial analysis:

coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="ststr"))

## 
## z test of coefficients:
## 
##                                            Estimate Std. Error z value
## (Intercept)                               1.6386640  0.0365272 44.8615
## factor(anes2016$HLEducation)Bachlrs       0.0522828  0.0181706  2.8773
## factor(anes2016$HLEducation)HSDipGED     -0.0486050  0.0206094 -2.3584
## factor(anes2016$HLEducation)LessHSDipGED -0.0314849  0.0295579 -1.0652
## factor(anes2016$HLEducation)Mastrs        0.0890989  0.0193091  4.6144
## factor(anes2016$Race)Black                0.0632957  0.0398996  1.5864
## factor(anes2016$Race)Hisp                -0.0059564  0.0402400 -0.1480
## factor(anes2016$Race)NatAmAlsk           -0.0444851  0.0989931 -0.4494
## factor(anes2016$Race)Other                0.0197866  0.0475040  0.4165
## factor(anes2016$Race)White                0.0704451  0.0342954  2.0541
##                                           Pr(>|z|)    
## (Intercept)                              < 2.2e-16 ***
## factor(anes2016$HLEducation)Bachlrs       0.004011 ** 
## factor(anes2016$HLEducation)HSDipGED      0.018354 *  
## factor(anes2016$HLEducation)LessHSDipGED  0.286788    
## factor(anes2016$HLEducation)Mastrs       3.943e-06 ***
## factor(anes2016$Race)Black                0.112654    
## factor(anes2016$Race)Hisp                 0.882325    
## factor(anes2016$Race)NatAmAlsk            0.653161    
## factor(anes2016$Race)Other                0.677025    
## factor(anes2016$Race)White                0.039969 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anes2016$fullwt<-as.numeric(anes2016$V160102+anes2016$V160101)

fit.nb1<-glm.nb(anes2016$NewsMedDays_Wk~factor(anes2016$HLEducation),
              data=sub,
              weights=anes2016$fullwt/mean(anes2016$fullwt, na.rm=T))

## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached

## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached

fit.nb2<-glm.nb(anes2016$NewsMedDays_Wk~factor(anes2016$HLEducation)+factor(anes2016$Race),
              data=sub,
              weights=anes2016$fullwt/mean(anes2016$fullwt, na.rm=T))

## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached

## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached

tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))

library(stargazer)

stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]),  se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4])   )

## 
## ----------------------------------------------------------------------------
##                                            NewsMedDays_Wk                   
##                                   Model 1                   Model 2         
## ----------------------------------------------------------------------------
## HLEducation)Bachlrs              0.076***                   0.075**         
##                                   (0.023)                   (0.023)         
## HLEducation)HSDipGED              -0.040                    -0.038          
##                                   (0.027)                   (0.027)         
## HLEducation)LessHSDipGED          -0.037                    -0.025          
##                                   (0.037)                   (0.037)         
## HLEducation)Mastrs               0.117***                  0.120***         
##                                   (0.024)                   (0.024)         
## Race)Black                                                   0.090          
##                                                             (0.053)         
## Race)Hisp                                                    0.020          
##                                                             (0.053)         
## Race)NatAmAlsk                                              -0.046          
##                                                             (0.119)         
## Race)Other                                                   0.012          
##                                                             (0.070)         
## Race)White                                                   0.089          
##                                                             (0.047)         
## Constant                         1.678***                  1.601***         
##                                   (0.049)                   (0.049)         
## N                                  3,144                     3,122          
## Log Likelihood                  -7,236.404                -7,192.231        
## theta                    122,348.900 (327,531.400) 123,664.800 (329,993.000)
## AIC                             14,482.810                14,404.460        
## ----------------------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001

Results:

The AIC for poisson model 2 (fit2) was 13,568. This was less than the AICs of the negative binomial models (14,482.810 fit.nb1) (14,404.460 fit.nb2). The akaike information criterion (AIC) is a measure of model fit. The lower the AIC is, the better its goodness of fit.

Poisson model 2 (fit2) provides the best model used in this analysis to measure the average number of days Americans consume news media within a weekly period by race and highest level of education.

Additionally, Americans holding a bachelor’s degree or higher consume news media at a significantly higher rate each week, than those without.

Homework 4

Jairemy Edwards

4/06/2020