#Question 1 ###All the variables in this dataset are from Add Health
##A ordinal variable is a variable where its responses are in a specific order. For example, the ordinal variable that I will be using is Alcohol usage, which asks during the past 12 months, on how many days did you drink alcohol? It is originally coded as 1=everyday, 2=3 to 5 times a week, 3=1 to 2 times a week, 4=2 to 3 times a month, 5=once a month, 6=1 to 2 times a year and 7=never. I prefer the order to be from least to greatest so it will be recoded as the following: 1=never, 2=1 to2 times a year, 3=once a month, 4=2 to 3 times a month, 5=1 to 2 times a week, 6=3 to 5 times a week 7= everyday.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
d1<-read_dta("C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets/wholeaddhealthstata.dta")
w1<-read_dta("C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets/21600-0004-Data.dta")
d2<-merge(d1, w1, by="AID")
###Recode suicide
d2$suicide<-Recode(d2$H1SU1, recodes="0=0; 1=1; else=NA")
table(d2$suicide)
##
## 0 1
## 5614 821
is.numeric(d2$suicide)
## [1] TRUE
###Recode grade
d2$grade<-Recode(d2$H1GI20, recodes="7=7; 8=8; 9=9; 10=10; 11=11; 12=12; else=NA")
table(d2$grade)
##
## 7 8 9 10 11 12
## 979 992 1107 1144 1122 993
table(d2$H1GI20)
##
## 7 8 9 10 11 12 96 97 98 99
## 979 992 1107 1144 1122 993 1 128 3 35
is.numeric(d2$grade)
## [1] TRUE
###Recode Hispanic
table(d2$H1GI4)
##
## 0 1 6 8
## 5738 743 3 20
d2$Hispanic<-Recode(d2$H1GI4, recodes="0=0; 1=1; else=NA")
table(d2$Hispanic)
##
## 0 1
## 5738 743
is.numeric(d2$Hispanic)
## [1] TRUE
is.character(d2$Hispanic)
## [1] FALSE
table(d2$BIO_SEX)
##
## 1 2 6
## 3147 3356 1
d2$SEX<-Recode(d2$BIO_SEX, recodes="1=1; 2=0; else=NA")
table(d2$SEX)
##
## 0 1
## 3356 3147
#males equals 1, females equals 0.
###Recode Alcohol
table(d2$H1TO15)
##
## 1 2 3 4 5 6 7 96 97 98
## 59 166 387 512 743 1097 581 8 2944 7
d2$alcohol<-Recode(d2$H1TO15, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA")
table(d2$alcohol)
##
## 1 2 3 4 5 6 7
## 581 1097 743 512 387 166 59
table(d2$H1RE4)
##
## 1 2 3 4 6 7 8
## 2812 2218 391 193 3 879 8
d2$religion<-Recode(d2$H1RE4, recodes="1=4; 2=3; 3=2; 4=1; else=NA")
table(d2$religion)
##
## 1 2 3 4
## 193 391 2218 2812
d2 <- d2 %>%
mutate(
grade = as.factor(grade),
Hispanic = as.factor(Hispanic),
alcohol = as.factor(alcohol),
SEX=as.factor(SEX),
suicide=as.factor(suicide),
religion=as.factor(religion))
#Question 2 Do Hispanic males who have suicide thoughts have a greater likelihood to drink alcohol more frequently?
d2$alcohol<-Recode(d2$H1TO15, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA", as.factor=T)
d2$alcohol<-relevel(d2$alcohol, ref="1")
d2$alcohol2<-car::Recode(d2$alcohol, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA", as.factor=F)
sub<-d2%>%
select(alcohol, alcohol2, Hispanic, SEX, suicide, religion, GSWGT1)%>%
filter(complete.cases(.))
options(survey.lonely.psu = "PSUSCID")
des<-svydesign(ids=~CLUSTER2, weights=~GSWGT1, data=d2)
fit.solr1<-svyolr(alcohol~grade+SEX+Hispanic+suicide, des)
summary(fit.solr1)
## Call:
## svyolr(alcohol ~ grade + SEX + Hispanic + suicide, des)
##
## Coefficients:
## Value Std. Error t value
## grade8 0.3304395 0.17162961 1.925306
## grade9 0.6891641 0.15315148 4.499885
## grade10 0.8686001 0.16111639 5.391134
## grade11 1.0621094 0.14894544 7.130862
## grade12 1.3413259 0.16500344 8.129079
## SEX1 0.2238010 0.06720762 3.329995
## Hispanic1 -0.1430155 0.12944653 -1.104823
## suicide1 0.4861899 0.09216900 5.274983
##
## Intercepts:
## Value Std. Error t value
## 1|2 -0.7179 0.1440 -4.9835
## 2|3 0.8495 0.1406 6.0434
## 3|4 1.7640 0.1474 11.9654
## 4|5 2.5846 0.1588 16.2792
## 5|6 3.6755 0.1595 23.0423
## 6|7 5.0779 0.2103 24.1465
## (3066 observations deleted due to missingness)
#Results that have a t value that are greater than 2 are considered significant. This includes students in grades 9 through 12th, females, and those who have suicidal thoughts. Students in grades 9th through 12th have a higher likelihood to drink alcohol than students in 7th grade. Males also are more likely to drink alcohol than females. Last, students who have suicide thoughts are also more likely to drink alcohol.
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 11717.15
ex1<-svyglm(I(alcohol2>1)~grade+SEX+Hispanic+suicide,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
ex2<-svyglm(I(alcohol2>2)~grade+SEX+Hispanic+suicide,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
plot(coef(ex1)[-1], ylim=c(-3, 3), type="l",xaxt="n",
ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2)
axis(side=1, at=1:12, labels=F)
text(x=1:12, y=-4, srt = 90, pos = 1, xpd = TRUE,cex=.8,
labels = c("8th","9th" ,"10th", "11th", "12th", "Male", "Hispanic","suicide thoughts"))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))
lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=3)
###Printing odds ratios
round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
## grade8 grade9 grade10 grade11 grade12 SEX1 Hispanic1 suicide1
## [1,] 1.682 0.934 1.146 1.818 1.239 0.475 0.818 0.405
## [2,] 1.268 0.828 0.933 0.952 0.546 0.535 1.116 0.448
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:survey':
##
## calibrate
#Proporitional Assumptions
##Proportional Odds
fit.vgam<-vglm(as.ordered(alcohol)~grade+SEX+Hispanic+suicide,
d2, weights =GSWGT1/mean(GSWGT1, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(alcohol) ~ grade + SEX + Hispanic +
## suicide, family = cumulative(parallel = T, reverse = T),
## data = d2, weights = GSWGT1/mean(GSWGT1, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.874 0.1315 0.24617 0.54616 1.820
## logitlink(P[Y>=3]) -3.049 -0.8386 0.16787 0.59742 4.353
## logitlink(P[Y>=4]) -2.692 -0.4713 -0.25304 0.46760 7.130
## logitlink(P[Y>=5]) -2.888 -0.3257 -0.19853 -0.11466 6.303
## logitlink(P[Y>=6]) -2.239 -0.1994 -0.13089 -0.09394 11.648
## logitlink(P[Y>=7]) -2.346 -0.1038 -0.07697 -0.05874 19.904
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.71788 0.10812 6.639 3.15e-11 ***
## (Intercept):2 -0.84952 0.10796 -7.869 3.57e-15 ***
## (Intercept):3 -1.76393 0.11087 -15.910 < 2e-16 ***
## (Intercept):4 -2.58461 0.11510 -22.456 < 2e-16 ***
## (Intercept):5 -3.67549 0.12639 -29.080 < 2e-16 ***
## (Intercept):6 -5.07788 0.16664 -30.472 < 2e-16 ***
## grade8 0.33042 0.12877 2.566 0.01029 *
## grade9 0.68914 0.12138 5.678 1.37e-08 ***
## grade10 0.86858 0.12115 7.170 7.52e-13 ***
## grade11 1.06209 0.12102 8.776 < 2e-16 ***
## grade12 1.34130 0.11959 11.216 < 2e-16 ***
## SEX1 0.22380 0.06112 3.662 0.00025 ***
## Hispanic1 -0.14301 0.09325 -1.534 0.12512
## suicide1 0.48619 0.08123 5.985 2.16e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 6
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5]), logitlink(P[Y>=6]),
## logitlink(P[Y>=7])
##
## Residual deviance: 11761.39 on 20614 degrees of freedom
##
## Log-likelihood: -5880.697 on 20614 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):6'
##
##
## Exponentiated coefficients:
## grade8 grade9 grade10 grade11 grade12 SEX1 Hispanic1
## 1.3915516 1.9920079 2.3835198 2.8924026 3.8240301 1.2508262 0.8667425
## suicide1
## 1.6261102
##Non-Proportional odds
fit.vgam2<-vglm(as.ordered(alcohol)~grade+SEX+Hispanic+suicide,
d2, weights =GSWGT1/mean(GSWGT1, na.rm=T),
family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Nonproportional odds
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(alcohol) ~ grade + SEX + Hispanic +
## suicide, family = cumulative(parallel = F, reverse = T),
## data = d2, weights = GSWGT1/mean(GSWGT1, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.757 0.1266 0.24050 0.51427 2.057
## logitlink(P[Y>=3]) -2.746 -0.8558 0.17437 0.58273 4.498
## logitlink(P[Y>=4]) -2.634 -0.4793 -0.25516 0.47517 6.318
## logitlink(P[Y>=5]) -2.930 -0.3228 -0.19082 -0.11124 7.219
## logitlink(P[Y>=6]) -2.652 -0.1967 -0.12573 -0.09024 10.945
## logitlink(P[Y>=7]) -2.770 -0.1004 -0.07204 -0.05287 14.800
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.840946 0.133408 6.304 2.91e-10 ***
## (Intercept):2 -0.814026 0.124057 -6.562 5.32e-11 ***
## (Intercept):3 -1.615858 0.142282 -11.357 < 2e-16 ***
## (Intercept):4 -2.575718 0.185578 -13.879 < 2e-16 ***
## (Intercept):5 -3.323776 0.252571 -13.160 < 2e-16 ***
## (Intercept):6 -4.503453 0.402819 -11.180 < 2e-16 ***
## grade8:1 0.387199 0.161827 2.393 0.016726 *
## grade8:2 0.375086 0.148866 2.520 0.011748 *
## grade8:3 -0.004383 0.174432 -0.025 0.979952
## grade8:4 0.050535 0.224782 0.225 0.822120
## grade8:5 -0.220840 0.313227 -0.705 0.480781
## grade8:6 -0.517808 0.494597 -1.047 0.295132
## grade9:1 0.645442 0.155881 4.141 3.46e-05 ***
## grade9:2 0.757570 0.140050 5.409 6.33e-08 ***
## grade9:3 0.468356 0.158100 2.962 0.003053 **
## grade9:4 0.350019 0.204541 1.711 0.087035 .
## grade9:5 0.184874 0.274402 0.674 0.500480
## grade9:6 -0.003058 0.408271 -0.007 0.994023
## grade10:1 1.056968 0.165490 6.387 1.69e-10 ***
## grade10:2 0.899134 0.139849 6.429 1.28e-10 ***
## grade10:3 0.587667 0.156942 3.744 0.000181 ***
## grade10:4 0.475232 0.202413 2.348 0.018883 *
## grade10:5 0.070026 0.280183 0.250 0.802643
## grade10:6 -0.218603 0.430230 -0.508 0.611378
## grade11:1 1.170599 0.168319 6.955 3.53e-12 ***
## grade11:2 1.074237 0.140014 7.672 1.69e-14 ***
## grade11:3 0.807237 0.154900 5.211 1.87e-07 ***
## grade11:4 0.773051 0.196636 3.931 8.45e-05 ***
## grade11:5 0.014412 0.279813 0.052 0.958921
## grade11:6 -0.838922 0.495641 -1.693 0.090532 .
## grade12:1 1.487805 0.174360 8.533 < 2e-16 ***
## grade12:2 1.341322 0.139182 9.637 < 2e-16 ***
## grade12:3 1.055234 0.152112 6.937 4.00e-12 ***
## grade12:4 1.098895 0.191315 5.744 9.25e-09 ***
## grade12:5 0.622093 0.258029 2.411 0.015911 *
## grade12:6 -0.395787 0.438902 -0.902 0.367181
## SEX1:1 -0.155995 0.094999 -1.642 0.100574
## SEX1:2 0.139583 0.070000 1.994 0.046147 *
## SEX1:3 0.415156 0.074895 5.543 2.97e-08 ***
## SEX1:4 0.656871 0.093234 7.045 1.85e-12 ***
## SEX1:5 0.676006 0.139872 4.833 1.34e-06 ***
## SEX1:6 0.879992 0.262448 3.353 0.000799 ***
## Hispanic1:1 -0.131969 0.137570 -0.959 0.337414
## Hispanic1:2 -0.160784 0.106112 -1.515 0.129714
## Hispanic1:3 -0.172408 0.116498 -1.480 0.138895
## Hispanic1:4 -0.069054 0.141772 -0.487 0.626203
## Hispanic1:5 -0.100737 0.212638 -0.474 0.635678
## Hispanic1:6 0.470415 0.311796 1.509 0.131369
## suicide1:1 0.546001 0.143224 3.812 0.000138 ***
## suicide1:2 0.389638 0.094338 4.130 3.62e-05 ***
## suicide1:3 0.493589 0.095648 5.160 2.46e-07 ***
## suicide1:4 0.565035 0.111526 5.066 4.05e-07 ***
## suicide1:5 0.798247 0.152779 5.225 1.74e-07 ***
## suicide1:6 1.048922 0.258883 4.052 5.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 6
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]),
## logitlink(P[Y>=4]), logitlink(P[Y>=5]), logitlink(P[Y>=6]),
## logitlink(P[Y>=7])
##
## Residual deviance: 11653.04 on 20574 degrees of freedom
##
## Log-likelihood: -5826.518 on 20574 degrees of freedom
##
## Number of Fisher scoring iterations: 11
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):5', '(Intercept):6'
##
##
## Exponentiated coefficients:
## grade8:1 grade8:2 grade8:3 grade8:4 grade8:5 grade8:6
## 1.4728500 1.4551171 0.9956263 1.0518338 0.8018452 0.5958249
## grade9:1 grade9:2 grade9:3 grade9:4 grade9:5 grade9:6
## 1.9068298 2.1330874 1.5973658 1.4190951 1.2030668 0.9969464
## grade10:1 grade10:2 grade10:3 grade10:4 grade10:5 grade10:6
## 2.8776330 2.4574731 1.7997839 1.6083880 1.0725356 0.8036404
## grade11:1 grade11:2 grade11:3 grade11:4 grade11:5 grade11:6
## 3.2239236 2.9277596 2.2417060 2.1663648 1.0145167 0.4321762
## grade12:1 grade12:2 grade12:3 grade12:4 grade12:5 grade12:6
## 4.4273681 3.8240949 2.8726460 3.0008485 1.8628226 0.6731502
## SEX1:1 SEX1:2 SEX1:3 SEX1:4 SEX1:5 SEX1:6
## 0.8555635 1.1497948 1.5146071 1.9287476 1.9660089 2.4108803
## Hispanic1:1 Hispanic1:2 Hispanic1:3 Hispanic1:4 Hispanic1:5 Hispanic1:6
## 0.8763679 0.8514759 0.8416357 0.9332760 0.9041706 1.6006590
## suicide1:1 suicide1:2 suicide1:3 suicide1:4 suicide1:5 suicide1:6
## 1.7263360 1.4764461 1.6381852 1.7595086 2.2216426 2.8545732
AIC(fit.vgam)
## [1] 11789.39
AIC(fit.vgam2)
## [1] 11761.04
#Based on the AIC, the non-proportional odds model fits better than the proportional odds model.
Do Hispanic males who have suicide thoughts have a greater likelihood to drink alcohol more frequently?
Males have who have a suicide thoughts have a higher likelihood to drink alcohol. However, being Hispanic was not signficant.