#Question 1 ###All the variables in this dataset are from Add Health

##A ordinal variable is a variable where its responses are in a specific order. For example, the ordinal variable that I will be using is Alcohol usage, which asks during the past 12 months, on how many days did you drink alcohol? It is originally coded as 1=everyday, 2=3 to 5 times a week, 3=1 to 2 times a week, 4=2 to 3 times a month, 5=once a month, 6=1 to 2 times a year and 7=never. I prefer the order to be from least to greatest so it will be recoded as the following: 1=never, 2=1 to2 times a year, 3=once a month, 4=2 to 3 times a month, 5=1 to 2 times a week, 6=3 to 5 times a week 7= everyday.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(haven)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
## Warning: package 'questionr' was built under R version 3.6.2
library(ggplot2)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
d1<-read_dta("C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets/wholeaddhealthstata.dta") 
w1<-read_dta("C:/Users/tim1203/Desktop/utsa/C. Sparks/Dem 7283/Data sets/21600-0004-Data.dta")

d2<-merge(d1, w1, by="AID")

###Recode suicide

d2$suicide<-Recode(d2$H1SU1, recodes="0=0; 1=1; else=NA")
table(d2$suicide)
## 
##    0    1 
## 5614  821
is.numeric(d2$suicide)
## [1] TRUE

###Recode grade

d2$grade<-Recode(d2$H1GI20, recodes="7=7; 8=8; 9=9; 10=10; 11=11; 12=12; else=NA")
table(d2$grade)
## 
##    7    8    9   10   11   12 
##  979  992 1107 1144 1122  993
table(d2$H1GI20)
## 
##    7    8    9   10   11   12   96   97   98   99 
##  979  992 1107 1144 1122  993    1  128    3   35
is.numeric(d2$grade)
## [1] TRUE

###Recode Hispanic

table(d2$H1GI4)
## 
##    0    1    6    8 
## 5738  743    3   20
d2$Hispanic<-Recode(d2$H1GI4, recodes="0=0; 1=1; else=NA")
table(d2$Hispanic)
## 
##    0    1 
## 5738  743
is.numeric(d2$Hispanic)
## [1] TRUE
is.character(d2$Hispanic)
## [1] FALSE
table(d2$BIO_SEX)
## 
##    1    2    6 
## 3147 3356    1
d2$SEX<-Recode(d2$BIO_SEX, recodes="1=1; 2=0; else=NA")
table(d2$SEX)
## 
##    0    1 
## 3356 3147

#males equals 1, females equals 0.

###Recode Alcohol

table(d2$H1TO15)
## 
##    1    2    3    4    5    6    7   96   97   98 
##   59  166  387  512  743 1097  581    8 2944    7
d2$alcohol<-Recode(d2$H1TO15, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA")
table(d2$alcohol)
## 
##    1    2    3    4    5    6    7 
##  581 1097  743  512  387  166   59
table(d2$H1RE4)
## 
##    1    2    3    4    6    7    8 
## 2812 2218  391  193    3  879    8
d2$religion<-Recode(d2$H1RE4, recodes="1=4; 2=3; 3=2; 4=1; else=NA")
table(d2$religion)
## 
##    1    2    3    4 
##  193  391 2218 2812
d2 <- d2 %>% 
  mutate(
    grade = as.factor(grade),
    Hispanic = as.factor(Hispanic),
    alcohol = as.factor(alcohol),
    SEX=as.factor(SEX),
    suicide=as.factor(suicide),
    religion=as.factor(religion))

#Question 2 Do Hispanic males who have suicide thoughts have a greater likelihood to drink alcohol more frequently?

d2$alcohol<-Recode(d2$H1TO15, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA", as.factor=T)
d2$alcohol<-relevel(d2$alcohol, ref="1")
d2$alcohol2<-car::Recode(d2$alcohol, recodes="1=7; 2=6; 3=5; 4=4; 5=3; 6=2; 7=1; else=NA", as.factor=F)
sub<-d2%>%
select(alcohol, alcohol2, Hispanic, SEX, suicide, religion, GSWGT1)%>%
  filter(complete.cases(.))
options(survey.lonely.psu = "PSUSCID")
des<-svydesign(ids=~CLUSTER2, weights=~GSWGT1, data=d2)
fit.solr1<-svyolr(alcohol~grade+SEX+Hispanic+suicide, des)
summary(fit.solr1)
## Call:
## svyolr(alcohol ~ grade + SEX + Hispanic + suicide, des)
## 
## Coefficients:
##                Value Std. Error   t value
## grade8     0.3304395 0.17162961  1.925306
## grade9     0.6891641 0.15315148  4.499885
## grade10    0.8686001 0.16111639  5.391134
## grade11    1.0621094 0.14894544  7.130862
## grade12    1.3413259 0.16500344  8.129079
## SEX1       0.2238010 0.06720762  3.329995
## Hispanic1 -0.1430155 0.12944653 -1.104823
## suicide1   0.4861899 0.09216900  5.274983
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2 -0.7179  0.1440    -4.9835
## 2|3  0.8495  0.1406     6.0434
## 3|4  1.7640  0.1474    11.9654
## 4|5  2.5846  0.1588    16.2792
## 5|6  3.6755  0.1595    23.0423
## 6|7  5.0779  0.2103    24.1465
## (3066 observations deleted due to missingness)

#Results that have a t value that are greater than 2 are considered significant. This includes students in grades 9 through 12th, females, and those who have suicidal thoughts. Students in grades 9th through 12th have a higher likelihood to drink alcohol than students in 7th grade. Males also are more likely to drink alcohol than females. Last, students who have suicide thoughts are also more likely to drink alcohol.

fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 11717.15
ex1<-svyglm(I(alcohol2>1)~grade+SEX+Hispanic+suicide,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
ex2<-svyglm(I(alcohol2>2)~grade+SEX+Hispanic+suicide,des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
plot(coef(ex1)[-1], ylim=c(-3, 3), type="l",xaxt="n",
     ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2) 
axis(side=1, at=1:12, labels=F)
text(x=1:12, y=-4,  srt = 90, pos = 1, xpd = TRUE,cex=.8,
     labels = c("8th","9th" ,"10th", "11th", "12th", "Male", "Hispanic","suicide thoughts"))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))
lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=3)

###Printing odds ratios

round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
##      grade8 grade9 grade10 grade11 grade12  SEX1 Hispanic1 suicide1
## [1,]  1.682  0.934   1.146   1.818   1.239 0.475     0.818    0.405
## [2,]  1.268  0.828   0.933   0.952   0.546 0.535     1.116    0.448
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:survey':
## 
##     calibrate

#Proporitional Assumptions

##Proportional Odds

fit.vgam<-vglm(as.ordered(alcohol)~grade+SEX+Hispanic+suicide,
               d2, weights =GSWGT1/mean(GSWGT1, na.rm=T),
               family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
## 
## Call:
## vglm(formula = as.ordered(alcohol) ~ grade + SEX + Hispanic + 
##     suicide, family = cumulative(parallel = T, reverse = T), 
##     data = d2, weights = GSWGT1/mean(GSWGT1, na.rm = T))
## 
## Pearson residuals:
##                       Min      1Q   Median       3Q    Max
## logitlink(P[Y>=2]) -4.874  0.1315  0.24617  0.54616  1.820
## logitlink(P[Y>=3]) -3.049 -0.8386  0.16787  0.59742  4.353
## logitlink(P[Y>=4]) -2.692 -0.4713 -0.25304  0.46760  7.130
## logitlink(P[Y>=5]) -2.888 -0.3257 -0.19853 -0.11466  6.303
## logitlink(P[Y>=6]) -2.239 -0.1994 -0.13089 -0.09394 11.648
## logitlink(P[Y>=7]) -2.346 -0.1038 -0.07697 -0.05874 19.904
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.71788    0.10812   6.639 3.15e-11 ***
## (Intercept):2 -0.84952    0.10796  -7.869 3.57e-15 ***
## (Intercept):3 -1.76393    0.11087 -15.910  < 2e-16 ***
## (Intercept):4 -2.58461    0.11510 -22.456  < 2e-16 ***
## (Intercept):5 -3.67549    0.12639 -29.080  < 2e-16 ***
## (Intercept):6 -5.07788    0.16664 -30.472  < 2e-16 ***
## grade8         0.33042    0.12877   2.566  0.01029 *  
## grade9         0.68914    0.12138   5.678 1.37e-08 ***
## grade10        0.86858    0.12115   7.170 7.52e-13 ***
## grade11        1.06209    0.12102   8.776  < 2e-16 ***
## grade12        1.34130    0.11959  11.216  < 2e-16 ***
## SEX1           0.22380    0.06112   3.662  0.00025 ***
## Hispanic1     -0.14301    0.09325  -1.534  0.12512    
## suicide1       0.48619    0.08123   5.985 2.16e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  6 
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5]), logitlink(P[Y>=6]), 
## logitlink(P[Y>=7])
## 
## Residual deviance: 11761.39 on 20614 degrees of freedom
## 
## Log-likelihood: -5880.697 on 20614 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):6'
## 
## 
## Exponentiated coefficients:
##    grade8    grade9   grade10   grade11   grade12      SEX1 Hispanic1 
## 1.3915516 1.9920079 2.3835198 2.8924026 3.8240301 1.2508262 0.8667425 
##  suicide1 
## 1.6261102

##Non-Proportional odds

fit.vgam2<-vglm(as.ordered(alcohol)~grade+SEX+Hispanic+suicide,
               d2, weights =GSWGT1/mean(GSWGT1, na.rm=T),
               family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Nonproportional odds
summary(fit.vgam2)
## 
## Call:
## vglm(formula = as.ordered(alcohol) ~ grade + SEX + Hispanic + 
##     suicide, family = cumulative(parallel = F, reverse = T), 
##     data = d2, weights = GSWGT1/mean(GSWGT1, na.rm = T))
## 
## Pearson residuals:
##                       Min      1Q   Median       3Q    Max
## logitlink(P[Y>=2]) -4.757  0.1266  0.24050  0.51427  2.057
## logitlink(P[Y>=3]) -2.746 -0.8558  0.17437  0.58273  4.498
## logitlink(P[Y>=4]) -2.634 -0.4793 -0.25516  0.47517  6.318
## logitlink(P[Y>=5]) -2.930 -0.3228 -0.19082 -0.11124  7.219
## logitlink(P[Y>=6]) -2.652 -0.1967 -0.12573 -0.09024 10.945
## logitlink(P[Y>=7]) -2.770 -0.1004 -0.07204 -0.05287 14.800
## 
## Coefficients: 
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1  0.840946   0.133408   6.304 2.91e-10 ***
## (Intercept):2 -0.814026   0.124057  -6.562 5.32e-11 ***
## (Intercept):3 -1.615858   0.142282 -11.357  < 2e-16 ***
## (Intercept):4 -2.575718   0.185578 -13.879  < 2e-16 ***
## (Intercept):5 -3.323776   0.252571 -13.160  < 2e-16 ***
## (Intercept):6 -4.503453   0.402819 -11.180  < 2e-16 ***
## grade8:1       0.387199   0.161827   2.393 0.016726 *  
## grade8:2       0.375086   0.148866   2.520 0.011748 *  
## grade8:3      -0.004383   0.174432  -0.025 0.979952    
## grade8:4       0.050535   0.224782   0.225 0.822120    
## grade8:5      -0.220840   0.313227  -0.705 0.480781    
## grade8:6      -0.517808   0.494597  -1.047 0.295132    
## grade9:1       0.645442   0.155881   4.141 3.46e-05 ***
## grade9:2       0.757570   0.140050   5.409 6.33e-08 ***
## grade9:3       0.468356   0.158100   2.962 0.003053 ** 
## grade9:4       0.350019   0.204541   1.711 0.087035 .  
## grade9:5       0.184874   0.274402   0.674 0.500480    
## grade9:6      -0.003058   0.408271  -0.007 0.994023    
## grade10:1      1.056968   0.165490   6.387 1.69e-10 ***
## grade10:2      0.899134   0.139849   6.429 1.28e-10 ***
## grade10:3      0.587667   0.156942   3.744 0.000181 ***
## grade10:4      0.475232   0.202413   2.348 0.018883 *  
## grade10:5      0.070026   0.280183   0.250 0.802643    
## grade10:6     -0.218603   0.430230  -0.508 0.611378    
## grade11:1      1.170599   0.168319   6.955 3.53e-12 ***
## grade11:2      1.074237   0.140014   7.672 1.69e-14 ***
## grade11:3      0.807237   0.154900   5.211 1.87e-07 ***
## grade11:4      0.773051   0.196636   3.931 8.45e-05 ***
## grade11:5      0.014412   0.279813   0.052 0.958921    
## grade11:6     -0.838922   0.495641  -1.693 0.090532 .  
## grade12:1      1.487805   0.174360   8.533  < 2e-16 ***
## grade12:2      1.341322   0.139182   9.637  < 2e-16 ***
## grade12:3      1.055234   0.152112   6.937 4.00e-12 ***
## grade12:4      1.098895   0.191315   5.744 9.25e-09 ***
## grade12:5      0.622093   0.258029   2.411 0.015911 *  
## grade12:6     -0.395787   0.438902  -0.902 0.367181    
## SEX1:1        -0.155995   0.094999  -1.642 0.100574    
## SEX1:2         0.139583   0.070000   1.994 0.046147 *  
## SEX1:3         0.415156   0.074895   5.543 2.97e-08 ***
## SEX1:4         0.656871   0.093234   7.045 1.85e-12 ***
## SEX1:5         0.676006   0.139872   4.833 1.34e-06 ***
## SEX1:6         0.879992   0.262448   3.353 0.000799 ***
## Hispanic1:1   -0.131969   0.137570  -0.959 0.337414    
## Hispanic1:2   -0.160784   0.106112  -1.515 0.129714    
## Hispanic1:3   -0.172408   0.116498  -1.480 0.138895    
## Hispanic1:4   -0.069054   0.141772  -0.487 0.626203    
## Hispanic1:5   -0.100737   0.212638  -0.474 0.635678    
## Hispanic1:6    0.470415   0.311796   1.509 0.131369    
## suicide1:1     0.546001   0.143224   3.812 0.000138 ***
## suicide1:2     0.389638   0.094338   4.130 3.62e-05 ***
## suicide1:3     0.493589   0.095648   5.160 2.46e-07 ***
## suicide1:4     0.565035   0.111526   5.066 4.05e-07 ***
## suicide1:5     0.798247   0.152779   5.225 1.74e-07 ***
## suicide1:6     1.048922   0.258883   4.052 5.08e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  6 
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3]), 
## logitlink(P[Y>=4]), logitlink(P[Y>=5]), logitlink(P[Y>=6]), 
## logitlink(P[Y>=7])
## 
## Residual deviance: 11653.04 on 20574 degrees of freedom
## 
## Log-likelihood: -5826.518 on 20574 degrees of freedom
## 
## Number of Fisher scoring iterations: 11 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):5', '(Intercept):6'
## 
## 
## Exponentiated coefficients:
##    grade8:1    grade8:2    grade8:3    grade8:4    grade8:5    grade8:6 
##   1.4728500   1.4551171   0.9956263   1.0518338   0.8018452   0.5958249 
##    grade9:1    grade9:2    grade9:3    grade9:4    grade9:5    grade9:6 
##   1.9068298   2.1330874   1.5973658   1.4190951   1.2030668   0.9969464 
##   grade10:1   grade10:2   grade10:3   grade10:4   grade10:5   grade10:6 
##   2.8776330   2.4574731   1.7997839   1.6083880   1.0725356   0.8036404 
##   grade11:1   grade11:2   grade11:3   grade11:4   grade11:5   grade11:6 
##   3.2239236   2.9277596   2.2417060   2.1663648   1.0145167   0.4321762 
##   grade12:1   grade12:2   grade12:3   grade12:4   grade12:5   grade12:6 
##   4.4273681   3.8240949   2.8726460   3.0008485   1.8628226   0.6731502 
##      SEX1:1      SEX1:2      SEX1:3      SEX1:4      SEX1:5      SEX1:6 
##   0.8555635   1.1497948   1.5146071   1.9287476   1.9660089   2.4108803 
## Hispanic1:1 Hispanic1:2 Hispanic1:3 Hispanic1:4 Hispanic1:5 Hispanic1:6 
##   0.8763679   0.8514759   0.8416357   0.9332760   0.9041706   1.6006590 
##  suicide1:1  suicide1:2  suicide1:3  suicide1:4  suicide1:5  suicide1:6 
##   1.7263360   1.4764461   1.6381852   1.7595086   2.2216426   2.8545732
AIC(fit.vgam)
## [1] 11789.39
AIC(fit.vgam2)
## [1] 11761.04

#Based on the AIC, the non-proportional odds model fits better than the proportional odds model.

Do Hispanic males who have suicide thoughts have a greater likelihood to drink alcohol more frequently?

Males have who have a suicide thoughts have a higher likelihood to drink alcohol. However, being Hispanic was not signficant.