Data: Swiss Health Survey
Dependent Variable: Intensity of smoking
library(ordinal)
ordsmok <- clm(as.factor(smokekat)~alter+alko+yeareduc+man+gesbed0+gesbed1+gesbed2,
data=smokerdata,
link=c("probit"))
summary(ordsmok)
## formula:
## as.factor(smokekat) ~ alter + alko + yeareduc + man + gesbed0 + gesbed1 + gesbed2
## data: smokerdata
##
## link threshold nobs logLik AIC niter max.grad cond.H
## probit flexible 714 -941.57 1901.14 5(0) 9.39e-13 2.1e+05
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## alter 0.007874 0.002903 2.712 0.006681 **
## alko 0.120165 0.081117 1.481 0.138505
## yeareduc -0.063028 0.019032 -3.312 0.000928 ***
## man 0.437750 0.085261 5.134 2.83e-07 ***
## gesbed0 0.365162 0.134658 2.712 0.006693 **
## gesbed1 -0.061403 0.106959 -0.574 0.565917
## gesbed2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -0.66875 0.24258 -2.757
## 2|3 0.06835 0.24099 0.284
## 3|4 0.96877 0.24394 3.971
## (2633 Beobachtungen als fehlend gelöscht)
Mutually exclusive dummy variables have one linear dependent variable
(here gesbed2).
\(\Rightarrow\) exclude variable for
better results and easier handling
ordsmok <- clm(as.factor(smokekat)~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata,
link=c("probit"))
summary(ordsmok)
## formula:
## as.factor(smokekat) ~ alter + alko + yeareduc + man + gesbed1 + gesbed2
## data: smokerdata
##
## link threshold nobs logLik AIC niter max.grad cond.H
## probit flexible 714 -941.57 1901.14 5(0) 1.64e-12 2.2e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## alter 0.007874 0.002903 2.712 0.006681 **
## alko 0.120165 0.081117 1.481 0.138505
## yeareduc -0.063028 0.019032 -3.312 0.000928 ***
## man 0.437750 0.085261 5.134 2.83e-07 ***
## gesbed1 -0.426564 0.108636 -3.927 8.62e-05 ***
## gesbed2 -0.365162 0.134658 -2.712 0.006693 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -1.0339 0.2484 -4.163
## 2|3 -0.2968 0.2462 -1.205
## 3|4 0.6036 0.2475 2.439
## (2633 Beobachtungen als fehlend gelöscht)
library(ordinal)
ordsmoknew <- clm(as.factor(smokekat)~alter+alko+yeareduc+man+gesbed1+gesbed2
+kids_d+nowork+landei+nosleep,
data=smokerdata,
link=c("probit"))
summary(ordsmoknew)
## formula:
## as.factor(smokekat) ~ alter + alko + yeareduc + man + gesbed1 + gesbed2 + kids_d + nowork + landei + nosleep
## data: smokerdata
##
## link threshold nobs logLik AIC niter max.grad cond.H
## probit flexible 669 -879.69 1785.37 5(0) 8.39e-13 2.4e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## alter 0.008626 0.003079 2.802 0.00509 **
## alko 0.131080 0.082482 1.589 0.11202
## yeareduc -0.057174 0.019626 -2.913 0.00358 **
## man 0.403691 0.088118 4.581 4.62e-06 ***
## gesbed1 -0.427525 0.114131 -3.746 0.00018 ***
## gesbed2 -0.327199 0.140855 -2.323 0.02018 *
## kids_d -0.023760 0.093100 -0.255 0.79856
## nowork 0.367094 0.199710 1.838 0.06604 .
## landei -0.057133 0.091298 -0.626 0.53146
## nosleep -0.161553 0.084834 -1.904 0.05687 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -1.0340 0.2669 -3.875
## 2|3 -0.2958 0.2645 -1.118
## 3|4 0.6144 0.2659 2.310
## (2678 Beobachtungen als fehlend gelöscht)
LR <- -2*(ordsmok$logLik-ordsmoknew$logLik)
LR
## [1] 123.7664
LR-Test: LR=123.7663672 > 9.4877 = \(\chi^2(0.95,4)\)
pchisq(LR, 4, lower.tail = FALSE)
## [1] 8.375473e-26
v <- vcov(ordsmok)[1:3, 1:3]
v
## 1|2 2|3 3|4
## 1|2 0.06168468 0.06004102 0.05919682
## 2|3 0.06004102 0.06062413 0.05937645
## 3|4 0.05919682 0.05937645 0.06125308
test <- (coef(ordsmok)[2]-coef(ordsmok)[1])/sqrt(v[2,2]+v[1,1]-2*v[1,2])
test
## 2|3
## 15.62043
15.6204306 \(> 2.33 \Rightarrow\) reject \(H_0\) as not significant. Therefore \(\tau_1\) and \(\tau_2\) can indeed be distinguished given the data.
| x | |
|---|---|
| 1|2 | -1.0339154 |
| 2|3 | -0.2968099 |
| 3|4 | 0.6036057 |
| alter | 0.0078738 |
| alko | 0.1201646 |
| yeareduc | -0.0630285 |
| man | 0.4377496 |
| gesbed1 | -0.4265642 |
| gesbed2 | -0.3651615 |
library(ordinal)
cond <- data.frame(smokekat=c(1,2,3,4),alter=45, alko=0.5, yeareduc=12, man=1, gesbed0=1, gesbed1=0, gesbed2=0)
predict(ordsmok, cond, type="prob")
## $fit
## 1 2 3 4
## 1 0.1292958 0.1292958 0.1292958 0.1292958
## 2 0.2180040 0.2180040 0.2180040 0.2180040
## 3 0.3469016 0.3469016 0.3469016 0.3469016
## 4 0.3057986 0.3057986 0.3057986 0.3057986
xbeta <- coef(ordsmok)[4]*45+coef(ordsmok)[5]*0.5+coef(ordsmok)[6]*12+coef(ordsmok)[7]*1+coef(ordsmok)[8]*1+coef(ordsmok)[9]*0
\(x_i\beta=\beta_{alter}*45+\beta_{alko}*0.5+\beta_{yeareduc}*12+\beta_{man}*1+\beta_{gesbed0}*1+\beta_{gesbed1}*0+\beta_{gesbed2}*0=0.4609723\) \(P(y=3|x)=P(\tau_2<y*\leq \tau_3)=\Phi(\tau_3-x_i\beta)-\Phi(\tau_2-x_i\beta)\)
library(margins)
margins_summary(ordsmok, category=3)
## factor AME SE z p lower upper
## alko -0.0029 NA NA NA NA NA
## alter -0.0002 NA NA NA NA NA
## gesbed1 0.0104 NA NA NA NA NA
## gesbed2 0.0089 NA NA NA NA NA
## man -0.0107 NA NA NA NA NA
## yeareduc 0.0015 NA NA NA NA NA
pred1<-predict(ordsmok, data.frame(smokekat=c(1,2,3,4), alter=40, alko=0, yeareduc=15, man=1, gesbed0=1, gesbed1=0, gesbed2=0), type='prob')
pred2<-predict(ordsmok, data.frame(smokekat=c(1,2,3,4), alter=40, alko=0, yeareduc=15, man=0, gesbed0=1, gesbed1=0, gesbed2=0), type='prob')
pred1$fit[2]-pred2$fit[2]
## [1] -0.02894127
library(ordinal)
#transform variable first
smokerdata$smokekat <- as.factor(smokerdata$smokekat)
#run ordinal probit model
ordsmok <- clm(smokekat ~ alter + alko + yeareduc + man + gesbed1 + gesbed2, data=smokerdata, link = c('probit'))
smokeman1 <- data.frame(smokekat="1", alter=seq(18,80,1), alko=mean(na.omit(smokerdata$alko)), yeareduc=mean(na.omit(smokerdata$yeareduc)), man=1,gesbed1=mean(na.omit(smokerdata$gesbed1)),gesbed2=mean(na.omit(smokerdata$gesbed2)))
smokeman1$smokekat <- as.factor(smokeman1$smokekat)
smokeman4 <- data.frame(smokekat=4, alter=c(18:80), alko=mean(na.omit(smokerdata$alko)), yeareduc=mean(na.omit(smokerdata$yeareduc)), man=1,gesbed1=mean(na.omit(smokerdata$gesbed1)),gesbed2=mean(na.omit(smokerdata$gesbed2)))
#pred_prob_1 <- predict(ordsmok, newdata= smokeman1, type="prob", se.fit=TRUE, interval = "confidence")
#pred_prob_4 <- predict(ordsmok, newdata= smokeman4 , type="prob", se.fit=TRUE, interval = "confidence")
#conf_intervall1 <- data.frame(pred=pred_prob_1$fit, lower=pred_prob_1$fit- 1.96*pred_prob_1$se.fit, upper= pred_prob_1$fit+ 1.96*pred_prob_1$se.fit)
#conf_intervall4 <- data.frame(pred=pred_prob_4$fit, lower=pred_prob_4$fit- 1.96*pred_prob_4$se.fit, upper= pred_prob_4$fit+ 1.96*pred_prob_4$se.fit)
x_axis <- c(18:80)
#plot(0, type="n", xlab="Age", ylab="Predicted probability of being a smoker", main="Prediction with 95%-CIs", xlim=c(20, 80), ylim=c(0, 1.0))
#lines(x_axis, unlist(conf_intervall1['pred'],use.name=FALSE), col='blue')
#lines(x_axis, unlist(conf_intervall4['pred'],use.name=FALSE), col='red')
#polygon(x = c(x_axis, rev(x_axis)), y = c(unlist(conf_intervall1['lower'], use.names=FALSE), rev(unlist(conf_intervall1['upper'],use.names=FALSE))), col = adjustcolor('lightblue', alpha = 0.8), border = NA)
#polygon(x = c(x_axis, rev(x_axis)), y = c(unlist(conf_intervall4['lower'], use.names=FALSE), rev(unlist(conf_intervall4['upper'],use.names=FALSE))), col = adjustcolor('green', alpha = 0.4), border = NA)
#legend("topright", c("smokekat=1", "smokekat=4"), col = c("blue", "red"), lty = 1, cex = 1)
Error: variable ‘smokekat’ was fitted with type “factor” but type “numeric” was supplied
library(jtools)
library(MASS)
logitsmok <- clm(as.factor(smokekat)~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata,
link=c("logit"))
logitsmok <- polr(as.factor(smokekat)~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata,
method=c("logistic"))
export_summs(logitsmok, ordsmok, model.names =c("Ordinal Probit Model", "Ordinal Logit Model"))
| Ordinal Probit Model | Ordinal Logit Model | |
|---|---|---|
| alter | 0.01 | 0.01 ** |
| (0.00) | (0.00) | |
| alko | 0.22 | 0.12 |
| (0.14) | (0.08) | |
| yeareduc | -0.12 | -0.06 *** |
| (0.03) | (0.02) | |
| man | 0.72 | 0.44 *** |
| (0.14) | (0.09) | |
| gesbed1 | -0.75 | -0.43 *** |
| (0.19) | (0.11) | |
| gesbed2 | -0.64 | -0.37 ** |
| (0.23) | (0.13) | |
| 1|2 | -1.88 | -1.03 *** |
| (0.43) | (0.25) | |
| 2|3 | -0.67 | -0.30 |
| (0.42) | (0.25) | |
| 3|4 | 0.85 | 0.60 * |
| (0.42) | (0.25) | |
| nobs | 714.00 | 714.00 |
| edf | 9.00 | 9.00 |
| logLik | -940.87 | -941.57 |
| AIC | 1899.74 | 1901.14 |
| BIC | 1940.87 | 1942.28 |
| deviance | 1881.74 | |
| df.residual | 705.00 | 705.00 |
| nobs.1 | 714.00 | 714.00 |
| *** p < 0.001; ** p < 0.01; * p < 0.05. | ||
cond <- data.frame(smokekat=c(1,2,3,4), alter=45, alko=0.5, yeareduc=12, man=1, gesbed0=1, gesbed1=0, gesbed2=0)
predict(logitsmok, cond, type="prob")
## 1 2 3 4
## 1 0.1306068 0.2042521 0.362834 0.3023071
## 2 0.1306068 0.2042521 0.362834 0.3023071
## 3 0.1306068 0.2042521 0.362834 0.3023071
## 4 0.1306068 0.2042521 0.362834 0.3023071
xbetalogit <- coef(logitsmok)[1]*45+coef(logitsmok)[2]*0.5+coef(logitsmok)[3]*12+coef(logitsmok)[4]*1+coef(logitsmok)[5]*1+coef(logitsmok)[6]*0
1/(1+exp(-(1.49462211-xbetalogit)))-1/(1+exp(-(-0.02800562-xbetalogit)))
## alter
## 0.2332849
\(x_i\beta=\beta_{alter}*45+\beta_{alko}*0.5+\beta_{yeareduc}*12+\beta_{man}*1+\beta_{gesbed0}*1+\beta_{gesbed1}*0+\beta_{gesbed2}*0=\)
-0.733926
\(P(y=3|x)=P(\tau_2<y*\leq
\tau_3)=\Lambda(\tau_3-x_i\beta)-\Lambda(\tau_2-x_i\beta)=\frac{1}{1+exp(-(\tau_3-x_i\beta))}-\frac{1}{1+exp(-(\tau_2-x_i\beta))}\)
\(=\frac{1}{1+exp(-(1.49462211
--0.733926))}-\frac{1}{1+exp(-(-0.02800562--0.733926))}\)
0.3628334*(1-0.3628334)*0.013004
## [1] 0.003006334
smokerdata$binary1 <- ifelse(smokerdata$smokekat==1, 0, 1)
smokerdata$binary2 <- ifelse(smokerdata$smokekat==1 | smokerdata$smokekat==2, 0, 1)
smokerdata$binary3 <- ifelse(smokerdata$smokekat==1 | smokerdata$smokekat==2 | smokerdata$smokekat==3, 0, 1)
smokerdata$binary4 <- ifelse(smokerdata$smokekat==1 | smokerdata$smokekat==2 | smokerdata$smokekat==3 | smokerdata$smokekat==4, 0, 1)
binary1 <- glm(binary1~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata, family = "binomial")
binary2 <- glm(binary2~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata, family = "binomial")
binary3 <- glm(binary3~alter+alko+yeareduc+man+gesbed1+gesbed2,
data=smokerdata, family = "binomial")
data.frame(Model1=binary1$coefficients, Model2=binary2$coefficients, Model3=binary3$coefficients)
## Model1 Model2 Model3
## (Intercept) 1.684762784 0.89635898 -1.94519931
## alter 0.009228628 0.01371047 0.01848193
## alko 0.167419028 0.22309195 0.20371301
## yeareduc -0.110480976 -0.13561698 -0.04031966
## man 0.628280561 0.65040438 1.04452836
## gesbed1 -0.378509275 -0.82403602 -0.91544898
## gesbed2 -0.281215549 -0.66298374 -0.87123012
\(\Rightarrow\) Parallel Regression Assumption does not hold.
library(brant)
brant(logitsmok)
## --------------------------------------------
## Test for X2 df probability
## --------------------------------------------
## Omnibus 18.46 12 0.1
## alter 1.29 2 0.53
## alko 0.12 2 0.94
## yeareduc 5.88 2 0.05
## man 3.58 2 0.17
## gesbed1 5.61 2 0.06
## gesbed2 3.06 2 0.22
## --------------------------------------------
##
## H0: Parallel Regression Assumption holds