knitr::opts_chunk$set(echo = TRUE)
options(scipen=999, digits=6)
source("xval.glm.R")
library(foreach)
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
library(ggplot2)
library(gridExtra)
dt = read.csv("3_Study1data.csv")
dtg = subset(dt, goodsub==1)
dtg0 = dtg
dtg$gender_z = factor(dtg$gender_z)
dtg$education_z = factor(dtg$education_z)
dtg$income_z = factor(dtg$income_z)
models = vector(mode = "list", length = 2)
models[[1]] = socialdistancing_z ~ 1 + age_z + gender_z + education_z + income_z + PHQ_z + GAD_z + PSQI_z
models[[2]] = socialdistancing_z ~ 1 + age_z + gender_z + education_z + income_z + PHQ_z + GAD_z + PSQI_z + K_z
models[[3]] = socialdistancing_z ~ 1 + age_z + gender_z + education_z + income_z + PHQ_z + GAD_z + PSQI_z + poly(K_z, 3)
This gives almost identical results as Table 1.
fit1 = glm(models[[1]], data = dtg0)
summary(fit1)
##
## Call:
## glm(formula = models[[1]], data = dtg0)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.254 -0.316 0.225 0.544 1.830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.000000000000000238 0.045750997700160737 0.00 1.0000
## age_z 0.122723926121425769 0.048147402538218843 2.55 0.0112 *
## gender_z -0.151033565436406003 0.046670112272815187 -3.24 0.0013 **
## education_z 0.029006515206507211 0.048986728214616568 0.59 0.5541
## income_z 0.008717184236623998 0.048161295346713880 0.18 0.8565
## PHQ_z -0.444020532142960056 0.094775203699089233 -4.68 0.0000039 ***
## GAD_z 0.178342826068621452 0.089405065648086410 1.99 0.0468 *
## PSQI_z -0.080594799761740263 0.061678946084681469 -1.31 0.1921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.830982)
##
## Null deviance: 396.00 on 396 degrees of freedom
## Residual deviance: 323.25 on 389 degrees of freedom
## AIC: 1063
##
## Number of Fisher Scoring iterations: 2
fit2 = glm(models[[2]], data = dtg0)
summary(fit2)
##
## Call:
## glm(formula = models[[2]], data = dtg0)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.318 -0.310 0.218 0.545 1.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.000000000000000269 0.045029255345376072 0.00 1.00000
## age_z 0.147985386398776481 0.047881467571003672 3.09 0.00214 **
## gender_z -0.135579243436681651 0.046125056290229456 -2.94 0.00349 **
## education_z 0.037016852595417815 0.048262952492987218 0.77 0.44356
## income_z -0.005059552761494530 0.047548834565387027 -0.11 0.91531
## PHQ_z -0.352424729170637552 0.096537226921704897 -3.65 0.00030 ***
## GAD_z 0.151528952645651610 0.088295208640466236 1.72 0.08693 .
## PSQI_z -0.078338728977675270 0.060709022517028727 -1.29 0.19768
## K_z 0.182098976063317874 0.049433155354782710 3.68 0.00026 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.804971)
##
## Null deviance: 396.00 on 396 degrees of freedom
## Residual deviance: 312.33 on 388 degrees of freedom
## AIC: 1051
##
## Number of Fisher Scoring iterations: 2
output = xval.glm(data = dtg0, models)
## Running Cross-validation...done [ 12.7 sec ]
## Results for (10-fold, 200 repeats)
## Model: | Wins | 2.5% | mean | 97.5% |
## [ 1] socialdistancing_z ~ 1 + age_z + gender_ | 0% | 0.917 | 0.922 | 0.929 |
## [ 2] socialdistancing_z ~ 1 + age_z + gender_ | 100% | 0.903 | 0.909 | 0.916 |
## [ 3] socialdistancing_z ~ 1 + age_z + gender_ | 0% | 0.907 | 0.914 | 0.923 |
For K, no big difference from the results without dummy.
fit1 = glm(models[[1]], data = dtg)
summary(fit1)
##
## Call:
## glm(formula = models[[1]], data = dtg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.191 -0.329 0.224 0.575 1.797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2362 0.1173 2.01 0.0447 *
## age_z 0.1173 0.0481 2.44 0.0151 *
## gender_z0.996227203217905 -0.2843 0.0931 -3.05 0.0024 **
## education_z-0.104987458573891 -0.1358 0.1202 -1.13 0.2592
## education_z1.19751319935845 -0.1036 0.1496 -0.69 0.4890
## education_z2.50001385729079 0.3782 0.2506 1.51 0.1321
## income_z0.365413191926448 -0.0453 0.1027 -0.44 0.6594
## income_z1.99540237389049 0.1026 0.1731 0.59 0.5538
## PHQ_z -0.4107 0.0952 -4.31 0.00002 ***
## GAD_z 0.1568 0.0895 1.75 0.0805 .
## PSQI_z -0.0973 0.0623 -1.56 0.1190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.823252)
##
## Null deviance: 396.00 on 396 degrees of freedom
## Residual deviance: 317.78 on 386 degrees of freedom
## AIC: 1062
##
## Number of Fisher Scoring iterations: 2
fit2 = glm(models[[2]], data = dtg)
summary(fit2)
##
## Call:
## glm(formula = models[[2]], data = dtg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.281 -0.303 0.213 0.545 1.687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2030 0.1161 1.75 0.08119 .
## age_z 0.1423 0.0480 2.97 0.00321 **
## gender_z0.996227203217905 -0.2572 0.0922 -2.79 0.00554 **
## education_z-0.104987458573891 -0.1083 0.1189 -0.91 0.36257
## education_z1.19751319935845 -0.0596 0.1481 -0.40 0.68756
## education_z2.50001385729079 0.3794 0.2472 1.53 0.12563
## income_z0.365413191926448 -0.0402 0.1013 -0.40 0.69156
## income_z1.99540237389049 0.0308 0.1721 0.18 0.85806
## PHQ_z -0.3303 0.0968 -3.41 0.00071 ***
## GAD_z 0.1356 0.0885 1.53 0.12633
## PSQI_z -0.0942 0.0615 -1.53 0.12605
## K_z 0.1700 0.0498 3.41 0.00071 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.801166)
##
## Null deviance: 396.00 on 396 degrees of freedom
## Residual deviance: 308.45 on 385 degrees of freedom
## AIC: 1052
##
## Number of Fisher Scoring iterations: 2
fit3 = glm(models[[3]], data = dtg)
summary(fit3)
##
## Call:
## glm(formula = models[[3]], data = dtg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.285 -0.305 0.209 0.546 1.684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2034 0.1165 1.75 0.08146 .
## age_z 0.1421 0.0481 2.95 0.00333 **
## gender_z0.996227203217905 -0.2572 0.0925 -2.78 0.00572 **
## education_z-0.104987458573891 -0.1090 0.1194 -0.91 0.36198
## education_z1.19751319935845 -0.0589 0.1491 -0.39 0.69308
## education_z2.50001385729079 0.3780 0.2486 1.52 0.12924
## income_z0.365413191926448 -0.0407 0.1026 -0.40 0.69193
## income_z1.99540237389049 0.0313 0.1726 0.18 0.85616
## PHQ_z -0.3295 0.0976 -3.37 0.00082 ***
## GAD_z 0.1353 0.0888 1.52 0.12840
## PSQI_z -0.0943 0.0617 -1.53 0.12716
## poly(K_z, 3)1 3.3878 0.9953 3.40 0.00073 ***
## poly(K_z, 3)2 -0.0940 0.9159 -0.10 0.91828
## poly(K_z, 3)3 -0.0587 0.9117 -0.06 0.94868
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.805318)
##
## Null deviance: 396.00 on 396 degrees of freedom
## Residual deviance: 308.44 on 383 degrees of freedom
## AIC: 1056
##
## Number of Fisher Scoring iterations: 2
output = xval.glm(data = dtg0, models)
## Running Cross-validation...done [ 12.7 sec ]
## Results for (10-fold, 200 repeats)
## Model: | Wins | 2.5% | mean | 97.5% |
## [ 1] socialdistancing_z ~ 1 + age_z + gender_ | 0% | 0.917 | 0.922 | 0.930 |
## [ 2] socialdistancing_z ~ 1 + age_z + gender_ | 100% | 0.903 | 0.909 | 0.916 |
## [ 3] socialdistancing_z ~ 1 + age_z + gender_ | 0% | 0.906 | 0.914 | 0.921 |