library(ipumsr)
setwd("~/Downloads")
ddi <- read_ipums_ddi("nhis_00004.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
Data Recoding
data$male <- ifelse(data$sex == 1,1,0)
#Numeric Outcome variable
data$generalhealth<-Recode(data$health,
recodes="1=5;2=4;3=3; 4=2; 5=1; else=NA",
as.factor = F)
data$anxiety_disoreder<- car::Recode(data$anxietyev,
recodes="1= 0; 2= 1;else=NA")
data$educ<- car::Recode(data$educ,
recodes="102:116='1Less than HS'; 201:202='2hsgrad'; 301:503='3More than HS';997:999=NA;000=NA",
as.factor=T)
data$educ<-fct_relevel(data$educ,'1Less than HS','2hsgrad','3More than HS')
# ever had depression
data$hadpression<- car::Recode(data$depressev,
recodes="1= 0 ; 2= 1;else=NA")
data$familyincome_num <- data$famtotinc
#had depression or anxiety
data$dep_anx <- paste( data$anxiety_disoreder, data$hadpression, sep ="")
data$dep_anx<- as.numeric(data$dep_anx)
## Warning: NAs introduced by coercion
#race/ethnicity
data$nhwhite<- car::Recode(data$hisprace,
recodes="02=1; 99=NA; else=0")
data$other_race_eth<- car::Recode(data$hisprace,
recodes="c(1,3,4,5,6,7)=1; 99=NA; else=0")
data$race_eth<-car::Recode(data$hisprace,
recodes="02='NH_White'; c(1,3,4,5,6,7)='other_race_eth';else=NA",
as.factor = T)
data$race_eth<-relevel(data$race_eth,
ref = "NH_White")
#Categorial outcome variable
data$dep_anx2<- car::Recode(data$dep_anx,
recodes="01:11=1; 00 = 0; else=NA")
data <- data%>%
filter(familyincome_num >=0 & age<=999996)
data <- data%>%
filter(age >=18 & age<=099)
Selecting only needed variables
data2<- data %>%
select( strata, sampweight, dep_anx2, race_eth, nhwhite, other_race_eth, educ, generalhealth, age, familyincome_num, male)%>%
filter(complete.cases(.))
Survey Design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata, weights=~sampweight, data = data2 )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data2)
Checking to see the selected continuous variable are linear
Since the above are significant it shows that the age variable is not linear
Plotting to know if the selected continuous variable are linear
plot(datagam)



This gives more evidence that the above is not linear
2) Using the gam() function, estimate a model with only linear terms in your model
datagam_l<-gam(dep_anx2~age+ familyincome_num +generalhealth + educ + male + nhwhite,
data=data2,
weights = sampweight/mean(sampweight, na.rm=T),
family=binomial)
summary(datagam_l)
##
## Family: binomial
## Link function: logit
##
## Formula:
## dep_anx2 ~ age + familyincome_num + generalhealth + educ + male +
## nhwhite
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.700e+00 5.820e-02 29.202 < 2e-16 ***
## age -1.861e-02 6.114e-04 -30.438 < 2e-16 ***
## familyincome_num -3.030e-06 2.038e-07 -14.870 < 2e-16 ***
## generalhealth -6.627e-01 1.094e-02 -60.599 < 2e-16 ***
## educ2hsgrad -3.501e-02 3.647e-02 -0.960 0.337
## educ3More than HS 2.054e-01 3.479e-02 5.904 3.55e-09 ***
## male -7.643e-01 2.163e-02 -35.335 < 2e-16 ***
## nhwhite 8.878e-01 2.418e-02 36.713 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.121 Deviance explained = 11.1%
## UBRE = -0.090304 Scale est. = 1 n = 62931
3) Repeat Step 2, but include a smooth term for at least one continuous variable
Numerical outcome
datagam_s<-gam(dep_anx2~s(age)+ familyincome_num +generalhealth + educ + male + nhwhite,
data=data2,
weights = sampweight/mean(sampweight, na.rm=T),
family=binomial)
summary(datagam_s)
##
## Family: binomial
## Link function: logit
##
## Formula:
## dep_anx2 ~ s(age) + familyincome_num + generalhealth + educ +
## male + nhwhite
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.228e-01 4.354e-02 16.601 < 2e-16 ***
## familyincome_num -3.209e-06 2.079e-07 -15.434 < 2e-16 ***
## generalhealth -6.574e-01 1.097e-02 -59.901 < 2e-16 ***
## educ2hsgrad -4.806e-02 3.668e-02 -1.311 0.19
## educ3More than HS 1.819e-01 3.520e-02 5.167 2.38e-07 ***
## male -7.672e-01 2.165e-02 -35.437 < 2e-16 ***
## nhwhite 8.979e-01 2.426e-02 37.020 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age) 8.33 8.874 958.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.123 Deviance explained = 11.2%
## UBRE = -0.091175 Scale est. = 1 n = 62931
4) Test if the model in step 3 is fitting the data better than the purely linear model.
anova( datagam_l, datagam_s, test="Chisq")
Comparing the two models, the model with a smooth term for the age variable fits the model better. This is evident because of the very small p-value.
5) Produce a plot of the smooth effect from the model in step 3
plot(datagam_s)

LS0tCnRpdGxlOiAiQXNzaWdubWVudCA5IgphdXRob3I6ICJKb3NlcGggSmFpeWVvbGEiCmRhdGU6ICAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiCm91dHB1dDoKICAgaHRtbF9kb2N1bWVudDoKICAgIGRmX3ByaW50OiBwYWdlZAogICAgZmlnX2hlaWdodDogNwogICAgZmlnX3dpZHRoOiA3CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgoKCgoKYGBge3IgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShmb3JjYXRzKQpsaWJyYXJ5KHN0YXJnYXplciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3VydmV5LCBxdWlldGx5ID0gVCkKbGlicmFyeShjYXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KHF1ZXN0aW9uciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoZHBseXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGZvcmNhdHMsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KHRpZHl2ZXJzZSwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3J2eXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KCBndHN1bW1hcnksIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGNhcmV0LCBxdWlldGx5ID0gVCkKbGlicmFyeSh0YWJsZW9uZSwgIHF1aWV0bHkgPSBUKQpgYGAKCmBgYHtyIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoY2FyKQpsaWJyYXJ5KHN0YXJnYXplciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3VydmV5LCBxdWlldGx5ID0gVCkKbGlicmFyeShnZ3Bsb3QyLCBxdWlldGx5ID0gVCkKbGlicmFyeShwYW5kZXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGtuaXRyLCBxdWlldGx5ID0gVCkKbGlicmFyeShkcGx5ciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoZmFjdG9leHRyYSwgcXVpZXRseSA9IFQpCmxpYnJhcnkoRmFjdG9NaW5lUiwgcXVpZXRseSA9IFQpCgoKbGlicmFyeShjYXIpCmxpYnJhcnkobWljZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQpgYGAKCgpgYGB7ciBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KG1nY3YpICNvbmUgbGlicmFyeSBmb3IgR0FNcwpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY2FyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGVtbWVhbnMpCmxpYnJhcnkoc3VydmV5KQpsaWJyYXJ5KHRhYmxlb25lLCAgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3BsaW5lcykKbGlicmFyeShtZ2N2KQpgYGAKCgpgYGB7cn0KbGlicmFyeShpcHVtc3IpCmBgYAoKYGBge3J9CnNldHdkKCJ+L0Rvd25sb2FkcyIpCmBgYAoKCgpgYGB7cn0KZGRpIDwtIHJlYWRfaXB1bXNfZGRpKCJuaGlzXzAwMDA0LnhtbCIpCmRhdGEgPC0gcmVhZF9pcHVtc19taWNybyhkZGkpCmRhdGE8LSBoYXZlbjo6emFwX2xhYmVscyhkYXRhKQpgYGAKCgoKYGBge3J9Cm5hbWVzKGRhdGEpIDwtIHRvbG93ZXIoZ3N1YihwYXR0ZXJuID0gIl8iLHJlcGxhY2VtZW50ID0gICIiLHggPSAgbmFtZXMoZGF0YSkpKQpgYGAKCgoKIyBEYXRhIFJlY29kaW5nCmBgYHtyfQoKZGF0YSRtYWxlIDwtIGlmZWxzZShkYXRhJHNleCA9PSAxLDEsMCkKCiNOdW1lcmljIE91dGNvbWUgdmFyaWFibGUKZGF0YSRnZW5lcmFsaGVhbHRoPC1SZWNvZGUoZGF0YSRoZWFsdGgsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSIxPTU7Mj00OzM9MzsgND0yOyA1PTE7ICBlbHNlPU5BIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFzLmZhY3RvciA9IEYpCgoKZGF0YSRhbnhpZXR5X2Rpc29yZWRlcjwtIGNhcjo6UmVjb2RlKGRhdGEkYW54aWV0eWV2LAogICAgICAgICAgICAgICAgICAgICAgIHJlY29kZXM9IjE9IDA7IDI9IDE7ZWxzZT1OQSIpCgoKCmRhdGEkZWR1YzwtIGNhcjo6UmVjb2RlKGRhdGEkZWR1YywKICAgICAgICAgICAgICAgICAgICAgcmVjb2Rlcz0iMTAyOjExNj0nMUxlc3MgdGhhbiBIUyc7IDIwMToyMDI9JzJoc2dyYWQnOyAzMDE6NTAzPSczTW9yZSB0aGFuIEhTJzs5OTc6OTk5PU5BOzAwMD1OQSIsCiAgICAgICAgICAgICAgICAgICAgIGFzLmZhY3Rvcj1UKQpkYXRhJGVkdWM8LWZjdF9yZWxldmVsKGRhdGEkZWR1YywnMUxlc3MgdGhhbiBIUycsJzJoc2dyYWQnLCczTW9yZSB0aGFuIEhTJykgCgoKIyBldmVyIGhhZCBkZXByZXNzaW9uCmRhdGEkaGFkcHJlc3Npb248LSBjYXI6OlJlY29kZShkYXRhJGRlcHJlc3NldiwKICAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSIxPSAwIDsgMj0gMTtlbHNlPU5BIikKCgpkYXRhJGZhbWlseWluY29tZV9udW0gPC0gZGF0YSRmYW10b3RpbmMKCiNoYWQgZGVwcmVzc2lvbiBvciBhbnhpZXR5CgoKZGF0YSRkZXBfYW54IDwtIHBhc3RlKCBkYXRhJGFueGlldHlfZGlzb3JlZGVyLCBkYXRhJGhhZHByZXNzaW9uLCBzZXAgPSIiKQoKZGF0YSRkZXBfYW54PC0gYXMubnVtZXJpYyhkYXRhJGRlcF9hbngpCgoKIAoKCiNyYWNlL2V0aG5pY2l0eQpkYXRhJG5od2hpdGU8LSBjYXI6OlJlY29kZShkYXRhJGhpc3ByYWNlLAogICAgICAgICAgICAgICAgICAgICAgIHJlY29kZXM9IjAyPTE7IDk5PU5BOyBlbHNlPTAiKQoKZGF0YSRvdGhlcl9yYWNlX2V0aDwtIGNhcjo6UmVjb2RlKGRhdGEkaGlzcHJhY2UsCiAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSJjKDEsMyw0LDUsNiw3KT0xOyA5OT1OQTsgZWxzZT0wIikKCgpkYXRhJHJhY2VfZXRoPC1jYXI6OlJlY29kZShkYXRhJGhpc3ByYWNlLApyZWNvZGVzPSIwMj0nTkhfV2hpdGUnOyBjKDEsMyw0LDUsNiw3KT0nb3RoZXJfcmFjZV9ldGgnO2Vsc2U9TkEiLAphcy5mYWN0b3IgPSBUKQpkYXRhJHJhY2VfZXRoPC1yZWxldmVsKGRhdGEkcmFjZV9ldGgsCiAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmID0gIk5IX1doaXRlIikKCiNDYXRlZ29yaWFsIG91dGNvbWUgdmFyaWFibGUKCmRhdGEkZGVwX2FueDI8LSBjYXI6OlJlY29kZShkYXRhJGRlcF9hbngsCiAgICAgICAgICAgICAgICAgICAgICAgcmVjb2Rlcz0iMDE6MTE9MTsgMDAgPSAwOyBlbHNlPU5BIikKYGBgCgoKCmBgYHtyfQoKCmRhdGEgPC0gZGF0YSU+JQpmaWx0ZXIoZmFtaWx5aW5jb21lX251bSA+PTAgJiBhZ2U8PTk5OTk5NikKCgpkYXRhIDwtIGRhdGElPiUKZmlsdGVyKGFnZSA+PTE4ICYgYWdlPD0wOTkpCgpgYGAKCgojIFNlbGVjdGluZyBvbmx5IG5lZWRlZCB2YXJpYWJsZXMKYGBge3J9CmRhdGEyPC0gZGF0YSAlPiUgCiAgc2VsZWN0KCBzdHJhdGEsIHNhbXB3ZWlnaHQsIGRlcF9hbngyLCByYWNlX2V0aCwgbmh3aGl0ZSwgb3RoZXJfcmFjZV9ldGgsIGVkdWMsIGdlbmVyYWxoZWFsdGgsIGFnZSwgZmFtaWx5aW5jb21lX251bSwgbWFsZSklPiUKICBmaWx0ZXIoY29tcGxldGUuY2FzZXMoLikpCmBgYAoKCgoKIyBTdXJ2ZXkgRGVzaWduCmBgYHtyfQpvcHRpb25zKHN1cnZleS5sb25lbHkucHN1ID0gImFkanVzdCIpCgpkZXM8LXN2eWRlc2lnbihpZHM9fjEsIHN0cmF0YT1+c3RyYXRhLCB3ZWlnaHRzPX5zYW1wd2VpZ2h0LCBkYXRhID0gZGF0YTIgKQpkZXMKYGBgCgoKIyAxKSBkZWZpbmUgYW4gb3V0Y29tZSwgdGhpcyBjYW4gYmUgb2YgYW55IGZvcm0sIGJ1dCB1c2UgYW4gYXBwcm9wcmlhdGUgZGlzdHJpYnV0aW9uIGZvciB5b3VyIG91dGNvbWUuIEFsc28gZGVmaW5lIGF0IGxlYXN0IDEgY29udGludW91cyBwcmVkaWN0b3IuCgojIyBDYXRlZ29yaWFsIE91dGNvbWU6IEFueGlldHkvRGVwcmVzc2lvbgojIyBDb250aW51b3VzIE91dGNvbWU6IEZhbWlseSBJbmNvbWUKCgojIENoZWNraW5nIHRvIHNlZSB0aGUgc2VsZWN0ZWQgY29udGludW91cyB2YXJpYWJsZSBhcmUgbGluZWFyCgoKCmBgYHtyIGluY2x1ZGU9RkFMU0V9CmRhdGFnYW08LWdhbShkZXBfYW54MiB+IHMoYWdlLCBieSA9IGVkdWMpKyAgZWR1YywKICAgICAgICAgICAgZGF0YT1kYXRhMiwKICAgICAgICAgICAgd2VpZ2h0cyA9IHNhbXB3ZWlnaHQvbWVhbihzYW1wd2VpZ2h0LCBuYS5ybT1UKSwKICAgICAgICAgICAgZmFtaWx5PWJpbm9taWFsKQpzdW1tYXJ5KGRhdGFnYW0pCmBgYAoKCiMjIyBTaW5jZSB0aGUgYWJvdmUgYXJlIHNpZ25pZmljYW50IGl0IHNob3dzIHRoYXQgdGhlIGFnZSB2YXJpYWJsZSBpcyBub3QgbGluZWFyCgoKCiMgUGxvdHRpbmcgdG8ga25vdyBpZiB0aGUgc2VsZWN0ZWQgY29udGludW91cyB2YXJpYWJsZSBhcmUgbGluZWFyCmBgYHtyfQpwbG90KGRhdGFnYW0pCmBgYAoKCiMjIyBUaGlzIGdpdmVzIG1vcmUgZXZpZGVuY2UgdGhhdCB0aGUgYWJvdmUgaXMgbm90IGxpbmVhcgoKCiMgMikgVXNpbmcgdGhlIGdhbSgpIGZ1bmN0aW9uLCBlc3RpbWF0ZSBhIG1vZGVsIHdpdGggb25seSBsaW5lYXIgdGVybXMgaW4geW91ciBtb2RlbAoKCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KZGF0YWdhbV9sPC1nYW0oZGVwX2FueDJ+YWdlKyBmYW1pbHlpbmNvbWVfbnVtICtnZW5lcmFsaGVhbHRoICsgZWR1YyArIG1hbGUgKyBuaHdoaXRlLAogICAgICAgICAgICAgIGRhdGE9ZGF0YTIsCiAgICAgICAgICAgICAgICB3ZWlnaHRzID0gc2FtcHdlaWdodC9tZWFuKHNhbXB3ZWlnaHQsIG5hLnJtPVQpLAogICAgICAgICAgICAgIGZhbWlseT1iaW5vbWlhbCkKCnN1bW1hcnkoZGF0YWdhbV9sKQpgYGAKCgojIDMpIFJlcGVhdCBTdGVwIDIsIGJ1dCBpbmNsdWRlIGEgc21vb3RoIHRlcm0gZm9yIGF0IGxlYXN0IG9uZSBjb250aW51b3VzIHZhcmlhYmxlCgoKIyMgTnVtZXJpY2FsIG91dGNvbWUgCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KZGF0YWdhbV9zPC1nYW0oZGVwX2FueDJ+cyhhZ2UpKyBmYW1pbHlpbmNvbWVfbnVtICtnZW5lcmFsaGVhbHRoICsgZWR1YyArIG1hbGUgKyBuaHdoaXRlLAogICAgICAgICAgICAgIGRhdGE9ZGF0YTIsCiAgICAgICAgICAgICAgICB3ZWlnaHRzID0gc2FtcHdlaWdodC9tZWFuKHNhbXB3ZWlnaHQsIG5hLnJtPVQpLAogICAgICAgICAgICAgIGZhbWlseT1iaW5vbWlhbCkKCnN1bW1hcnkoZGF0YWdhbV9zKQpgYGAKCgojIDQpIFRlc3QgaWYgdGhlIG1vZGVsIGluIHN0ZXAgMyBpcyBmaXR0aW5nIHRoZSBkYXRhIGJldHRlciB0aGFuIHRoZSBwdXJlbHkgbGluZWFyIG1vZGVsLgoKCmBgYHtyfQphbm92YSggZGF0YWdhbV9sLCBkYXRhZ2FtX3MsIHRlc3Q9IkNoaXNxIikKYGBgCgpDb21wYXJpbmcgdGhlIHR3byBtb2RlbHMsIHRoZSBtb2RlbCB3aXRoIGEgc21vb3RoIHRlcm0gZm9yIHRoZSBhZ2UgdmFyaWFibGUgZml0cyB0aGUgbW9kZWwgYmV0dGVyLiBUaGlzIGlzIGV2aWRlbnQgYmVjYXVzZSBvZiB0aGUgdmVyeSBzbWFsbCBwLXZhbHVlLgoKCgoKCiMgNSkgUHJvZHVjZSBhIHBsb3Qgb2YgdGhlIHNtb290aCBlZmZlY3QgZnJvbSB0aGUgbW9kZWwgaW4gc3RlcCAzCgoKCmBgYHtyfQpwbG90KGRhdGFnYW1fcykKYGBgCgoK