library(ipumsr)
setwd("~/Downloads")
ddi <- read_ipums_ddi("nhis_00004.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement =  "",x =  names(data)))

Data Recoding

data$male <- ifelse(data$sex == 1,1,0)

#Numeric Outcome variable
data$generalhealth<-Recode(data$health,
                               recodes="1=5;2=4;3=3; 4=2; 5=1;  else=NA",
                               as.factor = F)


data$anxiety_disoreder<- car::Recode(data$anxietyev,
                       recodes="1= 0; 2= 1;else=NA")



data$educ<- car::Recode(data$educ,
                     recodes="102:116='1Less than HS'; 201:202='2hsgrad'; 301:503='3More than HS';997:999=NA;000=NA",
                     as.factor=T)
data$educ<-fct_relevel(data$educ,'1Less than HS','2hsgrad','3More than HS') 


# ever had depression
data$hadpression<- car::Recode(data$depressev,
                       recodes="1= 0 ; 2= 1;else=NA")


data$familyincome_num <- data$famtotinc

#had depression or anxiety


data$dep_anx <- paste( data$anxiety_disoreder, data$hadpression, sep ="")

data$dep_anx<- as.numeric(data$dep_anx)
## Warning: NAs introduced by coercion
#race/ethnicity
data$nhwhite<- car::Recode(data$hisprace,
                       recodes="02=1; 99=NA; else=0")

data$other_race_eth<- car::Recode(data$hisprace,
                      recodes="c(1,3,4,5,6,7)=1; 99=NA; else=0")


data$race_eth<-car::Recode(data$hisprace,
recodes="02='NH_White'; c(1,3,4,5,6,7)='other_race_eth';else=NA",
as.factor = T)
data$race_eth<-relevel(data$race_eth,
                          ref = "NH_White")

#Categorial outcome variable

data$dep_anx2<- car::Recode(data$dep_anx,
                       recodes="01:11=1; 00 = 0; else=NA")
data <- data%>%
filter(familyincome_num >=0 & age<=999996)


data <- data%>%
filter(age >=18 & age<=099)

Selecting only needed variables

data2<- data %>% 
  select( strata, sampweight, dep_anx2, race_eth, nhwhite, other_race_eth, educ, generalhealth, age, familyincome_num, male)%>%
  filter(complete.cases(.))

Survey Design

options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, strata=~strata, weights=~sampweight, data = data2 )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight, 
##     data = data2)

1) define an outcome, this can be of any form, but use an appropriate distribution for your outcome. Also define at least 1 continuous predictor.

Categorial Outcome: Anxiety/Depression

Continuous Outcome: Family Income

Checking to see the selected continuous variable are linear

Since the above are significant it shows that the age variable is not linear

Plotting to know if the selected continuous variable are linear

plot(datagam)

This gives more evidence that the above is not linear

2) Using the gam() function, estimate a model with only linear terms in your model

datagam_l<-gam(dep_anx2~age+ familyincome_num +generalhealth + educ + male + nhwhite,
              data=data2,
                weights = sampweight/mean(sampweight, na.rm=T),
              family=binomial)

summary(datagam_l)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## dep_anx2 ~ age + familyincome_num + generalhealth + educ + male + 
##     nhwhite
## 
## Parametric coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.700e+00  5.820e-02  29.202  < 2e-16 ***
## age               -1.861e-02  6.114e-04 -30.438  < 2e-16 ***
## familyincome_num  -3.030e-06  2.038e-07 -14.870  < 2e-16 ***
## generalhealth     -6.627e-01  1.094e-02 -60.599  < 2e-16 ***
## educ2hsgrad       -3.501e-02  3.647e-02  -0.960    0.337    
## educ3More than HS  2.054e-01  3.479e-02   5.904 3.55e-09 ***
## male              -7.643e-01  2.163e-02 -35.335  < 2e-16 ***
## nhwhite            8.878e-01  2.418e-02  36.713  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.121   Deviance explained = 11.1%
## UBRE = -0.090304  Scale est. = 1         n = 62931

3) Repeat Step 2, but include a smooth term for at least one continuous variable

Numerical outcome

datagam_s<-gam(dep_anx2~s(age)+ familyincome_num +generalhealth + educ + male + nhwhite,
              data=data2,
                weights = sampweight/mean(sampweight, na.rm=T),
              family=binomial)

summary(datagam_s)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## dep_anx2 ~ s(age) + familyincome_num + generalhealth + educ + 
##     male + nhwhite
## 
## Parametric coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        7.228e-01  4.354e-02  16.601  < 2e-16 ***
## familyincome_num  -3.209e-06  2.079e-07 -15.434  < 2e-16 ***
## generalhealth     -6.574e-01  1.097e-02 -59.901  < 2e-16 ***
## educ2hsgrad       -4.806e-02  3.668e-02  -1.311     0.19    
## educ3More than HS  1.819e-01  3.520e-02   5.167 2.38e-07 ***
## male              -7.672e-01  2.165e-02 -35.437  < 2e-16 ***
## nhwhite            8.979e-01  2.426e-02  37.020  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##         edf Ref.df Chi.sq p-value    
## s(age) 8.33  8.874  958.5  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.123   Deviance explained = 11.2%
## UBRE = -0.091175  Scale est. = 1         n = 62931

4) Test if the model in step 3 is fitting the data better than the purely linear model.

anova( datagam_l, datagam_s, test="Chisq")

Comparing the two models, the model with a smooth term for the age variable fits the model better. This is evident because of the very small p-value.

5) Produce a plot of the smooth effect from the model in step 3

plot(datagam_s)

LS0tCnRpdGxlOiAiQXNzaWdubWVudCA5IgphdXRob3I6ICJKb3NlcGggSmFpeWVvbGEiCmRhdGU6ICAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiCm91dHB1dDoKICAgaHRtbF9kb2N1bWVudDoKICAgIGRmX3ByaW50OiBwYWdlZAogICAgZmlnX2hlaWdodDogNwogICAgZmlnX3dpZHRoOiA3CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgoKCgoKYGBge3IgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShmb3JjYXRzKQpsaWJyYXJ5KHN0YXJnYXplciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3VydmV5LCBxdWlldGx5ID0gVCkKbGlicmFyeShjYXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KHF1ZXN0aW9uciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoZHBseXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGZvcmNhdHMsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KHRpZHl2ZXJzZSwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3J2eXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KCBndHN1bW1hcnksIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGNhcmV0LCBxdWlldGx5ID0gVCkKbGlicmFyeSh0YWJsZW9uZSwgIHF1aWV0bHkgPSBUKQpgYGAKCmBgYHtyIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoY2FyKQpsaWJyYXJ5KHN0YXJnYXplciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3VydmV5LCBxdWlldGx5ID0gVCkKbGlicmFyeShnZ3Bsb3QyLCBxdWlldGx5ID0gVCkKbGlicmFyeShwYW5kZXIsIHF1aWV0bHkgPSBUKQpsaWJyYXJ5KGtuaXRyLCBxdWlldGx5ID0gVCkKbGlicmFyeShkcGx5ciwgcXVpZXRseSA9IFQpCmxpYnJhcnkoZmFjdG9leHRyYSwgcXVpZXRseSA9IFQpCmxpYnJhcnkoRmFjdG9NaW5lUiwgcXVpZXRseSA9IFQpCgoKbGlicmFyeShjYXIpCmxpYnJhcnkobWljZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQpgYGAKCgpgYGB7ciBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KG1nY3YpICNvbmUgbGlicmFyeSBmb3IgR0FNcwpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY2FyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGVtbWVhbnMpCmxpYnJhcnkoc3VydmV5KQpsaWJyYXJ5KHRhYmxlb25lLCAgcXVpZXRseSA9IFQpCmxpYnJhcnkoc3BsaW5lcykKbGlicmFyeShtZ2N2KQpgYGAKCgpgYGB7cn0KbGlicmFyeShpcHVtc3IpCmBgYAoKYGBge3J9CnNldHdkKCJ+L0Rvd25sb2FkcyIpCmBgYAoKCgpgYGB7cn0KZGRpIDwtIHJlYWRfaXB1bXNfZGRpKCJuaGlzXzAwMDA0LnhtbCIpCmRhdGEgPC0gcmVhZF9pcHVtc19taWNybyhkZGkpCmRhdGE8LSBoYXZlbjo6emFwX2xhYmVscyhkYXRhKQpgYGAKCgoKYGBge3J9Cm5hbWVzKGRhdGEpIDwtIHRvbG93ZXIoZ3N1YihwYXR0ZXJuID0gIl8iLHJlcGxhY2VtZW50ID0gICIiLHggPSAgbmFtZXMoZGF0YSkpKQpgYGAKCgoKIyBEYXRhIFJlY29kaW5nCmBgYHtyfQoKZGF0YSRtYWxlIDwtIGlmZWxzZShkYXRhJHNleCA9PSAxLDEsMCkKCiNOdW1lcmljIE91dGNvbWUgdmFyaWFibGUKZGF0YSRnZW5lcmFsaGVhbHRoPC1SZWNvZGUoZGF0YSRoZWFsdGgsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSIxPTU7Mj00OzM9MzsgND0yOyA1PTE7ICBlbHNlPU5BIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFzLmZhY3RvciA9IEYpCgoKZGF0YSRhbnhpZXR5X2Rpc29yZWRlcjwtIGNhcjo6UmVjb2RlKGRhdGEkYW54aWV0eWV2LAogICAgICAgICAgICAgICAgICAgICAgIHJlY29kZXM9IjE9IDA7IDI9IDE7ZWxzZT1OQSIpCgoKCmRhdGEkZWR1YzwtIGNhcjo6UmVjb2RlKGRhdGEkZWR1YywKICAgICAgICAgICAgICAgICAgICAgcmVjb2Rlcz0iMTAyOjExNj0nMUxlc3MgdGhhbiBIUyc7IDIwMToyMDI9JzJoc2dyYWQnOyAzMDE6NTAzPSczTW9yZSB0aGFuIEhTJzs5OTc6OTk5PU5BOzAwMD1OQSIsCiAgICAgICAgICAgICAgICAgICAgIGFzLmZhY3Rvcj1UKQpkYXRhJGVkdWM8LWZjdF9yZWxldmVsKGRhdGEkZWR1YywnMUxlc3MgdGhhbiBIUycsJzJoc2dyYWQnLCczTW9yZSB0aGFuIEhTJykgCgoKIyBldmVyIGhhZCBkZXByZXNzaW9uCmRhdGEkaGFkcHJlc3Npb248LSBjYXI6OlJlY29kZShkYXRhJGRlcHJlc3NldiwKICAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSIxPSAwIDsgMj0gMTtlbHNlPU5BIikKCgpkYXRhJGZhbWlseWluY29tZV9udW0gPC0gZGF0YSRmYW10b3RpbmMKCiNoYWQgZGVwcmVzc2lvbiBvciBhbnhpZXR5CgoKZGF0YSRkZXBfYW54IDwtIHBhc3RlKCBkYXRhJGFueGlldHlfZGlzb3JlZGVyLCBkYXRhJGhhZHByZXNzaW9uLCBzZXAgPSIiKQoKZGF0YSRkZXBfYW54PC0gYXMubnVtZXJpYyhkYXRhJGRlcF9hbngpCgoKIAoKCiNyYWNlL2V0aG5pY2l0eQpkYXRhJG5od2hpdGU8LSBjYXI6OlJlY29kZShkYXRhJGhpc3ByYWNlLAogICAgICAgICAgICAgICAgICAgICAgIHJlY29kZXM9IjAyPTE7IDk5PU5BOyBlbHNlPTAiKQoKZGF0YSRvdGhlcl9yYWNlX2V0aDwtIGNhcjo6UmVjb2RlKGRhdGEkaGlzcHJhY2UsCiAgICAgICAgICAgICAgICAgICAgICByZWNvZGVzPSJjKDEsMyw0LDUsNiw3KT0xOyA5OT1OQTsgZWxzZT0wIikKCgpkYXRhJHJhY2VfZXRoPC1jYXI6OlJlY29kZShkYXRhJGhpc3ByYWNlLApyZWNvZGVzPSIwMj0nTkhfV2hpdGUnOyBjKDEsMyw0LDUsNiw3KT0nb3RoZXJfcmFjZV9ldGgnO2Vsc2U9TkEiLAphcy5mYWN0b3IgPSBUKQpkYXRhJHJhY2VfZXRoPC1yZWxldmVsKGRhdGEkcmFjZV9ldGgsCiAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmID0gIk5IX1doaXRlIikKCiNDYXRlZ29yaWFsIG91dGNvbWUgdmFyaWFibGUKCmRhdGEkZGVwX2FueDI8LSBjYXI6OlJlY29kZShkYXRhJGRlcF9hbngsCiAgICAgICAgICAgICAgICAgICAgICAgcmVjb2Rlcz0iMDE6MTE9MTsgMDAgPSAwOyBlbHNlPU5BIikKYGBgCgoKCmBgYHtyfQoKCmRhdGEgPC0gZGF0YSU+JQpmaWx0ZXIoZmFtaWx5aW5jb21lX251bSA+PTAgJiBhZ2U8PTk5OTk5NikKCgpkYXRhIDwtIGRhdGElPiUKZmlsdGVyKGFnZSA+PTE4ICYgYWdlPD0wOTkpCgpgYGAKCgojIFNlbGVjdGluZyBvbmx5IG5lZWRlZCB2YXJpYWJsZXMKYGBge3J9CmRhdGEyPC0gZGF0YSAlPiUgCiAgc2VsZWN0KCBzdHJhdGEsIHNhbXB3ZWlnaHQsIGRlcF9hbngyLCByYWNlX2V0aCwgbmh3aGl0ZSwgb3RoZXJfcmFjZV9ldGgsIGVkdWMsIGdlbmVyYWxoZWFsdGgsIGFnZSwgZmFtaWx5aW5jb21lX251bSwgbWFsZSklPiUKICBmaWx0ZXIoY29tcGxldGUuY2FzZXMoLikpCmBgYAoKCgoKIyBTdXJ2ZXkgRGVzaWduCmBgYHtyfQpvcHRpb25zKHN1cnZleS5sb25lbHkucHN1ID0gImFkanVzdCIpCgpkZXM8LXN2eWRlc2lnbihpZHM9fjEsIHN0cmF0YT1+c3RyYXRhLCB3ZWlnaHRzPX5zYW1wd2VpZ2h0LCBkYXRhID0gZGF0YTIgKQpkZXMKYGBgCgoKIyAxKSBkZWZpbmUgYW4gb3V0Y29tZSwgdGhpcyBjYW4gYmUgb2YgYW55IGZvcm0sIGJ1dCB1c2UgYW4gYXBwcm9wcmlhdGUgZGlzdHJpYnV0aW9uIGZvciB5b3VyIG91dGNvbWUuIEFsc28gZGVmaW5lIGF0IGxlYXN0IDEgY29udGludW91cyBwcmVkaWN0b3IuCgojIyBDYXRlZ29yaWFsIE91dGNvbWU6IEFueGlldHkvRGVwcmVzc2lvbgojIyBDb250aW51b3VzIE91dGNvbWU6IEZhbWlseSBJbmNvbWUKCgojIENoZWNraW5nIHRvIHNlZSB0aGUgc2VsZWN0ZWQgY29udGludW91cyB2YXJpYWJsZSBhcmUgbGluZWFyCgoKCmBgYHtyIGluY2x1ZGU9RkFMU0V9CmRhdGFnYW08LWdhbShkZXBfYW54MiB+IHMoYWdlLCBieSA9IGVkdWMpKyAgZWR1YywKICAgICAgICAgICAgZGF0YT1kYXRhMiwKICAgICAgICAgICAgd2VpZ2h0cyA9IHNhbXB3ZWlnaHQvbWVhbihzYW1wd2VpZ2h0LCBuYS5ybT1UKSwKICAgICAgICAgICAgZmFtaWx5PWJpbm9taWFsKQpzdW1tYXJ5KGRhdGFnYW0pCmBgYAoKCiMjIyBTaW5jZSB0aGUgYWJvdmUgYXJlIHNpZ25pZmljYW50IGl0IHNob3dzIHRoYXQgdGhlIGFnZSB2YXJpYWJsZSBpcyBub3QgbGluZWFyCgoKCiMgUGxvdHRpbmcgdG8ga25vdyBpZiB0aGUgc2VsZWN0ZWQgY29udGludW91cyB2YXJpYWJsZSBhcmUgbGluZWFyCmBgYHtyfQpwbG90KGRhdGFnYW0pCmBgYAoKCiMjIyBUaGlzIGdpdmVzIG1vcmUgZXZpZGVuY2UgdGhhdCB0aGUgYWJvdmUgaXMgbm90IGxpbmVhcgoKCiMgMikgVXNpbmcgdGhlIGdhbSgpIGZ1bmN0aW9uLCBlc3RpbWF0ZSBhIG1vZGVsIHdpdGggb25seSBsaW5lYXIgdGVybXMgaW4geW91ciBtb2RlbAoKCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KZGF0YWdhbV9sPC1nYW0oZGVwX2FueDJ+YWdlKyBmYW1pbHlpbmNvbWVfbnVtICtnZW5lcmFsaGVhbHRoICsgZWR1YyArIG1hbGUgKyBuaHdoaXRlLAogICAgICAgICAgICAgIGRhdGE9ZGF0YTIsCiAgICAgICAgICAgICAgICB3ZWlnaHRzID0gc2FtcHdlaWdodC9tZWFuKHNhbXB3ZWlnaHQsIG5hLnJtPVQpLAogICAgICAgICAgICAgIGZhbWlseT1iaW5vbWlhbCkKCnN1bW1hcnkoZGF0YWdhbV9sKQpgYGAKCgojIDMpIFJlcGVhdCBTdGVwIDIsIGJ1dCBpbmNsdWRlIGEgc21vb3RoIHRlcm0gZm9yIGF0IGxlYXN0IG9uZSBjb250aW51b3VzIHZhcmlhYmxlCgoKIyMgTnVtZXJpY2FsIG91dGNvbWUgCgpgYGB7ciwgd2FybmluZz1GQUxTRX0KZGF0YWdhbV9zPC1nYW0oZGVwX2FueDJ+cyhhZ2UpKyBmYW1pbHlpbmNvbWVfbnVtICtnZW5lcmFsaGVhbHRoICsgZWR1YyArIG1hbGUgKyBuaHdoaXRlLAogICAgICAgICAgICAgIGRhdGE9ZGF0YTIsCiAgICAgICAgICAgICAgICB3ZWlnaHRzID0gc2FtcHdlaWdodC9tZWFuKHNhbXB3ZWlnaHQsIG5hLnJtPVQpLAogICAgICAgICAgICAgIGZhbWlseT1iaW5vbWlhbCkKCnN1bW1hcnkoZGF0YWdhbV9zKQpgYGAKCgojIDQpIFRlc3QgaWYgdGhlIG1vZGVsIGluIHN0ZXAgMyBpcyBmaXR0aW5nIHRoZSBkYXRhIGJldHRlciB0aGFuIHRoZSBwdXJlbHkgbGluZWFyIG1vZGVsLgoKCmBgYHtyfQphbm92YSggZGF0YWdhbV9sLCBkYXRhZ2FtX3MsIHRlc3Q9IkNoaXNxIikKYGBgCgpDb21wYXJpbmcgdGhlIHR3byBtb2RlbHMsIHRoZSBtb2RlbCB3aXRoIGEgc21vb3RoIHRlcm0gZm9yIHRoZSBhZ2UgdmFyaWFibGUgZml0cyB0aGUgbW9kZWwgYmV0dGVyLiBUaGlzIGlzIGV2aWRlbnQgYmVjYXVzZSBvZiB0aGUgdmVyeSBzbWFsbCBwLXZhbHVlLgoKCgoKCiMgNSkgUHJvZHVjZSBhIHBsb3Qgb2YgdGhlIHNtb290aCBlZmZlY3QgZnJvbSB0aGUgbW9kZWwgaW4gc3RlcCAzCgoKCmBgYHtyfQpwbG90KGRhdGFnYW1fcykKYGBgCgoK