Part 1: Description of project. Apply tidyverse tools (dplyr, ggplot2) to manipulate and visualize real world public data; UPDATED FROM FILE “August2.2023_rmb_html_.rmb.Rmd”. As described in the README.md file, I am interested in substance use disorders and how they differ by gender and other factors such as socioeconomic status in the community. I sought to apply dplyr and ggplot2 tools to data I downloaded from Nutritional Health and Nutrition Examination Survey (NHANES, https://wwwn.cdc.gov/nchs/nhanes/).The income:family size ratio was used as a marker of degree of poverty (higher numbers = less poverty) and the substances considered in this exploratory project include marijuana, cocaine, heroin, and methamphetamine. There are 4 cohorts from different time points (year of survey participation) included: 2011-2012, 2013-2014, 2015-2016, 2017-2018. The main update here is that DATA IS NOW WEIGHTED BY DEMOGRAPHICS AND NOW ACCURATELY REFLECT US CIVILIAN POPULATION (see https://wwwn.cdc.gov/nchs/data/nhanes/analyticguidelines/11-16-analytic-guidelines.pdf). Descriptions interpreting data are directly below plots.

Part 2: Packages Required

#for data wrangling; dplyr::select, mutate, select, recode
library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#for data visualization with various plots
library(ggplot2)

####August 11, 2023 - today I found out that R has an NHANES package, making it 
# easy to browse, select and import databases of interest!! Thanks, chatgpt AI!

#for fully customizable browsing, selection and retrieving data directly from 
# NHANES website
#install.packages("nhanesA")
library(nhanesA)

##used for handling survey data and applying survey weights
# install.packages("survey") 
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
##allows for dplyr-like syntax to survey data
# install.packages("srvyr") 
library(srvyr, warn.conflicts = FALSE)

## appends large datasets together
# install.packages("data.table") 
library("data.table")
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

Part 3: Dataset description For the current project, I focused on adults 18+ years, from cross-sectional cohorts assessed at 4 different time points (2011-2012, 2013-2014, 2015-2016, 2017-2018). Demographic (“Demographic Variables and Sample Weights” data file) and questionnaire (“Drug Use” data file) data. These data sets were merged.

### Demographic data ###########################################################

nhanesTables(data_group = 'DEMO', year = 2011)
##   Data.File.Name                  Data.File.Description
## 1         DEMO_G Demographic Variables & Sample Weights
demo_2011_2012 <- nhanes('DEMO_G')
names(demo_2011_2012)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGY" "RIDEXAGM" "DMQMILIZ"
## [13] "DMQADFC"  "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2"
## [19] "DMDMARTL" "RIDEXPRG" "SIALANG"  "SIAPROXY" "SIAINTRP" "FIALANG" 
## [25] "FIAPROXY" "FIAINTRP" "MIALANG"  "MIAPROXY" "MIAINTRP" "AIALANGA"
## [31] "WTINT2YR" "WTMEC2YR" "SDMVPSU"  "SDMVSTRA" "INDHHIN2" "INDFMIN2"
## [37] "INDFMPIR" "DMDHHSIZ" "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE"
## [43] "DMDHRGND" "DMDHRAGE" "DMDHRBR4" "DMDHREDU" "DMDHRMAR" "DMDHSEDU"
nhanesTables(data_group = 'DEMO', year = 2013)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_H Demographic Variables and Sample Weights
demo_2013_2014 <- nhanes('DEMO_H')
names(demo_2013_2014)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM" "DMQMILIZ" "DMQADFC" 
## [13] "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2" "DMDMARTL"
## [19] "RIDEXPRG" "SIALANG"  "SIAPROXY" "SIAINTRP" "FIALANG"  "FIAPROXY"
## [25] "FIAINTRP" "MIALANG"  "MIAPROXY" "MIAINTRP" "AIALANGA" "DMDHHSIZ"
## [31] "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE" "DMDHRGND" "DMDHRAGE"
## [37] "DMDHRBR4" "DMDHREDU" "DMDHRMAR" "DMDHSEDU" "WTINT2YR" "WTMEC2YR"
## [43] "SDMVPSU"  "SDMVSTRA" "INDHHIN2" "INDFMIN2" "INDFMPIR"
nhanesTables(data_group = 'DEMO', year = 2014)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_H Demographic Variables and Sample Weights
nhanesTables(data_group = 'DEMO', year = 2015)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_I Demographic Variables and Sample Weights
demo_2015_2016 <- nhanes('DEMO_I')
names(demo_2015_2016)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM" "DMQMILIZ" "DMQADFC" 
## [13] "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2" "DMDMARTL"
## [19] "RIDEXPRG" "SIALANG"  "SIAPROXY" "SIAINTRP" "FIALANG"  "FIAPROXY"
## [25] "FIAINTRP" "MIALANG"  "MIAPROXY" "MIAINTRP" "AIALANGA" "DMDHHSIZ"
## [31] "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE" "DMDHRGND" "DMDHRAGE"
## [37] "DMDHRBR4" "DMDHREDU" "DMDHRMAR" "DMDHSEDU" "WTINT2YR" "WTMEC2YR"
## [43] "SDMVPSU"  "SDMVSTRA" "INDHHIN2" "INDFMIN2" "INDFMPIR"
nhanesTables(data_group = 'DEMO', year = 2016)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_I Demographic Variables and Sample Weights
nhanesTables(data_group = 'DEMO', year = 2017)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_J Demographic Variables and Sample Weights
nhanesTables(data_group = 'DEMO', year = 2018)
##   Data.File.Name                    Data.File.Description
## 1         DEMO_J Demographic Variables and Sample Weights
demo_2017_2018 <- nhanes('DEMO_J')
names(demo_2017_2018)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM" "DMQMILIZ" "DMQADFC" 
## [13] "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2" "DMDMARTL"
## [19] "RIDEXPRG" "SIALANG"  "SIAPROXY" "SIAINTRP" "FIALANG"  "FIAPROXY"
## [25] "FIAINTRP" "MIALANG"  "MIAPROXY" "MIAINTRP" "AIALANGA" "DMDHHSIZ"
## [31] "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE" "DMDHRGND" "DMDHRAGZ"
## [37] "DMDHREDZ" "DMDHRMAZ" "DMDHSEDZ" "WTINT2YR" "WTMEC2YR" "SDMVPSU" 
## [43] "SDMVSTRA" "INDHHIN2" "INDFMIN2" "INDFMPIR"
### Questionnaire Drug Use Data ################################################

nhanesTables(data_group = 'Q', year = 2011)
##    Data.File.Name                 Data.File.Description
## 1           TBQ_G                          Tuberculosis
## 2           HIQ_G                      Health Insurance
## 3           BPQ_G          Blood Pressure & Cholesterol
## 4           CDQ_G                 Cardiovascular Health
## 5           RDQ_G                    Respiratory Health
## 6           PAQ_G                     Physical Activity
## 7           PFQ_G                  Physical Functioning
## 8        RXQASA_G                Preventive Aspirin Use
## 9           CKQ_G                       Creatine Kinase
## 10          CSQ_G                         Taste & Smell
## 11          DIQ_G                              Diabetes
## 12          HUQ_G Hospital Utilization & Access to Care
## 13          OHQ_G                           Oral Health
## 14          ECQ_G                       Early Childhood
## 15          SXQ_G                       Sexual Behavior
## 16       SMQRTU_G          Smoking - Recent Tobacco Use
## 17          SMQ_G               Smoking - Cigarette Use
## 18       SMQFAM_G           Smoking - Household Smokers
## 19          OCQ_G                            Occupation
## 20          HOQ_G               Housing Characteristics
## 21       PUQMEC_G                         Pesticide Use
## 22          MCQ_G                    Medical Conditions
## 23          IMQ_G                          Immunization
## 24          HSQ_G                 Current Health Status
## 25          DUQ_G                              Drug Use
## 26          DPQ_G   Mental Health - Depression Screener
## 27          SLQ_G                       Sleep Disorders
## 28          ALQ_G                           Alcohol Use
## 29          RHQ_G                   Reproductive Health
## 30          AUQ_G                            Audiometry
## 31       WHQMEC_G                Weight History - Youth
## 32          DEQ_G                           Dermatology
## 33          ACQ_G                         Acculturation
## 34       RXQ_RX_G              Prescription Medications
## 35          HCQ_G                 Hepatitis C Follow Up
## 36          WHQ_G                        Weight History
## 37        KIQ_U_G           Kidney Conditions - Urology
## 38          DBQ_G             Diet Behavior & Nutrition
## 39          INQ_G                                Income
## 40          CBQ_G                     Consumer Behavior
## 41          FSQ_G                         Food Security
## 42          CFQ_G                 Cognitive Functioning
## 43          VTQ_G         Volatile Toxicant (Subsample)
nhanesTables(data_group = 'Q', year = 2012)
##    Data.File.Name                 Data.File.Description
## 1           TBQ_G                          Tuberculosis
## 2           HIQ_G                      Health Insurance
## 3           BPQ_G          Blood Pressure & Cholesterol
## 4           CDQ_G                 Cardiovascular Health
## 5           RDQ_G                    Respiratory Health
## 6           PAQ_G                     Physical Activity
## 7           PFQ_G                  Physical Functioning
## 8        RXQASA_G                Preventive Aspirin Use
## 9           CKQ_G                       Creatine Kinase
## 10          CSQ_G                         Taste & Smell
## 11          DIQ_G                              Diabetes
## 12          HUQ_G Hospital Utilization & Access to Care
## 13          OHQ_G                           Oral Health
## 14          ECQ_G                       Early Childhood
## 15          SXQ_G                       Sexual Behavior
## 16       SMQRTU_G          Smoking - Recent Tobacco Use
## 17          SMQ_G               Smoking - Cigarette Use
## 18       SMQFAM_G           Smoking - Household Smokers
## 19          OCQ_G                            Occupation
## 20          HOQ_G               Housing Characteristics
## 21       PUQMEC_G                         Pesticide Use
## 22          MCQ_G                    Medical Conditions
## 23          IMQ_G                          Immunization
## 24          HSQ_G                 Current Health Status
## 25          DUQ_G                              Drug Use
## 26          DPQ_G   Mental Health - Depression Screener
## 27          SLQ_G                       Sleep Disorders
## 28          ALQ_G                           Alcohol Use
## 29          RHQ_G                   Reproductive Health
## 30          AUQ_G                            Audiometry
## 31       WHQMEC_G                Weight History - Youth
## 32          DEQ_G                           Dermatology
## 33          ACQ_G                         Acculturation
## 34       RXQ_RX_G              Prescription Medications
## 35          HCQ_G                 Hepatitis C Follow Up
## 36          WHQ_G                        Weight History
## 37        KIQ_U_G           Kidney Conditions - Urology
## 38          DBQ_G             Diet Behavior & Nutrition
## 39          INQ_G                                Income
## 40          CBQ_G                     Consumer Behavior
## 41          FSQ_G                         Food Security
## 42          CFQ_G                 Cognitive Functioning
## 43          VTQ_G         Volatile Toxicant (Subsample)
DUQ_2011_2012 <- nhanes('DUQ_G')
names(DUQ_2011_2012)
##  [1] "SEQN"    "DUQ200"  "DUQ210"  "DUQ211"  "DUQ213"  "DUQ215Q" "DUQ215U"
##  [8] "DUQ217"  "DUQ219"  "DUQ220Q" "DUQ220U" "DUQ230"  "DUQ240"  "DUQ250" 
## [15] "DUQ260"  "DUQ270Q" "DUQ270U" "DUQ272"  "DUQ280"  "DUQ290"  "DUQ300" 
## [22] "DUQ310Q" "DUQ310U" "DUQ320"  "DUQ330"  "DUQ340"  "DUQ350Q" "DUQ350U"
## [29] "DUQ352"  "DUQ360"  "DUQ370"  "DUQ380A" "DUQ380B" "DUQ380C" "DUQ380D"
## [36] "DUQ380E" "DUQ390"  "DUQ400Q" "DUQ400U" "DUQ410"  "DUQ420"  "DUQ430"
nhanesTables(data_group = 'Q', year = 2013)
##    Data.File.Name                 Data.File.Description
## 1           DLQ_H                            Disability
## 2           DEQ_H                           Dermatology
## 3           OSQ_H                          Osteoporosis
## 4           IMQ_H                          Immunization
## 5           SXQ_H                       Sexual Behavior
## 6           CDQ_H                 Cardiovascular Health
## 7           BPQ_H          Blood Pressure & Cholesterol
## 8           MCQ_H                    Medical Conditions
## 9           HIQ_H                      Health Insurance
## 10          HUQ_H Hospital Utilization & Access to Care
## 11          PAQ_H                     Physical Activity
## 12          PFQ_H                  Physical Functioning
## 13          HEQ_H                             Hepatitis
## 14          ECQ_H                       Early Childhood
## 15          DIQ_H                              Diabetes
## 16       SMQFAM_H           Smoking - Household Smokers
## 17          SMQ_H               Smoking - Cigarette Use
## 18       SMQRTU_H          Smoking - Recent Tobacco Use
## 19          HOQ_H               Housing Characteristics
## 20       PUQMEC_H                         Pesticide Use
## 21       SMQSHS_H   Smoking - Secondhand Smoke Exposure
## 22          INQ_H                                Income
## 23          CSQ_H                         Taste & Smell
## 24          DBQ_H             Diet Behavior & Nutrition
## 25          CBQ_H                     Consumer Behavior
## 26          HSQ_H                 Current Health Status
## 27          SLQ_H                       Sleep Disorders
## 28       RXQASA_H                Preventive Aspirin Use
## 29          DUQ_H                              Drug Use
## 30       WHQMEC_H                Weight History - Youth
## 31          ALQ_H                           Alcohol Use
## 32          DPQ_H   Mental Health - Depression Screener
## 33          ACQ_H                         Acculturation
## 34          WHQ_H                        Weight History
## 35          RHQ_H                   Reproductive Health
## 36          FSQ_H                         Food Security
## 37          OHQ_H                           Oral Health
## 38          OCQ_H                            Occupation
## 39       RXQ_RX_H              Prescription Medications
## 40        KIQ_U_H           Kidney Conditions - Urology
## 41          CKQ_H                       Creatine Kinase
## 42          VTQ_H         Volatile Toxicant (Subsample)
## 43          CFQ_H                 Cognitive Functioning
nhanesTables(data_group = 'Q', year = 2014)
##    Data.File.Name                 Data.File.Description
## 1           DLQ_H                            Disability
## 2           DEQ_H                           Dermatology
## 3           OSQ_H                          Osteoporosis
## 4           IMQ_H                          Immunization
## 5           SXQ_H                       Sexual Behavior
## 6           CDQ_H                 Cardiovascular Health
## 7           BPQ_H          Blood Pressure & Cholesterol
## 8           MCQ_H                    Medical Conditions
## 9           HIQ_H                      Health Insurance
## 10          HUQ_H Hospital Utilization & Access to Care
## 11          PAQ_H                     Physical Activity
## 12          PFQ_H                  Physical Functioning
## 13          HEQ_H                             Hepatitis
## 14          ECQ_H                       Early Childhood
## 15          DIQ_H                              Diabetes
## 16       SMQFAM_H           Smoking - Household Smokers
## 17          SMQ_H               Smoking - Cigarette Use
## 18       SMQRTU_H          Smoking - Recent Tobacco Use
## 19          HOQ_H               Housing Characteristics
## 20       PUQMEC_H                         Pesticide Use
## 21       SMQSHS_H   Smoking - Secondhand Smoke Exposure
## 22          INQ_H                                Income
## 23          CSQ_H                         Taste & Smell
## 24          DBQ_H             Diet Behavior & Nutrition
## 25          CBQ_H                     Consumer Behavior
## 26          HSQ_H                 Current Health Status
## 27          SLQ_H                       Sleep Disorders
## 28       RXQASA_H                Preventive Aspirin Use
## 29          DUQ_H                              Drug Use
## 30       WHQMEC_H                Weight History - Youth
## 31          ALQ_H                           Alcohol Use
## 32          DPQ_H   Mental Health - Depression Screener
## 33          ACQ_H                         Acculturation
## 34          WHQ_H                        Weight History
## 35          RHQ_H                   Reproductive Health
## 36          FSQ_H                         Food Security
## 37          OHQ_H                           Oral Health
## 38          OCQ_H                            Occupation
## 39       RXQ_RX_H              Prescription Medications
## 40        KIQ_U_H           Kidney Conditions - Urology
## 41          CKQ_H                       Creatine Kinase
## 42          VTQ_H         Volatile Toxicant (Subsample)
## 43          CFQ_H                 Cognitive Functioning
DUQ_2013_2014 <- nhanes('DUQ_H')
names(DUQ_2013_2014)
##  [1] "SEQN"    "DUQ200"  "DUQ210"  "DUQ211"  "DUQ213"  "DUQ215Q" "DUQ215U"
##  [8] "DUQ217"  "DUQ219"  "DUQ220Q" "DUQ220U" "DUQ230"  "DUQ240"  "DUQ250" 
## [15] "DUQ260"  "DUQ270Q" "DUQ270U" "DUQ272"  "DUQ280"  "DUQ290"  "DUQ300" 
## [22] "DUQ310Q" "DUQ310U" "DUQ320"  "DUQ330"  "DUQ340"  "DUQ350Q" "DUQ350U"
## [29] "DUQ352"  "DUQ360"  "DUQ370"  "DUQ380A" "DUQ380B" "DUQ380C" "DUQ380D"
## [36] "DUQ380E" "DUQ390"  "DUQ400Q" "DUQ400U" "DUQ410"  "DUQ420"  "DUQ430"
nhanesTables(data_group = 'Q', year = 2015)
##    Data.File.Name                 Data.File.Description
## 1           PFQ_I                  Physical Functioning
## 2           CDQ_I                 Cardiovascular Health
## 3           MCQ_I                    Medical Conditions
## 4           BPQ_I          Blood Pressure & Cholesterol
## 5           HEQ_I                             Hepatitis
## 6           DLQ_I                            Disability
## 7           HIQ_I                      Health Insurance
## 8           PAQ_I                     Physical Activity
## 9           HUQ_I Hospital Utilization & Access to Care
## 10          IMQ_I                          Immunization
## 11       SMQSHS_I   Smoking - Secondhand Smoke Exposure
## 12       SMQRTU_I          Smoking - Recent Tobacco Use
## 13       SMQFAM_I           Smoking - Household Smokers
## 14          SMQ_I               Smoking - Cigarette Use
## 15          DIQ_I                              Diabetes
## 16          OHQ_I                           Oral Health
## 17          DEQ_I                           Dermatology
## 18          HOQ_I               Housing Characteristics
## 19          DPQ_I   Mental Health - Depression Screener
## 20          HSQ_I                 Current Health Status
## 21          SXQ_I                       Sexual Behavior
## 22          INQ_I                                Income
## 23          ACQ_I                         Acculturation
## 24          ECQ_I                       Early Childhood
## 25          OCQ_I                            Occupation
## 26       PUQMEC_I                         Pesticide Use
## 27          RHQ_I                   Reproductive Health
## 28          WHQ_I                        Weight History
## 29       WHQMEC_I                Weight History - Youth
## 30          SLQ_I                       Sleep Disorders
## 31          AUQ_I                            Audiometry
## 32          ALQ_I                           Alcohol Use
## 33        KIQ_U_I           Kidney Conditions - Urology
## 34          DUQ_I                              Drug Use
## 35          VTQ_I                     Volatile Toxicant
## 36       RXQASA_I                Preventive Aspirin Use
## 37          CBQ_I                     Consumer Behavior
## 38          DBQ_I             Diet Behavior & Nutrition
## 39       RXQ_RX_I              Prescription Medications
## 40          FSQ_I                         Food Security
nhanesTables(data_group = 'Q', year = 2016)
##    Data.File.Name                 Data.File.Description
## 1           PFQ_I                  Physical Functioning
## 2           CDQ_I                 Cardiovascular Health
## 3           MCQ_I                    Medical Conditions
## 4           BPQ_I          Blood Pressure & Cholesterol
## 5           HEQ_I                             Hepatitis
## 6           DLQ_I                            Disability
## 7           HIQ_I                      Health Insurance
## 8           PAQ_I                     Physical Activity
## 9           HUQ_I Hospital Utilization & Access to Care
## 10          IMQ_I                          Immunization
## 11       SMQSHS_I   Smoking - Secondhand Smoke Exposure
## 12       SMQRTU_I          Smoking - Recent Tobacco Use
## 13       SMQFAM_I           Smoking - Household Smokers
## 14          SMQ_I               Smoking - Cigarette Use
## 15          DIQ_I                              Diabetes
## 16          OHQ_I                           Oral Health
## 17          DEQ_I                           Dermatology
## 18          HOQ_I               Housing Characteristics
## 19          DPQ_I   Mental Health - Depression Screener
## 20          HSQ_I                 Current Health Status
## 21          SXQ_I                       Sexual Behavior
## 22          INQ_I                                Income
## 23          ACQ_I                         Acculturation
## 24          ECQ_I                       Early Childhood
## 25          OCQ_I                            Occupation
## 26       PUQMEC_I                         Pesticide Use
## 27          RHQ_I                   Reproductive Health
## 28          WHQ_I                        Weight History
## 29       WHQMEC_I                Weight History - Youth
## 30          SLQ_I                       Sleep Disorders
## 31          AUQ_I                            Audiometry
## 32          ALQ_I                           Alcohol Use
## 33        KIQ_U_I           Kidney Conditions - Urology
## 34          DUQ_I                              Drug Use
## 35          VTQ_I                     Volatile Toxicant
## 36       RXQASA_I                Preventive Aspirin Use
## 37          CBQ_I                     Consumer Behavior
## 38          DBQ_I             Diet Behavior & Nutrition
## 39       RXQ_RX_I              Prescription Medications
## 40          FSQ_I                         Food Security
DUQ_2015_2016 <- nhanes('DUQ_I')
names(DUQ_2015_2016)
##  [1] "SEQN"    "DUQ200"  "DUQ210"  "DUQ211"  "DUQ213"  "DUQ215Q" "DUQ215U"
##  [8] "DUQ217"  "DUQ219"  "DUQ220Q" "DUQ220U" "DUQ230"  "DUQ240"  "DUQ250" 
## [15] "DUQ260"  "DUQ270Q" "DUQ270U" "DUQ272"  "DUQ280"  "DUQ290"  "DUQ300" 
## [22] "DUQ310Q" "DUQ310U" "DUQ320"  "DUQ330"  "DUQ340"  "DUQ350Q" "DUQ350U"
## [29] "DUQ352"  "DUQ360"  "DUQ370"  "DUQ380A" "DUQ380B" "DUQ380C" "DUQ380D"
## [36] "DUQ380E" "DUQ390"  "DUQ400Q" "DUQ400U" "DUQ410"  "DUQ420"  "DUQ430"
nhanesTables(data_group = 'Q', year = 2017)
##    Data.File.Name                            Data.File.Description
## 1           BPQ_J                     Blood Pressure & Cholesterol
## 2           HUQ_J            Hospital Utilization & Access to Care
## 3           HEQ_J                                        Hepatitis
## 4           CDQ_J                            Cardiovascular Health
## 5          PAQY_J                        Physical Activity - Youth
## 6           PAQ_J                                Physical Activity
## 7           IMQ_J                                     Immunization
## 8           HIQ_J                                 Health Insurance
## 9           DLQ_J                                       Disability
## 10          DIQ_J                                         Diabetes
## 11          PFQ_J                             Physical Functioning
## 12       SMQRTU_J                     Smoking - Recent Tobacco Use
## 13       SMQFAM_J                      Smoking - Household Smokers
## 14       SMQSHS_J              Smoking - Secondhand Smoke Exposure
## 15          SMQ_J                          Smoking - Cigarette Use
## 16          MCQ_J                               Medical Conditions
## 17          OHQ_J                                      Oral Health
## 18          HSQ_J                            Current Health Status
## 19          HOQ_J                          Housing Characteristics
## 20          DEQ_J                                      Dermatology
## 21          INQ_J                                           Income
## 22          ACQ_J                                    Acculturation
## 23          RHQ_J                              Reproductive Health
## 24          SLQ_J                                  Sleep Disorders
## 25          OCQ_J                                       Occupation
## 26          DPQ_J              Mental Health - Depression Screener
## 27          DUQ_J                                         Drug Use
## 28          ECQ_J                                  Early Childhood
## 29          OSQ_J                                     Osteoporosis
## 30        KIQ_U_J                      Kidney Conditions - Urology
## 31          CBQ_J                                Consumer Behavior
## 32          DBQ_J                        Diet Behavior & Nutrition
## 33       RXQ_RX_J                         Prescription Medications
## 34          WHQ_J                                   Weight History
## 35       WHQMEC_J                           Weight History - Youth
## 36       RXQASA_J                           Preventive Aspirin Use
## 37          AUQ_J                                       Audiometry
## 38       PUQMEC_J                                    Pesticide Use
## 39          ALQ_J                                      Alcohol Use
## 40          VTQ_J                                Volatile Toxicant
## 41       CBQPFC_J Consumer Behavior Phone Follow-up Module - Child
## 42       CBQPFA_J Consumer Behavior Phone Follow-up Module - Adult
## 43          FSQ_J                                    Food Security
nhanesTables(data_group = 'Q', year = 2018)
##    Data.File.Name                            Data.File.Description
## 1           BPQ_J                     Blood Pressure & Cholesterol
## 2           HUQ_J            Hospital Utilization & Access to Care
## 3           HEQ_J                                        Hepatitis
## 4           CDQ_J                            Cardiovascular Health
## 5          PAQY_J                        Physical Activity - Youth
## 6           PAQ_J                                Physical Activity
## 7           IMQ_J                                     Immunization
## 8           HIQ_J                                 Health Insurance
## 9           DLQ_J                                       Disability
## 10          DIQ_J                                         Diabetes
## 11          PFQ_J                             Physical Functioning
## 12       SMQRTU_J                     Smoking - Recent Tobacco Use
## 13       SMQFAM_J                      Smoking - Household Smokers
## 14       SMQSHS_J              Smoking - Secondhand Smoke Exposure
## 15          SMQ_J                          Smoking - Cigarette Use
## 16          MCQ_J                               Medical Conditions
## 17          OHQ_J                                      Oral Health
## 18          HSQ_J                            Current Health Status
## 19          HOQ_J                          Housing Characteristics
## 20          DEQ_J                                      Dermatology
## 21          INQ_J                                           Income
## 22          ACQ_J                                    Acculturation
## 23          RHQ_J                              Reproductive Health
## 24          SLQ_J                                  Sleep Disorders
## 25          OCQ_J                                       Occupation
## 26          DPQ_J              Mental Health - Depression Screener
## 27          DUQ_J                                         Drug Use
## 28          ECQ_J                                  Early Childhood
## 29          OSQ_J                                     Osteoporosis
## 30        KIQ_U_J                      Kidney Conditions - Urology
## 31          CBQ_J                                Consumer Behavior
## 32          DBQ_J                        Diet Behavior & Nutrition
## 33       RXQ_RX_J                         Prescription Medications
## 34          WHQ_J                                   Weight History
## 35       WHQMEC_J                           Weight History - Youth
## 36       RXQASA_J                           Preventive Aspirin Use
## 37          AUQ_J                                       Audiometry
## 38       PUQMEC_J                                    Pesticide Use
## 39          ALQ_J                                      Alcohol Use
## 40          VTQ_J                                Volatile Toxicant
## 41       CBQPFC_J Consumer Behavior Phone Follow-up Module - Child
## 42       CBQPFA_J Consumer Behavior Phone Follow-up Module - Adult
## 43          FSQ_J                                    Food Security
DUQ_2017_2018 <- nhanes('DUQ_J')
names(DUQ_2017_2018)
##  [1] "SEQN"    "DUQ200"  "DUQ210"  "DUQ211"  "DUQ213"  "DUQ215Q" "DUQ215U"
##  [8] "DUQ217"  "DUQ219"  "DUQ220Q" "DUQ220U" "DUQ230"  "DUQ240"  "DUQ250" 
## [15] "DUQ260"  "DUQ270Q" "DUQ270U" "DUQ272"  "DUQ280"  "DUQ290"  "DUQ300" 
## [22] "DUQ310Q" "DUQ310U" "DUQ320"  "DUQ330"  "DUQ340"  "DUQ350Q" "DUQ350U"
## [29] "DUQ352"  "DUQ360"  "DUQ370"  "DUQ380A" "DUQ380B" "DUQ380C" "DUD380F"
## [36] "DUQ390"  "DUQ400Q" "DUQ400U" "DUQ410"  "DUQ420"  "DUQ430"
### Translate numeric code to text for categorical variables####################

demo_2011_2012_trans <- nhanesTranslate('DEMO_G', names(demo_2011_2012), 
                                        data = demo_2011_2012)
## Translated columns: RIDSTATR RIAGENDR RIDRETH1 RIDRETH3 RIDEXMON DMQMILIZ DMQADFC DMDBORN4 DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2 DMDMARTL RIDEXPRG SIALANG SIAPROXY SIAINTRP FIALANG FIAPROXY FIAINTRP MIALANG MIAPROXY MIAINTRP AIALANGA INDHHIN2 INDFMIN2 DMDHRGND DMDHRBR4 DMDHREDU DMDHRMAR DMDHSEDU
demo_2013_2014_trans <- nhanesTranslate('DEMO_H', names(demo_2013_2014), 
                                        data = demo_2013_2014)
## Translated columns: RIDSTATR RIAGENDR RIDRETH1 RIDRETH3 RIDEXMON DMQMILIZ DMQADFC DMDBORN4 DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2 DMDMARTL RIDEXPRG SIALANG SIAPROXY SIAINTRP FIALANG FIAPROXY FIAINTRP MIALANG MIAPROXY MIAINTRP AIALANGA DMDHHSIZ DMDFMSIZ DMDHHSZA DMDHHSZB DMDHHSZE DMDHRGND DMDHRBR4 DMDHREDU DMDHRMAR DMDHSEDU INDHHIN2 INDFMIN2
demo_2015_2016_trans <- nhanesTranslate('DEMO_I', names(demo_2015_2016),
                                        data = demo_2015_2016)
## Translated columns: RIDSTATR RIAGENDR RIDRETH1 RIDRETH3 RIDEXMON DMQMILIZ DMQADFC DMDBORN4 DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2 DMDMARTL RIDEXPRG SIALANG SIAPROXY SIAINTRP FIALANG FIAPROXY FIAINTRP MIALANG MIAPROXY MIAINTRP AIALANGA DMDHHSIZ DMDFMSIZ DMDHHSZA DMDHHSZB DMDHHSZE DMDHRGND DMDHRBR4 DMDHREDU DMDHRMAR DMDHSEDU INDHHIN2 INDFMIN2
demo_2017_2018_trans <- nhanesTranslate('DEMO_J', names(demo_2017_2018),
                                        data = demo_2017_2018)
## Translated columns: RIDSTATR RIAGENDR RIDRETH1 RIDRETH3 RIDEXMON DMQMILIZ DMQADFC DMDBORN4 DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2 DMDMARTL RIDEXPRG SIALANG SIAPROXY SIAINTRP FIALANG FIAPROXY FIAINTRP MIALANG MIAPROXY MIAINTRP AIALANGA DMDHHSIZ DMDFMSIZ DMDHHSZA DMDHHSZB DMDHHSZE DMDHRGND DMDHRAGZ DMDHREDZ DMDHRMAZ DMDHSEDZ INDHHIN2 INDFMIN2
DUQ_2011_2012_trans <- nhanesTranslate('DUQ_G', names(DUQ_2011_2012),
                                       data = DUQ_2011_2012)
## Translated columns: DUQ200 DUQ211 DUQ215U DUQ217 DUQ219 DUQ220U DUQ240 DUQ250 DUQ270U DUQ272 DUQ290 DUQ310U DUQ330 DUQ350U DUQ352 DUQ370 DUQ380A DUQ400U DUQ410 DUQ420 DUQ430
DUQ_2013_2014_trans <- nhanesTranslate('DUQ_H', names(DUQ_2013_2014),
                                       data = DUQ_2013_2014)
## Translated columns: DUQ200 DUQ211 DUQ215U DUQ217 DUQ219 DUQ220U DUQ240 DUQ250 DUQ270U DUQ272 DUQ290 DUQ310U DUQ330 DUQ350U DUQ352 DUQ370 DUQ380A DUQ400U DUQ410 DUQ420 DUQ430
DUQ_2015_2016_trans <- nhanesTranslate('DUQ_I', names(DUQ_2015_2016),
                                       data =   DUQ_2015_2016)
## Translated columns: DUQ200 DUQ211 DUQ215U DUQ217 DUQ219 DUQ220U DUQ240 DUQ250 DUQ270U DUQ272 DUQ290 DUQ310U DUQ330 DUQ350U DUQ352 DUQ370 DUQ400U DUQ410 DUQ420 DUQ430
DUQ_2017_2018_trans <- nhanesTranslate('DUQ_J', names(DUQ_2017_2018),
                                       data = DUQ_2017_2018)
## Translated columns: DUQ200 DUQ211 DUQ215U DUQ217 DUQ219 DUQ220U DUQ240 DUQ250 DUQ270U DUQ272 DUQ290 DUQ310U DUQ330 DUQ350U DUQ352 DUQ370 DUQ400U DUQ410 DUQ420 DUQ430
### Retain only useful variables ###############################################

demo_2011_2012_select <- demo_2011_2012_trans[c("SEQN", #Respondent sequence 
                                                #number
                                                "SDDSRVYR", #data release cycle
                                                "RIAGENDR", #gender
                                                "RIDAGEYR", #age in yrs at 
                                                #screening
                                                "RIDRETH3", #race with NH Asian
                                                "DMDEDUC2", #education level 
                                                #adults 20+
                                                "WTINT2YR", #full sample 2 yr 
                                                #interview weight
                                                "WTMEC2YR", #full sample 2 yr 
                                                #MEC exam weight
                                                "SDMVPSU", #masked variance 
                                                #psuedo PSU
                                                "SDMVSTRA", # masked variance 
                                                #psuedo-stratum
                                                "INDFMPIR" #ratio of family 
                                                #income to poverty
                                                )]

demo_2013_2014_select <- demo_2013_2014_trans[c("SEQN", #Respondent sequence 
                                                #number
                                                "SDDSRVYR", #data release cycle
                                                "RIAGENDR", #gender
                                                "RIDAGEYR", #age in yrs at 
                                                #screening
                                                "RIDRETH3", #race with NH Asian
                                                "DMDEDUC2", #education level 
                                                #adults 20+
                                                "WTINT2YR", #full sample 2 yr 
                                                #interview weight
                                                "WTMEC2YR", #full sample 2 yr 
                                                #MEC exam weight
                                                "SDMVPSU", #masked variance 
                                                #psuedo PSU
                                                "SDMVSTRA", # masked variance 
                                                #psuedo-stratum
                                                "INDFMPIR" #ratio of family 
                                                #income to poverty
                                                )]

demo_2015_2016_select <- demo_2015_2016_trans[c("SEQN", #Respondent sequence 
                                                #number
                                                "SDDSRVYR", #data release cycle
                                                "RIAGENDR", #gender
                                                "RIDAGEYR", #age in yrs at 
                                                #screening
                                                "RIDRETH3", #race with NH Asian
                                                "DMDEDUC2", #education level 
                                                #adults 20+
                                                "WTINT2YR", #full sample 2 yr 
                                                #interview weight
                                                "WTMEC2YR", #full sample 2 yr 
                                                #MEC exam weight
                                                "SDMVPSU", #masked variance 
                                                #psuedo PSU
                                                "SDMVSTRA", # masked variance 
                                                #psuedo-stratum
                                                "INDFMPIR" #ratio of family 
                                                #income to poverty
                                                )]

demo_2017_2018_select <- demo_2017_2018_trans[c("SEQN", #Respondent sequence 
                                                #number
                                                "SDDSRVYR", #data release cycle
                                                "RIAGENDR", #gender
                                                "RIDAGEYR", #age in yrs at '
                                                #screening
                                                "RIDRETH3", #race with NH Asian
                                                "DMDEDUC2", #education level 
                                                #adults 20+
                                                "WTINT2YR", #full sample 2 yr 
                                                #interview weight
                                                "WTMEC2YR", #full sample 2 yr 
                                                #MEC exam weight
                                                "SDMVPSU", #masked variance 
                                                #psuedo PSU
                                                "SDMVSTRA", # masked variance 
                                                #psuedo-stratum
                                                "INDFMPIR" #ratio of family 
                                                #income to poverty
                                                )]

DUQ_2011_2012_select <- DUQ_2011_2012_trans[c("SEQN", #Respondent sequence 
                                              #number
                                                "DUQ200", #ever used mj/hashish
                                                "DUQ210", #age 1st try mj
                                                "DUQ213", #age started using mj 
                                              #regular
                                                "DUQ230", # no. days used mj in 
                                              #past month
                                                "DUQ240", #ever used coc, heroin 
                                              #or meth
                                                "DUQ260", #age 1st try coc
                                                "DUQ280", #no days used coc in 
                                              #last month
                                                "DUQ300", #age 1st try heroin
                                                "DUQ320", # no days used heroin 
                                              #in last month
                                                "DUQ340", # age first try meth
                                                "DUQ360" #no days used meth in 
                                              #last month
                                                )]

DUQ_2013_2014_select <- DUQ_2013_2014_trans[c("SEQN", #Respondent sequence 
                                              #number
                                              "DUQ200", #ever used mj/hashish
                                              "DUQ210", #age 1st try mj
                                              "DUQ213", #age started using mj 
                                              #regular
                                              "DUQ230", # no. days used mj in 
                                              #past month
                                              "DUQ240", #ever used coc, heroin 
                                              #or meth
                                              "DUQ260", #age 1st try coc
                                              "DUQ280", #no days used coc in 
                                              #last month
                                              "DUQ300", #age 1st try heroin
                                              "DUQ320", # no days used heroin in 
                                              #last month
                                              "DUQ340", # age first try meth
                                              "DUQ360" #no days used meth in 
                                              #last month
                                              )]

DUQ_2015_2016_select <- DUQ_2015_2016_trans[c("SEQN", #Respondent sequence 
                                              #number
                                              "DUQ200", #ever used mj/hashish
                                              "DUQ210", #age 1st try mj
                                              "DUQ213", #age started using mj 
                                              #regular
                                              "DUQ230", # no. days used mj in 
                                              #past month
                                              "DUQ240", #ever used coc, heroin 
                                              #or meth
                                              "DUQ260", #age 1st try coc
                                              "DUQ280", #no days used coc in 
                                              #last month
                                              "DUQ300", #age 1st try heroin
                                              "DUQ320", # no days used heroin in 
                                              #last month
                                              "DUQ340", # age first try meth
                                              "DUQ360" #no days used meth in 
                                              #last month
                                              )]

DUQ_2017_2018_select <- DUQ_2017_2018_trans[c("SEQN", #Respondent sequence 
                                              #number
                                              "DUQ200", #ever used mj/hashish
                                              "DUQ210", #age 1st try mj
                                              "DUQ213", #age started using mj 
                                              #regular
                                              "DUQ230", # no. days used mj in 
                                              #past month
                                              "DUQ240", #ever used coc, heroin 
                                              #or meth
                                              "DUQ260", #age 1st try coc
                                              "DUQ280", #no days used coc in 
                                              #last month
                                              "DUQ300", #age 1st try heroin
                                              "DUQ320", # no days used heroin in 
                                              #last month
                                              "DUQ340", # age first try meth
                                              "DUQ360" #no days used meth in 
                                              #last month
                                              )]
### Merge data tables ##########################################################

merged_data_2011_2012 <- merge(demo_2011_2012_select, DUQ_2011_2012_select, 
                               by = c("SEQN"), all = TRUE)

head(merged_data_2011_2012)
##    SEQN SDDSRVYR RIAGENDR RIDAGEYR           RIDRETH3
## 1 62161        7     Male       22 Non-Hispanic White
## 2 62162        7   Female        3   Mexican American
## 3 62163        7     Male       14 Non-Hispanic Asian
## 4 62164        7   Female       44 Non-Hispanic White
## 5 62165        7   Female       14 Non-Hispanic Black
## 6 62166        7     Male        9 Non-Hispanic White
##                           DMDEDUC2   WTINT2YR   WTMEC2YR SDMVPSU SDMVSTRA
## 1 High school graduate/GED or equi 102641.406 104236.583       1       91
## 2                             <NA>  15457.737  16116.354       3       92
## 3                             <NA>   7397.685   7869.485       3       90
## 4        Some college or AA degree 127351.373 127965.226       1       94
## 5                             <NA>  12209.745  13384.042       2       90
## 6                             <NA>  60593.637  64068.123       1       91
##   INDFMPIR DUQ200 DUQ210 DUQ213 DUQ230 DUQ240 DUQ260 DUQ280 DUQ300 DUQ320
## 1     3.15     No     NA     NA     NA     No     NA     NA     NA     NA
## 2     0.60   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 3     4.07   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 4     1.67   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 5     0.57   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 6       NA   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
##   DUQ340 DUQ360
## 1     NA     NA
## 2     NA     NA
## 3     NA     NA
## 4     NA     NA
## 5     NA     NA
## 6     NA     NA
merged_data_2013_2014 <- merge(demo_2013_2014_select, DUQ_2013_2014_select, 
                               by = c("SEQN"), all = TRUE)

head(merged_data_2013_2014)
##    SEQN SDDSRVYR RIAGENDR RIDAGEYR           RIDRETH3
## 1 73557        8     Male       69 Non-Hispanic Black
## 2 73558        8     Male       54 Non-Hispanic White
## 3 73559        8     Male       72 Non-Hispanic White
## 4 73560        8     Male        9 Non-Hispanic White
## 5 73561        8   Female       73 Non-Hispanic White
## 6 73562        8     Male       56   Mexican American
##                           DMDEDUC2 WTINT2YR WTMEC2YR SDMVPSU SDMVSTRA INDFMPIR
## 1 High school graduate/GED or equi 13281.24 13481.04       1      112     0.84
## 2 High school graduate/GED or equi 23682.06 24471.77       1      108     1.78
## 3        Some college or AA degree 57214.80 57193.29       1      109     4.51
## 4                             <NA> 55201.18 55766.51       2      109     2.52
## 5        College graduate or above 63709.67 65541.87       2      116     5.00
## 6        Some college or AA degree 24978.14 25344.99       1      111     4.79
##   DUQ200 DUQ210 DUQ213 DUQ230  DUQ240 DUQ260 DUQ280 DUQ300 DUQ320 DUQ340 DUQ360
## 1   <NA>     NA     NA     NA Refused     NA     NA     NA     NA     NA     NA
## 2    Yes     18     18     20     Yes     23     NA     NA     NA     NA     NA
## 3   <NA>     NA     NA     NA    <NA>     NA     NA     NA     NA     NA     NA
## 4   <NA>     NA     NA     NA    <NA>     NA     NA     NA     NA     NA     NA
## 5   <NA>     NA     NA     NA    <NA>     NA     NA     NA     NA     NA     NA
## 6    Yes     16     24     NA      No     NA     NA     NA     NA     NA     NA
merged_data_2015_2016 <- merge(demo_2015_2016_select, DUQ_2015_2016_select, 
                               by = c("SEQN"), all = TRUE)

head(merged_data_2015_2016)
##    SEQN SDDSRVYR RIAGENDR RIDAGEYR           RIDRETH3
## 1 83732        9     Male       62 Non-Hispanic White
## 2 83733        9     Male       53 Non-Hispanic White
## 3 83734        9     Male       78 Non-Hispanic White
## 4 83735        9   Female       56 Non-Hispanic White
## 5 83736        9   Female       42 Non-Hispanic Black
## 6 83737        9   Female       72   Mexican American
##                           DMDEDUC2  WTINT2YR  WTMEC2YR SDMVPSU SDMVSTRA
## 1        College graduate or above 134671.37 135629.51       1      125
## 2 High school graduate/GED or equi  24328.56  25282.43       1      125
## 3 High school graduate/GED or equi  12400.01  12575.84       1      131
## 4        College graduate or above 102718.00 102078.63       1      131
## 5        Some college or AA degree  17627.67  18234.74       2      126
## 6 9-11th grade (Includes 12th grad  11252.31  10878.68       1      128
##   INDFMPIR DUQ200 DUQ210 DUQ213 DUQ230 DUQ240 DUQ260 DUQ280 DUQ300 DUQ320
## 1     4.39   <NA>     NA     NA     NA    Yes     NA     NA     NA     NA
## 2     1.32     No     NA     NA     NA     No     NA     NA     NA     NA
## 3     1.51   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 4     5.00     No     NA     NA     NA     No     NA     NA     NA     NA
## 5     1.23    Yes     25     25     30     No     NA     NA     NA     NA
## 6     2.82   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
##   DUQ340 DUQ360
## 1     NA     NA
## 2     NA     NA
## 3     NA     NA
## 4     NA     NA
## 5     NA     NA
## 6     NA     NA
merged_data_2017_2018 <- merge(demo_2017_2018_select, DUQ_2017_2018_select, 
                               by = c("SEQN"), all = TRUE)

head(merged_data_2017_2018)
##    SEQN SDDSRVYR RIAGENDR RIDAGEYR                         RIDRETH3
## 1 93703       10   Female        2               Non-Hispanic Asian
## 2 93704       10     Male        2               Non-Hispanic White
## 3 93705       10   Female       66               Non-Hispanic Black
## 4 93706       10     Male       18               Non-Hispanic Asian
## 5 93707       10     Male       13 Other Race - Including Multi-Rac
## 6 93708       10   Female       66               Non-Hispanic Asian
##                           DMDEDUC2  WTINT2YR  WTMEC2YR SDMVPSU SDMVSTRA
## 1                             <NA>  9246.492  8539.731       2      145
## 2                             <NA> 37338.768 42566.615       1      143
## 3 9-11th grade (Includes 12th grad  8614.571  8338.420       2      145
## 4                             <NA>  8548.633  8723.440       2      134
## 5                             <NA>  6769.345  7064.610       1      138
## 6              Less than 9th grade 13329.451 14372.489       2      138
##   INDFMPIR DUQ200 DUQ210 DUQ213 DUQ230 DUQ240 DUQ260 DUQ280 DUQ300 DUQ320
## 1     5.00   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 2     5.00   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 3     0.82   <NA>     NA     NA     NA     No     NA     NA     NA     NA
## 4       NA     No     NA     NA     NA     No     NA     NA     NA     NA
## 5     1.88   <NA>     NA     NA     NA   <NA>     NA     NA     NA     NA
## 6     1.63   <NA>     NA     NA     NA     No     NA     NA     NA     NA
##   DUQ340 DUQ360
## 1     NA     NA
## 2     NA     NA
## 3     NA     NA
## 4     NA     NA
## 5     NA     NA
## 6     NA     NA
### Append tables together #####################################################

table_2011_2018 <- rbind(merged_data_2011_2012, merged_data_2013_2014, 
                         merged_data_2015_2016, merged_data_2017_2018)

dim(table_2011_2018) 
## [1] 39156    22

Handling weighted NHANES multiple year sample data

From https:https://wwwn.cdc.gov/nchs/data/nhanes/analyticguidelines/11-16 -analytic-guidelines.pdf

“Specific subsample file documentation can be found via the link next to the respective data file on the NHANES website. Importantly, the documentation will provide detailed information on the component, the eligible sample, the laboratory method used, and it will include analytic notes containing information on the specific subsample weights that the analyst should use.”

From https://wwwn.cdc.gov/nchs/nhanes/tutorials/Weighting.aspx

Selecting the Correct Weight in NHANES Various sample weights are available on the data release files – such as the interview weight (wtint2yr), the MEC exam weight (wtmec2yr), and several subsample weights. Use of the correct sample weight for NHANES analyses depends on the variables being used. A good rule of thumb is to use “the least common denominator” where the variable of interest that was collected on the smallest number of respondents is the “least common denominator.” The sample weight that applies to that variable is the appropriate one to use for that particular analysis. Review the documentation file for each component included in your analysis; the analytic notes often recommend the appropriate weight to be used for analyzing variables from that component. Be aware that some questionnaire components were administered during the MEC session rather than during the in-home interview, and therefore the MEC exam weights must be used for these components. All interview and MEC exam weights can be found on the demographic file for the respective survey cycle. Weights for a given component conducted on only a subsample of the original NHANES sample (e.g. many environment chemicals) are available on the data file for that particular component.

My decision: Drug use is part of the MEC and is the LCD with smallest number of respondents, so use MEC weight and not interview weight.

From https://wwwn.cdc.gov/nchs/nhanes/tutorials/Weighting.aspx: To get correct MEC weight across these 4 2-yr cycles: if sddsrvyr in (7,8,9,10) then MEC8YR = 1/4 * WTMEC2YR;

Example R-code: https://wwwn.cdc.gov/nchs/data/Tutorials/Code/DB303_R.r. Example of NHANES study where dependent variable is depression.


Part 4: Descriptive statistics and visualizations WEIGHTED BY DEMOGRAPHICS regarding self-reported drug use in men and women from surveys obtained 2011-2018.

Part 4A: Number of and percent of participants who responded “Yes” to ever having used A) marijuana or B) cocaine, heroin and/or meth. Described and visualized with bar plots.

# A) Ever use marijuana ########################################################

## Define dataset containing data of interest

table_nhanes_1 <- table_2011_2018 %>%
  # Set 7 = Refused and 9 = Don't Know To Missing for variable 200  ##
  mutate(DUQ200 = replace(DUQ200, DUQ200 >= 7, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ200, no missing
    #data
    inAnalysis = (!is.na(DUQ200)))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `DUQ200 = replace(DUQ200, DUQ200 >= 7, NA)`.
## Caused by warning in `Ops.factor()`:
## ! '>=' not meaningful for factors
## Define survey design ######################################################

# Define survey design for entire dataset
# NHANES_all_2 <- svydesign(data = table_nhanes_1, id= ~SDMVPSU, 
# strata= ~SDMVSTRA, weights=~WTMEC8YR, nest=TRUE)
# Create a survey design object for the subset of interest: !is.na for DUQ200 
# Subsetting the original survey design object ensures we keep the design 
#information about the number of clusters and strata

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                        weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ200 <- subset(NHANES_all_2, inAnalysis)
## Use dplyr to manipulate data, obtain tibbles, and ggplot2 to visualize data

#By gender
total_count <- table_nhanes_1 %>%
  filter(DUQ200 %in% c("Yes", "No")) %>%
  group_by(Gender,DUQ200) %>%
  summarize(n = n()) %>%  
  mutate(count = n, percent = (n/sum(n))*100)
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
total_count
## # A tibble: 4 × 5
## # Groups:   Gender [2]
##   Gender DUQ200     n count percent
##   <fct>  <fct>  <int> <int>   <dbl>
## 1 Men    Yes     3931  3931    58.9
## 2 Men    No      2743  2743    41.1
## 3 Women  Yes     3287  3287    47.2
## 4 Women  No      3683  3683    52.8
total_count_yes <- total_count %>%
  filter(DUQ200 %in% "Yes") 

total_count_yes
## # A tibble: 2 × 5
## # Groups:   Gender [2]
##   Gender DUQ200     n count percent
##   <fct>  <fct>  <int> <int>   <dbl>
## 1 Men    Yes     3931  3931    58.9
## 2 Women  Yes     3287  3287    47.2
ggplot(total_count_yes, aes(x = Gender, y = count, fill = Gender)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Number of individuals reporting ever trying marijuana") + 
  ylim(NA,5000)

ggplot(total_count_yes, aes(x = Gender, y = percent, fill = Gender)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Percent of individuals reporting ever trying marijuana") + 
  ylim(NA,100)

#By age group
total_count <- table_nhanes_1 %>%
  filter(DUQ200 %in% c("Yes", "No")) %>%
  group_by(Age.Group,DUQ200) %>%
  summarize(n = n()) %>%  
  mutate(count = n, percent = (n/sum(n))*100)
## `summarise()` has grouped output by 'Age.Group'. You can override using the
## `.groups` argument.
total_count
## # A tibble: 6 × 5
## # Groups:   Age.Group [3]
##   Age.Group DUQ200     n count percent
##   <fct>     <fct>  <int> <int>   <dbl>
## 1 Under 20  Yes      560   560    51.5
## 2 Under 20  No       527   527    48.5
## 3 20-39     Yes     3603  3603    56.8
## 4 20-39     No      2738  2738    43.2
## 5 40-59     Yes     3055  3055    49.1
## 6 40-59     No      3161  3161    50.9
total_count_yes <- total_count %>%
  filter(DUQ200 %in% "Yes") 

total_count_yes
## # A tibble: 3 × 5
## # Groups:   Age.Group [3]
##   Age.Group DUQ200     n count percent
##   <fct>     <fct>  <int> <int>   <dbl>
## 1 Under 20  Yes      560   560    51.5
## 2 20-39     Yes     3603  3603    56.8
## 3 40-59     Yes     3055  3055    49.1
ggplot(total_count_yes, aes(x = Age.Group, y = count, fill = Age.Group)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Number of individuals reporting ever trying marijuana") + 
  ylim(NA,5000)

ggplot(total_count_yes, aes(x = Age.Group, y = percent, fill = Age.Group)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Percent of individuals reporting ever trying marijuana") + 
  ylim(NA,100)

Description of above: By gender (x-axis = Gender): More male than female participants responded “yes” to having ever tried marijuana. About half of each gender responded “Yes” (59% male, 47% female). By age group (x-axis = Age.Group): Most participants who responded “Yes” were older than 20 years. Across age groups, about half of participants responded “Yes”, but this ratio was a bit higher in the 20-39 years group.

# B) Ever use cocaine/heroin/meth ##############################################

table_nhanes_1 <- table_2011_2018 %>%
  # Set 7 = Refused and 9 = Don't Know To Missing for variable 240  ##
  mutate(DUQ240 = replace(DUQ240, DUQ240 >= 7, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ240 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ240)))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `DUQ240 = replace(DUQ240, DUQ240 >= 7, NA)`.
## Caused by warning in `Ops.factor()`:
## ! '>=' not meaningful for factors
NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ240 <- subset(NHANES_all_2, inAnalysis)
## Use dplyr to manipulate data, obtain tibbles, and ggplot2 to visualize data

#By gender
total_count <- table_nhanes_1 %>%
  filter(DUQ240 %in% c("Yes", "No")) %>%
  group_by(Gender,DUQ240) %>%
  summarize(n = n()) %>%  
  mutate(count = n, percent = (n/sum(n))*100)
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
total_count
## # A tibble: 4 × 5
## # Groups:   Gender [2]
##   Gender DUQ240     n count percent
##   <fct>  <fct>  <int> <int>   <dbl>
## 1 Men    Yes     1712  1712    20.5
## 2 Men    No      6632  6632    79.5
## 3 Women  Yes     1014  1014    11.7
## 4 Women  No      7659  7659    88.3
total_count_yes <- total_count %>%
  filter(DUQ240 %in% "Yes") 

total_count_yes
## # A tibble: 2 × 5
## # Groups:   Gender [2]
##   Gender DUQ240     n count percent
##   <fct>  <fct>  <int> <int>   <dbl>
## 1 Men    Yes     1712  1712    20.5
## 2 Women  Yes     1014  1014    11.7
ggplot(total_count_yes, aes(x = Gender, y = count, fill = Gender)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Number of individuals reporting ever trying cocaine, heroin or meth") + 
  ylim(NA, 5000)

ggplot(total_count_yes, aes(x = Gender, y = percent, fill = Gender)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Percent of individuals reporting ever trying cocaine, heroin or meth") + 
  ylim(NA, 100)

#By age group
total_count <- table_nhanes_1 %>%
  filter(DUQ240 %in% c("Yes", "No")) %>%
  group_by(Age.Group,DUQ240) %>%
  summarize(n = n()) %>%  
  mutate(count = n, percent = (n/sum(n))*100) 
## `summarise()` has grouped output by 'Age.Group'. You can override using the
## `.groups` argument.
total_count
## # A tibble: 8 × 5
## # Groups:   Age.Group [4]
##   Age.Group   DUQ240     n count percent
##   <fct>       <fct>  <int> <int>   <dbl>
## 1 Under 20    Yes       51    51    4.70
## 2 Under 20    No      1035  1035   95.3 
## 3 20-39       Yes      964   964   15.2 
## 4 20-39       No      5377  5377   84.8 
## 5 40-59       Yes     1189  1189   19.1 
## 6 40-59       No      5028  5028   80.9 
## 7 60 and over Yes      522   522   15.5 
## 8 60 and over No      2851  2851   84.5
total_count_yes <- total_count %>%
  filter(DUQ240 %in% "Yes") 

total_count_yes
## # A tibble: 4 × 5
## # Groups:   Age.Group [4]
##   Age.Group   DUQ240     n count percent
##   <fct>       <fct>  <int> <int>   <dbl>
## 1 Under 20    Yes       51    51    4.70
## 2 20-39       Yes      964   964   15.2 
## 3 40-59       Yes     1189  1189   19.1 
## 4 60 and over Yes      522   522   15.5
ggplot(total_count_yes, aes(x = Age.Group, y = count, fill = Age.Group)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Number of individuals reporting ever trying cocaine, heroin or meth") + 
  ylim(NA,5000)

ggplot(total_count_yes, aes(x = Age.Group, y = percent, fill = Age.Group)) + 
  geom_col() + 
  scale_fill_brewer(palette = "Set2") + 
  ylab("Percent of individuals reporting ever trying cocaine, heroin or meth") + 
  ylim(NA,100)

Description of above: Y-axis scales are the same as for marijuana use to illustrate differences between number of participants ever having tried marijuana vs cocaine/heroin/meth. By gender (x-axis = Gender): Similar to marijuana, more male than female participants responded “yes” to having ever tried cocaine, heroin or meth. However, only 12% of women and 21% of men responded “Yes”, much less than for marijuana. By age group (x-axis = Age.Group): Most participants who responded “Yes” were older than 20 years. Across age groups, about 15-20% of participants responded “Yes”, but this ratio was smaller in the Under 20 years age group.


Part 4B: Descriptive statistics and visualizations of self-reported number of days during past month participant used A) marijuana, B) cocaine, C) heroin, D) methamphetamine (meth). Includes weighted means and standard errors described and visualized with bar plots, weighted frequencies visualized with histograms, and relationships to income:family size ratio, an indicator of poverty level. Data is described and visualized as overall, by gender, and by age group.

## A) Number of days used marijuana in past month

## Define dataset containing data of interest

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ230 = replace(DUQ230, DUQ230 >= 777, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ230 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ230)))

## Define survey design 

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ230 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

# Define a function to call svymean and unweighted count

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

#Overall
getSummary(~DUQ230, ~table_nhanes_1, NHANES_DUQ230)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts  DUQ230        se
## 1              1   2251 13.2987 0.3918085
out <- svyby(formula = ~DUQ230,
             by = ~table_nhanes_1,
             design = NHANES_DUQ230,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ230 - 1*se,
                  upper = DUQ230 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ230, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylim(0,NA) + 
  ylab("Average number of days used marijuana in past month") + 
  geom_errorbar(width = 0.8)

####filter out 0 values
table_nhanes_1_exclude_0 <- table_nhanes_1 %>% 
  filter(DUQ230 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Number of days used marijuana in past month") + 
  xlim(1,NA)

# By Gender
getSummary(~DUQ230, ~Gender, NHANES_DUQ230)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ230        se
## 1    Men   1355 14.16068 0.4780267
## 2  Women    896 11.96931 0.4433332
out <- svyby(formula = ~DUQ230,
             by = ~Gender,
             design = NHANES_DUQ230,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ230 - 1*se,
                  upper = DUQ230 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ230, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylim(0,NA) + 
  ylab("Average number of days used marijuana in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Number of days used marijuana in past month") +
  xlim(1,NA)

# By age group
getSummary(~DUQ230, ~Age.Group, NHANES_DUQ230)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ230        se
## 1  Under 20    271 12.14662 1.1448777
## 2     20-39   1323 13.52292 0.4218369
## 3     40-59    657 13.15761 0.7078820
out <- svyby(formula = ~DUQ230,
             by = ~Age.Group,
             design = NHANES_DUQ230,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ230 - 1*se,
                  upper = DUQ230 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ230, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylim(0,NA) + 
  ylab("Average number of days used marijuana in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Number of days used marijuana in past month") +
  xlim(1,NA)

################Relationship to Income:Family size ratio#######################

table_nhanes_1_exclude_0 <- table_nhanes_1 %>%
  filter(DUQ230 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, y = 
                             INDFMPIR, alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used marijuana in past month") + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, y = 
                             INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used marijuana in past month") + 
  facet_wrap(~Gender) + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ230, y = 
                              INDFMPIR, 
                            color = Gender, 
                            alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used marijuana in past month") + 
  facet_wrap(~Age.Group) + 
  xlim(1,30)

Description of above: Darker shading in dot plots indicates more heavily-weighted data points.Histogram “counts” are weighted. 2,251 participants self-reported the number of days they used marijuana in the past month. More men (n = 1355) than women (n = 896) responded. Overall, the average number of days marijuana was used during the past month was ~13 days. Across all age groups, a bimodal frequency distribution occurred where men and women tended to report using marijuana either very few days or nearly every day during the past month. Women tended to use marijuana on fewer days (mean = 12 days) than men (14 days) during the past month. While it appears that marijuana was used more often in people with lower income to family size ratio (higher poverty level), it was also used by people with higher income:family size ratios, with high variability in frequency of use during the past month across gender, age group and poverty levels.

# B) Number of days used cocaine in past month (DUQ280)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ280 = replace(DUQ280, DUQ280 >= 777, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ280 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ280)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ280 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

# Define a function to call svymean and unweighted count

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

#DUQ280 weighted mean

getSummary(~DUQ280, ~table_nhanes_1, NHANES_DUQ280)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ280        se
## 1              1    227 3.297038 0.3246398
out <- svyby(formula = ~DUQ280,
             by = ~table_nhanes_1,
             design = NHANES_DUQ280,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ280 - 1*se,
                  upper = DUQ280 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ280, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylim(0,NA) + 
  ylab("Average number of days used cocaine in past month") + 
  geom_errorbar(width = 0.8)

table_nhanes_1_exclude_0 <- table_nhanes_1 %>%
  filter(DUQ280 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Number of days used cocaine in past month") +
  xlim(1,NA)

# By Gender
getSummary(~DUQ280, ~Gender, NHANES_DUQ280)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ280        se
## 1    Men    158 2.850153 0.2604982
## 2  Women     69 4.270214 0.9167835
out <- svyby(formula = ~DUQ280,
             by = ~Gender,
             design = NHANES_DUQ280,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ280 - 1*se,
                  upper = DUQ280 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ280, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylim(0,NA) + 
  ylab("Average number of days used cocaine in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Number of days used cocaine in past month") +
  xlim(1,NA)

#By age
getSummary(~DUQ280, ~Age.Group, NHANES_DUQ280)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ280        se
## 1  Under 20     12 2.462129 0.6574136
## 2     20-39    128 3.038265 0.4354862
## 3     40-59     87 3.887171 0.5128187
out <- svyby(formula = ~DUQ280,
             by = ~Age.Group,
             design = NHANES_DUQ280,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ280 - 1*se,
                  upper = DUQ280 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ280, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylim(0,NA) + 
  ylab("Average number of days used cocaine in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Number of days used cocaine in past month") +
  xlim(1,NA)

################Relationship to Income:Family size ratio#######################

table_nhanes_1_exclude_0 <- table_nhanes_1 %>%
  filter(DUQ280 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, y = 
                             INDFMPIR, alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used cocaine in past month") + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, y = 
                             INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used cocaine in past month") + 
  facet_wrap(~Gender) + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ280, y = 
                              INDFMPIR, 
                            color = Gender, 
                            alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used cocaine in past month") + 
  facet_wrap(~Age.Group) + 
  xlim(1,30)

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. 227 participants self-reported the number of days they used cocaine in the past month, far fewer than for marijuana. Similar to marijuana, more men (n = 158) than women (n = 69) responded. Overall, the average number of days cocaine was used during the past month was ~3 days, much less than the average for marijuana use. In contrast to marijuana, women used cocaine more frequently (~4 days) than men (~3 days) during the last month. Across gender and all age groups, the frequency distribution was negatively skewed, with some individuals reporting using cocaine anywhere from 3 - 30 days during the last month, although <10 days was most frequent. Cocaine was reported as used more frequently during the past month in men and women with lower income to family size ratio (higher poverty level) in older age groups, particularly in 40-59 year olds.

# C) Number of days used heroin in past month (DUQ320)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ320 = replace(DUQ320, DUQ320 >= 77, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ320 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ320)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ320 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

# Define a function to call svymean and unweighted count

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

#DUQ320 weighted mean

getSummary(~DUQ320, ~table_nhanes_1, NHANES_DUQ320)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ320       se
## 1              1     30 13.06192 2.099767
out <- svyby(formula = ~DUQ320,
             by = ~table_nhanes_1,
             design = NHANES_DUQ320,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ320 - 1*se,
                  upper = DUQ320 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ320, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylim(0,NA) + 
  ylab("Average number of days used heroin in past month") + 
  geom_errorbar(width = 0.8)

table_nhanes_1_exclude_0 <- table_nhanes_1 %>%
  filter(DUQ320 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Number of days used heroin in past month") +
  xlim(1,NA)

# By Gender
getSummary(~DUQ320, ~Gender, NHANES_DUQ320)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ320       se
## 1    Men     23 14.02679 2.788799
## 2  Women      7 10.32785 3.904974
out <- svyby(formula = ~DUQ320,
             by = ~Gender,
             design = NHANES_DUQ320,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ320 - 1*se,
                  upper = DUQ320 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ320, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylim(0,NA) + 
  ylab("Average number of days used heroin in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Number of days used heroin in past month") +
  xlim(1,NA)

# By Age Group
getSummary(~DUQ320, ~Age.Group, NHANES_DUQ320)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts    DUQ320       se
## 1     20-39     17 15.165675 2.413688
## 2     40-59     13  7.276432 3.065020
out <- svyby(formula = ~DUQ320,
             by = ~Age.Group,
             design = NHANES_DUQ320,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ320 - 1*se,
                  upper = DUQ320 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ320, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylim(0,NA) + 
  ylab("Average number of days used heroin in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Number of days used heroin in past month") +
  xlim(1,NA)

################Relationship to Income:Family size ratio#######################

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, 
                           y = INDFMPIR, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used heroin in last month") + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, 
                           y = INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used heroin in last month") + 
  facet_wrap(~Gender) + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ320, 
                           y = INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used heroin in last month") + 
  facet_wrap(~Age.Group) + 
  xlim(1,30)

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. 30 participants self-reported the number of days they used heroin in the past month, far fewer than for marijuana or cocaine. No one under 20 years reported using heroin during the past month. Similar to marijuana and cocaine, more men (n = 23) than women (n = 7) responded. Overall, the average number of days heroin was used during the past month was 13 days, less frequently than the average for marijuana use but more frequently than cocaine use. Similar to marijuana but in contrast to cocaine, women used heroin less frequently (~10 days) than men (~14 days) during the last month. Frequency distribution of heroin use was variable, with individuals reporting using heroin anywhere from very few to most days during the last month. Heroin was reported as used more frequently during the past month in individuals with lower income to family size ratio (higher poverty level) in older age groups, particularly in 20-39 year olds.

# D) Number of days used methamphetamine in past month (DUQ360)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ360 = replace(DUQ360, DUQ360 >= 77, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ360 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ360)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ360 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

# Define a function to call svymean and unweighted count

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

#DUQ360 weighted mean
getSummary(~DUQ360, ~table_nhanes_1, NHANES_DUQ360)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ360       se
## 1              1    108 7.784202 1.189926
out <- svyby(formula = ~DUQ360,
             by = ~table_nhanes_1,
             design = NHANES_DUQ360,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ360 - 1*se,
                  upper = DUQ360 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ360, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylim(0,NA) + 
  ylab("Average number of days used meth in past month") + 
  geom_errorbar(width = 0.8)

table_nhanes_1_exclude_0 <- table_nhanes_1 %>%
  filter(DUQ360 > 0)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Number of days used meth in past month") +
  xlim(1,NA)

# By Gender
getSummary(~DUQ360, ~Gender, NHANES_DUQ360)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ360       se
## 1    Men     78 8.589133 1.870559
## 2  Women     30 5.675083 1.392610
out <- svyby(formula = ~DUQ360,
             by = ~Gender,
             design = NHANES_DUQ360,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ360 - 1*se,
                  upper = DUQ360 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ360, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylim(0,NA) + 
  ylab("Average number of days used meth in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Number of days used meth in past month") +
  xlim(1,NA)

# By Age Group
getSummary(~DUQ360, ~Age.Group, NHANES_DUQ360)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ360        se
## 1  Under 20      2 2.160070 1.1647877
## 2     20-39     51 5.871812 0.9931428
## 3     40-59     55 9.587373 1.6971213
out <- svyby(formula = ~DUQ360,
             by = ~Age.Group,
             design = NHANES_DUQ360,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ360 - 1*se,
                  upper = DUQ360 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ360, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylim(0,NA) + 
  ylab("Average number of days used meth in past month") + 
  geom_errorbar(width = 0.8) 

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Number of days used meth in past month") +
  xlim(1,NA)

################Relationship to Income:Family size ratio#######################


ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, 
                           y = INDFMPIR, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used meth in past month") + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, 
                           y = INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used meth in past month") + 
  facet_wrap(~Gender) + 
  xlim(1,30)

ggplot(table_nhanes_1_exclude_0, aes(x = DUQ360, 
                           y = INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Number of days used meth in past month") + 
  facet_wrap(~Age.Group) + 
  xlim(1,30)

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. 108 participants self-reported the number of days they used methamphetamine (meth) in the past month, fewer than for marijuana or cocaine but greater than for heroin use. Only two participants under 20 years reported using meth during the past month. Similar to marijuana, cocaine and heroin, more men (n = 78) than women (n = 30) responded. Overall, the average number of days meth was used during the past month was 8 days, less frequently than the average for marijuana and heroin use but more frequently than cocaine use. Similar to marijuana and heroin use but in contrast to cocaine use, women used meth less frequently (~6 days) than men (~9 days) during the last month. Frequency distribution of meth use over the past month was variable. The distribution of meth use was variable and positively skewed in both men and women across number of days. This was true in 20-39 year olds as well. In 40-59 year olds, frequency of meth use was more bimodal, with <10 or >25 days of use being most frequent. For both individuals under 20 years old, reported use was <10 days over the last month. Meth was reported as used more frequently during the past month in individuals with lower income to family size ratio (higher poverty level) in older age groups, similar to cocaine and heroin use.


Part 4C: Descriptive statistics and visualizations of self-reported age when first trying A) marijuana, B) cocaine, C) heroin, and D) methamphetamine (meth). Includes weighted means and standard errors described and visualized with bar plots, weighted frequencies visualized with histograms, and relationships to income:family size ratio, an indicator of poverty level. Data is described and visualized as overall, by gender, and by age group.

## A) age first tried marijuana

## Define dataset containing data of interest

table_nhanes_1 <- table_2011_2018 %>%
  # Set 777 = Refused and 999 = Don't Know To Missing for variable 210  ##
  mutate(DUQ210 = replace(DUQ210, DUQ210 >= 777, NA)) %>%
  #create factor variables
    mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
         Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                         labels=c("Under 20", "20-39","40-59","60 and over")),
         # Generate 8-year MEC weight (Divide weight by 4 because we are 
         #appending 4 survey cycles)                                                             
         # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
         #on the NHANES documentation  
         WTMEC8YR = WTMEC2YR/4,
        # Define indicator for analysis population of interest: DUQ210 with a 
        #valid age 
        inAnalysis = (!is.na(DUQ210)))

### Define survey design 

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ210 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

# Define a function to call svymean and unweighted count

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

#DUQ210 weighted mean and frequency overall

getSummary(~DUQ210, ~table_nhanes_1, NHANES_DUQ210)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ210         se
## 1              1   7213 17.25957 0.07149401
out <- svyby(formula = ~DUQ210,
             by = ~table_nhanes_1,
             design = NHANES_DUQ210,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ210 - 1*se,
                  upper = DUQ210 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ210, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylab("Average age first tried marijuana (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA, 30)

ggplot(table_nhanes_1, aes(DUQ210, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  xlab("Age first tried marijuana (years)")

# DUQ210 Weighted Mean and frequency By Gender

getSummary(~DUQ210, ~Gender, NHANES_DUQ210)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ210         se
## 1    Men   3928 16.87636 0.08488343
## 2  Women   3285 17.71431 0.10855156
out <- svyby(formula = ~DUQ210,
             by = ~Gender,
             design = NHANES_DUQ210,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ210 - 1*se,
                  upper = DUQ210 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ210, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylab("Average age first tried marijuana (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA, 30)

ggplot(table_nhanes_1, aes(DUQ210, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  xlab("Age first tried marijuana (years)") + 
  facet_wrap(~Gender)

# DUQ210 weighted mean and frequency By age group

getSummary(~DUQ210, ~Age.Group, NHANES_DUQ210)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ210         se
## 1  Under 20    559 15.53352 0.14285290
## 2     20-39   3603 17.13847 0.09460781
## 3     40-59   3051 17.53541 0.10405569
out <- svyby(formula = ~DUQ210,
             by = ~Age.Group,
             design = NHANES_DUQ210,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ210 - 1*se,
                  upper = DUQ210 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ210, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "purple") + 
  ylab("Average age first tried marijuana (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA, 30)

ggplot(table_nhanes_1, aes(DUQ210, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  xlab("Age first tried marijuana (years)") + 
  facet_wrap(~Age.Group)

################Relationship to Income:Family size ratio#######################

ggplot(table_nhanes_1, aes(x = DUQ210, y = INDFMPIR, alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
    ylab("Income to Family Ratio") + 
    xlab("Age first tried marijuana") 

ggplot(table_nhanes_1, aes(x = DUQ210, y = INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried marijuana") + 
  facet_wrap(~Gender)

ggplot(table_nhanes_1, aes(x = DUQ210, y = INDFMPIR, 
                            color = Gender, 
                            alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried marijuana") + 
  facet_wrap(~Age.Group)

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. More men (n = 3928) than women (n = 3285) reported age at first trying marijuana. The overall average age at first trying marijuana was ~17 yrs and was similar for men and women, with women being slightly older (<1 year). Average age at first trying marijuana decreased as age group decreased, implying that participants aged 20-39 and 40-59 tried marijuana at slightly older ages (~17-18) than under 20 participants (~age 16). The histograms show that in these older age groups, average ages at first trying marijuana were skewed towards older age due to participants reporting trying marijuana for the first time in their 30’s and 40’s. The scatterplots do not demonstrate a clear relationship between income:family size ratio (lower ratios indicate greater poverty) overall, by gender, or by age group. Interestingly, many older participants, across the income:family size ratio spectra, reported using marijuana for the first time at over 20 years.

# B) Age first used cocaine (DUQ260)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ260 = replace(DUQ260, DUQ260 >= 777, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ260 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ260)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ260 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

#DUQ260 weighted mean and frequency overall

getSummary(~DUQ260, ~table_nhanes_1, NHANES_DUQ260)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ260        se
## 1              1   2093 21.07566 0.1485499
out <- svyby(formula = ~DUQ260,
             by = ~table_nhanes_1,
             design = NHANES_DUQ260,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ260 - 1*se,
                  upper = DUQ260 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ260, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylab("Age first tried cocaine (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(DUQ260, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", na.rm = TRUE) +
  xlab("Age first tried cocaine (years)")

# By Gender

getSummary(~DUQ260, ~Gender, NHANES_DUQ260)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ260        se
## 1    Men   1298 21.00088 0.1730902
## 2  Women    795 21.19966 0.2138307
out <- svyby(formula = ~DUQ260,
             by = ~Gender,
             design = NHANES_DUQ260,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ260 - 1*se,
                  upper = DUQ260 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ260, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylim(0,NA) + 
  ylab("Age first tried cocaine (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot(table_nhanes_1, aes(DUQ260, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", na.rm = TRUE) +
  xlab("Age first tried cocaine (years)") + 
  facet_wrap(~Gender)

# By age
getSummary(~DUQ260, ~Age.Group, NHANES_DUQ260)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ260        se
## 1  Under 20     48 17.33161 0.1471544
## 2     20-39    898 20.22698 0.1815832
## 3     40-59   1147 21.78532 0.2120667
out <- svyby(formula = ~DUQ260,
             by = ~Age.Group,
             design = NHANES_DUQ260,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ260 - 1*se,
                  upper = DUQ260 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ260, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "blue") + 
  ylim(0,NA) + 
  ylab("Age first tried cocaine (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot(table_nhanes_1, aes(DUQ260, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", na.rm = TRUE) +
  xlab("Age first tried cocaine (years)") + 
  facet_wrap(~Age.Group)

################Relationship to Income:Family size ratio#######################

ggplot(table_nhanes_1, aes(x = DUQ260, y = 
                             INDFMPIR, alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set3") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried cocaine") 

ggplot(table_nhanes_1, aes(x = DUQ260, y = 
                             INDFMPIR, 
                           color = Gender, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried cocaine") + 
  facet_wrap(~Gender)

ggplot(table_nhanes_1, aes(x = DUQ260, y = 
                              INDFMPIR, 
                            color = Gender, 
                            alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried cocaine") + 
  facet_wrap(~Age.Group) 

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. Similar to marijuana, more men (n = 1298) than women (n = 795) reported age at first using cocaine. The overall average age at first using cocaine was ~21 yrs, about 4 years older than for marijuana, and was similar among men and women (total n = 2093). Similar to marijuana, average age at first trying cocaine decreased as age group decreased, implying that participants aged 20-59 tried cocaine at slightly older ages than under 20 participants.The histograms show that in these older age groups, age at first trying cocaine was skewed towards older age due to participants reporting trying cocaine for the first time in their 30’s and 40’s, similar to marijuana. Only 48 participants under 20 years reported age at first trying cocaine.The scatterplots do not demonstrate a clear relationship between income:family size ratio (lower ratios indicate greater poverty) and cocaine use by gender. Interestingly, there is a pattern in which many older participants (20-59 years) at higher poverty levels report older than average ages at first trying cocaine compared to participants with lower poverty levels.

# C) Age first tried heroin (DUQ300)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ300 = replace(DUQ300, DUQ300 >= 777, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ300 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ300)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ300 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

#DUQ300 weighted mean overall

getSummary(~DUQ300, ~table_nhanes_1, NHANES_DUQ300)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ300        se
## 1              1    285 24.32627 0.8037168
out <- svyby(formula = ~DUQ300,
             by = ~table_nhanes_1,
             design = NHANES_DUQ300,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ300 - 1*se,
                  upper = DUQ300 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ300, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylab("Age first tried heroin (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ300, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Age first tried heroin (years)")

# By Gender
getSummary(~DUQ300, ~Gender, NHANES_DUQ300)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ300        se
## 1    Men    190 24.65102 1.0388882
## 2  Women     95 23.56931 0.7411713
out <- svyby(formula = ~DUQ300,
             by = ~Gender,
             design = NHANES_DUQ300,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ300 - 1*se,
                  upper = DUQ300 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ300, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylab("Age first tried heroin (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ300, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Age first tried heroin (years)")

# By age
getSummary(~DUQ300, ~Age.Group, NHANES_DUQ300)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ300        se
## 1  Under 20      1 18.00000 0.0000000
## 2     20-39    125 21.92782 0.5612027
## 3     40-59    159 26.83573 1.3221051
out <- svyby(formula = ~DUQ300,
             by = ~Age.Group,
             design = NHANES_DUQ300,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ300 - 1*se,
                  upper = DUQ300 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ300, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "green") + 
  ylab("Age first tried heroin (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ300, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Age first tried heroin (years)")

################Relationship to Income:Family size ratio#######################

ggplot(table_nhanes_1, aes(x = DUQ300, 
                           y = INDFMPIR, 
                           alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried heroin (years)") 

ggplot(table_nhanes_1, aes(x = DUQ300, 
                           y = INDFMPIR, 
                                     color = Gender, 
                                     alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried heroin (years)") + 
  facet_wrap(~Gender)

ggplot(table_nhanes_1, aes(x = DUQ300, 
                           y = INDFMPIR, 
                                     color = Gender, 
                                     alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried heroin (years)") + 
  facet_wrap(~Age.Group) 

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. Only one participant under 20 years reported age at first using heroin. Relative to marijuana and cocaine, a smaller number of participants (total n = 285) reported their age when they first tried heroin. Similar to marijuana and cocaine, more men (n = 190) than women (n = 95) responded. Overall average age at first trying heroin was ~24 yrs, about 7 and 3 years older than for marijuana and cocaine, respectively, as shown above. On average, and dissimilar to marijuana and cocaine, respectively, women were slightly younger than men. Similar to marijuana and cocaine, average age at first trying heroin decreased as age group decreased. Interestingly, the histograms show that older participants, especially men, reported first trying heroin at older ages including 30s, 40s and even 50s. The scatterplots demonstrate a relationship between income:family size ratio (lower ratios indicate greater poverty) in which older participants 20-59 years with greater poverty report first age at trying heroin at both younger and older ages relative to those at lower poverty levels, similar to cocaine use.

# D) Age first tried meth (DUQ340)

table_nhanes_1 <- table_2011_2018 %>%
  mutate(DUQ340 = replace(DUQ340, DUQ340 >= 777, NA)) %>%
  #create factor variables
  mutate(# create indicator for overall summary
    table_nhanes_1 = 1,
    Year = factor(SDDSRVYR, labels = c("2011-2012", "2013-2014", 
                                       "2015-2016", "2017-2018")),
    Gender = factor(RIAGENDR, labels=c("Men", "Women")),
    Age.Group = cut(RIDAGEYR, breaks=c(-Inf,19,39,59,Inf),
                    labels=c("Under 20", "20-39","40-59","60 and over")),
    # Generate 8-year MEC weight (Divide weight by 4 because we are 
    #appending 4 survey cycles)                                                             
    # Note: using the MEC Exam Weights (WTMEC2YR), per the analytic notes 
    #on the NHANES documentation  
    WTMEC8YR = WTMEC2YR/4,
    # Define indicator for analysis population of interest: DUQ340 with a 
    #valid age 
    inAnalysis = (!is.na(DUQ340)))

NHANES_all_2 <- as_survey_design(table_nhanes_1, id=SDMVPSU, strata=SDMVSTRA, 
                                 weights=WTMEC8YR, nest=TRUE)

NHANES_DUQ340 <- subset(NHANES_all_2, inAnalysis)
# Weighted data descriptive statistics and visualizations

getSummary <- function(varformula, byformula, design){
  # Get mean, stderr, and unweighted sample size
  c <- svyby(varformula, byformula, design, unwtd.count ) 
  p <- svyby(varformula, byformula, design, svymean) 
  outSum <- left_join(select(c,-se), p) 
  outSum
}

# Get results (see output for all results tables and plots)

#DUQ340 weighted mean overall

getSummary(~DUQ340, ~table_nhanes_1, NHANES_DUQ340)
## Joining with `by = join_by(table_nhanes_1)`
##   table_nhanes_1 counts   DUQ340        se
## 1              1    900 22.01556 0.3197709
out <- svyby(formula = ~DUQ340,
             by = ~table_nhanes_1,
             design = NHANES_DUQ340,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ340 - 1*se,
                  upper = DUQ340 + 1*se)

ggplot(out_col, aes(table_nhanes_1, DUQ340, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylab("Average age first tried meth (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ340, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE)+
  xlab("Age first tried meth (years)")

# By Gender
getSummary(~DUQ340, ~Gender, NHANES_DUQ340)
## Joining with `by = join_by(Gender)`
##   Gender counts   DUQ340        se
## 1    Men    588 22.19434 0.3468977
## 2  Women    312 21.67210 0.5626809
out <- svyby(formula = ~DUQ340,
             by = ~Gender,
             design = NHANES_DUQ340,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ340 - 1*se,
                  upper = DUQ340 + 1*se)

ggplot(out_col, aes(x = Gender, y = DUQ340, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylab("Average age first tried meth (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ340, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Gender) +
  xlab("Age first tried meth (years)")

# By Age
getSummary(~DUQ340, ~Age.Group, NHANES_DUQ340)
## Joining with `by = join_by(Age.Group)`
##   Age.Group counts   DUQ340        se
## 1  Under 20     12 16.62381 0.6661040
## 2     20-39    402 20.43651 0.2876035
## 3     40-59    486 23.08935 0.4841990
out <- svyby(formula = ~DUQ340,
             by = ~Age.Group,
             design = NHANES_DUQ340,
             FUN = svymean,
             na.rm = TRUE,
             keep.names = FALSE)

out_col <- mutate(out,
                  lower = DUQ340 - 1*se,
                  upper = DUQ340 + 1*se)

ggplot(out_col, aes(x = Age.Group, y = DUQ340, 
                    ymin = lower,
                    ymax = upper)) + 
  geom_col(fill = "orange") + 
  ylab("Average age first tried meth (years)") + 
  geom_errorbar(width = 0.8) + 
  ylim(NA,30)

ggplot(table_nhanes_1, aes(x = DUQ340, weight = WTMEC8YR)) +
  geom_histogram(binwidth = 5, color = "darkblue",
                 fill = "lightblue", 
                 na.rm = TRUE) +
  facet_wrap(~Age.Group) +
  xlab("Age first tried meth (years)")

################Relationship to Income:Family size ratio#######################

ggplot(table_nhanes_1, aes(x = DUQ340, 
                                     y = INDFMPIR, 
                                     alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) + 
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried meth (years)") 

ggplot(table_nhanes_1, aes(x = DUQ340, 
                                     y = INDFMPIR, 
                                     color = Gender, 
                                     alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried meth (years)") + 
  facet_wrap(~Gender) 

ggplot(table_nhanes_1, aes(x = DUQ340, 
                                     y = INDFMPIR, 
                                     color = Gender, 
                                     alpha = WTMEC8YR)) + 
  geom_point(na.rm = TRUE) +
  scale_color_brewer(palette = "Set2") + 
  ylab("Income to Family Ratio") + 
  xlab("Age first tried meth (years)") + 
  facet_wrap(~Age.Group) 

Description of above: Darker shading in dot plots indicates more heavily-weighted data points. Histogram “counts” are weighted. Similar to other drugs, more men (n = 588) than women (n = 312) reported age at first using methamphetamine (meth). Only 12 participants under 20 years responded. Similar to cocaine, the overall average age at first trying meth was ~22 yrs. Similar to marijuana, cocaine, and heroin, average age at first trying meth decreased as age group decreased, implying that participants aged 20-39 and 40-59 tried meth at older ages than the under 20 participants who responded. Interestingly, similar to heroin, the histograms show that men and women reported first trying meth at older ages including 30s, 40s and even 50s. The scatterplots demonstrate a pattern in which men and women 20-59 years old report trying meth for the first time at older ages relative to their cohorts at lower poverty levels.

```