#------------ setwd
setwd("C:/Users/C00252837/Dropbox/StudentParentsData")

#------------Read in Data
data<-read.csv("sutdentparents_survey_weighted.csv", header = T)
data<-as_tibble(data)
names(data)

##  [1] "ï..Column1"                       "x1"                              
##  [3] "Ã.Column1"                        "x"                               
##  [5] "classification"                   "enrollment"                      
##  [7] "department"                       "q4"                              
##  [9] "finanacial_assistance"            "q24_8_text"                      
## [11] "q23"                              "q38"                             
## [13] "q5"                               "q43"                             
## [15] "employment_status"                "gender"                          
## [17] "gender2"                          "q10_5_text"                      
## [19] "age_original"                     "race"                            
## [21] "race2"                            "q46"                             
## [23] "q46_4_text"                       "q47"                             
## [25] "q47_11_text"                      "pregnancy"                       
## [27] "parenthood"                       "numb_children"                   
## [29] "age_children"                     "q35_13"                          
## [31] "q35_14"                           "q35_15"                          
## [33] "q62"                              "column226"                       
## [35] "res_aware_both_mean"              "res_aware_patuniq_mean"          
## [37] "res_use_both_mean"                "res_use_patuniq_mean"            
## [39] "socialsupport_both_mean"          "socialsupport_patuniq_mean"      
## [41] "positive_exp_both_mean"           "positive_exp_patuniq_mean"       
## [43] "negative_exp_gen_both_mean"       "negative_exp_gen_patuniq_mean"   
## [45] "academic_diffty_both_mean"        "academic_diffty_patuniq_mean"    
## [47] "financial_ins_both_mean"          "financial_ins_patuniq_mean"      
## [49] "housing_ins_both_mean"            "physical_health_both_mean"       
## [51] "psycsocemo_health_both_mean"      "psycsocemo_health_patuniq_mean"  
## [53] "expectations_both_mean"           "expectations_patuniq_mean"       
## [55] "pat_childcare_patuniq_mean"       "child_issues_patuniq_mean"       
## [57] "pregnancy_patuniq_mean"           "age_recoded"                     
## [59] "age_recoded_narm"                 "parenthood_age"                  
## [61] "parenthood_age_narm"              "res_aware_both_mean_recode"      
## [63] "res_use_both_mean_recode"         "socialsupport_both_mean_recode"  
## [65] "positive_exp_both_mean_recode"    "negative_exp_gen_both_mean_recod"
## [67] "academic_diffty_both_mean_recode" "financial_ins_both_mean_recode"  
## [69] "housing_ins_both_mean_recode"     "physical_health_both_mean_recode"
## [71] "psycsocemo_health_both_mean_reco" "nurs_or_not"                     
## [73] "age"                              "agegroup"                        
## [75] "binary_gender_original"           "binary_gender"                   
## [77] "race3"                            "graduate"                        
## [79] "agegroup_tot"                     "binary_gender_tot"               
## [81] "race3_tot"                        "graduate_tot"                    
## [83] "counter1"                         "sample_tot"                      
## [85] "baseweight"                       "finalweight"

data <- data%>%
  rename(
    ID = x)

Complex Survey Analysis

Differential respondent weighting (following https://rpubs.com/corey_sparks/53683)

#Total count of the sample
count(data)

## # A tibble: 1 x 1
##       n
##   <int>
## 1   738

#parenthood status
table(data$parenthood)

## 
##  No Yes 
## 571 167

prop.table(table(data$parenthood))

## 
##        No       Yes 
## 0.7737127 0.2262873

#graduate status
table(data$graduate)#0=undergraduate, 1=graduate

## 
##   0   1 
## 535 203

prop.table(table(data$graduate))

## 
##         0         1 
## 0.7249322 0.2750678

#parenthood by graduate status
table(data$parenthood, data$graduate)

##      
##         0   1
##   No  445 126
##   Yes  90  77

prop.table(table(data$parenthood, data$graduate), margin=2)

##      
##               0         1
##   No  0.8317757 0.6206897
##   Yes 0.1682243 0.3793103

##Create a survey design object ##Use functions in a new library, called survey

library(survey)
des<-svydesign(ids=~1, weights=~finalweight, data = data)

#re-do the analysis from above using sample weights
library(questionr)
wtd.table(data$parenthood,weights = data$finalweight)

##    No   Yes 
## 12521  2203

prop.table(wtd.table(data$parenthood,weights = data$finalweight))

##        No       Yes 
## 0.8503803 0.1496197

wtd.table(data$graduate, weights = data$finalweight)#0=undergraduate, 1=graduate

##     0     1 
## 12353  2371

prop.table(wtd.table(data$graduate, weights = data$finalweight))

##         0         1 
## 0.8389704 0.1610296

wtd.table(data$parenthood, data$graduate, weights = data$finalweight)

##         0     1
## No  10949  1572
## Yes  1404   799

prop.table(wtd.table(data$parenthood, data$graduate, weights = data$finalweight), margin=2)

##             0         1
## No  0.8863434 0.6630114
## Yes 0.1136566 0.3369886

t- test

#un-weighted
Hmisc::describe(data$res_use_both_mean)

## data$res_use_both_mean 
##        n  missing distinct     Info     Mean      Gmd 
##      519      219        5    0.755    2.796   0.6127 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency      3   152   313    50     1
## Proportion 0.006 0.293 0.603 0.096 0.002

t.test(res_use_both_mean~parenthood, data=data, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  res_use_both_mean by parenthood
## t = -0.22078, df = 517, p-value = 0.8254
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  -0.1427821  0.1139322
## sample estimates:
##  mean in group No mean in group Yes 
##          2.792593          2.807018

#weighted
Hmisc::describe(data$res_use_both_mean, weights = data$finalweight)

## data$res_use_both_mean 
##        n  missing distinct     Info     Mean 
##    10249     4475        5    0.762     2.77 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency     95  3122  6086   936    10
## Proportion 0.009 0.305 0.594 0.091 0.001

t.test(res_use_both_mean~parenthood, data=data, weights = data$finalweight, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  res_use_both_mean by parenthood
## t = -0.22078, df = 517, p-value = 0.8254
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  -0.1427821  0.1139322
## sample estimates:
##  mean in group No mean in group Yes 
##          2.792593          2.807018

##### Social Support #################################################################
#unweighted
Hmisc::describe(data$socialsupport_both_mean)

## data$socialsupport_both_mean 
##        n  missing distinct     Info     Mean      Gmd 
##      607      131        5    0.878    3.489   0.9084 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency      6    63   229   246    63
## Proportion 0.010 0.104 0.377 0.405 0.104

t.test(socialsupport_both_mean~parenthood, data=data, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  socialsupport_both_mean by parenthood
## t = -2.8398, df = 605, p-value = 0.004665
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  -0.4107835 -0.0749049
## sample estimates:
##  mean in group No mean in group Yes 
##          3.440083          3.682927

#weighted
Hmisc::describe(data$socialsupport_both_mean, weights = data$finalweight)

## data$socialsupport_both_mean 
##        n  missing distinct     Info     Mean 
##    12375     2349        5    0.881    3.445 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency    148  1466  4679  4896  1186
## Proportion 0.012 0.118 0.378 0.396 0.096

t.test(socialsupport_both_mean~parenthood, data=data, weights = data$finalweight, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  socialsupport_both_mean by parenthood
## t = -2.8398, df = 605, p-value = 0.004665
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  -0.4107835 -0.0749049
## sample estimates:
##  mean in group No mean in group Yes 
##          3.440083          3.682927

##### academic_difficulty 
#un-weighted
Hmisc::describe(data$academic_diffty_both_mean)

## data$academic_diffty_both_mean 
##        n  missing distinct     Info     Mean      Gmd 
##      542      196        5    0.899    3.321    1.001 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency      7    93   218   167    57
## Proportion 0.013 0.172 0.402 0.308 0.105

t.test(academic_diffty_both_mean~parenthood, data=data, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  academic_diffty_both_mean by parenthood
## t = 3.0432, df = 540, p-value = 0.002454
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  0.1058207 0.4911664
## sample estimates:
##  mean in group No mean in group Yes 
##          3.381062          3.082569

#weighted
Hmisc::describe(data$academic_diffty_both_mean, weights = data$finalweight)

## data$academic_diffty_both_mean 
##        n  missing distinct     Info     Mean 
##    11049     3675        5    0.897    3.381 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency     70  1719  4545  3356  1359
## Proportion 0.006 0.156 0.411 0.304 0.123

t.test(academic_diffty_both_mean~parenthood, data=data, weights = data$finalweight, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  academic_diffty_both_mean by parenthood
## t = 3.0432, df = 540, p-value = 0.002454
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  0.1058207 0.4911664
## sample estimates:
##  mean in group No mean in group Yes 
##          3.381062          3.082569

##### psycsocemo_health_issues ##########################################################
#un-weighted
Hmisc::describe(data$psycsocemo_health_both_mean)

## data$psycsocemo_health_both_mean 
##        n  missing distinct     Info     Mean      Gmd 
##      557      181        5    0.883    3.578   0.9382 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency      1    66   174   242    74
## Proportion 0.002 0.118 0.312 0.434 0.133

t.test(psycsocemo_health_both_mean~parenthood, data=data, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  psycsocemo_health_both_mean by parenthood
## t = 2.4613, df = 555, p-value = 0.01415
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  0.04668813 0.41572232
## sample estimates:
##  mean in group No mean in group Yes 
##          3.621681          3.390476

#weighted
Hmisc::describe(data$psycsocemo_health_both_mean, weights = data$finalweight)

## data$psycsocemo_health_both_mean 
##        n  missing distinct     Info     Mean 
##    11361     3363        5    0.888    3.592 
## 
## lowest : 1 2 3 4 5, highest: 1 2 3 4 5
##                                         
## Value          1     2     3     4     5
## Frequency     42  1343  3469  4866  1641
## Proportion 0.004 0.118 0.305 0.428 0.144

t.test(psycsocemo_health_both_mean~parenthood, data=data, weights = data$finalweight, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  psycsocemo_health_both_mean by parenthood
## t = 2.4613, df = 555, p-value = 0.01415
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
##  0.04668813 0.41572232
## sample estimates:
##  mean in group No mean in group Yes 
##          3.621681          3.390476

data$parenthood<-as.factor(data$parenthood)
data$agegroup<-as.factor(data$agegroup)
data$binary_gender<-as.factor(data$binary_gender)#1=female, 2=male
data$race3<-as.factor(data$race3)
data$graduate<-as.factor(data$graduate)

reg_psycsocemo_health_both<-lm(psycsocemo_health_both_mean~agegroup + + binary_gender + race3 + graduate + parenthood + socialsupport_both_mean + financial_ins_both_mean + physical_health_both_mean, data=data, weights = data$finalweight, var.equal=TRUE)

summary(reg_psycsocemo_health_both)

## 
## Call:
## lm(formula = psycsocemo_health_both_mean ~ agegroup + +binary_gender + 
##     race3 + graduate + parenthood + socialsupport_both_mean + 
##     financial_ins_both_mean + physical_health_both_mean, data = data, 
##     weights = data$finalweight, var.equal = TRUE)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4769  -1.7348   0.1566   1.7664   9.6573 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.46015    0.24627  14.050  < 2e-16 ***
## agegroup2                  0.01410    0.07774   0.181   0.8562    
## agegroup3                 -0.09927    0.08576  -1.158   0.2476    
## agegroup4                 -0.11089    0.12690  -0.874   0.3827    
## binary_gender2            -0.07775    0.06110  -1.272   0.2039    
## race31                    -0.15926    0.21054  -0.756   0.4498    
## race32                    -0.29322    0.11798  -2.485   0.0133 *  
## race33                     0.06492    0.14910   0.435   0.6634    
## race34                    -0.21396    0.10235  -2.090   0.0371 *  
## graduate1                 -0.15944    0.10349  -1.541   0.1241    
## parenthoodYes             -0.09344    0.12287  -0.760   0.4474    
## socialsupport_both_mean   -0.31729    0.03860  -8.221 1.95e-15 ***
## financial_ins_both_mean    0.25892    0.03925   6.597 1.12e-10 ***
## physical_health_both_mean  0.20535    0.03107   6.609 1.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.965 on 474 degrees of freedom
##   (250 observations deleted due to missingness)
## Multiple R-squared:  0.466,  Adjusted R-squared:  0.4514 
## F-statistic: 31.82 on 13 and 474 DF,  p-value: < 2.2e-16

reg_academic_diffty_both_mean<-lm(academic_diffty_both_mean~agegroup + binary_gender + race3 + graduate + parenthood + socialsupport_both_mean + financial_ins_both_mean + physical_health_both_mean + psycsocemo_health_both_mean, data=data, weights = data$finalweight, var.equal=TRUE)

summary(reg_academic_diffty_both_mean)

## 
## Call:
## lm(formula = academic_diffty_both_mean ~ agegroup + binary_gender + 
##     race3 + graduate + parenthood + socialsupport_both_mean + 
##     financial_ins_both_mean + physical_health_both_mean + psycsocemo_health_both_mean, 
##     data = data, weights = data$finalweight, var.equal = TRUE)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.5957 -1.6734  0.0118  1.6559  9.1170 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.266906   0.278066   4.556 6.74e-06 ***
## agegroup2                    0.090008   0.073289   1.228  0.22005    
## agegroup3                   -0.051923   0.080802  -0.643  0.52081    
## agegroup4                   -0.175296   0.121199  -1.446  0.14878    
## binary_gender2               0.172855   0.057788   2.991  0.00293 ** 
## race31                      -0.003832   0.197575  -0.019  0.98453    
## race32                      -0.191330   0.113004  -1.693  0.09113 .  
## race33                      -0.259703   0.142764  -1.819  0.06957 .  
## race34                      -0.325543   0.097481  -3.340  0.00091 ***
## graduate1                   -0.092642   0.100140  -0.925  0.35540    
## parenthoodYes                0.056598   0.117851   0.480  0.63128    
## socialsupport_both_mean     -0.092298   0.039290  -2.349  0.01925 *  
## financial_ins_both_mean      0.399052   0.038786  10.288  < 2e-16 ***
## physical_health_both_mean    0.040564   0.030463   1.332  0.18368    
## psycsocemo_health_both_mean  0.340292   0.043302   7.859 2.95e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.73 on 445 degrees of freedom
##   (278 observations deleted due to missingness)
## Multiple R-squared:  0.5745, Adjusted R-squared:  0.5611 
## F-statistic: 42.92 on 14 and 445 DF,  p-value: < 2.2e-16

sutdentparents_survey_weighted

Hung-Chu Lin

11/10/2021

Complex Survey Analysis

Differential respondent weighting (following https://rpubs.com/corey_sparks/53683)

t- test