201201_Math217_FP

Setup Stuff

NB: family_sample_x was exported as a .csv file under the name family_sample_clean. family_sample_x is what I edited in NCSS.

Rename to a simpler name

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.2

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0

## Warning: package 'tibble' was built under R version 4.0.3

## Warning: package 'tidyr' was built under R version 4.0.2

## Warning: package 'dplyr' was built under R version 4.0.2

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(infer)

## Warning: package 'infer' was built under R version 4.0.2

library(gmodels)

## Warning: package 'gmodels' was built under R version 4.0.3

getwd()

## [1] "C:/Users/Jerome/Documents/From_Toshiba_HD_Work_Files/0000_Montgomery_College/Math_217/Final_Project/Working_Folder"

fsc  <- read.csv("family_sample_clean.csv")

Eliminate NA in food_sec; check it w/ the table command

fsc$food_sec [is.na(fsc$food_sec)] <- 0

table(fsc$food_sec)

## 
##   0   1 
## 764  92

table(fsc$health_ins)

## 
##   0   1 
## 773  83

Save the file after eliminating NA in food_sec

write.csv(fsc, file = "fsc.csv", row.names = FALSE)

Eliminate an unneeded variable

fsc <- fsc[-c(102),]

Verify codes in HOUSEOWN

table(fsc$HOUSEOWN)

## 
##   1   2   3 
## 532 294  29

write.csv(fsc, file = "fsc.csv", row.names = FALSE)

Some confusion here – when did I create fsc_fix_ind?

fsc_fix_ind <- read.csv("fsc_fix_ind.csv")

In the next 2 chunks I update (fix?) the 2 indicators, health_ins & food_sec

fsc_fix_ind <- mutate(fsc_fix_ind, food_sec = ifelse(FSRUNOUT == 1 | FSLAST == 1 | FSSKIP == 1 | FSBALANC == 1 | FSLESS == 1 | FSHUNGRY == 1 | FSWEIGHT == 1 | FSNOTEAT == 1, 1,0))

fsc_fix_ind <- mutate(fsc_fix_ind, health_ins = ifelse(FHIPRVCT > 0 | FHISINCT > 0  | FHICARCT > 0 | FHICADCT >0 | FHICHPCT >0 | FHIMILCT > 0  | FHIPUBCT > 0 | FHIOGVCT > 0 | FHIIHSCT> 0 | FHIEXCT > 0, 1,0))

table(fsc_fix_ind$health_ins)

## 
##   0   1 
##  46 809

Save the file after updating the indicators

write.csv(fsc_fix_ind, file = "fsc_fix_ind.csv", row.names = FALSE)

Remove the NA code in food_sec. health_ins did not get NA codes but food_sec did. Why????

fsc_fix_ind$food_sec[is.na(fsc_fix_ind$food_sec)] = 0

write.csv(fsc_fix_ind, file = "fsc_fix_ind.csv", row.names = FALSE)

Check values in the indicator variables

table(fsc_fix_ind$food_sec)

## 
##   0   1 
## 763  92

table(fsc_fix_ind$health_ins)

## 
##   0   1 
##  46 809

Prepare the Plots

hist(fsc_fix_ind$FM_EDUC1)

hist(fsc_fix_ind$FM_TYPE)

Analyze the codes for Health Status in preparation for creating another indicator and doin chi-square

table(fsc_fix_ind$FHSTATEX)

## 
##   0   1   2   3   4   5   6 
## 501 166  95  55  31   4   3

table(fsc_fix_ind$FHSTATVG)

## 
##   0   1   2   3   4   5   6   7   8 
## 478 204 114  32  20   4   1   1   1

table(fsc_fix_ind$FHSTATG)

## 
##   0   1   2   3   4   5   6   7 
## 569 197  70   6   6   5   1   1

table(fsc_fix_ind$FHSTATFR)

## 
##   0   1   2   3   5 
## 731 100  21   2   1

table(fsc_fix_ind$FHSTATPR)

## 
##   0   1   2   3 
## 808  40   6   1

hs_chi_sq <- read.csv("hs_chi_sq.csv")

Chi-square on health status - not used in final report

expected = c(0.2, 0.2, 0.2, 0.2, 0.2)
health_status = c(354, 377, 286, 124, 47)
chisq.test(health_status, p=expected) # p is not the second option, so must be labeled

## 
##  Chi-squared test for given probabilities
## 
## data:  health_status
## X-squared = 355.88, df = 4, p-value < 2.2e-16

There is strong evidence the health status of the individuals in these families is disproportional, with the majority of persons in excellent to good health.

The health indicators data were collected as counts of # of persons in the family in that category. Changed the counts to binary variables.

fsc_fix_ind <- mutate(fsc_fix_ind, excel = ifelse(FHSTATEX > 0,  1,0))
fsc_fix_ind <- mutate(fsc_fix_ind, veryg = ifelse(FHSTATVG > 0,  1,0))
fsc_fix_ind <- mutate(fsc_fix_ind, good = ifelse(FHSTATG > 0,  1,0))
fsc_fix_ind <- mutate(fsc_fix_ind, fair = ifelse(FHSTATFR > 0,  1,0))
fsc_fix_ind <- mutate(fsc_fix_ind, poor = ifelse(FHSTATPR > 0,  1,0))

Save file

write.csv(fsc_fix_ind, file = "fsc_fix_ind.csv", row.names = FALSE)

create new file to have a health status indicator

family_scored <- read.csv("family_scored.csv")

family_scored <- mutate(family_scored, score = ifelse(excel > 0 | veryg > 0  | good > 0, 1,0))

write.csv(family_scored, file = "family_scored.csv", row.names = FALSE)

More plots for report. From here to Line 328, all plotting.

ggplot(family_scored, aes(x=FM_SIZE))+
  geom_bar()+
  facet_grid(~health_ins)+
  ggtitle("Family Size by Health Insurance Status")

healthinsnew <- family_scored %>%
  mutate(health_ins=recode(health_ins, "0"="No Ins", "1"="Have Ins"))

table(healthinsnew$health_ins)

## 
## Have Ins   No Ins 
##      809       46

p1 <- healthinsnew %>%
  ggplot(aes(x=FM_SIZE))+
  geom_bar()+
  facet_grid(~health_ins)+
  ggtitle("Family Size by Health Insurance Status") +
  scale_x_continuous(breaks=c(0,1,2,3,4,5,6,7,8,9))
p1

table(family_scored$FM_SIZE)

## 
##   1   2   3   4   5   6   7   8   9 
## 308 261 122  95  48  12   5   1   3

table(healthinsnew$food_sec)

## 
##   0   1 
## 763  92

indicators <- healthinsnew %>%
  mutate(food_sec=recode(food_sec, "1"="Food Insecure", "0"="Food Secure"))
table(indicators$food_sec)

## 
## Food Insecure   Food Secure 
##            92           763

p2 <- indicators %>%
  ggplot(aes(x=FM_SIZE))+
  geom_bar()+
  facet_grid(~food_sec)+
  ggtitle("Family Size by Food Security Status") +
  scale_x_continuous(breaks=c(0,1,2,3,4,5,6,7,8,9))
p2

write.csv(healthinsnew, file = "healthinsnew.csv", row.names = FALSE)
write.csv(indicators, file = "indicators.csv", row.names = FALSE)

yes_tablefs <- table(indicators$FM_SIZE, indicators$food_sec == 0)
no_tablefs <- table(indicators$FM_SIZE, indicators$food_sec == 1)
yes_tablehi <- table(indicators$FM_SIZE, indicators$health_ins == 1)
no_tableshi <- table(indicators$FM_SIZE, indicators$health_ins == 0)

educ_numbers <-table(indicators$FM_EDUC1)
table(educ_numbers)

## educ_numbers
##  18  21  43  45  76 142 158 160 192 
##   1   1   1   1   1   1   1   1   1

educ_numbers

## 
##   1   2   3   4   5   6   7   8   9 
##  21  45  18 142 160  76  43 192 158

p3 <- indicators %>%
  ggplot(aes(x=FM_EDUC1))+
  geom_bar()+
  #facet_grid(~food_sec)+
   ggtitle("Number of Persons at Each Education Level") +
  xlab("Education Levels") +
  scale_x_continuous(breaks=c(0,1,2,3,4,5,6,7,8,9))
p3

p4 <- indicators %>%
  ggplot(aes(x=FM_TYPE))+
  geom_bar()+
  #facet_grid(~food_sec)+
  ggtitle("Number of Families of Each Type") +
  xlab("Family Type") +
  scale_x_continuous(breaks=c(0, 1,2,3,4))
p4

p5 <- indicators %>%
  ggplot(aes(x=HOUSEOWN))+
  geom_bar()+
  facet_grid(~food_sec)+
  ggtitle("Housing Tenure Status by Food Security") +
   xlab("Housing Tenure Status") +
  scale_x_continuous(breaks=c(0,1,2,3))
p5

p6 <- indicators %>%
  ggplot(aes(x=HOUSEOWN))+
  geom_bar()+
  facet_grid(~health_ins)+
  ggtitle("Housing Tenure Status by Health Insurance") +
   xlab("Housing Tenure Status") +
  scale_x_continuous(breaks=c(0,1,2,3))
p6

Verify data

table(indicators$FM_SIZE, indicators$food_sec == 0)

##    
##     FALSE
##   1   308
##   2   261
##   3   122
##   4    95
##   5    48
##   6    12
##   7     5
##   8     1
##   9     3

This is the code I used to creae the binary health status indicator.

indicators <- indicators %>%
  mutate(score1=recode(score, "0"="Fair/Poor", "1"="Good to Excellent"))

Created numeric binary health_ins and food_sec codes

indicators <- indicators %>%
 mutate(health_ins1 = recode(health_ins, "Have Ins" = "1", "No Ins" = "0"))

indicators <- indicators %>%
  mutate(food_sec1=recode(food_sec, "Food Insecure" = "1", "Food Secure" = "0"))

Verify the mutate worked.

table(indicators$health_ins)

## 
## Have Ins   No Ins 
##      809       46

table(indicators$health_ins1)

## 
##   0   1 
##  46 809

table(indicators$food_sec)

## 
## Food Insecure   Food Secure 
##            92           763

table(indicators$food_sec1)

## 
##   0   1 
## 763  92

Plots after I fixed the data to have more descriptive titles. From here to Line 410.

p7 <- indicators %>%
  ggplot(aes(x=score1))+
  geom_bar()+
  facet_grid(~health_ins)+
  ggtitle("HealthStatus by Health Insurance Status") +
   xlab("Health Status") 
  
p7

p8 <- indicators %>%
  ggplot(aes(x=score1))+
  geom_bar()+
  facet_grid(~food_sec)+
  ggtitle("HealthStatus by Food Security Status") +
   xlab("Health Status") 
 
p8

Save the file

write.csv(indicators, file = "indicators.csv", row.names = FALSE)

Check the file

dim(indicators)

## [1] 855  55

My first attempt at the logistic regression run. Put everything in. It didn’t work.

#fit0 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$INCGRPS) + #as.factor(indicators$RAT_CAT5 + as.factor(indicators$FANYLCT) + as.factor(indicators$FM_EDUC1) + indicators$HOUSEOWN + #indicators$FNMEDYN, family = binomial()))

#summary(fit0)

At Professor Saidi’s suggestion, ran the logistic regression by adding one variable at a time. Started w/ health_ins as that was thought to be the strongest indicator.

fit0 <- glm(indicators$score ~ indicators$health_ins1, family = binomial())
summary(fit0)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1, family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2189   0.4223   0.4223   0.4223   0.4797  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               2.1041     0.4737   4.442 8.92e-06 ***
## indicators$health_ins11   0.2684     0.4901   0.548    0.584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 503.28  on 853  degrees of freedom
## AIC: 507.28
## 
## Number of Fisher Scoring iterations: 5

Add Food Security

fit1 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1,  family = binomial())
summary(fit1)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1, 
##     family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3442   0.3639   0.3639   0.3639   0.8110  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              2.63420    0.50721   5.194 2.06e-07 ***
## indicators$health_ins11  0.04719    0.51100   0.092    0.926    
## indicators$food_sec11   -1.69092    0.27816  -6.079 1.21e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 471.46  on 852  degrees of freedom
## AIC: 477.46
## 
## Number of Fisher Scoring iterations: 5

Check values in Income Groups variable

table(indicators$INCGRP5)

## 
##   1   2   3   4 
## 294 272  99 190

Add Housing tenure status to the model.

fit2 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN),  family = binomial())
summary(fit2)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3899   0.3491   0.3491   0.3762   1.0726  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.79659    0.52816   5.295 1.19e-07 ***
## indicators$health_ins11         -0.02951    0.51476  -0.057    0.954    
## indicators$food_sec11           -1.64313    0.29082  -5.650 1.60e-08 ***
## as.factor(indicators$HOUSEOWN)2 -0.15406    0.27191  -0.567    0.571    
## as.factor(indicators$HOUSEOWN)3 -0.87231    0.54374  -1.604    0.109    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 469.15  on 850  degrees of freedom
## AIC: 479.15
## 
## Number of Fisher Scoring iterations: 5

Add the variable “Family Member did not get medical care because of cost”

fit3 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN,  family = binomial())
summary(fit3)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN, family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4127   0.3346   0.3346   0.3576   1.3861  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       0.9305     0.7378   1.261 0.207238    
## indicators$health_ins11          -0.3826     0.5345  -0.716 0.474088    
## indicators$food_sec11            -1.3007     0.3155  -4.123 3.74e-05 ***
## as.factor(indicators$HOUSEOWN)2  -0.1367     0.2762  -0.495 0.620533    
## as.factor(indicators$HOUSEOWN)3  -0.8789     0.5460  -1.610 0.107475    
## indicators$FNMEDYN                1.1533     0.3325   3.469 0.000522 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 458.16  on 849  degrees of freedom
## AIC: 470.16
## 
## Number of Fisher Scoring iterations: 5

Add the variable “Problems paying medical bills”

fit4 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL,  family = binomial())
summary(fit4)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL, 
##     family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4259   0.3291   0.3291   0.3527   1.2506  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       0.5081     0.8396   0.605 0.545032    
## indicators$health_ins11          -0.3099     0.5348  -0.579 0.562316    
## indicators$food_sec11            -1.2165     0.3246  -3.747 0.000179 ***
## as.factor(indicators$HOUSEOWN)2  -0.1422     0.2759  -0.515 0.606260    
## as.factor(indicators$HOUSEOWN)3  -0.8656     0.5458  -1.586 0.112740    
## indicators$FNMEDYN                0.9769     0.3708   2.634 0.008428 ** 
## indicators$FMEDBILL               0.3682     0.3500   1.052 0.292817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 457.09  on 848  degrees of freedom
## AIC: 471.09
## 
## Number of Fisher Scoring iterations: 5

The variable FANYLCT is a count of the # of persons in the family w/ limitations on their activities due to health issues. I recoded this to a binary variable - presence or absense of health limitations in the family. Saved the file and read it back in.

indicators <- mutate(indicators, limit = ifelse(FANYLCT > 0, 1,0))

write.csv(indicators, file = "indicators.csv", row.names = FALSE)

indicators <- read.csv("indicators.csv")

Added the new limitations variable.

fit5 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit ,  family = binomial())
summary(fit5)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + 
##     indicators$limit, family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7910   0.2028   0.2041   0.3227   1.4918  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.08087    0.90128   2.309   0.0210 *  
## indicators$health_ins1          -0.09081    0.56906  -0.160   0.8732    
## indicators$food_sec1            -0.54540    0.34005  -1.604   0.1087    
## as.factor(indicators$HOUSEOWN)2 -0.26886    0.28727  -0.936   0.3493    
## as.factor(indicators$HOUSEOWN)3 -0.94487    0.60105  -1.572   0.1159    
## indicators$FNMEDYN               0.92907    0.39334   2.362   0.0182 *  
## indicators$FMEDBILL              0.01298    0.36951   0.035   0.9720    
## indicators$limit                -2.16905    0.31023  -6.992 2.72e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 399.99  on 847  degrees of freedom
## AIC: 415.99
## 
## Number of Fisher Scoring iterations: 6

Added the Education variable - person in the family w/ the highest level of education

fit6 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1),  family = binomial())
summary(fit6)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + 
##     indicators$limit + as.factor(indicators$FM_EDUC1), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8059   0.1125   0.2055   0.3240   1.5367  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       0.5879     1.0858   0.541  0.58819    
## indicators$health_ins1           -0.4000     0.5995  -0.667  0.50460    
## indicators$food_sec1             -0.4305     0.3532  -1.219  0.22298    
## as.factor(indicators$HOUSEOWN)2  -0.1206     0.3043  -0.396  0.69200    
## as.factor(indicators$HOUSEOWN)3  -0.7270     0.6271  -1.159  0.24631    
## indicators$FNMEDYN                1.0308     0.4088   2.522  0.01168 *  
## indicators$FMEDBILL               0.1069     0.3819   0.280  0.77964    
## indicators$limit                 -2.0574     0.3185  -6.459 1.06e-10 ***
## as.factor(indicators$FM_EDUC1)2   0.4689     0.7032   0.667  0.50494    
## as.factor(indicators$FM_EDUC1)3   1.3031     1.0036   1.298  0.19413    
## as.factor(indicators$FM_EDUC1)4   0.5782     0.6459   0.895  0.37072    
## as.factor(indicators$FM_EDUC1)5   1.4537     0.6671   2.179  0.02933 *  
## as.factor(indicators$FM_EDUC1)6   2.3201     0.8687   2.671  0.00757 ** 
## as.factor(indicators$FM_EDUC1)7   2.5967     1.2044   2.156  0.03108 *  
## as.factor(indicators$FM_EDUC1)8   1.3843     0.6821   2.029  0.04243 *  
## as.factor(indicators$FM_EDUC1)9   2.8279     0.9402   3.008  0.00263 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 371.87  on 839  degrees of freedom
## AIC: 403.87
## 
## Number of Fisher Scoring iterations: 7

Added the income groups variable

fit7 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5),  family = binomial())
summary(fit7)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + 
##     indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5), 
##     family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9612   0.1002   0.2042   0.3645   1.5160  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.25346    1.07666   0.235  0.81389    
## indicators$health_ins1          -0.36053    0.60004  -0.601  0.54794    
## indicators$food_sec1            -0.16589    0.36221  -0.458  0.64697    
## as.factor(indicators$HOUSEOWN)2  0.08505    0.31246   0.272  0.78547    
## as.factor(indicators$HOUSEOWN)3 -0.29272    0.64301  -0.455  0.64894    
## indicators$FNMEDYN               0.96197    0.41363   2.326  0.02004 *  
## indicators$FMEDBILL              0.06758    0.38845   0.174  0.86189    
## indicators$limit                -1.96128    0.32445  -6.045 1.49e-09 ***
## as.factor(indicators$FM_EDUC1)2  0.43664    0.69934   0.624  0.53239    
## as.factor(indicators$FM_EDUC1)3  1.34468    1.01093   1.330  0.18347    
## as.factor(indicators$FM_EDUC1)4  0.58099    0.64182   0.905  0.36535    
## as.factor(indicators$FM_EDUC1)5  1.24935    0.66119   1.890  0.05882 .  
## as.factor(indicators$FM_EDUC1)6  1.85468    0.87383   2.122  0.03380 *  
## as.factor(indicators$FM_EDUC1)7  2.29559    1.21851   1.884  0.05957 .  
## as.factor(indicators$FM_EDUC1)8  0.71964    0.69584   1.034  0.30104    
## as.factor(indicators$FM_EDUC1)9  2.16847    0.95344   2.274  0.02294 *  
## as.factor(indicators$INCGRP5)2   1.17047    0.38156   3.068  0.00216 ** 
## as.factor(indicators$INCGRP5)3   0.59165    0.55221   1.071  0.28398    
## as.factor(indicators$INCGRP5)4   2.61667    1.06915   2.447  0.01439 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 355.45  on 836  degrees of freedom
## AIC: 393.45
## 
## Number of Fisher Scoring iterations: 8

Added the variable ratio of family’s income to poverty level

fit8 <- glm(indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + as.factor(indicators$RAT_CAT5),  family = binomial())
summary(fit8)

## 
## Call:
## glm(formula = indicators$score ~ indicators$health_ins1 + indicators$food_sec1 + 
##     as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + 
##     indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + 
##     as.factor(indicators$RAT_CAT5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1835   0.0713   0.1600   0.3277   1.9937  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        1.64917    1.31749   1.252 0.210660    
## indicators$health_ins1            -0.11794    0.60814  -0.194 0.846223    
## indicators$food_sec1              -0.43717    0.40366  -1.083 0.278801    
## as.factor(indicators$HOUSEOWN)2   -0.08546    0.34607  -0.247 0.804949    
## as.factor(indicators$HOUSEOWN)3   -0.63660    0.68317  -0.932 0.351423    
## indicators$FNMEDYN                 1.17535    0.44967   2.614 0.008954 ** 
## indicators$FMEDBILL               -0.15262    0.42536  -0.359 0.719741    
## indicators$limit                  -2.21431    0.34844  -6.355 2.09e-10 ***
## as.factor(indicators$FM_EDUC1)2    0.24328    0.74874   0.325 0.745246    
## as.factor(indicators$FM_EDUC1)3    1.71550    1.05601   1.625 0.104268    
## as.factor(indicators$FM_EDUC1)4    0.63354    0.67714   0.936 0.349468    
## as.factor(indicators$FM_EDUC1)5    1.27178    0.69545   1.829 0.067444 .  
## as.factor(indicators$FM_EDUC1)6    2.02456    0.90818   2.229 0.025798 *  
## as.factor(indicators$FM_EDUC1)7    2.46843    1.25408   1.968 0.049032 *  
## as.factor(indicators$FM_EDUC1)8    0.94796    0.73647   1.287 0.198032    
## as.factor(indicators$FM_EDUC1)9    2.39162    0.97960   2.441 0.014629 *  
## as.factor(indicators$INCGRP5)2     2.01831    0.59481   3.393 0.000691 ***
## as.factor(indicators$INCGRP5)3     1.74416    0.90156   1.935 0.053040 .  
## as.factor(indicators$INCGRP5)4     4.30215    1.34720   3.193 0.001406 ** 
## as.factor(indicators$RAT_CAT5)2   -0.41676    0.84633  -0.492 0.622414    
## as.factor(indicators$RAT_CAT5)3   -1.77109    0.72804  -2.433 0.014987 *  
## as.factor(indicators$RAT_CAT5)4   -1.61631    0.78407  -2.061 0.039262 *  
## as.factor(indicators$RAT_CAT5)5   -1.90040    0.85024  -2.235 0.025409 *  
## as.factor(indicators$RAT_CAT5)6   -1.38167    0.83415  -1.656 0.097645 .  
## as.factor(indicators$RAT_CAT5)7   -0.81903    0.89711  -0.913 0.361262    
## as.factor(indicators$RAT_CAT5)8   -1.16748    0.92036  -1.268 0.204621    
## as.factor(indicators$RAT_CAT5)9   -2.52872    0.87160  -2.901 0.003717 ** 
## as.factor(indicators$RAT_CAT5)10  -2.57473    1.17982  -2.182 0.029086 *  
## as.factor(indicators$RAT_CAT5)11  -1.69326    1.39314  -1.215 0.224202    
## as.factor(indicators$RAT_CAT5)12  -3.28227    1.11267  -2.950 0.003179 ** 
## as.factor(indicators$RAT_CAT5)13  -2.14303    1.46389  -1.464 0.143214    
## as.factor(indicators$RAT_CAT5)14  -3.43785    1.13306  -3.034 0.002412 ** 
## as.factor(indicators$RAT_CAT5)15   0.17093    1.05778   0.162 0.871628    
## as.factor(indicators$RAT_CAT5)16  -2.16446    0.99432  -2.177 0.029493 *  
## as.factor(indicators$RAT_CAT5)17  -1.69690    1.36489  -1.243 0.213776    
## as.factor(indicators$RAT_CAT5)18  11.44231  970.10787   0.012 0.990589    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 329.65  on 819  degrees of freedom
## AIC: 401.65
## 
## Number of Fisher Scoring iterations: 17

Save the file one more time

write.csv(indicators, file = "indicators.csv", row.names = FALSE)

At 06:42 hours on 11 Dec 2020 I finished editing the .Rmd code above. Now I will begin removing variables to create a more parsimonious model. I will remove the variables one at a time, using the p-values from the last run, fit8. Note there were only 4 variables that had p-values > .05. Those were health insurance, food security, housing tenure, and difficulty paying or unable to pay medical bills. Some of the individual values of factor variables were not significant, but because others were, I had to leave those variables in the model.

Remove health insurance

fit90 <- glm(indicators$score ~   indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + as.factor(indicators$RAT_CAT5),  family = binomial())
summary(fit90)

## 
## Call:
## glm(formula = indicators$score ~ indicators$food_sec1 + as.factor(indicators$HOUSEOWN) + 
##     indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + 
##     as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + 
##     as.factor(indicators$RAT_CAT5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1887   0.0701   0.1598   0.3307   1.9897  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        1.57876    1.26559   1.247 0.212232    
## indicators$food_sec1              -0.43309    0.40295  -1.075 0.282458    
## as.factor(indicators$HOUSEOWN)2   -0.08166    0.34521  -0.237 0.812995    
## as.factor(indicators$HOUSEOWN)3   -0.62218    0.67877  -0.917 0.359338    
## indicators$FNMEDYN                 1.15587    0.43773   2.641 0.008276 ** 
## indicators$FMEDBILL               -0.14251    0.42151  -0.338 0.735289    
## indicators$limit                  -2.22025    0.34738  -6.391 1.64e-10 ***
## as.factor(indicators$FM_EDUC1)2    0.22188    0.74136   0.299 0.764725    
## as.factor(indicators$FM_EDUC1)3    1.69397    1.05075   1.612 0.106928    
## as.factor(indicators$FM_EDUC1)4    0.61887    0.67381   0.918 0.358379    
## as.factor(indicators$FM_EDUC1)5    1.25226    0.68889   1.818 0.069094 .  
## as.factor(indicators$FM_EDUC1)6    2.01129    0.90734   2.217 0.026645 *  
## as.factor(indicators$FM_EDUC1)7    2.44092    1.24605   1.959 0.050122 .  
## as.factor(indicators$FM_EDUC1)8    0.92188    0.72516   1.271 0.203635    
## as.factor(indicators$FM_EDUC1)9    2.36773    0.97257   2.435 0.014912 *  
## as.factor(indicators$INCGRP5)2     2.01733    0.59477   3.392 0.000694 ***
## as.factor(indicators$INCGRP5)3     1.73942    0.90180   1.929 0.053752 .  
## as.factor(indicators$INCGRP5)4     4.30239    1.34752   3.193 0.001409 ** 
## as.factor(indicators$RAT_CAT5)2   -0.42880    0.84371  -0.508 0.611291    
## as.factor(indicators$RAT_CAT5)3   -1.77845    0.72683  -2.447 0.014411 *  
## as.factor(indicators$RAT_CAT5)4   -1.61464    0.78329  -2.061 0.039269 *  
## as.factor(indicators$RAT_CAT5)5   -1.90982    0.84854  -2.251 0.024404 *  
## as.factor(indicators$RAT_CAT5)6   -1.38003    0.83413  -1.654 0.098036 .  
## as.factor(indicators$RAT_CAT5)7   -0.82591    0.89407  -0.924 0.355609    
## as.factor(indicators$RAT_CAT5)8   -1.16409    0.91911  -1.267 0.205317    
## as.factor(indicators$RAT_CAT5)9   -2.52871    0.87068  -2.904 0.003681 ** 
## as.factor(indicators$RAT_CAT5)10  -2.57616    1.17920  -2.185 0.028914 *  
## as.factor(indicators$RAT_CAT5)11  -1.66366    1.38582  -1.200 0.229951    
## as.factor(indicators$RAT_CAT5)12  -3.27936    1.11235  -2.948 0.003197 ** 
## as.factor(indicators$RAT_CAT5)13  -2.14204    1.46326  -1.464 0.143227    
## as.factor(indicators$RAT_CAT5)14  -3.44074    1.13218  -3.039 0.002373 ** 
## as.factor(indicators$RAT_CAT5)15   0.19331    1.05607   0.183 0.854763    
## as.factor(indicators$RAT_CAT5)16  -2.17858    0.99184  -2.197 0.028055 *  
## as.factor(indicators$RAT_CAT5)17  -1.71275    1.36188  -1.258 0.208525    
## as.factor(indicators$RAT_CAT5)18  11.43650  969.91107   0.012 0.990592    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 329.69  on 820  degrees of freedom
## AIC: 399.69
## 
## Number of Fisher Scoring iterations: 17

Removing health insurance did not change the significance of the food security or housing tenure variables. Some of the p-values of the education categories changed, but one of them remained significant at the .05 level.

Next remove food security

fit91 <- glm(indicators$score ~    as.factor(indicators$HOUSEOWN) + indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + as.factor(indicators$RAT_CAT5),  family = binomial())
summary(fit91)

## 
## Call:
## glm(formula = indicators$score ~ as.factor(indicators$HOUSEOWN) + 
##     indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + 
##     as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + 
##     as.factor(indicators$RAT_CAT5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1949   0.0671   0.1560   0.3328   1.9007  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        1.17538    1.19513   0.983 0.325374    
## as.factor(indicators$HOUSEOWN)2   -0.13425    0.34027  -0.395 0.693186    
## as.factor(indicators$HOUSEOWN)3   -0.59796    0.67338  -0.888 0.374544    
## indicators$FNMEDYN                 1.22483    0.43376   2.824 0.004747 ** 
## indicators$FMEDBILL               -0.05262    0.41226  -0.128 0.898428    
## indicators$limit                  -2.27520    0.34286  -6.636 3.22e-11 ***
## as.factor(indicators$FM_EDUC1)2    0.18311    0.73513   0.249 0.803299    
## as.factor(indicators$FM_EDUC1)3    1.64499    1.05198   1.564 0.117886    
## as.factor(indicators$FM_EDUC1)4    0.57528    0.66953   0.859 0.390221    
## as.factor(indicators$FM_EDUC1)5    1.18838    0.68240   1.741 0.081600 .  
## as.factor(indicators$FM_EDUC1)6    1.97350    0.90897   2.171 0.029920 *  
## as.factor(indicators$FM_EDUC1)7    2.41205    1.24944   1.931 0.053545 .  
## as.factor(indicators$FM_EDUC1)8    0.87143    0.72107   1.209 0.226846    
## as.factor(indicators$FM_EDUC1)9    2.31116    0.96828   2.387 0.016992 *  
## as.factor(indicators$INCGRP5)2     2.05823    0.59682   3.449 0.000563 ***
## as.factor(indicators$INCGRP5)3     1.77435    0.90569   1.959 0.050099 .  
## as.factor(indicators$INCGRP5)4     4.34370    1.34885   3.220 0.001281 ** 
## as.factor(indicators$RAT_CAT5)2   -0.46581    0.83390  -0.559 0.576441    
## as.factor(indicators$RAT_CAT5)3   -1.70170    0.71685  -2.374 0.017602 *  
## as.factor(indicators$RAT_CAT5)4   -1.47293    0.76465  -1.926 0.054072 .  
## as.factor(indicators$RAT_CAT5)5   -1.74824    0.82827  -2.111 0.034796 *  
## as.factor(indicators$RAT_CAT5)6   -1.22261    0.81745  -1.496 0.134749    
## as.factor(indicators$RAT_CAT5)7   -0.79146    0.88279  -0.897 0.369964    
## as.factor(indicators$RAT_CAT5)8   -1.05572    0.90613  -1.165 0.243985    
## as.factor(indicators$RAT_CAT5)9   -2.36227    0.84793  -2.786 0.005338 ** 
## as.factor(indicators$RAT_CAT5)10  -2.46418    1.16828  -2.109 0.034923 *  
## as.factor(indicators$RAT_CAT5)11  -1.49038    1.37570  -1.083 0.278649    
## as.factor(indicators$RAT_CAT5)12  -3.13825    1.09881  -2.856 0.004290 ** 
## as.factor(indicators$RAT_CAT5)13  -1.98355    1.45472  -1.364 0.172716    
## as.factor(indicators$RAT_CAT5)14  -3.30650    1.12153  -2.948 0.003196 ** 
## as.factor(indicators$RAT_CAT5)15   0.27995    1.04355   0.268 0.788494    
## as.factor(indicators$RAT_CAT5)16  -2.19097    0.98237  -2.230 0.025729 *  
## as.factor(indicators$RAT_CAT5)17  -1.55347    1.35606  -1.146 0.251971    
## as.factor(indicators$RAT_CAT5)18  11.56923  967.26885   0.012 0.990457    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 330.84  on 821  degrees of freedom
## AIC: 398.84
## 
## Number of Fisher Scoring iterations: 17

Same result as the run removing health insurance. Education, income group, and ratio of income category to income all had significant p-values for at least some of the values. Limitations and did not obtain medical care due to cost remained significant.

fit92 <- glm(indicators$score ~    indicators$FNMEDYN + indicators$FMEDBILL + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + as.factor(indicators$RAT_CAT5),  family = binomial())
summary(fit92)

## 
## Call:
## glm(formula = indicators$score ~ indicators$FNMEDYN + indicators$FMEDBILL + 
##     indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + 
##     as.factor(indicators$RAT_CAT5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1883   0.0658   0.1589   0.3316   1.8938  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.9188     1.1384   0.807 0.419615    
## indicators$FNMEDYN                 1.2174     0.4317   2.820 0.004807 ** 
## indicators$FMEDBILL               -0.0310     0.4104  -0.076 0.939788    
## indicators$limit                  -2.2480     0.3390  -6.631 3.35e-11 ***
## as.factor(indicators$FM_EDUC1)2    0.1957     0.7298   0.268 0.788609    
## as.factor(indicators$FM_EDUC1)3    1.7210     1.0500   1.639 0.101218    
## as.factor(indicators$FM_EDUC1)4    0.6541     0.6578   0.994 0.320001    
## as.factor(indicators$FM_EDUC1)5    1.2394     0.6730   1.842 0.065514 .  
## as.factor(indicators$FM_EDUC1)6    1.9396     0.8990   2.157 0.030970 *  
## as.factor(indicators$FM_EDUC1)7    2.5154     1.2417   2.026 0.042795 *  
## as.factor(indicators$FM_EDUC1)8    0.8772     0.7138   1.229 0.219131    
## as.factor(indicators$FM_EDUC1)9    2.3685     0.9629   2.460 0.013909 *  
## as.factor(indicators$INCGRP5)2     2.1002     0.5969   3.519 0.000434 ***
## as.factor(indicators$INCGRP5)3     1.8721     0.8997   2.081 0.037457 *  
## as.factor(indicators$INCGRP5)4     4.4776     1.3397   3.342 0.000831 ***
## as.factor(indicators$RAT_CAT5)2   -0.4338     0.8298  -0.523 0.601156    
## as.factor(indicators$RAT_CAT5)3   -1.6316     0.7152  -2.281 0.022540 *  
## as.factor(indicators$RAT_CAT5)4   -1.3998     0.7594  -1.843 0.065290 .  
## as.factor(indicators$RAT_CAT5)5   -1.6641     0.8228  -2.023 0.043123 *  
## as.factor(indicators$RAT_CAT5)6   -1.1519     0.8111  -1.420 0.155556    
## as.factor(indicators$RAT_CAT5)7   -0.7131     0.8714  -0.818 0.413166    
## as.factor(indicators$RAT_CAT5)8   -0.9697     0.8989  -1.079 0.280703    
## as.factor(indicators$RAT_CAT5)9   -2.2659     0.8230  -2.753 0.005904 ** 
## as.factor(indicators$RAT_CAT5)10  -2.3843     1.1593  -2.057 0.039719 *  
## as.factor(indicators$RAT_CAT5)11  -1.3824     1.3648  -1.013 0.311082    
## as.factor(indicators$RAT_CAT5)12  -3.0601     1.0952  -2.794 0.005204 ** 
## as.factor(indicators$RAT_CAT5)13  -1.9299     1.4439  -1.337 0.181341    
## as.factor(indicators$RAT_CAT5)14  -3.2733     1.1123  -2.943 0.003251 ** 
## as.factor(indicators$RAT_CAT5)15   0.2635     1.0317   0.255 0.798392    
## as.factor(indicators$RAT_CAT5)16  -2.0787     0.9771  -2.127 0.033380 *  
## as.factor(indicators$RAT_CAT5)17  -1.5882     1.3531  -1.174 0.240478    
## as.factor(indicators$RAT_CAT5)18  11.6625   965.7550   0.012 0.990365    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 331.63  on 823  degrees of freedom
## AIC: 395.63
## 
## Number of Fisher Scoring iterations: 17

Same result as above. Education, income group, and ratio of income to poverty level remained significant, at least for some values. Limitations and did not receive medical care due to cost remained significant.

fit93 <- glm(indicators$score ~    indicators$FNMEDYN  + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + as.factor(indicators$RAT_CAT5),  family = binomial())
summary(fit93)

## 
## Call:
## glm(formula = indicators$score ~ indicators$FNMEDYN + indicators$limit + 
##     as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5) + 
##     as.factor(indicators$RAT_CAT5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1884   0.0657   0.1589   0.3303   1.8965  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.8862     1.0535   0.841 0.400214    
## indicators$FNMEDYN                 1.2008     0.3723   3.226 0.001257 ** 
## indicators$limit                  -2.2440     0.3350  -6.698 2.11e-11 ***
## as.factor(indicators$FM_EDUC1)2    0.2018     0.7250   0.278 0.780795    
## as.factor(indicators$FM_EDUC1)3    1.7295     1.0437   1.657 0.097495 .  
## as.factor(indicators$FM_EDUC1)4    0.6557     0.6570   0.998 0.318248    
## as.factor(indicators$FM_EDUC1)5    1.2438     0.6700   1.856 0.063398 .  
## as.factor(indicators$FM_EDUC1)6    1.9455     0.8955   2.173 0.029808 *  
## as.factor(indicators$FM_EDUC1)7    2.5185     1.2409   2.030 0.042392 *  
## as.factor(indicators$FM_EDUC1)8    0.8830     0.7093   1.245 0.213161    
## as.factor(indicators$FM_EDUC1)9    2.3697     0.9625   2.462 0.013815 *  
## as.factor(indicators$INCGRP5)2     2.0997     0.5968   3.518 0.000434 ***
## as.factor(indicators$INCGRP5)3     1.8711     0.8988   2.082 0.037377 *  
## as.factor(indicators$INCGRP5)4     4.4735     1.3380   3.343 0.000828 ***
## as.factor(indicators$RAT_CAT5)2   -0.4385     0.8272  -0.530 0.596042    
## as.factor(indicators$RAT_CAT5)3   -1.6276     0.7132  -2.282 0.022488 *  
## as.factor(indicators$RAT_CAT5)4   -1.3952     0.7569  -1.843 0.065284 .  
## as.factor(indicators$RAT_CAT5)5   -1.6620     0.8223  -2.021 0.043267 *  
## as.factor(indicators$RAT_CAT5)6   -1.1498     0.8106  -1.419 0.156029    
## as.factor(indicators$RAT_CAT5)7   -0.7109     0.8709  -0.816 0.414340    
## as.factor(indicators$RAT_CAT5)8   -0.9666     0.8978  -1.077 0.281650    
## as.factor(indicators$RAT_CAT5)9   -2.2653     0.8230  -2.753 0.005912 ** 
## as.factor(indicators$RAT_CAT5)10  -2.3839     1.1590  -2.057 0.039711 *  
## as.factor(indicators$RAT_CAT5)11  -1.3828     1.3648  -1.013 0.310978    
## as.factor(indicators$RAT_CAT5)12  -3.0577     1.0946  -2.793 0.005215 ** 
## as.factor(indicators$RAT_CAT5)13  -1.9290     1.4435  -1.336 0.181461    
## as.factor(indicators$RAT_CAT5)14  -3.2723     1.1116  -2.944 0.003242 ** 
## as.factor(indicators$RAT_CAT5)15   0.2568     1.0271   0.250 0.802569    
## as.factor(indicators$RAT_CAT5)16  -2.0772     0.9773  -2.125 0.033545 *  
## as.factor(indicators$RAT_CAT5)17  -1.5867     1.3524  -1.173 0.240692    
## as.factor(indicators$RAT_CAT5)18  11.6613   966.4303   0.012 0.990373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 331.64  on 824  degrees of freedom
## AIC: 393.64
## 
## Number of Fisher Scoring iterations: 17

Remove the variable ratio of income group to poverty level.

fit94 <- glm(indicators$score ~    indicators$FNMEDYN  + indicators$limit + as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5),  family = binomial())
summary(fit94)

## 
## Call:
## glm(formula = indicators$score ~ indicators$FNMEDYN + indicators$limit + 
##     as.factor(indicators$FM_EDUC1) + as.factor(indicators$INCGRP5), 
##     family = binomial())
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.98884   0.09939   0.20433   0.37538   1.45369  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.05904    0.87829   0.067  0.94640    
## indicators$FNMEDYN               0.99866    0.34085   2.930  0.00339 ** 
## indicators$limit                -2.02919    0.31133  -6.518 7.14e-11 ***
## as.factor(indicators$FM_EDUC1)2  0.34202    0.69098   0.495  0.62062    
## as.factor(indicators$FM_EDUC1)3  1.22873    0.99465   1.235  0.21671    
## as.factor(indicators$FM_EDUC1)4  0.53540    0.63382   0.845  0.39826    
## as.factor(indicators$FM_EDUC1)5  1.18315    0.65321   1.811  0.07010 .  
## as.factor(indicators$FM_EDUC1)6  1.76407    0.86759   2.033  0.04202 *  
## as.factor(indicators$FM_EDUC1)7  2.20301    1.20494   1.828  0.06750 .  
## as.factor(indicators$FM_EDUC1)8  0.58685    0.68054   0.862  0.38850    
## as.factor(indicators$FM_EDUC1)9  2.08218    0.94239   2.209  0.02714 *  
## as.factor(indicators$INCGRP5)2   1.21550    0.36957   3.289  0.00101 ** 
## as.factor(indicators$INCGRP5)3   0.61452    0.54031   1.137  0.25539    
## as.factor(indicators$INCGRP5)4   2.66476    1.05911   2.516  0.01187 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 356.39  on 841  degrees of freedom
## AIC: 384.39
## 
## Number of Fisher Scoring iterations: 8

Remove education.

fit95 <- glm(indicators$score ~    indicators$FNMEDYN  + indicators$limit  + as.factor(indicators$INCGRP5),  family = binomial())
summary(fit95)

## 
## Call:
## glm(formula = indicators$score ~ indicators$FNMEDYN + indicators$limit + 
##     as.factor(indicators$INCGRP5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9238   0.0761   0.2076   0.3370   1.2062  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      1.0587     0.6498   1.629  0.10326    
## indicators$FNMEDYN               0.8905     0.3277   2.718  0.00658 ** 
## indicators$limit                -2.0167     0.3060  -6.589 4.42e-11 ***
## as.factor(indicators$INCGRP5)2   1.4206     0.3448   4.120 3.80e-05 ***
## as.factor(indicators$INCGRP5)3   0.8287     0.5088   1.629  0.10338    
## as.factor(indicators$INCGRP5)4   3.0034     1.0243   2.932  0.00337 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 372.13  on 849  degrees of freedom
## AIC: 384.13
## 
## Number of Fisher Scoring iterations: 8

Suppose we remove income - what happens?

fit96 <- glm(indicators$score ~    indicators$FNMEDYN  + indicators$limit,  family = binomial())
summary(fit96)

## 
## Call:
## glm(formula = indicators$score ~ indicators$FNMEDYN + indicators$limit, 
##     family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7372   0.2186   0.2186   0.3846   1.0581  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.4115     0.6383   2.212  0.02700 *  
## indicators$FNMEDYN   1.1554     0.3172   3.643  0.00027 ***
## indicators$limit    -2.2797     0.2982  -7.645 2.08e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 406.41  on 852  degrees of freedom
## AIC: 412.41
## 
## Number of Fisher Scoring iterations: 6

OOPS! Score goes up. Put the income groups back in for the final analysis.

Correlation between limit and score – Family Member has health limitations and health status indicator.

cor.test(indicators$score, indicators$limit)

## 
##  Pearson's product-moment correlation
## 
## data:  indicators$score and indicators$limit
## t = -10.298, df = 853, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3908629 -0.2715381
## sample estimates:
##        cor 
## -0.3325306

Negative because 0 is fair to poor health and 1 = has a limitation, so the expected correlation is negative. But i thought it would be higher than this.

Trying to show graphically the relationship between family size and income. Question: Why didn’t the y-axis continuous break command work like it did on the x-axis?

p11 <- indicators %>%
  ggplot(aes(x=INCGRP5,y=FM_SIZE))+
  geom_point()+
  
  ggtitle("Family Size by Income Group") +
  xlab("Income Group") +
  ylab("Family Size") +
  scale_x_continuous(breaks=c(0, 1,2,3,4)) +
  scale_y_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9))
p11

Correlation between family size and income groups to see if families w/ larger incomes had larger families. They don’t. This confirms the visual in the scatterplot above.

cor.test(indicators$FM_SIZE, indicators$INCGRP5)

## 
##  Pearson's product-moment correlation
## 
## data:  indicators$FM_SIZE and indicators$INCGRP5
## t = 6.7116, df = 853, p-value = 3.509e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1593074 0.2867032
## sample estimates:
##       cor 
## 0.2239618

Change the values in FNMEDYN

As I wrote the model according to the formula Professor Saidi sent, it occurred to me I may have found significance in the “did not receive medical care due to cost” variable because “Yes” was coded 1, and in the intercept, and “No” was coded 2. I then changed the values to “No” = 0 and “Yes” = 1. I reran the model (fit100, below) but the result still showed that variable significant, but the sign changed to negative and the value in the intercept became significant. This to me is a real conundrum, but I don’t have time to dig into this to see what significance it has. The AIC value is the same, so I will report the results without this wrinkle. I did want to note it here.

indicators <- mutate(indicators, nomedcare = ifelse(FNMEDYN == 1,  1,0))

table(indicators$nomedcare)

## 
##   0   1 
## 776  79

fit100 <- glm(indicators$score ~    indicators$nomedcare  + indicators$limit  + as.factor(indicators$INCGRP5),  family = binomial())
summary(fit100)

## 
## Call:
## glm(formula = indicators$score ~ indicators$nomedcare + indicators$limit + 
##     as.factor(indicators$INCGRP5), family = binomial())
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9238   0.0761   0.2076   0.3370   1.2062  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      2.8397     0.2876   9.874  < 2e-16 ***
## indicators$nomedcare            -0.8905     0.3277  -2.718  0.00658 ** 
## indicators$limit                -2.0167     0.3060  -6.589 4.42e-11 ***
## as.factor(indicators$INCGRP5)2   1.4206     0.3448   4.120 3.80e-05 ***
## as.factor(indicators$INCGRP5)3   0.8287     0.5088   1.629  0.10338    
## as.factor(indicators$INCGRP5)4   3.0034     1.0243   2.932  0.00337 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 503.56  on 854  degrees of freedom
## Residual deviance: 372.13  on 849  degrees of freedom
## AIC: 384.13
## 
## Number of Fisher Scoring iterations: 8

201201_Math217_FP_Analysis2

Jerome

12/1/2020