This data is from a 2014 National Health Survey. It asked families about their demographics: economic, social, and health. My group focused on how often families were worried they would run out of food. We focused on what factored into that insecurity, if it was health, or their family structure.
library(car)
library(norm)
library(lattice)
library(nutshell)
## Loading required package: nutshell.bbdb
## Loading required package: nutshell.audioscrobbler
fam<-read.csv("/Users/kimberlyhatlestad/Data Mining/family14.csv")
head(fam)
## RECTYPE SRVY_YR HHX FMX WTFA_FAM FINT_Y_P FINT_M_P TELN_FLG CURWRKN
## 1 60 2014 4 1 5024 2014 1 1 2
## 2 60 2014 7 1 5296 2014 1 1 1
## 3 60 2014 13 1 6048 2014 1 1 2
## 4 60 2014 14 1 4201 2014 2 1 1
## 5 60 2014 16 1 4168 2014 3 1 1
## 6 60 2014 18 1 4174 2014 3 1 1
## TELCELN WRKCELN PHONEUSE FLNGINTV FM_SIZE FM_KIDS FM_ELDR FM_TYPE
## 1 1 1 NA 1 1 0 0 1
## 2 1 2 3 1 3 0 1 2
## 3 1 2 NA 1 3 1 0 4
## 4 1 1 3 1 1 0 1 1
## 5 1 1 3 1 1 0 1 1
## 6 1 5 1 1 3 0 0 2
## FM_STRCP FM_STRP FM_EDUC1 FCHLMYN FCHLMCT FSPEDYN FSPEDCT FLAADLYN
## 1 11 11 2 NA NA NA NA 2
## 2 23 23 5 NA NA NA NA 2
## 3 41 42 5 2 0 2 0 2
## 4 11 11 9 NA NA NA NA 2
## 5 11 11 4 NA NA NA NA 2
## 6 23 23 8 NA NA NA NA 2
## FLAADLCT FLIADLYN FLIADLCT FWKLIMYN FWKLIMCT FWALKYN FWALKCT FREMEMYN
## 1 0 2 0 2 0 2 0 2
## 2 0 2 0 1 1 1 1 1
## 3 0 2 0 2 0 2 0 2
## 4 0 2 0 2 0 2 0 2
## 5 0 2 0 2 0 2 0 2
## 6 0 2 0 2 0 2 0 2
## FREMEMCT FANYLYN FANYLCT FHSTATEX FHSTATVG FHSTATG FHSTATFR FHSTATPR
## 1 0 2 0 0 1 0 0 0
## 2 1 1 2 0 2 1 0 0
## 3 0 2 0 3 0 0 0 0
## 4 0 2 0 1 0 0 0 0
## 5 0 2 0 1 0 0 0 0
## 6 0 2 0 1 2 0 0 0
## FSRUNOUT FSLAST FSBALANC FSSKIP FSSKDAYS FSLESS FSHUNGRY FSWEIGHT
## 1 3 3 3 NA NA NA NA NA
## 2 3 3 3 NA NA NA NA NA
## 3 3 3 3 NA NA NA NA NA
## 4 3 3 3 NA NA NA NA NA
## 5 3 3 3 NA NA NA NA NA
## 6 3 3 3 NA NA NA NA NA
## FSNOTEAT FSNEDAYS FDMEDYN FDMEDCT FNMEDYN FNMEDCT FHOSP2YN FHOSP2CT
## 1 NA NA 1 1 1 1 2 0
## 2 NA NA 2 0 2 0 1 1
## 3 NA NA 2 0 2 0 2 0
## 4 NA NA 2 0 2 0 2 0
## 5 NA NA 1 1 2 0 9 0
## 6 NA NA 2 0 2 0 2 0
## FHCHMYN FHCHMCT FHCPHRYN FHCPHRCT FHCDVYN FHCDVCT F10DVYN F10DVCT
## 1 2 0 2 0 2 0 2 0
## 2 2 0 2 0 2 0 2 0
## 3 2 0 2 0 2 0 2 0
## 4 2 0 2 0 2 0 2 0
## 5 2 0 2 0 2 0 1 1
## 6 2 0 1 1 1 1 2 0
## FHICOVYN FHICOVCT FHIPRVCT FHIEXCT FHISINCT FHICARCT FHICADCT FHICHPCT
## 1 2 0 0 0 0 0 0 0
## 2 1 3 3 0 0 2 0 0
## 3 1 3 3 0 0 0 0 0
## 4 1 1 1 0 0 1 0 0
## 5 1 1 0 0 0 1 0 0
## 6 1 3 3 0 3 0 0 0
## FHIMILCT FHIIHSCT FHIPUBCT FHIOGVCT FPRCOOH FHIEBCCT COVCONF FHICOST
## 1 0 0 0 0 NA NA NA 2
## 2 0 0 0 0 2 0 NA 5
## 3 0 0 0 0 2 2 1 1
## 4 0 0 0 0 NA 0 NA 2
## 5 0 0 0 0 NA NA NA 1
## 6 0 0 0 0 1 3 2 2
## FMEDBILL FMEDBPAY FMEDBNOP FSAF FHDSTCT FDGLWCT1 FDGLWCT2 FWRKLWCT
## 1 1 1 1 2 NA 1 0 1
## 2 1 1 2 2 NA 0 0 NA
## 3 2 2 NA 2 0 2 0 1
## 4 2 2 NA 2 NA 0 0 NA
## 5 2 2 NA 2 NA 0 0 NA
## 6 2 2 NA 2 NA 3 0 2
## FSALYN FSALCT FSEINCYN FSEINCCT FSSRRYN FSSRRCT FPENSYN FPENSCT FOPENSYN
## 1 1 1 2 0 2 0 2 0 2
## 2 2 0 2 0 1 2 2 0 2
## 3 1 2 2 0 2 0 2 0 2
## 4 2 0 2 0 2 0 2 0 1
## 5 2 0 2 0 1 1 2 0 1
## 6 1 2 1 1 2 0 2 0 2
## FOPENSCT FSSIYN FSSICT FTANFYN FTANFCT FOWBENYN FOWBENCT FINTR1YN
## 1 0 2 0 2 0 2 0 2
## 2 0 1 1 2 0 2 0 2
## 3 0 2 0 2 0 2 0 2
## 4 1 2 0 2 0 2 0 1
## 5 1 2 0 2 0 2 0 2
## 6 0 2 0 2 0 2 0 2
## FINTR1CT FDIVDYN FDIVDCT FCHSPYN FCHSPCT FINCOTYN FINCOTCT INCGRP4
## 1 0 2 0 1 1 2 0 1
## 2 0 1 2 2 0 2 0 2
## 3 0 2 0 2 0 2 0 3
## 4 1 1 1 2 0 2 0 5
## 5 0 2 0 2 0 2 0 1
## 6 0 1 2 2 0 2 0 5
## INCGRP5 RAT_CAT4 RAT_CAT5 HOUSEOWN FGAH FSSAPLYN FSSAPLCT FSDAPLYN
## 1 1 3 3 2 2 2 0 1
## 2 2 8 8 2 2 1 1 1
## 3 2 11 11 2 2 2 0 2
## 4 4 14 14 1 NA 2 0 2
## 5 1 6 6 1 NA 2 0 2
## 6 4 14 14 1 NA 2 0 2
## FSDAPLCT FSNAP FSNAPMYR FWICYN FWICCT
## 1 1 1 6 2 0
## 2 1 2 NA NA NA
## 3 0 2 NA 2 0
## 4 0 2 NA NA NA
## 5 0 2 NA NA NA
## 6 0 2 NA 2 0
fam<- fam[,c("FSBALANC","FSRUNOUT","FSNAP","INCGRP5","TELCELN","FM_EDUC1","FNMEDYN","FM_SIZE","FSWEIGHT","F10DVYN","FM_KIDS","FMEDBNOP","FSALYN","FM_ELDR","FSSRRYN","FTANFYN","FGAH","FSNAPMYR","FWICYN")]
head(fam)
## FSBALANC FSRUNOUT FSNAP INCGRP5 TELCELN FM_EDUC1 FNMEDYN FM_SIZE
## 1 3 3 1 1 1 2 1 1
## 2 3 3 2 2 1 5 2 3
## 3 3 3 2 2 1 5 2 3
## 4 3 3 2 4 1 9 2 1
## 5 3 3 2 1 1 4 2 1
## 6 3 3 2 4 1 8 2 3
## FSWEIGHT F10DVYN FM_KIDS FMEDBNOP FSALYN FM_ELDR FSSRRYN FTANFYN FGAH
## 1 NA 2 0 1 1 0 2 2 2
## 2 NA 2 0 2 2 1 1 2 2
## 3 NA 2 1 NA 1 0 2 2 2
## 4 NA 2 0 NA 2 1 2 2 NA
## 5 NA 1 0 NA 2 1 1 2 NA
## 6 NA 2 0 NA 1 0 2 2 NA
## FSNAPMYR FWICYN
## 1 6 2
## 2 NA NA
## 3 NA 2
## 4 NA NA
## 5 NA NA
## 6 NA 2
#Variable Renaming
#recoding variables to get rid of the gap between the answered questions and the "unanswered" "unavailable" options.
Unbalanced_Meals<-fam$FSBALANC
Unbalanced_Meals<-recode(Unbalanced_Meals,"9=0")
Unbalanced_Meals<-recode(Unbalanced_Meals,"8=0")
Unbalanced_Meals<-recode(Unbalanced_Meals,"7=0")
table(Unbalanced_Meals)
## Unbalanced_Meals
## 0 1 2 3
## 60 1610 3937 39990
#.na.to.snglcode(Unbalanced_Meals, 0)
table(fam$INCGRP5)
##
## 1 2 3 4 96 99
## 16437 12013 4290 7477 2131 3249
Inc.Grp<-fam$INCGRP5
Inc.Grp<-recode(Inc.Grp,"99=0")
Inc.Grp<-recode(Inc.Grp,"98=0")
Inc.Grp<-recode(Inc.Grp,"97=0")
Inc.Grp<-recode(Inc.Grp,"96=0")
table(Inc.Grp)
## Inc.Grp
## 0 1 2 3 4
## 5380 16437 12013 4290 7477
#Inc.Grp[is.na(Inc.Grp)] <- "0"
No_Care<-fam$FNMEDYN
No_Care<-recode(No_Care,"9=0")
No_Care<-recode(No_Care,"8=0")
No_Care<-recode(No_Care,"7=0")
table(No_Care)
## No_Care
## 0 1 2
## 42 4887 40668
#No_Care[is.na(No_Care)] <- "0"
ten_Med_Visits<-fam$F10DVYN
ten_Med_Visits<-recode(ten_Med_Visits,"9=0")
ten_Med_Visits<-recode(ten_Med_Visits,"8=0")
ten_Med_Visits<-recode(ten_Med_Visits,"7=0")
table(ten_Med_Visits)
## ten_Med_Visits
## 0 1 2
## 98 8869 36630
#ten_Med_Visits[is.na(ten_Med_Visits)] <- "0"
#Medicaid<-fam$FHICADCT
#Medicaid[is.na(Medicaid)] <- "0"
MemIncome<-fam$FSALYN
MemIncome<-recode(MemIncome,"9=0")
MemIncome<-recode(MemIncome,"8=0")
MemIncome<-recode(MemIncome,"7=0")
table(MemIncome)
## MemIncome
## 0 1 2
## 723 32976 11898
#MemIncome[is.na(MemIncome)] <- "0"
FoodInsecurity<-fam$FSRUNOUT
FoodInsecurity<-recode(FoodInsecurity,"9=0")
FoodInsecurity<-recode(FoodInsecurity,"8=0")
FoodInsecurity<-recode(FoodInsecurity,"7=0")
table(FoodInsecurity)
## FoodInsecurity
## 0 1 2 3
## 45 2257 5068 38227
#FoodInsecurity[is.na(FoodInsecurity)] <- "0"
Cell<-fam$TELCELN
Cell<-recode(Cell,"9=0")
Cell<-recode(Cell,"8=0")
Cell<-recode(Cell,"7=0")
table(Cell)
## Cell
## 0 1 2
## 181 40034 5382
#Cell[is.na(Cell)] <- "0"
FamSz<-fam$FM_SIZE
table(FamSz)
## FamSz
## 1 2 3 4 5 6 7 8 9 10 11 12
## 14120 14512 6479 5817 2896 1134 391 136 65 24 13 7
## 13 14 16
## 1 1 1
#FamSz[is.na(FamSz)] <- "0"
#summary(FamSz)
#Memunder18<-fam$FM_KIDS
Num.kids<-fam$FM_KIDS
table(Num.kids)
## Num.kids
## 0 1 2 3 4 5 6 7 8 9 10
## 30910 6073 5250 2345 731 188 70 21 7 1 1
#Memunder18[is.na(Memunder18)] <- "0"
Elder<-fam$FM_ELDR
table(Elder)
## Elder
## 0 1 2 3 4
## 33805 8039 3697 54 2
SNAP<-fam$FSNAP
SNAP<-recode(SNAP,"9=0")
SNAP<-recode(SNAP,"8=0")
SNAP<-recode(SNAP,"7=0")
table(SNAP)
## SNAP
## 0 1 2
## 748 6810 38039
EduLevl<-fam$FM_EDUC1
EduLevl<-recode(EduLevl,"99=0")
EduLevl<-recode(EduLevl,"98=0")
EduLevl<-recode(EduLevl,"97=0")
#EduLevl[is.na(EduLevl)] <- "0"
table(EduLevl)
## EduLevl
## 0 1 2 3 4 5 6 7 8 9
## 340 1487 2965 1050 8841 8769 3880 1978 9560 6727
WeightLost<-fam$FSWEIGHT
WeightLost<-recode(WeightLost,"9=0")
WeightLost<-recode(WeightLost,"8=0")
WeightLost<-recode(WeightLost,"7=0")
#WeightLost[is.na(WeightLost)] <- "0"
table(WeightLost)
## WeightLost
## 0 1 2
## 102 1257 7243
UnpaidMedBills<-fam$FMEDBNOP
UnpaidMedBills<-recode(UnpaidMedBills,"9=0")
UnpaidMedBills<-recode(UnpaidMedBills,"8=0")
UnpaidMedBills<-recode(UnpaidMedBills,"7=0")
#UnpaidMedBills[is.na(UnpaidMedBills)] <- "0"
table(UnpaidMedBills)
## UnpaidMedBills
## 0 1 2
## 221 3773 3180
SSIMem<-fam$FSSRRYN
SSIMem<-recode(SSIMem,"9=0")
SSIMem<-recode(SSIMem,"8=0")
SSIMem<-recode(SSIMem,"7=0")
#SSIMem[is.na(SSIMem)] <- "0"
table(SSIMem)
## SSIMem
## 0 1 2
## 738 13090 31769
WelfareMem<-fam$FTANFYN
WelfareMem<-recode(WelfareMem,"9=0")
WelfareMem<-recode(WelfareMem,"8=0")
WelfareMem<-recode(WelfareMem,"7=0")
#WelfareMem[is.na(WelfareMem)] <- "0"
table(WelfareMem)
## WelfareMem
## 0 1 2
## 737 932 43928
Reduced_Rent<-fam$FGAH
Reduced_Rent<-recode(Reduced_Rent,"9=0")
Reduced_Rent<-recode(Reduced_Rent,"8=0")
Reduced_Rent<-recode(Reduced_Rent,"7=0")
Reduced_Rent<-recode(Reduced_Rent,"'NA'=0")
#Reduced_Rent[is.na(Reduced_Rent)] <- "0"
table(Reduced_Rent)
## Reduced_Rent
## 0 1 2
## 95 2296 14609
Time_on_Fd_Stmps<-fam$FSNAPMYR
Time_on_Fd_Stmps<-recode(Time_on_Fd_Stmps,"99=0")
Time_on_Fd_Stmps<-recode(Time_on_Fd_Stmps,"98=0")
Time_on_Fd_Stmps<-recode(Time_on_Fd_Stmps,"97=0")
Time_on_Fd_Stmps[is.na(Time_on_Fd_Stmps)] <- "0"
table(Time_on_Fd_Stmps)
## Time_on_Fd_Stmps
## 0 1 10 11 12 2 3 4 5 6 7 8
## 38906 137 111 61 4918 198 226 153 139 472 82 99
## 9
## 95
Mem_on_WIC<-fam$FWICYN
Mem_on_WIC<-recode(Mem_on_WIC,"9=0")
Mem_on_WIC<-recode(Mem_on_WIC,"8=0")
Mem_on_WIC<-recode(Mem_on_WIC,"7=0")
#Mem_on_WIC[is.na(Mem_on_WIC)] <- "0"
#head(Mem_on_WIC)
table(Mem_on_WIC)
## Mem_on_WIC
## 0 1 2
## 438 2217 22973
Variable visuaalization: Looking to see which variables have any correlation-either positive or negative-with food insecurity
plot(FoodInsecurity~Unbalanced_Meals,data=fam)
plot(FoodInsecurity~FamSz,data=fam)
plot(FoodInsecurity~Num.kids,data=fam)
plot(FoodInsecurity~Elder,data=fam)
plot(FoodInsecurity~EduLevl,data=fam)
plot(FoodInsecurity~WeightLost,data=fam)
plot(FoodInsecurity~UnpaidMedBills,data=fam)
plot(FoodInsecurity~SSIMem,data=fam)
plot(FoodInsecurity~WelfareMem,data=fam)
plot(FoodInsecurity~Reduced_Rent,data=fam)
plot(FoodInsecurity~Time_on_Fd_Stmps,data=fam)
plot(FoodInsecurity~Mem_on_WIC,data=fam)
xx=cbind(FoodInsecurity,Unbalanced_Meals,FamSz,
EduLevl,WeightLost,UnpaidMedBills,SSIMem,WelfareMem,Reduced_Rent,SNAP,
Mem_on_WIC,MemIncome,ten_Med_Visits,No_Care,Cell,Inc.Grp)
First linear model - all variables used included
m1=lm(FoodInsecurity~.,data=data.frame(xx))
summary(m1)
##
## Call:
## lm(formula = FoodInsecurity ~ ., data = data.frame(xx))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.49672 -0.38985 0.00984 0.24636 1.88378
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.628699 0.225155 2.792 0.00531 **
## Unbalanced_Meals 0.222901 0.023335 9.552 < 2e-16 ***
## FamSz 0.003247 0.011074 0.293 0.76944
## EduLevl 0.004630 0.008675 0.534 0.59362
## WeightLost 0.257086 0.036146 7.112 1.84e-12 ***
## UnpaidMedBills 0.067656 0.033250 2.035 0.04207 *
## SSIMem 0.065413 0.045786 1.429 0.15333
## WelfareMem 0.002333 0.059258 0.039 0.96860
## Reduced_Rent 0.025957 0.045825 0.566 0.57120
## SNAP 0.106584 0.035120 3.035 0.00245 **
## Mem_on_WIC -0.054603 0.044878 -1.217 0.22393
## MemIncome -0.024841 0.045379 -0.547 0.58419
## ten_Med_Visits -0.041783 0.034879 -1.198 0.23115
## No_Care 0.007087 0.032436 0.219 0.82707
## Cell -0.011851 0.055043 -0.215 0.82956
## Inc.Grp -0.002402 0.027942 -0.086 0.93150
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.583 on 1350 degrees of freedom
## (44231 observations deleted due to missingness)
## Multiple R-squared: 0.1597, Adjusted R-squared: 0.1504
## F-statistic: 17.11 on 15 and 1350 DF, p-value: < 2.2e-16
Second linear model- only using significant variables from first linear model
m2=lm(FoodInsecurity~Unbalanced_Meals+WeightLost+UnpaidMedBills+SNAP,data=data.frame(xx))
summary(m2)
##
## Call:
## lm(formula = FoodInsecurity ~ Unbalanced_Meals + WeightLost +
## UnpaidMedBills + SNAP, data = data.frame(xx))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47941 -0.38987 0.00592 0.23506 1.86612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.62192 0.05554 11.198 < 2e-16 ***
## Unbalanced_Meals 0.22914 0.01450 15.804 < 2e-16 ***
## WeightLost 0.25599 0.02342 10.930 < 2e-16 ***
## UnpaidMedBills 0.06269 0.02052 3.055 0.00226 **
## SNAP 0.11006 0.01940 5.673 1.52e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.588 on 3354 degrees of freedom
## (42238 observations deleted due to missingness)
## Multiple R-squared: 0.1541, Adjusted R-squared: 0.1531
## F-statistic: 152.8 on 4 and 3354 DF, p-value: < 2.2e-16
Our groups R squared value sadly was not high enough to have this be a valuable model. I think if we had more time to really sift through the variables we could have come up with a better model.
I would have loved to have been available during the time my group decided what variables to use. I would have liked to look more at how income and family structure, more specifically, affect how a family feels about their food security. I feel like our variables covered too broad of a range of topics for my liking, as proven by our low R-squared.