##This data is before the further analysis in R. 


Chopped<-read.csv("c:/users/abbey/Desktop/Data Mining/family.csv")
head(Chopped)
##   FM_SIZE FM_KIDS FM_EDUC1 FSRUNOUT FSLAST FSBALANC FSSKIP FSSKDAYS FSLESS
## 1       3       2        4        1      2        2      1        8      1
## 2       4       3        9        1      2        2      2       NA      2
## 3       3       1       97        1      1        1      1       15      1
## 4       2       0        4        1      2        2      1        3      1
## 5       5       3        5        1      1        1      1       30      1
## 6       2       1        2        1      2        3      2       NA      2
##   FSHUNGRY FSWEIGHT FSNOTEAT FSNEDAYS FDMEDYN INCGRP4 FSNAP
## 1        1        1        1        7       2       1     1
## 2        2        2       NA       NA       1       2     2
## 3        1        1        2       NA       2      99     7
## 4        1        2        2       NA       2       1     1
## 5        1        2        2       NA       1       3     2
## 6        2        2       NA       NA       2       1     1
library (car)

fit<-lm(FSRUNOUT~ FSBALANC + FSLAST+ FDMEDYN+ FSWEIGHT + FSHUNGRY + INCGRP4, data=Chopped)
summary(fit)
## 
## Call:
## lm(formula = FSRUNOUT ~ FSBALANC + FSLAST + FDMEDYN + FSWEIGHT + 
##     FSHUNGRY + INCGRP4, data = Chopped)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.71119 -0.27918  0.07383  0.31155  0.74443 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.8222234  0.0230502  35.671  < 2e-16 ***
## FSBALANC    0.0873303  0.0070310  12.421  < 2e-16 ***
## FSLAST      0.2908153  0.0079020  36.803  < 2e-16 ***
## FDMEDYN     0.0236071  0.0086057   2.743  0.00610 ** 
## FSWEIGHT    0.0011734  0.0051847   0.226  0.82096    
## FSHUNGRY    0.0299511  0.0093403   3.207  0.00135 ** 
## INCGRP4     0.0004679  0.0002001   2.338  0.01941 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3866 on 7318 degrees of freedom
## Multiple R-squared:  0.2995, Adjusted R-squared:  0.2989 
## F-statistic: 521.5 on 6 and 7318 DF,  p-value: < 2.2e-16
## Accoring to the logistic regression those said they were worried (FSRUNOUT) were correlated 
##with whether or not they were constantly eating balanced meals, if they have ran out of food in the last 12 months,
## if they have gone to the doctor when they needed medical attention, if they experienced hunger and did not eat, and thier income.
## The coeffecients suggest there is a increasing probability of the independent variables because they are all positive.
## the over all adjusted R squared is 30% saying it explains 30% of the data is relatively low but by calculating the f stat
##fstat
(0.2989/6)/((1-0.2989)/7318)
## [1] 519.9806
519.9806/521.5
## [1] 0.9970865
1-(.99708684)
## [1] 0.00291316
## we get .0029 which is significant so we are ok with our adjusted R squared being lower.
## in addition our overal p-value for the model is <0.05 which means this model is statistically significant.