#Research Question: What is the relationship between age, food, gender, and being overweight?
#H0: There is no significant relationship between age, food, gender, and being overweight.
#H1: There is a significant relationship between age, food, gender, and being overweight. 
library("readxl")
data <- read_excel("C:/Users/apoor/Downloads/LRclassP.xlsx")
summary(data)
##       Age             Food           Gender              Weight      
##  Min.   :30.00   Min.   :0.0000   Length:58          Min.   :0.0000  
##  1st Qu.:39.00   1st Qu.:0.0000   Class :character   1st Qu.:0.0000  
##  Median :56.00   Median :1.0000   Mode  :character   Median :1.0000  
##  Mean   :51.76   Mean   :0.9655                      Mean   :0.5862  
##  3rd Qu.:66.00   3rd Qu.:2.0000                      3rd Qu.:1.0000  
##  Max.   :74.00   Max.   :2.0000                      Max.   :1.0000  
##       HC           
##  Length:58         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
library("car")
## Loading required package: carData
scatterplot(data$Age, data$Weight)

scatterplot(data$Food, data$Food)

#Factoring 
data$Age <- factor(data$Age)
factor(data$Age)
##  [1] 31 35 44 34 60 31 47 71 69 66 33 72 61 56 69 58 42 64 30 69 59 46 41 73 57
## [26] 40 30 39 74 31 35 44 34 60 31 47 71 69 66 33 72 61 56 69 58 42 64 30 69 59
## [51] 46 41 73 57 40 30 39 74
## 25 Levels: 30 31 33 34 35 39 40 41 42 44 46 47 56 57 58 59 60 61 64 66 ... 74
data$Food <- factor(data$Food)
data$Food
##  [1] 1 1 1 2 1 1 2 0 0 2 0 2 0 0 1 2 0 2 0 1 2 0 0 1 2 1 2 0 1 1 1 1 2 1 1 2 0 0
## [39] 2 0 2 0 0 1 2 0 2 0 1 2 0 0 1 2 1 2 0 1
## Levels: 0 1 2
data$Gender <- factor(data$Gender)
factor(data$Gender)
##  [1] Female Female Female Female Female Female Female Female Female Female
## [11] Female Female Female Female Male   Male   Male   Male   Male   Male  
## [21] Male   Male   Male   Male   Male   Male   Male   Male   Male   Female
## [31] Female Female Female Female Female Female Female Female Female Female
## [41] Female Female Female Male   Male   Male   Male   Male   Male   Male  
## [51] Male   Male   Male   Male   Male   Male   Male   Male  
## Levels: Female Male
model1 <- glm(Weight ~ Food + Age, data = data, family = "binomial") 
model1
## 
## Call:  glm(formula = Weight ~ Food + Age, family = "binomial", data = data)
## 
## Coefficients:
## (Intercept)        Food1        Food2        Age31        Age33        Age34  
##  -2.257e+01    1.072e-06   -1.765e-06    2.257e+01    4.513e+01   -2.873e-11  
##       Age35        Age39        Age40        Age41        Age42        Age44  
##  -2.837e-06   -1.765e-06   -2.837e-06   -1.765e-06    4.513e+01    4.513e+01  
##       Age46        Age47        Age56        Age57        Age58        Age59  
##   4.513e+01    4.513e+01   -1.765e-06   -5.886e-12   -8.613e-12    4.513e+01  
##       Age60        Age61        Age64        Age66        Age69        Age71  
##   4.513e+01    4.513e+01    4.513e+01    4.513e+01    4.513e+01    4.513e+01  
##       Age72        Age73        Age74  
##  -1.404e-11    4.513e+01    4.513e+01  
## 
## Degrees of Freedom: 57 Total (i.e. Null);  31 Residual
## Null Deviance:       78.67 
## Residual Deviance: 5.545     AIC: 59.55
summary(model1)
## 
## Call:
## glm(formula = Weight ~ Food + Age, family = "binomial", data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.17741  -0.00002   0.00002   0.00002   1.17741  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01  3.408e+04  -0.001    0.999
## Food1        1.072e-06  4.174e+04   0.000    1.000
## Food2       -1.765e-06  4.820e+04   0.000    1.000
## Age31        2.257e+01  5.388e+04   0.000    1.000
## Age33        4.513e+01  4.820e+04   0.001    0.999
## Age34       -2.873e-11  4.820e+04   0.000    1.000
## Age35       -2.837e-06  6.376e+04   0.000    1.000
## Age39       -1.765e-06  4.820e+04   0.000    1.000
## Age40       -2.837e-06  6.376e+04   0.000    1.000
## Age41       -1.765e-06  4.820e+04   0.000    1.000
## Age42        4.513e+01  4.820e+04   0.001    0.999
## Age44        4.513e+01  6.376e+04   0.001    0.999
## Age46        4.513e+01  4.820e+04   0.001    0.999
## Age47        4.513e+01  4.820e+04   0.001    0.999
## Age56       -1.765e-06  4.820e+04   0.000    1.000
## Age57       -5.886e-12  4.820e+04   0.000    1.000
## Age58       -8.613e-12  4.820e+04   0.000    1.000
## Age59        4.513e+01  4.820e+04   0.001    0.999
## Age60        4.513e+01  6.376e+04   0.001    0.999
## Age61        4.513e+01  4.820e+04   0.001    0.999
## Age64        4.513e+01  4.820e+04   0.001    0.999
## Age66        4.513e+01  4.820e+04   0.001    0.999
## Age69        4.513e+01  4.820e+04   0.001    0.999
## Age71        4.513e+01  4.820e+04   0.001    0.999
## Age72       -1.404e-11  4.820e+04   0.000    1.000
## Age73        4.513e+01  6.376e+04   0.001    0.999
## Age74        4.513e+01  6.376e+04   0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78.6723  on 57  degrees of freedom
## Residual deviance:  5.5452  on 31  degrees of freedom
## AIC: 59.545
## 
## Number of Fisher Scoring iterations: 21
vif(model1)
##          GVIF Df GVIF^(1/(2*Df))
## Food 269.9999  2        4.053600
## Age  269.9999 24        1.123708
model1chi <- model1$null.deviance-model1$deviance 
model1chi
## [1] 73.12711
#Finding the degrees of freedom
dof <- model1$df.null-model1$df.residual 
dof
## [1] 26
chisqp <- 1 - pchisq(model1chi, dof) 
chisqp
## [1] 2.301179e-06
R2 <- model1chi/model1$null.deviance
R2
## [1] 0.9295155
model1$coefficients
##   (Intercept)         Food1         Food2         Age31         Age33 
## -2.256607e+01  1.071921e-06 -1.765217e-06  2.256607e+01  4.513214e+01 
##         Age34         Age35         Age39         Age40         Age41 
## -2.873336e-11 -2.837149e-06 -1.765222e-06 -2.837144e-06 -1.765226e-06 
##         Age42         Age44         Age46         Age47         Age56 
##  4.513214e+01  4.513213e+01  4.513214e+01  4.513214e+01 -1.765233e-06 
##         Age57         Age58         Age59         Age60         Age61 
## -5.885508e-12 -8.613305e-12  4.513214e+01  4.513213e+01  4.513214e+01 
##         Age64         Age66         Age69         Age71         Age72 
##  4.513214e+01  4.513214e+01  4.513213e+01  4.513214e+01 -1.404114e-11 
##         Age73         Age74 
##  4.513213e+01  4.513213e+01
exp(model1$coefficients) 
##  (Intercept)        Food1        Food2        Age31        Age33        Age34 
## 1.583732e-10 1.000001e+00 9.999982e-01 6.314192e+09 3.986918e+19 1.000000e+00 
##        Age35        Age39        Age40        Age41        Age42        Age44 
## 9.999972e-01 9.999982e-01 9.999972e-01 9.999982e-01 3.986918e+19 3.986914e+19 
##        Age46        Age47        Age56        Age57        Age58        Age59 
## 3.986918e+19 3.986926e+19 9.999982e-01 1.000000e+00 1.000000e+00 3.986926e+19 
##        Age60        Age61        Age64        Age66        Age69        Age71 
## 3.986914e+19 3.986918e+19 3.986925e+19 3.986926e+19 3.986915e+19 3.986918e+19 
##        Age72        Age73        Age74 
## 1.000000e+00 3.986914e+19 3.986914e+19
model2 <- glm(Weight ~ Age, data = data, family = "binomial") 
model2
## 
## Call:  glm(formula = Weight ~ Age, family = "binomial", data = data)
## 
## Coefficients:
## (Intercept)        Age31        Age33        Age34        Age35        Age39  
##  -2.257e+01    2.257e+01    4.513e+01   -8.886e-07   -8.886e-07   -8.886e-07  
##       Age40        Age41        Age42        Age44        Age46        Age47  
##  -8.886e-07   -8.886e-07    4.513e+01    4.513e+01    4.513e+01    4.513e+01  
##       Age56        Age57        Age58        Age59        Age60        Age61  
##  -8.886e-07   -8.886e-07   -8.886e-07    4.513e+01    4.513e+01    4.513e+01  
##       Age64        Age66        Age69        Age71        Age72        Age73  
##   4.513e+01    4.513e+01    4.513e+01    4.513e+01   -8.886e-07    4.513e+01  
##       Age74  
##   4.513e+01  
## 
## Degrees of Freedom: 57 Total (i.e. Null);  33 Residual
## Null Deviance:       78.67 
## Residual Deviance: 5.545     AIC: 55.55
summary(model2)
## 
## Call:
## glm(formula = Weight ~ Age, family = "binomial", data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.17741  -0.00002   0.00002   0.00002   1.17741  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01  2.410e+04  -0.001    0.999
## Age31        2.257e+01  2.410e+04   0.001    0.999
## Age33        4.513e+01  4.174e+04   0.001    0.999
## Age34       -8.886e-07  4.174e+04   0.000    1.000
## Age35       -8.886e-07  4.174e+04   0.000    1.000
## Age39       -8.886e-07  4.174e+04   0.000    1.000
## Age40       -8.886e-07  4.174e+04   0.000    1.000
## Age41       -8.886e-07  4.174e+04   0.000    1.000
## Age42        4.513e+01  4.174e+04   0.001    0.999
## Age44        4.513e+01  4.174e+04   0.001    0.999
## Age46        4.513e+01  4.174e+04   0.001    0.999
## Age47        4.513e+01  4.174e+04   0.001    0.999
## Age56       -8.886e-07  4.174e+04   0.000    1.000
## Age57       -8.886e-07  4.174e+04   0.000    1.000
## Age58       -8.886e-07  4.174e+04   0.000    1.000
## Age59        4.513e+01  4.174e+04   0.001    0.999
## Age60        4.513e+01  4.174e+04   0.001    0.999
## Age61        4.513e+01  4.174e+04   0.001    0.999
## Age64        4.513e+01  4.174e+04   0.001    0.999
## Age66        4.513e+01  4.174e+04   0.001    0.999
## Age69        4.513e+01  3.111e+04   0.001    0.999
## Age71        4.513e+01  4.174e+04   0.001    0.999
## Age72       -8.886e-07  4.174e+04   0.000    1.000
## Age73        4.513e+01  4.174e+04   0.001    0.999
## Age74        4.513e+01  4.174e+04   0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78.6723  on 57  degrees of freedom
## Residual deviance:  5.5452  on 33  degrees of freedom
## AIC: 55.545
## 
## Number of Fisher Scoring iterations: 21
model3 <- glm(Weight ~ Gender, data = data, family = "binomial")
model3
## 
## Call:  glm(formula = Weight ~ Gender, family = "binomial", data = data)
## 
## Coefficients:
## (Intercept)   GenderMale  
##      0.5878      -0.4543  
## 
## Degrees of Freedom: 57 Total (i.e. Null);  56 Residual
## Null Deviance:       78.67 
## Residual Deviance: 77.95     AIC: 81.95
summary(model3)
## 
## Call:
## glm(formula = Weight ~ Gender, family = "binomial", data = data)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.435  -1.235   0.940   1.121   1.121  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.5878     0.3944   1.490    0.136
## GenderMale   -0.4543     0.5380  -0.844    0.399
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78.672  on 57  degrees of freedom
## Residual deviance: 77.954  on 56  degrees of freedom
## AIC: 81.954
## 
## Number of Fisher Scoring iterations: 4
model4 <- glm(Weight ~ Age + Food, data = data, family = "binomial") 
model4
## 
## Call:  glm(formula = Weight ~ Age + Food, family = "binomial", data = data)
## 
## Coefficients:
## (Intercept)        Age31        Age33        Age34        Age35        Age39  
##  -2.257e+01    2.257e+01    4.513e+01   -3.534e-12   -2.556e-06   -1.823e-06  
##       Age40        Age41        Age42        Age44        Age46        Age47  
##  -2.556e-06   -1.823e-06    4.513e+01    4.513e+01    4.513e+01    4.513e+01  
##       Age56        Age57        Age58        Age59        Age60        Age61  
##  -1.823e-06   -1.608e-12   -1.879e-12    4.513e+01    4.513e+01    4.513e+01  
##       Age64        Age66        Age69        Age71        Age72        Age73  
##   4.513e+01    4.513e+01    4.513e+01    4.513e+01    3.966e-13    4.513e+01  
##       Age74        Food1        Food2  
##   4.513e+01    7.327e-07   -1.823e-06  
## 
## Degrees of Freedom: 57 Total (i.e. Null);  31 Residual
## Null Deviance:       78.67 
## Residual Deviance: 5.545     AIC: 59.55
summary(model4)
## 
## Call:
## glm(formula = Weight ~ Age + Food, family = "binomial", data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.17741  -0.00002   0.00002   0.00002   1.17741  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01  3.408e+04  -0.001    0.999
## Age31        2.257e+01  5.388e+04   0.000    1.000
## Age33        4.513e+01  4.820e+04   0.001    0.999
## Age34       -3.534e-12  4.820e+04   0.000    1.000
## Age35       -2.556e-06  6.376e+04   0.000    1.000
## Age39       -1.823e-06  4.820e+04   0.000    1.000
## Age40       -2.556e-06  6.376e+04   0.000    1.000
## Age41       -1.823e-06  4.820e+04   0.000    1.000
## Age42        4.513e+01  4.820e+04   0.001    0.999
## Age44        4.513e+01  6.376e+04   0.001    0.999
## Age46        4.513e+01  4.820e+04   0.001    0.999
## Age47        4.513e+01  4.820e+04   0.001    0.999
## Age56       -1.823e-06  4.820e+04   0.000    1.000
## Age57       -1.608e-12  4.820e+04   0.000    1.000
## Age58       -1.879e-12  4.820e+04   0.000    1.000
## Age59        4.513e+01  4.820e+04   0.001    0.999
## Age60        4.513e+01  6.376e+04   0.001    0.999
## Age61        4.513e+01  4.820e+04   0.001    0.999
## Age64        4.513e+01  4.820e+04   0.001    0.999
## Age66        4.513e+01  4.820e+04   0.001    0.999
## Age69        4.513e+01  4.820e+04   0.001    0.999
## Age71        4.513e+01  4.820e+04   0.001    0.999
## Age72        3.966e-13  4.820e+04   0.000    1.000
## Age73        4.513e+01  6.376e+04   0.001    0.999
## Age74        4.513e+01  6.376e+04   0.001    0.999
## Food1        7.327e-07  4.174e+04   0.000    1.000
## Food2       -1.823e-06  4.820e+04   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78.6723  on 57  degrees of freedom
## Residual deviance:  5.5452  on 31  degrees of freedom
## AIC: 59.545
## 
## Number of Fisher Scoring iterations: 21
model5 <- glm(Weight ~ Age + Gender, data = data, family = "binomial")
model5
## 
## Call:  glm(formula = Weight ~ Age + Gender, family = "binomial", data = data)
## 
## Coefficients:
## (Intercept)        Age31        Age33        Age34        Age35        Age39  
##  -2.257e+01    2.257e+01    4.513e+01   -2.470e-06   -2.470e-06   -5.623e-07  
##       Age40        Age41        Age42        Age44        Age46        Age47  
##  -5.623e-07   -5.623e-07    4.513e+01    4.513e+01    4.513e+01    4.513e+01  
##       Age56        Age57        Age58        Age59        Age60        Age61  
##  -2.470e-06   -5.623e-07   -5.623e-07    4.513e+01    4.513e+01    4.513e+01  
##       Age64        Age66        Age69        Age71        Age72        Age73  
##   4.513e+01    4.513e+01    4.513e+01    4.513e+01   -2.470e-06    4.513e+01  
##       Age74   GenderMale  
##   4.513e+01   -1.907e-06  
## 
## Degrees of Freedom: 57 Total (i.e. Null);  32 Residual
## Null Deviance:       78.67 
## Residual Deviance: 5.545     AIC: 57.55
summary(model5)
## 
## Call:
## glm(formula = Weight ~ Age + Gender, family = "binomial", data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.17741  -0.00002   0.00002   0.00002   1.17741  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01  4.820e+04   0.000    1.000
## Age31        2.257e+01  4.820e+04   0.000    1.000
## Age33        4.513e+01  5.903e+04   0.001    0.999
## Age34       -2.469e-06  5.903e+04   0.000    1.000
## Age35       -2.469e-06  5.903e+04   0.000    1.000
## Age39       -5.623e-07  4.174e+04   0.000    1.000
## Age40       -5.623e-07  4.174e+04   0.000    1.000
## Age41       -5.623e-07  4.174e+04   0.000    1.000
## Age42        4.513e+01  4.174e+04   0.001    0.999
## Age44        4.513e+01  5.903e+04   0.001    0.999
## Age46        4.513e+01  4.174e+04   0.001    0.999
## Age47        4.513e+01  5.903e+04   0.001    0.999
## Age56       -2.469e-06  5.903e+04   0.000    1.000
## Age57       -5.623e-07  4.174e+04   0.000    1.000
## Age58       -5.623e-07  4.174e+04   0.000    1.000
## Age59        4.513e+01  4.174e+04   0.001    0.999
## Age60        4.513e+01  5.903e+04   0.001    0.999
## Age61        4.513e+01  5.903e+04   0.001    0.999
## Age64        4.513e+01  4.174e+04   0.001    0.999
## Age66        4.513e+01  5.903e+04   0.001    0.999
## Age69        4.513e+01  3.408e+04   0.001    0.999
## Age71        4.513e+01  5.903e+04   0.001    0.999
## Age72       -2.469e-06  5.903e+04   0.000    1.000
## Age73        4.513e+01  4.174e+04   0.001    0.999
## Age74        4.513e+01  4.174e+04   0.001    0.999
## GenderMale  -1.907e-06  4.174e+04   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78.6723  on 57  degrees of freedom
## Residual deviance:  5.5452  on 32  degrees of freedom
## AIC: 57.545
## 
## Number of Fisher Scoring iterations: 21
#Compare different models
anova(model1, model2, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Weight ~ Food + Age
## Model 2: Weight ~ Age
##   Resid. Df Resid. Dev Df    Deviance Pr(>Chi)
## 1        31     5.5452                        
## 2        33     5.5452 -2 -8.8818e-16        1
#Summary
#The ANOVA table shows that the p-value for the deviance difference is greater than 0.05, which suggests that there is no significant difference between the two models. Therefore, we fail to reject H0 and can conclude that the food variable does not significantly contribute to the prediction of overweight status beyond what is explained by age alone.

#Based on this analysis, we can conclude that age is a significant predictor of overweight status, while the inclusion of the food variable in the model does not significantly improve the prediction of overweight status beyond what is explained by age alone.