#Research Question: What is the relationship between age, food, gender, and being overweight?
#H0: There is no significant relationship between age, food, gender, and being overweight.
#H1: There is a significant relationship between age, food, gender, and being overweight.
library("readxl")
data <- read_excel("C:/Users/apoor/Downloads/LRclassP.xlsx")
summary(data)
## Age Food Gender Weight
## Min. :30.00 Min. :0.0000 Length:58 Min. :0.0000
## 1st Qu.:39.00 1st Qu.:0.0000 Class :character 1st Qu.:0.0000
## Median :56.00 Median :1.0000 Mode :character Median :1.0000
## Mean :51.76 Mean :0.9655 Mean :0.5862
## 3rd Qu.:66.00 3rd Qu.:2.0000 3rd Qu.:1.0000
## Max. :74.00 Max. :2.0000 Max. :1.0000
## HC
## Length:58
## Class :character
## Mode :character
##
##
##
library("car")
## Loading required package: carData
scatterplot(data$Age, data$Weight)

scatterplot(data$Food, data$Food)

#Factoring
data$Age <- factor(data$Age)
factor(data$Age)
## [1] 31 35 44 34 60 31 47 71 69 66 33 72 61 56 69 58 42 64 30 69 59 46 41 73 57
## [26] 40 30 39 74 31 35 44 34 60 31 47 71 69 66 33 72 61 56 69 58 42 64 30 69 59
## [51] 46 41 73 57 40 30 39 74
## 25 Levels: 30 31 33 34 35 39 40 41 42 44 46 47 56 57 58 59 60 61 64 66 ... 74
data$Food <- factor(data$Food)
data$Food
## [1] 1 1 1 2 1 1 2 0 0 2 0 2 0 0 1 2 0 2 0 1 2 0 0 1 2 1 2 0 1 1 1 1 2 1 1 2 0 0
## [39] 2 0 2 0 0 1 2 0 2 0 1 2 0 0 1 2 1 2 0 1
## Levels: 0 1 2
data$Gender <- factor(data$Gender)
factor(data$Gender)
## [1] Female Female Female Female Female Female Female Female Female Female
## [11] Female Female Female Female Male Male Male Male Male Male
## [21] Male Male Male Male Male Male Male Male Male Female
## [31] Female Female Female Female Female Female Female Female Female Female
## [41] Female Female Female Male Male Male Male Male Male Male
## [51] Male Male Male Male Male Male Male Male
## Levels: Female Male
model1 <- glm(Weight ~ Food + Age, data = data, family = "binomial")
model1
##
## Call: glm(formula = Weight ~ Food + Age, family = "binomial", data = data)
##
## Coefficients:
## (Intercept) Food1 Food2 Age31 Age33 Age34
## -2.257e+01 1.072e-06 -1.765e-06 2.257e+01 4.513e+01 -2.873e-11
## Age35 Age39 Age40 Age41 Age42 Age44
## -2.837e-06 -1.765e-06 -2.837e-06 -1.765e-06 4.513e+01 4.513e+01
## Age46 Age47 Age56 Age57 Age58 Age59
## 4.513e+01 4.513e+01 -1.765e-06 -5.886e-12 -8.613e-12 4.513e+01
## Age60 Age61 Age64 Age66 Age69 Age71
## 4.513e+01 4.513e+01 4.513e+01 4.513e+01 4.513e+01 4.513e+01
## Age72 Age73 Age74
## -1.404e-11 4.513e+01 4.513e+01
##
## Degrees of Freedom: 57 Total (i.e. Null); 31 Residual
## Null Deviance: 78.67
## Residual Deviance: 5.545 AIC: 59.55
summary(model1)
##
## Call:
## glm(formula = Weight ~ Food + Age, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.17741 -0.00002 0.00002 0.00002 1.17741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01 3.408e+04 -0.001 0.999
## Food1 1.072e-06 4.174e+04 0.000 1.000
## Food2 -1.765e-06 4.820e+04 0.000 1.000
## Age31 2.257e+01 5.388e+04 0.000 1.000
## Age33 4.513e+01 4.820e+04 0.001 0.999
## Age34 -2.873e-11 4.820e+04 0.000 1.000
## Age35 -2.837e-06 6.376e+04 0.000 1.000
## Age39 -1.765e-06 4.820e+04 0.000 1.000
## Age40 -2.837e-06 6.376e+04 0.000 1.000
## Age41 -1.765e-06 4.820e+04 0.000 1.000
## Age42 4.513e+01 4.820e+04 0.001 0.999
## Age44 4.513e+01 6.376e+04 0.001 0.999
## Age46 4.513e+01 4.820e+04 0.001 0.999
## Age47 4.513e+01 4.820e+04 0.001 0.999
## Age56 -1.765e-06 4.820e+04 0.000 1.000
## Age57 -5.886e-12 4.820e+04 0.000 1.000
## Age58 -8.613e-12 4.820e+04 0.000 1.000
## Age59 4.513e+01 4.820e+04 0.001 0.999
## Age60 4.513e+01 6.376e+04 0.001 0.999
## Age61 4.513e+01 4.820e+04 0.001 0.999
## Age64 4.513e+01 4.820e+04 0.001 0.999
## Age66 4.513e+01 4.820e+04 0.001 0.999
## Age69 4.513e+01 4.820e+04 0.001 0.999
## Age71 4.513e+01 4.820e+04 0.001 0.999
## Age72 -1.404e-11 4.820e+04 0.000 1.000
## Age73 4.513e+01 6.376e+04 0.001 0.999
## Age74 4.513e+01 6.376e+04 0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78.6723 on 57 degrees of freedom
## Residual deviance: 5.5452 on 31 degrees of freedom
## AIC: 59.545
##
## Number of Fisher Scoring iterations: 21
vif(model1)
## GVIF Df GVIF^(1/(2*Df))
## Food 269.9999 2 4.053600
## Age 269.9999 24 1.123708
model1chi <- model1$null.deviance-model1$deviance
model1chi
## [1] 73.12711
#Finding the degrees of freedom
dof <- model1$df.null-model1$df.residual
dof
## [1] 26
chisqp <- 1 - pchisq(model1chi, dof)
chisqp
## [1] 2.301179e-06
R2 <- model1chi/model1$null.deviance
R2
## [1] 0.9295155
model1$coefficients
## (Intercept) Food1 Food2 Age31 Age33
## -2.256607e+01 1.071921e-06 -1.765217e-06 2.256607e+01 4.513214e+01
## Age34 Age35 Age39 Age40 Age41
## -2.873336e-11 -2.837149e-06 -1.765222e-06 -2.837144e-06 -1.765226e-06
## Age42 Age44 Age46 Age47 Age56
## 4.513214e+01 4.513213e+01 4.513214e+01 4.513214e+01 -1.765233e-06
## Age57 Age58 Age59 Age60 Age61
## -5.885508e-12 -8.613305e-12 4.513214e+01 4.513213e+01 4.513214e+01
## Age64 Age66 Age69 Age71 Age72
## 4.513214e+01 4.513214e+01 4.513213e+01 4.513214e+01 -1.404114e-11
## Age73 Age74
## 4.513213e+01 4.513213e+01
exp(model1$coefficients)
## (Intercept) Food1 Food2 Age31 Age33 Age34
## 1.583732e-10 1.000001e+00 9.999982e-01 6.314192e+09 3.986918e+19 1.000000e+00
## Age35 Age39 Age40 Age41 Age42 Age44
## 9.999972e-01 9.999982e-01 9.999972e-01 9.999982e-01 3.986918e+19 3.986914e+19
## Age46 Age47 Age56 Age57 Age58 Age59
## 3.986918e+19 3.986926e+19 9.999982e-01 1.000000e+00 1.000000e+00 3.986926e+19
## Age60 Age61 Age64 Age66 Age69 Age71
## 3.986914e+19 3.986918e+19 3.986925e+19 3.986926e+19 3.986915e+19 3.986918e+19
## Age72 Age73 Age74
## 1.000000e+00 3.986914e+19 3.986914e+19
model2 <- glm(Weight ~ Age, data = data, family = "binomial")
model2
##
## Call: glm(formula = Weight ~ Age, family = "binomial", data = data)
##
## Coefficients:
## (Intercept) Age31 Age33 Age34 Age35 Age39
## -2.257e+01 2.257e+01 4.513e+01 -8.886e-07 -8.886e-07 -8.886e-07
## Age40 Age41 Age42 Age44 Age46 Age47
## -8.886e-07 -8.886e-07 4.513e+01 4.513e+01 4.513e+01 4.513e+01
## Age56 Age57 Age58 Age59 Age60 Age61
## -8.886e-07 -8.886e-07 -8.886e-07 4.513e+01 4.513e+01 4.513e+01
## Age64 Age66 Age69 Age71 Age72 Age73
## 4.513e+01 4.513e+01 4.513e+01 4.513e+01 -8.886e-07 4.513e+01
## Age74
## 4.513e+01
##
## Degrees of Freedom: 57 Total (i.e. Null); 33 Residual
## Null Deviance: 78.67
## Residual Deviance: 5.545 AIC: 55.55
summary(model2)
##
## Call:
## glm(formula = Weight ~ Age, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.17741 -0.00002 0.00002 0.00002 1.17741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01 2.410e+04 -0.001 0.999
## Age31 2.257e+01 2.410e+04 0.001 0.999
## Age33 4.513e+01 4.174e+04 0.001 0.999
## Age34 -8.886e-07 4.174e+04 0.000 1.000
## Age35 -8.886e-07 4.174e+04 0.000 1.000
## Age39 -8.886e-07 4.174e+04 0.000 1.000
## Age40 -8.886e-07 4.174e+04 0.000 1.000
## Age41 -8.886e-07 4.174e+04 0.000 1.000
## Age42 4.513e+01 4.174e+04 0.001 0.999
## Age44 4.513e+01 4.174e+04 0.001 0.999
## Age46 4.513e+01 4.174e+04 0.001 0.999
## Age47 4.513e+01 4.174e+04 0.001 0.999
## Age56 -8.886e-07 4.174e+04 0.000 1.000
## Age57 -8.886e-07 4.174e+04 0.000 1.000
## Age58 -8.886e-07 4.174e+04 0.000 1.000
## Age59 4.513e+01 4.174e+04 0.001 0.999
## Age60 4.513e+01 4.174e+04 0.001 0.999
## Age61 4.513e+01 4.174e+04 0.001 0.999
## Age64 4.513e+01 4.174e+04 0.001 0.999
## Age66 4.513e+01 4.174e+04 0.001 0.999
## Age69 4.513e+01 3.111e+04 0.001 0.999
## Age71 4.513e+01 4.174e+04 0.001 0.999
## Age72 -8.886e-07 4.174e+04 0.000 1.000
## Age73 4.513e+01 4.174e+04 0.001 0.999
## Age74 4.513e+01 4.174e+04 0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78.6723 on 57 degrees of freedom
## Residual deviance: 5.5452 on 33 degrees of freedom
## AIC: 55.545
##
## Number of Fisher Scoring iterations: 21
model3 <- glm(Weight ~ Gender, data = data, family = "binomial")
model3
##
## Call: glm(formula = Weight ~ Gender, family = "binomial", data = data)
##
## Coefficients:
## (Intercept) GenderMale
## 0.5878 -0.4543
##
## Degrees of Freedom: 57 Total (i.e. Null); 56 Residual
## Null Deviance: 78.67
## Residual Deviance: 77.95 AIC: 81.95
summary(model3)
##
## Call:
## glm(formula = Weight ~ Gender, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.435 -1.235 0.940 1.121 1.121
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5878 0.3944 1.490 0.136
## GenderMale -0.4543 0.5380 -0.844 0.399
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78.672 on 57 degrees of freedom
## Residual deviance: 77.954 on 56 degrees of freedom
## AIC: 81.954
##
## Number of Fisher Scoring iterations: 4
model4 <- glm(Weight ~ Age + Food, data = data, family = "binomial")
model4
##
## Call: glm(formula = Weight ~ Age + Food, family = "binomial", data = data)
##
## Coefficients:
## (Intercept) Age31 Age33 Age34 Age35 Age39
## -2.257e+01 2.257e+01 4.513e+01 -3.534e-12 -2.556e-06 -1.823e-06
## Age40 Age41 Age42 Age44 Age46 Age47
## -2.556e-06 -1.823e-06 4.513e+01 4.513e+01 4.513e+01 4.513e+01
## Age56 Age57 Age58 Age59 Age60 Age61
## -1.823e-06 -1.608e-12 -1.879e-12 4.513e+01 4.513e+01 4.513e+01
## Age64 Age66 Age69 Age71 Age72 Age73
## 4.513e+01 4.513e+01 4.513e+01 4.513e+01 3.966e-13 4.513e+01
## Age74 Food1 Food2
## 4.513e+01 7.327e-07 -1.823e-06
##
## Degrees of Freedom: 57 Total (i.e. Null); 31 Residual
## Null Deviance: 78.67
## Residual Deviance: 5.545 AIC: 59.55
summary(model4)
##
## Call:
## glm(formula = Weight ~ Age + Food, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.17741 -0.00002 0.00002 0.00002 1.17741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01 3.408e+04 -0.001 0.999
## Age31 2.257e+01 5.388e+04 0.000 1.000
## Age33 4.513e+01 4.820e+04 0.001 0.999
## Age34 -3.534e-12 4.820e+04 0.000 1.000
## Age35 -2.556e-06 6.376e+04 0.000 1.000
## Age39 -1.823e-06 4.820e+04 0.000 1.000
## Age40 -2.556e-06 6.376e+04 0.000 1.000
## Age41 -1.823e-06 4.820e+04 0.000 1.000
## Age42 4.513e+01 4.820e+04 0.001 0.999
## Age44 4.513e+01 6.376e+04 0.001 0.999
## Age46 4.513e+01 4.820e+04 0.001 0.999
## Age47 4.513e+01 4.820e+04 0.001 0.999
## Age56 -1.823e-06 4.820e+04 0.000 1.000
## Age57 -1.608e-12 4.820e+04 0.000 1.000
## Age58 -1.879e-12 4.820e+04 0.000 1.000
## Age59 4.513e+01 4.820e+04 0.001 0.999
## Age60 4.513e+01 6.376e+04 0.001 0.999
## Age61 4.513e+01 4.820e+04 0.001 0.999
## Age64 4.513e+01 4.820e+04 0.001 0.999
## Age66 4.513e+01 4.820e+04 0.001 0.999
## Age69 4.513e+01 4.820e+04 0.001 0.999
## Age71 4.513e+01 4.820e+04 0.001 0.999
## Age72 3.966e-13 4.820e+04 0.000 1.000
## Age73 4.513e+01 6.376e+04 0.001 0.999
## Age74 4.513e+01 6.376e+04 0.001 0.999
## Food1 7.327e-07 4.174e+04 0.000 1.000
## Food2 -1.823e-06 4.820e+04 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78.6723 on 57 degrees of freedom
## Residual deviance: 5.5452 on 31 degrees of freedom
## AIC: 59.545
##
## Number of Fisher Scoring iterations: 21
model5 <- glm(Weight ~ Age + Gender, data = data, family = "binomial")
model5
##
## Call: glm(formula = Weight ~ Age + Gender, family = "binomial", data = data)
##
## Coefficients:
## (Intercept) Age31 Age33 Age34 Age35 Age39
## -2.257e+01 2.257e+01 4.513e+01 -2.470e-06 -2.470e-06 -5.623e-07
## Age40 Age41 Age42 Age44 Age46 Age47
## -5.623e-07 -5.623e-07 4.513e+01 4.513e+01 4.513e+01 4.513e+01
## Age56 Age57 Age58 Age59 Age60 Age61
## -2.470e-06 -5.623e-07 -5.623e-07 4.513e+01 4.513e+01 4.513e+01
## Age64 Age66 Age69 Age71 Age72 Age73
## 4.513e+01 4.513e+01 4.513e+01 4.513e+01 -2.470e-06 4.513e+01
## Age74 GenderMale
## 4.513e+01 -1.907e-06
##
## Degrees of Freedom: 57 Total (i.e. Null); 32 Residual
## Null Deviance: 78.67
## Residual Deviance: 5.545 AIC: 57.55
summary(model5)
##
## Call:
## glm(formula = Weight ~ Age + Gender, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.17741 -0.00002 0.00002 0.00002 1.17741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.257e+01 4.820e+04 0.000 1.000
## Age31 2.257e+01 4.820e+04 0.000 1.000
## Age33 4.513e+01 5.903e+04 0.001 0.999
## Age34 -2.469e-06 5.903e+04 0.000 1.000
## Age35 -2.469e-06 5.903e+04 0.000 1.000
## Age39 -5.623e-07 4.174e+04 0.000 1.000
## Age40 -5.623e-07 4.174e+04 0.000 1.000
## Age41 -5.623e-07 4.174e+04 0.000 1.000
## Age42 4.513e+01 4.174e+04 0.001 0.999
## Age44 4.513e+01 5.903e+04 0.001 0.999
## Age46 4.513e+01 4.174e+04 0.001 0.999
## Age47 4.513e+01 5.903e+04 0.001 0.999
## Age56 -2.469e-06 5.903e+04 0.000 1.000
## Age57 -5.623e-07 4.174e+04 0.000 1.000
## Age58 -5.623e-07 4.174e+04 0.000 1.000
## Age59 4.513e+01 4.174e+04 0.001 0.999
## Age60 4.513e+01 5.903e+04 0.001 0.999
## Age61 4.513e+01 5.903e+04 0.001 0.999
## Age64 4.513e+01 4.174e+04 0.001 0.999
## Age66 4.513e+01 5.903e+04 0.001 0.999
## Age69 4.513e+01 3.408e+04 0.001 0.999
## Age71 4.513e+01 5.903e+04 0.001 0.999
## Age72 -2.469e-06 5.903e+04 0.000 1.000
## Age73 4.513e+01 4.174e+04 0.001 0.999
## Age74 4.513e+01 4.174e+04 0.001 0.999
## GenderMale -1.907e-06 4.174e+04 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78.6723 on 57 degrees of freedom
## Residual deviance: 5.5452 on 32 degrees of freedom
## AIC: 57.545
##
## Number of Fisher Scoring iterations: 21
#Compare different models
anova(model1, model2, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Weight ~ Food + Age
## Model 2: Weight ~ Age
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 31 5.5452
## 2 33 5.5452 -2 -8.8818e-16 1
#Summary
#The ANOVA table shows that the p-value for the deviance difference is greater than 0.05, which suggests that there is no significant difference between the two models. Therefore, we fail to reject H0 and can conclude that the food variable does not significantly contribute to the prediction of overweight status beyond what is explained by age alone.
#Based on this analysis, we can conclude that age is a significant predictor of overweight status, while the inclusion of the food variable in the model does not significantly improve the prediction of overweight status beyond what is explained by age alone.