Obesity and Diabetes

Examining the association in BMI between individuals with and without diabetes

Yung Qi Chin (S3819792)

Last updated: 25 May, 2023

Introduction

Introduction Cont.

Problem Statement

Problem Statement Cont.

Data

Data Cont.

Descriptive Statistics and Visualisation

diabetes$BMI[diabetes$BMI == 0] <- mean(diabetes$BMI, na.rm = TRUE)
diabetes %>% boxplot(BMI ~ Outcome, data = ., ylab = "BMI")

Descriptive Statistics and Visualisation Cont.

diabetes_False <- diabetes[diabetes$Outcome == 0,]
diabetes_True <- diabetes[diabetes$Outcome == 1,]
d <- density(diabetes_True$BMI, adjust = 2)
d2<- density(diabetes_False$BMI, adjust = 2)
{plot(d, main="Density Plot of BMI", xlab = "BMI", col ="red", lwd = 2)
lines(d2, lwd = 2)
legend(65, 0.05, legend=c("Diabetic", "Non-diabetic"), col=c("red", "black"), lty=1:1, lwd = 2, cex=0.6, text.font=4, text.width = 7)}

Descriptive Statistics and Visualisation Cont.

Decsriptive Statistics Cont.

diabetes %>% group_by(Outcome) %>% summarise(Min = min(BMI,na.rm = TRUE), Q1 = quantile(BMI,probs = .25,na.rm = TRUE),  Median = median(BMI, na.rm = TRUE), Q3 = quantile(BMI,probs = .75,na.rm = TRUE), Max = max(BMI,na.rm = TRUE), Mean = mean(BMI, na.rm = TRUE), SD = sd(BMI, na.rm = TRUE), n = n(), Missing = sum(is.na(BMI))) -> table1
knitr::kable(table1)
Outcome Min Q1 Median Q3 Max Mean SD n Missing
0 18.2 25.75 30.40 35.300 57.3 30.88007 6.503051 500 0
1 22.9 30.90 34.25 38.775 67.1 35.38129 6.596733 268 0

Hypothesis Testing

BMI_nondiabetic <- diabetes %>% filter(Outcome == 0)
BMI_nondiabetic$BMI %>% qqPlot(dist="norm")

## [1] 443 152

Hypthesis Testing Cont.

BMI_diabetic <- diabetes %>% filter(Outcome == 1)
BMI_diabetic$BMI %>% qqPlot(dist="norm")

## [1]  63 171

Hypthesis Testing Cont.

leveneTest(BMI ~ Outcome, data = diabetes)

Hypthesis Testing Cont.

\[H_0: \mu_1 = \mu_2 \]

\[H_A: \mu_1 \ne \mu_2\]

t.test(
  BMI ~ Outcome,
  data = diabetes,
  var.equal = TRUE,
  alternative = "two.sided"
  )
## 
##  Two Sample t-test
## 
## data:  BMI by Outcome
## t = -9.097, df = 766, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -5.472549 -3.529894
## sample estimates:
## mean in group 0 mean in group 1 
##        30.88007        35.38129

Hypthesis Testing Cont.

Discussion

References