#Part 1
library(readxl)
library(dplyr)
##
## Dołączanie pakietu: 'dplyr'
## Następujące obiekty zostały zakryte z 'package:stats':
##
## filter, lag
## Następujące obiekty zostały zakryte z 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Ładowanie wymaganego pakietu: carData
##
## Dołączanie pakietu: 'car'
## Następujący obiekt został zakryty z 'package:dplyr':
##
## recode
library(rstatix)
##
## Dołączanie pakietu: 'rstatix'
## Następujący obiekt został zakryty z 'package:stats':
##
## filter
library(ggpubr)
library(FSA)
## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
## ## FSA v0.10.1. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
##
## Dołączanie pakietu: 'FSA'
## Następujący obiekt został zakryty z 'package:car':
##
## bootCase
#INSERT WORKING DIRECTORY HERE!!!!!!!!!!!!
diabetes_data <- read.csv("diabetes_controll.csv")
#Step 1 - loading data
str(diabetes_data)
## 'data.frame': 200 obs. of 9 variables:
## $ patient_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 63 35 58 60 54 53 51 56 48 65 ...
## $ sex : chr "Female" "Female" "Female" "Male" ...
## $ intervention : chr "Standard_Care" "Standard_Care" "Standard_Care" "Standard_Care" ...
## $ baseline_hba1c : num 7.2 7.4 9.9 7.5 8.9 6.6 9.6 8.6 6.6 7.1 ...
## $ followup_hba1c : num 8.1 7.2 8.5 8.3 7.1 7.7 7.5 7.6 9.1 6.8 ...
## $ fasting_glucose: int 176 145 102 175 166 199 120 154 174 199 ...
## $ exercise_freq : chr "3+_per_week" "None" "3+_per_week" "1-2_per_week" ...
## $ hba1c_change : num -0.9 0.2 1.4 -0.8 1.8 ...
head(diabetes_data)
## patient_id age sex intervention baseline_hba1c followup_hba1c
## 1 1 63 Female Standard_Care 7.2 8.1
## 2 2 35 Female Standard_Care 7.4 7.2
## 3 3 58 Female Standard_Care 9.9 8.5
## 4 4 60 Male Standard_Care 7.5 8.3
## 5 5 54 Female Standard_Care 8.9 7.1
## 6 6 53 Female Standard_Care 6.6 7.7
## fasting_glucose exercise_freq hba1c_change
## 1 176 3+_per_week -0.9
## 2 145 None 0.2
## 3 102 3+_per_week 1.4
## 4 175 1-2_per_week -0.8
## 5 166 None 1.8
## 6 199 None -1.1
#Step 2 - descriptive stats
baseline_summary <- diabetes_data |>
group_by(sex,intervention,exercise_freq) |>
summarise(
n = n(),
Mean = mean(baseline_hba1c),
SD = sd(baseline_hba1c),
Median = median(baseline_hba1c),
Q1 = quantile(baseline_hba1c,0.25),
Q3 = quantile(baseline_hba1c,0.75)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by sex, intervention, and exercise_freq.
## ℹ Output is grouped by sex and intervention.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(sex, intervention, exercise_freq))` for per-operation
## grouping (`?dplyr::dplyr_by`) instead.
print(baseline_summary)
## # A tibble: 24 × 9
## # Groups: sex, intervention [8]
## sex intervention exercise_freq n Mean SD Median Q1 Q3
## <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Female Diet_Counseling 1-2_per_week 5 9.54 1.69 10 9.8 10.6
## 2 Female Diet_Counseling 3+_per_week 10 7.91 0.877 7.95 7.52 8.28
## 3 Female Diet_Counseling None 10 9.18 0.666 9.25 8.93 9.4
## 4 Female Diet_Plus_Exercise 1-2_per_week 9 8.59 1.08 8.8 8.3 9.1
## 5 Female Diet_Plus_Exercise 3+_per_week 8 8.12 1.65 7.85 6.88 9.65
## 6 Female Diet_Plus_Exercise None 7 9.24 1.21 8.8 8.45 10.2
## 7 Female Intensive_Program 1-2_per_week 10 7.54 1.42 7.4 7.1 7.9
## 8 Female Intensive_Program 3+_per_week 6 8.95 0.787 8.7 8.33 9.68
## 9 Female Intensive_Program None 5 8.52 1.25 8.6 7.9 8.7
## 10 Female Standard_Care 1-2_per_week 7 8.81 1.05 9.3 8.1 9.55
## # ℹ 14 more rows
follow_summary <- diabetes_data |>
group_by(sex,intervention,exercise_freq) |>
summarise(
n = n(),
Mean = mean(followup_hba1c),
SD = sd(followup_hba1c),
Median = median(followup_hba1c),
Q1 = quantile(followup_hba1c,0.25),
Q3 = quantile(followup_hba1c,0.75)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by sex, intervention, and exercise_freq.
## ℹ Output is grouped by sex and intervention.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(sex, intervention, exercise_freq))` for per-operation
## grouping (`?dplyr::dplyr_by`) instead.
print(follow_summary)
## # A tibble: 24 × 9
## # Groups: sex, intervention [8]
## sex intervention exercise_freq n Mean SD Median Q1 Q3
## <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Female Diet_Counseling 1-2_per_week 5 8.54 1.26 9.2 7.3 9.2
## 2 Female Diet_Counseling 3+_per_week 10 7.18 0.857 7.15 6.6 7.62
## 3 Female Diet_Counseling None 10 7.21 0.640 7.3 6.93 7.68
## 4 Female Diet_Plus_Exercise 1-2_per_week 9 6.97 0.876 6.8 6.5 7.7
## 5 Female Diet_Plus_Exercise 3+_per_week 8 7.04 0.894 6.95 6.48 7.5
## 6 Female Diet_Plus_Exercise None 7 6.71 0.867 6.3 6.15 7.1
## 7 Female Intensive_Program 1-2_per_week 10 7.07 0.958 7 6.67 7.75
## 8 Female Intensive_Program 3+_per_week 6 6.57 0.532 6.45 6.15 7.05
## 9 Female Intensive_Program None 5 6.56 0.835 6.1 5.9 7.2
## 10 Female Standard_Care 1-2_per_week 7 8.44 0.627 8.5 8 8.85
## # ℹ 14 more rows
change_summary <- diabetes_data |>
group_by(sex,intervention,exercise_freq) |>
summarise(
n = n(),
Mean = mean(hba1c_change),
SD = sd(hba1c_change),
Median = median(hba1c_change),
Q1 = quantile(hba1c_change,0.25),
Q3 = quantile(hba1c_change,0.75)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by sex, intervention, and exercise_freq.
## ℹ Output is grouped by sex and intervention.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(sex, intervention, exercise_freq))` for per-operation
## grouping (`?dplyr::dplyr_by`) instead.
print(change_summary)
## # A tibble: 24 × 9
## # Groups: sex, intervention [8]
## sex intervention exercise_freq n Mean SD Median Q1 Q3
## <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Female Diet_Counseling 1-2_per_week 5 1 2.48 1.5 1.4 2.7
## 2 Female Diet_Counseling 3+_per_week 10 0.73 1.16 0.75 0.0250 1.5
## 3 Female Diet_Counseling None 10 1.97 0.994 2 1.45 2.25
## 4 Female Diet_Plus_Exerci… 1-2_per_week 9 1.62 1.24 2.1 1.4 2.3
## 5 Female Diet_Plus_Exerci… 3+_per_week 8 1.09 1.93 1.6 -0.300 2.42
## 6 Female Diet_Plus_Exerci… None 7 2.53 1.13 2.4 1.95 2.55
## 7 Female Intensive_Program 1-2_per_week 10 0.47 1.46 0.200 -0.5 0.95
## 8 Female Intensive_Program 3+_per_week 6 2.38 0.382 2.5 2.15 2.7
## 9 Female Intensive_Program None 5 1.96 1.89 2 1.4 2.6
## 10 Female Standard_Care 1-2_per_week 7 0.371 1.19 0 -0.150 0.95
## # ℹ 14 more rows
diabetes_data <- diabetes_data |>
mutate(hba1c_increase = ifelse(hba1c_change > 0,"YES","NO"),
glucose_healthy = ifelse(fasting_glucose < 70,"Low",
ifelse(fasting_glucose < 100,"Normal","High"))
)
diabetes_data |> count(hba1c_increase,glucose_healthy)
## hba1c_increase glucose_healthy n
## 1 NO High 43
## 2 NO Normal 1
## 3 YES High 149
## 4 YES Normal 7
#Part 2
boxplot_baseline <- ggplot(diabetes_data,aes(x = sex,y = baseline_hba1c,fill = sex))+
geom_boxplot(alpha = 0.7,outlier.shape = 16,outlier.size = 2)+
geom_jitter(width = 0.1,alpha = 0.3,size = 1.5)+
scale_fill_manual(values = c("Male" = "#d41b0b",
"Female" = "#0b51d4"))+
labs(
title = "Baseline Hemoglobin Level by Sex",
x = "Sex",
y = "Baseline Hemoglobin",
caption = "Boxplot with data overlaid"
)+
theme_classic()+
theme(
plot.title = element_text(size = 14,face = "bold",hjust = 0.5),
axis.title = element_text(size = 11,face = "bold"),
plot.caption = element_text(size = 10,color = "gray40"),
legend.position = "none"
)
print(boxplot_baseline)
hist_change <- ggplot(diabetes_data,aes(x = hba1c_change))+
geom_histogram(
binwidth = 0.5,
color = "#a11212",fill = "#db516a"
)+
labs(
title = "Distribution of Change in Hemoglobin",
x = "Change",
y = "Count"
)+
theme_classic()+
theme(
plot.title = element_text(size = 14,face = "bold",hjust = 0.5),
axis.title = element_text(size = 11,face = "bold",color = "gray30"),
legend.position = "none"
)
print(hist_change)
bar_data <- diabetes_data |>
group_by(hba1c_increase) |>
summarise(
n = n(),
Mean = mean(hba1c_change)
)
barplot_change <- ggplot(bar_data,aes(x = hba1c_increase,y = Mean,fill = hba1c_increase))+
geom_bar(stat = "identity")+
scale_fill_manual(values = c("YES" = "#26910c",
"NO" = "#910c1d"))+
labs(
title = "Mean Change Hemoglobin by whether it increases or not",
subtitle = "Comparison by Increasing/Decreasing groups",
x = "Group",
y = "Mean hemoglobin change"
)+
theme_classic()+
theme(
plot.title = element_text(size = 14,face = "bold",hjust = 0.5),
plot.subtitle = element_text(size = 11,color = "gray50",hjust = 0.5),
axis.title = element_text(size = 11,face = "bold"),
legend.position = "none"
)
print(barplot_change)
#Part 3:
Comparing hemoglobin change in different exercise groups
Question : “Does exercise influence hemoglobin levels significantly?
cat("\n--- Independence check ---\n")
##
## --- Independence check ---
cat("Independent groups\n")
## Independent groups
cat("\n--- Normal Distribution Check ---\n")
##
## --- Normal Distribution Check ---
cat("\nPerforming shapiro - wilk test:\n")
##
## Performing shapiro - wilk test:
shapiro_1 <- shapiro.test(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "None"])
shapiro_2 <- shapiro.test(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "1-2_per_week"])
shapiro_3 <- shapiro.test(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "3+_per_week"])
if(shapiro_1$p.value > 0.05 & shapiro_2$p.value > 0.05 & shapiro_3$p.value > 0.05){
cat("\nNormality satisfied (p > 0.05)\n")
}else{
cat("\nNormality violated (p < 0.05)\n")
cat("Perform a test for non-parametric\n")
}
##
## Normality satisfied (p > 0.05)
cat("\nQ-Q Plot test:\n")
##
## Q-Q Plot test:
par(mfrow = c(1,3))
qqnorm(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "None"],
main = "Q-Q Plot: No exc")
qqline(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "None"],
col = "red",lwd = 2)
qqnorm(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "1-2_per_week"],
main = "Q-Q Plot: 1-2 times exc")
qqline(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "1-2_per_week"],
col = "red",lwd = 2)
qqnorm(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "3+_per_week"],
main = "Q-Q Plot: 3+ times exc")
qqline(diabetes_data$hba1c_change[diabetes_data$exercise_freq == "3+_per_week"],
col = "red",lwd = 2)
par(mfrow = c(1,1))
cat("\n--- Equal variance in groups check ---\n")
##
## --- Equal variance in groups check ---
cat("\nLevene test:\n")
##
## Levene test:
levene_change <- leveneTest(hba1c_change ~ exercise_freq,data = diabetes_data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
if(levene_change$`Pr(>F)`[1] > 0.05){
cat("\nEqual variance\n")
cat("Can perform ANOVA\n")
}else{
cat("\nNot equal variance\n")
}
##
## Equal variance
## Can perform ANOVA
cat("\nANOVA test:\n")
##
## ANOVA test:
anova_test <- aov(hba1c_change ~ exercise_freq,data = diabetes_data)
anova_summary <- summary(anova_test)
f_statistic <- anova_summary[[1]]$`F value`[1]
p_value_anova <- anova_summary[[1]]$`Pr(>F)`[1]
if(p_value_anova < 0.05){
cat("\n(p < 0.05) - reject H₀\n")
cat("Significantly different change detected\n")
cat("Perform POST-HOC next\n")
}else{
cat("\n(p > 0.05) - do not reject H₀\n")
cat("No significant difference\n")
}
##
## (p > 0.05) - do not reject H₀
## No significant difference
cat("\nChecking data significance:")
##
## Checking data significance:
ss_sup <- anova_summary[[1]]$`Sum Sq`[1]
ss_total <- sum(anova_summary[[1]]$`Sum Sq`)
eta_squared <- ss_sup / ss_total
cat("Effect size: ",round(eta_squared,3),"\n")
## Effect size: 0.018
cat("Interpretation:",
ifelse(eta_squared < 0.01,"Negligible",
ifelse(eta_squared < 0.06,"Small",
ifelse(eta_squared < 0.14,"Medium","Large"))),"\n\n")
## Interpretation: Small
#Hypothesis(all means are equal) not rejected
stat_test <- diabetes_data |>
t_test(hba1c_change ~ exercise_freq) |>
adjust_pvalue(method = "bonferroni") |>
add_significance("p.adj")
pub_anova_plot <- ggplot(diabetes_data,
aes(x = exercise_freq,y = hba1c_change,fill = exercise_freq))+
geom_boxplot(alpha = 0.7,outlier.shape = 16,width = 0.6)+
geom_jitter(width = 0.15,alpha = 0.3,size = 1.5)+
stat_summary(fun = mean,geom = "point",shape = 23,size = 3,
fill = "white",color = "black",stroke = 1.5)+
scale_fill_manual(values = c("None" = "#cc1f0c",
"1-2_per_week" = "#18990c",
"3+_per_week" = "#130c99"))+
labs(
title = "Hemoglobin Change by Exercise",
subtitle = sprintf("One-way ANOVA: F(%d,%d) = %.2f, p %s",
anova_summary[[1]]$Df[1],
anova_summary[[1]]$Df[2],
f_statistic,
ifelse(p_value_anova < 0.001,"< 0.001",
paste("=",round(p_value_anova,3)))),
x = "Exericse",
y = "Hemoglobin change",
caption = "white diamond = mean; boxes = median and IQR"
)+
theme_classic()+
theme(
plot.title = element_text(size = 14,face = "bold",hjust = 0.5),
plot.subtitle = element_text(size = 11,color = "gray30",hjust = 0.5),
axis.title = element_text(size = 11,face = "bold"),
plot.caption = element_text(size = 10,color = "gray40",hjust = 0.5),
legend.position = "none",
panel.grid.major.y = element_line(color = "gray90", linewidth = 0.3)
)
print(pub_anova_plot)
#Part 4
Null Hypothesis not rejected - means are similar across groups
Low eta squared - low influence of hemoglobin change by exercise
groups
Limitations of the study: size of the groups not very big - could differ
with population