df <- readxl::read_excel(“DM.xlsx”) df <- df[-c(62:67), ] # drop rows 62–67 names(df) <- trimws(names(df))# trim header spaces options(dplyr.summarise.inform = FALSE)

install.packages(c( “readxl”, # Excel import “dplyr”, # data manipulation “tidyr”, # reshaping “ggplot2”, # plotting “knitr”, # tables / Rmd “kableExtra”,# nicer tables “janitor” # clean_names(), tabyl() ))

library(readxl) library(dplyr)

# Quick checks
dim(df)        # rows and columns

## NULL

names(df)      # column names

## NULL

head(df)       # first few rows

##                                               
## 1 function (x, df1, df2, ncp, log = FALSE)    
## 2 {                                           
## 3     if (missing(ncp))                       
## 4         .Call(C_df, x, df1, df2, log)       
## 5     else .Call(C_dnf, x, df1, df2, ncp, log)
## 6 }

Descriptive Statistics

Load libraries

library(readxl) library(ggplot2)

—————————-

1. Age distribution

—————————-

ggplot(df, aes(x = Age)) + geom_histogram(binwidth = 5, fill = “steelblue”, color = “white”) + labs(title = “Age Distribution”, x = “Age (years)”, y = “Count”)

—————————-

2. Gender (Legal Sex)

—————————-

ggplot(df, aes(x = Legal Sex)) + geom_bar(fill = “darkorange”) + labs(title = “Gender (Legal Sex)”, x = “Legal Sex”, y = “Count”)

—————————-

3. Race

—————————-

ggplot(df, aes(x = Race)) + geom_bar(fill = “purple”) + labs(title = “Race”, x = “Race”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

—————————-

4. Ethnicity

—————————-

ggplot(df, aes(x = Ethnicity)) + geom_bar(fill = “seagreen”) + labs(title = “Ethnicity”, x = “Ethnicity”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

—————————-

5. HbA1c values

—————————-

library(dplyr)

ggplot(df, aes(x = HbA1c Value (Last 3 Months))) + geom_histogram(binwidth = 0.5, fill = “tomato”, color = “white”) + labs(title = “HbA1c Distribution”, x = “HbA1c (%)”, y = “Count”)

— Age: summary table —

age_table <- df %>% summarise( count = sum(!is.na(Age)), mean_age = mean(Age, na.rm = TRUE), median_age = median(Age, na.rm = TRUE), min_age = min(Age, na.rm = TRUE), max_age = max(Age, na.rm = TRUE) )

— Gender (Legal Sex) —

gender_table <- df %>% count(Legal Sex, name = “n”) %>% mutate(percent = round(100 * n / sum(n), 1))

— Race —

race_table <- df %>% count(Race, name = “n”) %>% mutate(percent = round(100 * n / sum(n), 1))

— Ethnicity —

ethnicity_table <- df %>% count(Ethnicity, name = “n”) %>% mutate(percent = round(100 * n / sum(n), 1))

— A1c —

a1c_table <- df %>% summarise( median_A1c = median(HbA1c Value (Last 3 Months), na.rm = TRUE), min_A1c = min(HbA1c Value (Last 3 Months), na.rm = TRUE), max_A1c = max(HbA1c Value (Last 3 Months), na.rm = TRUE) )

show tables

age_table gender_table race_table ethnicity_table a1c_table

1) Normalize column names a bit (trim stray spaces)

names(df) <- trimws(names(df))

2) Find the three question columns by patterns (case-insensitive, handles couldn’t/couldn’t)

find1 <- grep(“worried.run out”, names(df), ignore.case = TRUE) find2 <- grep(”could.afford.not last|couldn’?t\sget more”, names(df), ignore.case = TRUE) find3 <- grep(“last month.*skip meals|skip meals”, names(df), ignore.case = TRUE)

Safety checks: ensure we found one match each

stopifnot(length(find1) >= 1, length(find2) >= 1, length(find3) >= 1)

c(names(df)[find1[1]], names(df)[find2[1]], names(df)[find3[1]]) # print what was matched

3) Permanently rename those columns in df to VS1, VS2, VS3

names(df)[c(find1[1], find2[1], find3[1])] <- c(“VS1”,“VS2”,“VS3”)

4) Build the TRUE/FALSE matrix for “Yes” (robust to Y/YES/SI/SÍ and spaces/case)

norm <- function(x) trimws(toupper(as.character(x))) yes_matrix <- data.frame( VS1 = norm(df$VS1) %in% c("YES","Y","SI","SÍ"), VS2 = norm(df$VS2) %in% c(“YES”,“Y”,“SI”,“SÍ”), VS3 = norm(df$VS3) %in% c(“YES”,“Y”,“SI”,“SÍ”) )

5) Counts & percents for 0/1/2/3 “Yes”

yes_count <- rowSums(yes_matrix, na.rm = TRUE) table_yes <- table(yes_count) percent_yes <- round(100 * prop.table(table_yes), 1)

table_yes percent_yes

6) Which questions were “Yes” (patterns: VS1, VS1, VS2, etc.)

yes_patterns <- apply(yes_matrix, 1, function(x) { cols <- names(yes_matrix)[x] if (length(cols) == 0) “None” else paste(cols, collapse = “,”) }) pattern_table <- sort(table(yes_patterns[yes_count > 0]), decreasing = TRUE) pattern_table

————————————————————

Setup

————————————————————

If needed, install once:

if (!requireNamespace(“readxl”, quietly = TRUE)) install.packages(“readxl”) if (!requireNamespace(“dplyr”, quietly = TRUE)) install.packages(“dplyr”) if (!requireNamespace(“tidyr”, quietly = TRUE)) install.packages(“tidyr”) if (!requireNamespace(“ggplot2”, quietly = TRUE)) install.packages(“ggplot2”) if (!requireNamespace(“scales”, quietly = TRUE)) install.packages(“scales”)

library(readxl); library(dplyr); library(tidyr); library(ggplot2); library(scales)

Load & clean

Helper to find first header that matches a pattern (case-insensitive)

find_col <- function(pattern, nms = names(df)) { hits <- grep(pattern, nms, ignore.case = TRUE, value = TRUE) if (length(hits) == 0) stop(“No column matched:”, pattern) hits[1] }

Helper for % labels in stacked bars

percent_format_1 <- function(x) scales::percent(x, accuracy = 1)

============================================================

Block 1 — Glucose Monitoring

Participants reported how often they checked blood sugar at T0, T1, and T2.

We expect a shift from lower frequencies toward “Daily” over time.

————————————————————

glucose_cols <- c( find_col(“^T0-\sIn the past week.check.blood sugar”), find_col(”^T1-\sIn the past week.check.blood sugar”), find_col(“^T2-\sIn the past week.check.*blood sugar”) )

glucose_long <- df %>% select(all_of(glucose_cols)) %>% setNames(c(“T0”,“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“Frequency”)

p_glucose <- ggplot(glucose_long, aes(Timepoint, fill = Frequency)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Glucose Monitoring Frequency Over Time”, y = “% of participants”, x = NULL, fill = “Frequency”) p_glucose

============================================================

Block 2 — Glycemic Control (Self-Reported)

Patients answered whether sugars were usually in target range last week.

Increasing % “Yes” suggests improved self-management.

————————————————————

glycemic_cols <- c( find_col(“^T0-\sWere your sugar levels.target range”), find_col(“^T1-\sWere your sugar levels.target range”), find_col(“^T2-\sWere your sugar levels.target range”) )

glycemic_long <- df %>% select(all_of(glycemic_cols)) %>% setNames(c(“T0”,“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“TargetInRange”)

p_glycemic <- ggplot(glycemic_long, aes(Timepoint, fill = TargetInRange)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Self-Reported In-Target Glucose (T0→T2)”, y = “% of participants”, x = NULL, fill = “Response”) p_glycemic

============================================================

Block 3 — Medication Adherence

Patients reported whether they missed any diabetes medication doses in the past week.

A decline in “Yes (missed)” over time indicates better adherence.

————————————————————

miss_cols <- c( find_col(“^T0-\sIn the past week.miss any doses.diabetes medication”), find_col(”^T1-\sIn the past week.miss any doses.diabetes medication”), find_col(“^T2-\sIn the past week.miss any doses.*diabetes medication”) )

miss_long <- df %>% select(all_of(miss_cols)) %>% setNames(c(“T0”,“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“MissedDose”)

p_miss <- ggplot(miss_long, aes(Timepoint, fill = MissedDose)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Missed Diabetes Medication Doses (T0→T2)”, y = “% of participants”, x = NULL, fill = “Response”) p_miss

============================================================

Block 4 — Confidence in Food Choices

Confidence in making food choices that help control blood sugar is expected to rise T0→T2.

————————————————————

food_conf_cols <- c( find_col(“^T0-\sAre you.Confident.making food choices.control.blood sugar”), find_col(”^T1-\sAre you.Confident.making food choices.control.blood sugar”), find_col(“^T2-\sAre you.Confident.making food choices.control.*blood sugar”) )

food_conf_long <- df %>% select(all_of(food_conf_cols)) %>% setNames(c(“T0”,“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“Confidence”)

p_food_conf <- ggplot(food_conf_long, aes(Timepoint, fill = Confidence)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Confidence in Food Choices (T0→T2)”, y = “% of participants”, x = NULL, fill = “Response”) p_food_conf

============================================================

Block 5 — Confidence in Reading Labels

Nutrition-label literacy should strengthen across follow-ups.

————————————————————

label_conf_cols <- c( find_col(“^T0-\sAre you.confident.read(ing)? food labels”), find_col(”^T1-\sAre you.confident.read(ing)? food labels”), find_col(“^T2-\sAre you.confident.*read(ing)? food labels”) )

label_conf_long <- df %>% select(all_of(label_conf_cols)) %>% setNames(c(“T0”,“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“LabelConfidence”)

p_label_conf <- ggplot(label_conf_long, aes(Timepoint, fill = LabelConfidence)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Confidence Reading Food Labels (T0→T2)”, y = “% of participants”, x = NULL, fill = “Response”) p_label_conf

============================================================

Block 6 — Program Utilization

T1/T2 capture how many meals were made with program food, ease of using it,

and whether the portion-control magnet affected plate sizes.

————————————————————

———- Meals made with program food (T1 vs T2) ———-

—- Check what R really calls the meals columns —-

library(dplyr) # — Check and select the correct meals columns, then plot — # See all columns that mention “meals” grep(“meals”, names(df), value = TRUE)

Replace the names below with the exact ones shown by grep output:

———- Meals made with program food (T1 vs T2) ———-

Robust: find the 2 columns by pattern, then select by their actual names

———- Meals made with program food (T1 vs T2) ———-

meals_cols <- c( “T1- In the past week, how many meals did you make using food from the program?…158”, “T2- In the past week, how many meals did you make using food from the program?…159” )

meals_long <- df %>% dplyr::select(dplyr::all_of(meals_cols)) %>% setNames(c(“T1”,“T2”)) %>% tidyr::pivot_longer(dplyr::everything(), names_to = “Timepoint”, values_to = “MealsFromProgram”)

ggplot(meals_long, aes(x = MealsFromProgram)) + geom_bar() + facet_wrap(~ Timepoint, nrow = 1) + labs(title = “Meals Made Using Program Food (T1 vs T2)”, x = “# meals in past week”, y = “Count”)

idx_meals <- grep( “^T[12]-\sIn the past week.how many meals.*(program|program food)“, names(df), ignore.case = TRUE ) meals_cols <- names(df)[idx_meals] meals_cols

Treat responses as categories (works even if entries are text like “More than 10”)

============================================================

Block 7 — Healthcare Engagement

Follow-ups ask whether participants had a diabetes appointment since last check-in.

Increases suggest improved engagement with medical care.

————————————————————

appt_cols <- c( find_col(“^T1-\sSince our last check.appointment.diabetes”), find_col(”^T2-\sSince our last check.appointment.diabetes”) )

appt_long <- df %>% select(all_of(appt_cols)) %>% setNames(c(“T1”,“T2”)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“HadAppt”)

p_appt <- ggplot(appt_long, aes(Timepoint, fill = HadAppt)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Diabetes Appointment Since Last Check-In”, y = “% of participants”, x = NULL, fill = “Response”) p_appt

============================================================

Block 8 — Clinical Outcomes

Pre/Post HbA1c (and acute utilization) gauge clinical impact of the program.

Lower post-A1c or fewer ED/hospital days indicate improvement.

————————————————————

—- Block 8: Clinical Outcomes —-

Use exact column names as they appear in your data

col_a1c_pre <- “PRE – HbA1c Value (Last 3 Months)” col_a1c_post <- “POST – HbA1c Value (Last 3 Months)”

a1c_long <- df %>% dplyr::transmute(PRE = .data[[col_a1c_pre]], POST = .data[[col_a1c_post]]) %>% tidyr::pivot_longer(dplyr::everything(), names_to = “Time”, values_to = “A1c”)

ggplot(a1c_long, aes(Time, A1c)) + geom_boxplot() + labs(title = “HbA1c: PRE vs POST”, x = NULL, y = “HbA1c (%)”)

a1c_long <- df %>% transmute(PRE = .data[[col_a1c_pre]], POST = .data[[col_a1c_post]]) %>% pivot_longer(everything(), names_to = “Time”, values_to = “A1c”)

p_a1c <- ggplot(a1c_long, aes(Time, A1c)) + geom_boxplot() + labs(title = “HbA1c: PRE vs POST”, x = NULL, y = “HbA1c (%)”) p_a1c

(Optional) ED visits / Hospital days pre vs post (if present)

ed_pre <- grep(“^PRE-\sED”, names(df), ignore.case = TRUE, value = TRUE) ed_post <- grep(”^POST-\sED”, names(df), ignore.case = TRUE, value = TRUE) hosp_pre <- grep(“^PRE-\sHosp”, names(df), ignore.case = TRUE, value = TRUE) hosp_post<- grep(”^POST-\sHosp”,names(df), ignore.case = TRUE, value = TRUE)

if (length(ed_pre) & length(ed_post)) { ed_long <- df %>% transmute(PRE = .data[[ed_pre[1]]], POST = .data[[ed_post[1]]]) %>% pivot_longer(everything(), names_to=“Time”, values_to=“ED_Visits”) p_ed <- ggplot(ed_long, aes(Time, ED_Visits)) + stat_summary(fun = mean, geom = “bar”, fill = “grey70”) + stat_summary(fun.data = mean_se, geom = “errorbar”, width = .2) + labs(title = “ED Visits: PRE vs POST (Mean ± SE)”, x = NULL, y = “Visits”) p_ed }

if (length(hosp_pre) & length(hosp_post)) { hosp_long <- df %>% transmute(PRE = .data[[hosp_pre[1]]], POST = .data[[hosp_post[1]]]) %>% pivot_longer(everything(), names_to=“Time”, values_to=“Hosp_Days”) p_hosp <- ggplot(hosp_long, aes(Time, Hosp_Days)) + stat_summary(fun = mean, geom = “bar”, fill = “grey70”) + stat_summary(fun.data = mean_se, geom = “errorbar”, width = .2) + labs(title = “Hospital Days: PRE vs POST (Mean ± SE)”, x = NULL, y = “Days”) p_hosp }

============================================================

T1/T2 ask whether the magnet helped with portion sizes and whether information

was shared with others, indicating reach and behavior influence.

————————————————————

share_cols <- grep(“^T[12]-\sDid you share.”, names(df), ignore.case = TRUE, value = TRUE)

if (length(share_cols)) { # Expect two columns (T1 & T2); keep first two if more share_cols <- share_cols[seq_len(min(2, length(share_cols)))] share_long <- df %>% select(all_of(share_cols)) %>% setNames(sub(“^.(T[12]).”, “\1”, share_cols)) %>% pivot_longer(everything(), names_to=“Timepoint”, values_to=“Shared”) p_share <- ggplot(share_long, aes(Timepoint, fill = Shared)) + geom_bar(position = “fill”) + scale_y_continuous(labels = percent_format_1) + labs(title = “Shared Program Information (T1 vs T2)”, y = “% of participants”, x = NULL, fill = “Response”) p_share }

ls(pattern = “^p_”)

p_age <- ggplot(df, aes(x = Age)) + geom_histogram(binwidth = 5, color = “white”) + labs(title = “Age Distribution”, x = “Age (years)”, y = “Count”)

Gender (Legal Sex)

p_gender <- ggplot(df, aes(x = Legal Sex)) + geom_bar() + labs(title = “Gender (Legal Sex)”, x = “Legal Sex”, y = “Count”)

Race

p_race <- ggplot(df, aes(x = Race)) + geom_bar() + labs(title = “Race”, x = “Race”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Ethnicity

p_ethnicity <- ggplot(df, aes(x = Ethnicity)) + geom_bar() + labs(title = “Ethnicity”, x = “Ethnicity”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Print them so they render in the knitted report

p_age; p_gender; p_race; p_ethnicity

============================================================

Demographic Visualizations

============================================================

—- Age Distribution —-

p_age <- ggplot(df, aes(x = Age)) + geom_histogram(binwidth = 5, fill = “steelblue”, color = “white”) + labs(title = “Age Distribution”, x = “Age (years)”, y = “Count”) p_age

—- Gender —-

p_gender <- ggplot(df, aes(x = Legal Sex)) + geom_bar(fill = “darkorange”) + labs(title = “Gender Distribution”, x = “Legal Sex”, y = “Count”) p_gender

—- Race —-

p_race <- ggplot(df, aes(x = Race)) + geom_bar(fill = “purple”) + labs(title = “Race Distribution”, x = “Race”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p_race

—- Ethnicity —-

p_ethnicity <- ggplot(df, aes(x = Ethnicity)) + geom_bar(fill = “seagreen”) + labs(title = “Ethnicity Distribution”, x = “Ethnicity”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p_ethnicity

DM_FIM_Rmarkdown

2025-09-30

Descriptive Statistics

Load libraries

—————————-

1. Age distribution

—————————-

—————————-

2. Gender (Legal Sex)

—————————-

—————————-

3. Race

—————————-

—————————-

4. Ethnicity

—————————-

—————————-

5. HbA1c values

—————————-

— Age: summary table —

— Gender (Legal Sex) —

— Race —

— Ethnicity —

— A1c —

show tables

1) Normalize column names a bit (trim stray spaces)

2) Find the three question columns by patterns (case-insensitive, handles couldn’t/couldn’t)

Safety checks: ensure we found one match each

3) Permanently rename those columns in df to VS1, VS2, VS3

4) Build the TRUE/FALSE matrix for “Yes” (robust to Y/YES/SI/SÍ and spaces/case)

5) Counts & percents for 0/1/2/3 “Yes”

6) Which questions were “Yes” (patterns: VS1, VS1, VS2, etc.)

————————————————————

Setup

————————————————————

If needed, install once:

Load & clean

Helper to find first header that matches a pattern (case-insensitive)

Helper for % labels in stacked bars

============================================================

Block 1 — Glucose Monitoring

Participants reported how often they checked blood sugar at T0, T1, and T2.

We expect a shift from lower frequencies toward “Daily” over time.

————————————————————

============================================================

Block 2 — Glycemic Control (Self-Reported)

Patients answered whether sugars were usually in target range last week.

Increasing % “Yes” suggests improved self-management.

————————————————————

============================================================

Block 3 — Medication Adherence

Patients reported whether they missed any diabetes medication doses in the past week.

A decline in “Yes (missed)” over time indicates better adherence.

————————————————————

============================================================

Block 4 — Confidence in Food Choices

Confidence in making food choices that help control blood sugar is expected to rise T0→T2.

————————————————————

============================================================

Block 5 — Confidence in Reading Labels

Nutrition-label literacy should strengthen across follow-ups.

————————————————————

============================================================

Block 6 — Program Utilization

T1/T2 capture how many meals were made with program food, ease of using it,

and whether the portion-control magnet affected plate sizes.

————————————————————

———- Meals made with program food (T1 vs T2) ———-

—- Check what R really calls the meals columns —-

Replace the names below with the exact ones shown by grep output:

———- Meals made with program food (T1 vs T2) ———-

Robust: find the 2 columns by pattern, then select by their actual names

———- Meals made with program food (T1 vs T2) ———-

Treat responses as categories (works even if entries are text like “More than 10”)

============================================================

Block 7 — Healthcare Engagement

Follow-ups ask whether participants had a diabetes appointment since last check-in.

Increases suggest improved engagement with medical care.

————————————————————

============================================================