# Load packages
{
  library(readxl)
  library(writexl)
  library(dplyr)
  library(plyr)
  library(ggplot2)
  library(tidyr)
  library(reshape2)
  library(gridExtra)
}

# Read the data
{
  pre=read_xlsx("GDB182D_CHOJ/QI HLUB Clinic Pre-Survey FINAL_May 19, 2026_12.57_AV_05212026.xlsx", sheet = 1)
  post=read_xlsx("GDB182D_CHOJ/QI HLUB Post Survey_ FINALV1May13_AV_05212026.xlsx", sheet = 1)
  
  predictionary = cbind(names(pre), unlist(pre[1,]))
  postdictionary = cbind(names(post), unlist(post[1,]))
  
  pre = pre[-1,]
  post = post[-1,]
  
dfpre = pre[,c("Q1...11", "Q1 _1", "Q22", "Q19_1", "Q21", "Q23_1", "Q24")]
dfpost = post[,c("Q1...2", "Q1 _1", "Q22", "Q19_1", "Q21", "Q23_1", "Q24")]

dfpre$TIME = "PRE"
dfpost$TIME = "POST"
}
names(dfpost) = names(dfpre)
matchedID = dfpre$Q1...11[dfpre$Q1...11 %in% dfpost$Q1...11]
cat(matchedID)
## AF0311 AK1234 AL0129 AL1125 AM1121 AB0802 AP6103 AY0121 AY0209 AY0823 EO0308 EH0609 GV1207 JG1126 JK0609 JS0122 JH0119 JV0910 JV8187 KC0215 KG9219 KR0908 KT0814 KX0106 KY0914 MT0424 NM0922 NT0425 OP0204 RC0429 SW0529 SX1129 YV0424

Using a Paired T-test for 3 instruments measured by a 5-point Likert scale with 1 indicating less and 5 indicating more.

  • Please indicate your level of knowledge of the cancer screening services offered in the HLUB Clinic.
  • How comfortable are you at encouraging your patient to obtain the cancer screening they are due for?
  • Please indicate your level of knowledge of general cancer screening guidelines for the specific cancer services we offer in the HLUB Clinic, including Sunday clinic screening guidelines.

Analysis was done in R Studio version 4.5.0.

df = rbind(dfpre, dfpost)
df = df[df$Q1...11 %in% matchedID,]

df[,c("Q1 _1", "Q19_1", "Q23_1")] = sapply(df[,c("Q1 _1", "Q19_1", "Q23_1")], as.numeric)
df$TIME = factor(df$TIME, levels = c("PRE", "POST"))

scoreKnowledgeServices = df %>% group_by(TIME) %>%
    dplyr::summarise(mean(`Q1 _1`))
names(scoreKnowledgeServices)[2] = "Knowledge"

scoreEncourageScreen = df %>% group_by(TIME) %>%
    dplyr::summarise(mean(`Q19_1`))
names(scoreEncourageScreen)[2] = "Encourage"

scoreKnowledgeScreen = df %>% group_by(TIME) %>%
    dplyr::summarise(mean(`Q23_1`))
names(scoreKnowledgeScreen)[2] = "Knowledge"

p.value.knowledgeservices = t.test(df$`Q1 _1`[df$TIME %in% "PRE"], df$`Q1 _1`[df$TIME %in% "POST"], data = df, paired = T)$p.value
cat("Knowledge of cancer screening services p-value: ", p.value.knowledgeservices)
## Knowledge of cancer screening services p-value:  5.53045e-10
p.value.encouragescreen = t.test(df$`Q19_1`[df$TIME %in% "PRE"], df$`Q19_1`[df$TIME %in% "POST"], data = df, paired = T)$p.value
cat("Confidence to encourage cancer screening p-value: ", p.value.encouragescreen)
## Confidence to encourage cancer screening p-value:  5.243222e-06
p.value.knowledgescreen = t.test(df$`Q23_1`[df$TIME %in% "PRE"], df$`Q23_1`[df$TIME %in% "POST"], data = df, paired = T)$p.value
cat("Knowledge of cancer screeing guidelines p-value: ", p.value.knowledgescreen)
## Knowledge of cancer screeing guidelines p-value:  1.506424e-11
out1 = ggplot(scoreKnowledgeServices, aes(x = TIME, y = Knowledge, group = 1)) + geom_point() + geom_line() + ylim(c(0,5)) +
  geom_text(x = "POST", y = 1, label = "p<0.001")+
  geom_text(aes(label = sprintf("%.2f", Knowledge)), size = 3.5, vjust = 2.8)+
  labs(x = "Timepoint", y = "Average Score", title = paste0("Knowledge in cancer\nservices, n=", dim(df)/2), size = 1)

out2 = ggplot(scoreEncourageScreen, aes(x = TIME, y = Encourage, group = 1)) + geom_point() + geom_line() + ylim(c(0,5)) +
  geom_text(x = "POST", y = 1, label = "p<0.001")+
  geom_text(aes(label = round(Encourage,2)), size = 3.5, vjust = 2.8)+
  labs(x = "Timepoint", y = "Average Score", title = paste0("Confidence to encourage\ncancer screening, n=", dim(df)/2), size = 1)

out3 = ggplot(scoreKnowledgeScreen, aes(x = TIME, y = Knowledge, group = 1)) + geom_point() + geom_line() + ylim(c(0,5)) +
  geom_text(x = "POST", y = 1, label ="p<0.001")+
  geom_text(aes(label = round(Knowledge,2)), size = 3.5, vjust = 2.8)+
  labs(x = "Timepoint", y = "Average Score", title = paste0("Knowledge in cancer\nscreening guidelines, n=", dim(df)/2))


grid.arrange(out1, out2, out3,ncol = 3)