library(tidyverse)
library(lubridate)
library(xlsx)
library(openxlsx)
library(formattable)
library(kableExtra)
Initial medication groups used for stratification
population <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002Medication5plus.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(population) <- gsub("\\.", "", names(population)) # remove periods '.'
population <- population[-nrow(population),] %>%
mutate(MedCount = as.numeric(MedCount))
refugeeorasylum <- as_tibble(read.csv("../../PEN Clinical Audit/20200206RefugeeAsylum.csv",
stringsAsFactors = FALSE,
colClasses = c(INTERNALID = 'numeric'))) %>%
select(INTERNALID)
analgesia <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationAnalgesia.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(analgesia) <- gsub("\\.", "", names(analgesia)) # remove periods '.'
analgesia <- analgesia[-nrow(analgesia),] %>%
select(ID, Medications)
BP <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationBP.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(BP) <- gsub("\\.", "", names(BP)) # remove periods '.'
BP <- BP[-nrow(BP),] %>%
select(ID, Medications)
anticoagulant <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationAnticoagulant.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(anticoagulant) <- gsub("\\.", "", names(anticoagulant)) # remove periods '.'
anticoagulant <- anticoagulant[-nrow(anticoagulant),] %>%
select(ID, Medications)
hypoglycaemic <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationHypoglycaemic.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(hypoglycaemic) <- gsub("\\.", "", names(hypoglycaemic)) # remove periods '.'
hypoglycaemic <- hypoglycaemic[-nrow(hypoglycaemic),] %>%
select(ID, Medications)
antipsychoticmood <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationAntipsychoticMoodStabilizers.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(antipsychoticmood) <- gsub("\\.", "", names(antipsychoticmood)) # remove periods '.'
antipsychoticmood <- antipsychoticmood[-nrow(antipsychoticmood),] %>%
select(ID, Medications)
antidepressant <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationAntidepressant.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(antidepressant) <- gsub("\\.", "", names(antidepressant)) # remove periods '.'
antidepressant <- antidepressant[-nrow(antidepressant),] %>%
select(ID, Medications)
antianxiety <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationAntianxiety.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(antianxiety) <- gsub("\\.", "", names(antianxiety)) # remove periods '.'
antianxiety <- antianxiety[-nrow(antianxiety),] %>%
select(ID, Medications)
respiratory <- as_tibble(read.xlsx2("../../PEN Clinical Audit/202002MedicationRespiratory.xlsx",
startRow = 4, sheetIndex = 1,
colClasses = c(ID = 'numeric'),
stringsAsFactors = FALSE))
names(respiratory) <- gsub("\\.", "", names(respiratory)) # remove periods '.'
respiratory <- respiratory[-nrow(respiratory),] %>%
select(ID, Medications)
# Calculate age
#
# By default, calculates the typical "age in years", with a
# \code{floor} applied so that you are, e.g., 5 years old from
# 5th birthday through the day before your 6th birthday. Set
# \code{floor = FALSE} to return decimal ages, and change \code{units}
# for units other than years.
# @param dob date-of-birth, the day to start calculating age.
# @param age.day the date on which age is to be calculated.
# @param units unit to measure age in. Defaults to \code{"years"}. Passed to \link{\code{duration}}.
# @param floor boolean for whether or not to floor the result. Defaults to \code{TRUE}.
# @return Age in \code{units}. Will be an integer if \code{floor = TRUE}.
# @examples
# my.dob <- as.Date('1983-10-20')
# age(my.dob)
# age(my.dob, units = "minutes")
# age(my.dob, floor = FALSE)
# code by 'Gregor'
# https://stackoverflow.com/questions/27096485/change-a-column-from-birth-date-to-age-in-r
# requires library 'lubridate'
age <- function(dob, age.day = today(), units = "years", floor = TRUE) {
calc.age = interval(dob, age.day) / duration(num = 1, units = units)
if (floor) return(as.integer(floor(calc.age)))
return(calc.age)
}
population <- population %>%
# add columns for whether they have seen the doctor who is making the telephone calls
# or are of known refugee or asylum seeker background
# (?proxy for low English language literacy)
mutate(RefugeeOrAsylum = ID %in% refugeeorasylum$INTERNALID,
analgesia = ID %in% analgesia$ID,
BP = ID %in% BP$ID,
anticoagulant = ID %in% anticoagulant$ID,
antipsychoticmood = ID %in% antipsychoticmood$ID,
antidepressant = ID %in% antidepressant$ID,
antianxiety = ID %in% antianxiety$ID,
respiratory = ID %in% respiratory$ID,
DOB = as.Date(substr(DOBAge, 1, 10), format = "%d/%m/%Y")) %>%
mutate(Age = age(DOB, age.day = as.Date("2020/02/01"))) %>%
# note that age is on 1st Feb 2020
mutate(AgeGroup10 = ((Age %/% 10)*10),
AgeGroup5 = ((Age %/% 5)* 5)) # 5 and 10-year age groups, labelled with minimum age
population %>%
select(c(DOB, Age, Sex, RefugeeOrAsylum,
analgesia, BP, anticoagulant, antipsychoticmood, antidepressant, antianxiety, respiratory,
AgeGroup10)) %>%
summary()
## DOB Age Sex RefugeeOrAsylum
## Min. :1920-06-15 Min. : 1.00 Length:775 Mode :logical
## 1st Qu.:1947-01-21 1st Qu.:48.00 Class :character FALSE:605
## Median :1958-06-09 Median :61.00 Mode :character TRUE :170
## Mean :1961-01-08 Mean :58.64
## 3rd Qu.:1971-05-29 3rd Qu.:73.00
## Max. :2018-06-18 Max. :99.00
## analgesia BP anticoagulant antipsychoticmood
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:415 FALSE:371 FALSE:595 FALSE:665
## TRUE :360 TRUE :404 TRUE :180 TRUE :110
##
##
##
## antidepressant antianxiety respiratory AgeGroup10
## Mode :logical Mode :logical Mode :logical Min. : 0.0
## FALSE:511 FALSE:688 FALSE:498 1st Qu.:40.0
## TRUE :264 TRUE :87 TRUE :277 Median :60.0
## Mean :54.1
## 3rd Qu.:70.0
## Max. :90.0
Number of patients at each age group, and recorded gender.
ggplot(population, aes(x = Age, fill=Sex)) +
geom_histogram(binwidth = 5, boundary = 25) +
xlab("Age (years)") +
ggtitle("Patients with ")
Total population : 775
medcount_age10 <- as.data.frame.matrix(table(population$MedCount, population$AgeGroup10))
Totals <- medcount_age10 %>%
mutate(Total = select(., `0`:`90`) %>% apply(1, sum, na.rm = TRUE)) %>%
pull(Total)
medcount_age10 %>%
mutate(MedCount = row.names(.)) %>%
mutate_if(is.numeric, function(x) {color_tile("#DeF7E9", "#71CA97")(x)}) %>%
select(MedCount, everything()) %>%
mutate(Total = Totals) %>%
kable("html", align = c("l", rep("r", 11)),
escape = F) %>%
kable_styling("hover", full_width = FALSE) %>%
column_spec(c(2,3,4,5,6,7,8, 9, 10, 11), width = "1.5cm", bold = TRUE) %>%
add_header_above(c(" ", "Age group (decade)" = 10, " "))
MedCount | 0 | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | Total |
---|---|---|---|---|---|---|---|---|---|---|---|
5 | 9 | 7 | 11 | 18 | 30 | 36 | 32 | 24 | 12 | 2 | 181 |
6 | 4 | 4 | 4 | 14 | 16 | 33 | 19 | 20 | 8 | 0 | 122 |
7 | 4 | 2 | 7 | 6 | 18 | 22 | 26 | 16 | 10 | 1 | 112 |
8 | 1 | 0 | 5 | 4 | 5 | 10 | 16 | 12 | 11 | 2 | 66 |
9 | 1 | 0 | 1 | 4 | 6 | 15 | 18 | 13 | 9 | 1 | 68 |
10 | 0 | 0 | 0 | 1 | 6 | 11 | 11 | 7 | 6 | 2 | 44 |
11 | 0 | 0 | 0 | 2 | 5 | 4 | 11 | 6 | 6 | 1 | 35 |
12 | 0 | 0 | 0 | 2 | 1 | 7 | 13 | 3 | 10 | 0 | 36 |
13 | 0 | 0 | 0 | 2 | 5 | 1 | 6 | 5 | 2 | 1 | 22 |
14 | 0 | 0 | 0 | 1 | 1 | 3 | 5 | 6 | 5 | 1 | 22 |
15 | 0 | 0 | 0 | 0 | 0 | 2 | 7 | 6 | 3 | 2 | 20 |
16 | 0 | 0 | 0 | 1 | 1 | 3 | 0 | 2 | 3 | 0 | 10 |
17 | 0 | 0 | 0 | 0 | 1 | 0 | 3 | 5 | 0 | 0 | 9 |
18 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 2 | 0 | 7 |
19 | 0 | 0 | 0 | 1 | 0 | 2 | 2 | 2 | 1 | 0 | 8 |
20 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 4 |
21 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 2 | 1 | 0 | 5 |
23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
28 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
29 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
population <- population %>%
filter(Age >= 50 & Age <= 89) %>%
filter(MedCount >= 8)
medcount_conditions_crosstable <- function(population) {
mc_refugeeorasylum <- as.data.frame.matrix(table(population$MedCount, population$RefugeeOrAsylum)) %>%
select(Refugee = `TRUE`)
mc_analgesia <- as.data.frame.matrix(table(population$MedCount, population$analgesia)) %>%
select(Analgesia = `TRUE`)
mc_BP <- as.data.frame.matrix(table(population$MedCount, population$BP)) %>%
select(BP = `TRUE`)
mc_anticoagulant <- as.data.frame.matrix(table(population$MedCount, population$anticoagulant)) %>%
select(anticoagulant = `TRUE`)
mc_antipsychoticmood <- as.data.frame.matrix(table(population$MedCount, population$antipsychoticmood)) %>%
select(antipsychotic = `TRUE`)
mc_antidepressant <- as.data.frame.matrix(table(population$MedCount, population$antidepressant)) %>%
select(antidepressant = `TRUE`)
mc_antianxiety <- as.data.frame.matrix(table(population$MedCount, population$antianxiety)) %>%
select(antianxiety = `TRUE`)
mc_respiratory <- as.data.frame.matrix(table(population$MedCount, population$respiratory)) %>%
select(respiratory = `TRUE`)
mc_conditions <- cbind(mc_refugeeorasylum, mc_analgesia, mc_BP,
mc_anticoagulant, mc_antipsychoticmood, mc_antidepressant,
mc_antianxiety, mc_respiratory)
return(mc_conditions)
}
medcount_conditions_crosstable(population) %>%
mutate(MedCount = row.names(.)) %>%
mutate_if(is.numeric, function(x) {color_tile("#DeF7E9", "#71CA97")(x)}) %>%
select(MedCount, everything()) %>%
kable("html", align = c("l", rep("r", 8)),
escape = F) %>%
kable_styling("hover", full_width = FALSE) %>%
column_spec(c(2,3,4,5,6,7,8,9), width = "2cm", width_min = "1.5cm", bold = TRUE) %>%
add_header_above(c(" ", "Conditions" = 8))
MedCount | Refugee | Analgesia | BP | anticoagulant | antipsychotic | antidepressant | antianxiety | respiratory |
---|---|---|---|---|---|---|---|---|
8 | 12 | 25 | 33 | 12 | 2 | 18 | 4 | 18 |
9 | 10 | 29 | 39 | 21 | 6 | 17 | 7 | 22 |
10 | 6 | 22 | 26 | 11 | 5 | 14 | 7 | 11 |
11 | 1 | 11 | 23 | 13 | 4 | 8 | 3 | 11 |
12 | 4 | 21 | 27 | 14 | 4 | 13 | 7 | 14 |
13 | 1 | 9 | 9 | 5 | 1 | 6 | 4 | 12 |
14 | 4 | 16 | 16 | 9 | 1 | 5 | 4 | 8 |
15 | 2 | 17 | 14 | 6 | 2 | 6 | 3 | 14 |
16 | 0 | 6 | 4 | 4 | 2 | 5 | 2 | 4 |
17 | 0 | 7 | 7 | 4 | 1 | 5 | 2 | 5 |
18 | 2 | 5 | 4 | 2 | 0 | 2 | 0 | 3 |
19 | 0 | 6 | 6 | 5 | 0 | 6 | 3 | 6 |
20 | 1 | 3 | 3 | 3 | 0 | 4 | 1 | 3 |
21 | 1 | 3 | 4 | 2 | 0 | 1 | 0 | 2 |
23 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |
28 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 1 |
31 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 |
set.seed(6291602)
# set random number seed
# chosen by time on my watch at that particular second
# use 'createDataPartition' function from 'caret' library
# paste which can ignore NA and empty strings
#
# Acts the same as regular paste, unless na.rm = TRUE,
# in which case empty strings and NA are removed
#
# based on code by Moody_Mudskipper at
# https://stackoverflow.com/questions/13673894/suppress-nas-in-paste
# with additional code from
# https://stackoverflow.com/questions/14270950/suppress-separator-in-paste-when-values-are-missing
#
# @param ... the list of strings to paste
# @param sep the separator string, " " by default
# @param collapse the collapse string, NULL by default
# @param na.rm whether to remove NA and empty strings
#
# @return string
paste2 <- function(..., sep = " ", collapse = NULL, na.rm = FALSE){
# in default case, use paste
if(!na.rm) return(paste(..., sep = sep, collapse = collapse))
# cbind is convenient to recycle, it warns though so use suppressWarnings
dots <- suppressWarnings(cbind(...))
res <- apply(dots, 1, function(...) {
x <- c(...)
x <- x[nchar(x) > 0] # get rid of empty strings
x <- x[length(x) > 0] # get rid of character(0)
if (all(is.na(x))) return(c(""))
do.call(paste, as.list(c(na.omit(x), sep = sep)))
})
if(is.null(collapse)) res else
paste(na.omit(res), collapse = collapse)
}
# balance across possible co-variants of gender, age (10),
# refugee/asylum seeker, BP, antidepressant medication
population <- population %>%
mutate(Subgroup = paste2(Sex,
if_else(RefugeeOrAsylum, "Refugee", as.character(NA)),
if_else(BP, "BP", as.character(NA)),
if_else(antidepressant, "Antidepressant", as.character(NA)),
sep = ", ", na.rm = TRUE)) %>%
mutate(SubgroupMed = paste(Subgroup, # stratification is done on 'SubgroupMed'
# 'hi' or 'low' (relatively) medication count
if_else(MedCount > 11, "H", "L"))) %>%
mutate(Subgroup = if_else(Subgroup == "", "None", Subgroup))
phase1 <- NULL
phase2 <- NULL
for (i in sort(unique(population$AgeGroup10))) {
coinflip <- runif(1)>.5
for (k in sort(unique(population$SubgroupMed))) {
subsection <- population[population$AgeGroup10 == i & population$SubgroupMed == k,]
if (nrow(subsection) > 0) {
# print(paste(k, ",", i, ":", nrow(subsection)))
subsection$rank <- runif(nrow(subsection))
if ((nrow(subsection) %% 2) == 1)
{coinflip <- 1 - coinflip} # toggle from favouring phase1 or phase2
new_phase2 <- top_n(subsection, as.integer(nrow(subsection)/2 + coinflip*.5), rank)
new_phase1 <- anti_join(subsection, new_phase2, by = "ID")
phase2 <- rbind(phase2, new_phase2)
phase1 <- rbind(phase1, new_phase1)
}
}
}
ggplot(phase1, aes(x = AgeGroup10, fill = Subgroup)) +
geom_histogram(binwidth = 10, boundary = 24.95)
ggplot(phase1 %>% mutate(MedCount = as.character(MedCount)),
aes(x = AgeGroup10, fill = as.character(MedCount))) +
geom_histogram(binwidth = 10, boundary = 24.95)
ggplot(phase2, aes(x = AgeGroup10, fill = Subgroup)) +
geom_histogram(binwidth = 10, boundary = 24.95)
ggplot(phase2 %>% mutate(MedCount = as.character(MedCount)),
aes(x = AgeGroup10, fill = as.character(MedCount))) +
geom_histogram(binwidth = 10, boundary = 24.95)
Re-attach patient names and demographic details to sub-groups, and export to Excel file (Phase 1 = Treat, Phase 2 = Control) for use by surveyors.
# set {r eval = FALSE when 'knitting' to form a HTML file}
# as identifying data should not be in a public space!
RecordingVariables <- matrix(c("Ineligible - reason", "Interpreter needed", "Interpreter used",
"Phone attempts", "Message left", "Contact made",
"Declined appt - why",
"Accepted appt", "Date of appt", "Attended appt",
"Review completed", "Result", "LMO Review", "HMR Referral"),
nrow = 1, byrow = FALSE)
# list of recording variables to add to Excel sheets
patient_details <- population %>%
select(ID, Surname, FirstName, KnownAs, Sex, PhoneHW, PhoneM, Address, City, DOB, Age,
MedCount, AgeGroup5, AgeGroup10)
# select columns required for export
phase1_details <- phase1 %>%
select(ID) %>%
# will store AgeGroup5 groups in separate sheets
left_join(patient_details, by = "ID")
phase2_details <- phase2 %>%
select(ID) %>%
# will store AgeGroup5 groups in separate sheets
left_join(patient_details, by = "ID")
wb_phase1 <- createWorkbook() # create blank workbook
for (i in sort(unique(phase1_details$AgeGroup5))) {
subsection <- phase1_details[phase1_details$AgeGroup5 == i,]
sheetname <- paste("Phase 1 - ", i)
addWorksheet(wb_phase1, sheetname)
writeData(wb_phase1, sheetname, subsection %>% select(-c("AgeGroup5", "AgeGroup10")))
writeData(wb_phase1, sheetname, RecordingVariables, startCol = 13, colNames = FALSE)
}
saveWorkbook(wb_phase1, file = "202002PolypharmacyPhase1.xlsx", overwrite = TRUE)
wb_phase2 <- createWorkbook() # create blank workbook
for (i in sort(unique(phase2_details$AgeGroup5))) {
subsection <- phase2_details[phase2_details$AgeGroup5 == i,]
sheetname <- paste("Phase 2 - ", i)
addWorksheet(wb_phase2, sheetname)
writeData(wb_phase2, sheetname, subsection %>% select(-c("AgeGroup5", "AgeGroup10")))
writeData(wb_phase2, sheetname, RecordingVariables, startCol = 13, colNames = FALSE)
}
saveWorkbook(wb_phase2, file = "202002PolypharmacyPhase2.xlsx", overwrite = TRUE)