knitr::opts_chunk$set(echo = params$printcode)
I am an Executive Director Medical Services of Hospital A (denoted in Col A in PCOR File.xlsx) and our hospital has been contributing to Prostate Cancer Outcomes Registry (PCOR) since 2016. Please provide me with the completeness of PSA assessment (denoted in Col C in PCOR File.xlsx) at diagnosis (denoted in Col B in PCOR File.xlsx) in my hospital compared to others? Please display this indicator (PSA assessment completed and documented at diagnosis) in my hospital over time.
library(readr) #Load data
library(dplyr) #Process data
library(tidyr) #Data Wrangling
library(lubridate) #Dealing with date/time variables
library(ggplot2) #Graphical plotting
library(plotly) #Interactive graphical plotting
library(knitr) #Output package
library(kableExtra)
library(magrittr) #R piping function
The data set table is comprised of 3 columns
1. Hospital
a. Hospital Name
2. DateofDiagnosis
a. Date of Diagnosis
3. Indicator_X
a. PSA assessment completed and documented at diagnosis
- 1: Yes
- 0: No
# Load excel file
pcor <- readxl::read_xlsx("PCOR.xlsx", col_names = TRUE, na = "", trim_ws = TRUE)
#List headers
names(pcor)
## [1] "Hospital" "Dateofdiagnosis" "Indicator_X"
#Change column names
colnames(pcor) <- c("Hospital", "Date_of_Diagnosis", "PSA_Assessment_Completed")
# Check class for earch data column
class(pcor$Hospital)
## [1] "character"
class(pcor$Date_of_Diagnosis)
## [1] "POSIXct" "POSIXt"
class(pcor$PSA_Assessment_Completed)
## [1] "numeric"
The data type for ‘Hospital’ is character while ‘Date of Diagnosis’ is date-time and ‘PAC Assessment Completed’ is numeric. But ‘PAC Assessment Completed’ is binary so we should change this to factor.
# Change class of 'PAC Assessment Completed' to factor
pcor$PSA_Assessment_Completed <- factor(pcor$PSA_Assessment_Completed, levels = c(0, 1), labels = c("No", "Yes"))
# Check class of 'PSA Assessment Completed'
class(pcor$PSA_Assessment_Completed)
## [1] "factor"
summary(pcor)
## Hospital Date_of_Diagnosis PSA_Assessment_Completed
## Length:8164 Min. :2016-01-01 00:00:00 No :3358
## Class :character 1st Qu.:2016-10-03 00:00:00 Yes:4806
## Mode :character Median :2017-06-26 00:00:00
## Mean :2017-06-28 18:28:13
## 3rd Qu.:2018-03-30 00:00:00
## Max. :2018-12-31 00:00:00
# Calculate PSA assessments completed and not completed for each hospital
hospitals_PSA_sum <- pcor %>%
group_by(Hospital, PSA_Assessment_Completed) %>%
summarize(count=n())
# Calculate proportion of PSA assessments ocmpleted for each hospital
hospitals_PSA_sum$Proportion_of_total <- hospitals_PSA_sum$count/nrow(pcor)
# PSA assessments completed for each hospital as percentage of total
hospitals_PSA_sum$Percent_of_total <- hospitals_PSA_sum$count/nrow(pcor)*100
# Percentage of PSA assessments completed for each hospital
PSA_complete_hospital <- hospitals_PSA_sum %>%
group_by(Hospital) %>%
mutate(Percent_per_hospital = (count/sum(count) * 100))
# Display first 6 lines
head(PSA_complete_hospital)
# Calculate number of PSA assessments on each day
PSA_num <- pcor %>%
group_by(Hospital, Date_of_Diagnosis, PSA_Assessment_Completed) %>%
summarize(count=n())
# Calculate cumulative sum of PSA assessments
PSA_num$cumSumTotal <- unlist(tapply(PSA_num$count, PSA_num$Hospital, cumsum))
# Display first 6 lines
head(PSA_num)
# Create table containing only PSA assessments completed
PSA_num_yes <- PSA_num %>% filter(PSA_Assessment_Completed == 'Yes')
# Calculate cumulative sum for PSA assessments completed
PSA_num_yes$cumSumYes <- unlist(tapply(PSA_num_yes$count, PSA_num_yes$Hospital, cumsum))
# Display first 6 rows
head(PSA_num_yes)
# Join tables
PSA_percent_complete <- left_join(PSA_num, PSA_num_yes)
# Display first 6 rows
head(PSA_percent_complete)
# Calculate precent PSA assessments completed over time
PSA_percent_complete <- transform(PSA_percent_complete, percentComplete = (cumSumYes/cumSumTotal)*100)
# Display first 6 rows
head(PSA_percent_complete)
#Create new table containing Hospital, PSA_Assessments_completed and Percent_per_hospital (Percent completed for each hospital)
PSA_complete_hospital_transpose <- PSA_complete_hospital %>% select(Hospital, PSA_Assessment_Completed, Percent_per_hospital)
#Transpose table so that data can be plotted in plotly
PSA_complete_hospital_transpose <- spread(PSA_complete_hospital_transpose, PSA_Assessment_Completed, Percent_per_hospital)
#Create interactive plotly graph
plot_ly(PSA_complete_hospital_transpose, x=~Hospital, y=~Yes, type = "bar", name = "Yes") %>%
add_trace(y=~No, name = "No") %>%
layout(
xaxis = list(
title = "Hospital",
titlefont = list(
size = 20,
color = 'rgb(107, 107, 107)'),
tickfont = list(
size = 16,
color = 'rgb(107, 107, 107)')),
yaxis = list(
title = 'Percent (%)',
titlefont = list(
size = 20,
color = 'rgb(107, 107, 107)'),
tickfont = list(
size = 16,
color = 'rgb(107, 107, 107)')),
legend = list(bgcolor = 'rgba(255, 255, 255, 0)', bordercolor = 'rgba(255, 255, 255, 0)'),
barmode = 'group', bargap = 0.15, bargroupgap = 0.1)
Fig. 1 Completeness of PSA assessments for each hospital. Blue bars - Percentage of PSA assessments completed and documented. Orange bars - Percentage of PSA assessments not completed.
# Remove rows containing missing values
PSA_percent_complete_noNa <- PSA_percent_complete %>% na.omit()
# Legend title
legendtitle <- list(yref='paper',xref="paper",y=1.05,x=1.1, text="<b>Hospital</b>",showarrow=F)
# Plot PSA completeness for each hospital
plot_ly(PSA_percent_complete_noNa,
x=~Date_of_Diagnosis,
y=~percentComplete,
type = 'scatter',
mode = 'lines',
name = ~Hospital,
color = ~Hospital == 'A',
colors = c("lightgrey", "red"),
text = paste("<b>Hospital:</b> ", PSA_percent_complete_noNa$Hospital)) %>%
layout(
annotations=legendtitle,
xaxis = list(
title = "",
tickangle = -45,
titlefont = list(
size = 12,
color = 'rgb(107, 107, 107)'),
tickfont = list(
size = 14,
color = 'rgb(107, 107, 107)')),
yaxis = list(
automargin = TRUE,
title = 'Percent Completed (%)',
titlefont = list(
size = 20,
color = 'rgb(107, 107, 107)'),
tickfont = list(
size = 16,
color = 'rgb(107, 107, 107)')),
legend = list(bgcolor = 'rgba(255, 255, 255, 0)',
bordercolor = 'rgba(255, 255, 255, 0)'),
barmode = 'group', bargap = 0.15, bargroupgap = 0.1)
Fig. 2 Percentage PSA assessments completed and documented at diagnosis. Comparison of hospital A (red line) with other hospitals in the PSA registry.