Title - Note writing project#2 - Golob JF, Como JJ

To run this file, change the value of “q” to “quarter 1”, “Jan thru May”, or “- All”. To run this program, we need two files, one for 2014 and one for 2015.

q<-"Jan thru May"

Next, we will read in the data from a CSV file.

datafile2014<-paste0("Golob - Trauma Notes 2014 ", q, ".csv")
datafile2015<-paste0("Golob - Trauma Notes 2015 ", q, ".csv")
notesData2014<-read.csv(datafile2014, head=TRUE)
notesData2015<-read.csv(datafile2015, head=TRUE)

##  notesData2014<-subset(notesData2014, (Note.Author!="Kreiner, Laura A., MD"))
##  notesData2015<-subset(notesData2015, (Note.Author!="Kreiner, Laura A., MD"))

We will only take notes of the type “Progress Notes” and those with a non-blank admit time. We are keeping patients with a blank Discharge date - these patients were likely still in the hospital at the conclusion of the study period. I then print the number of Progress Notes over this time period:

print(paste0("In 2014 - ", dim(notesData2014)[1], " total notes"), quote=FALSE)
## [1] In 2014 - 9726 total notes
notesDataSubset2014<-subset(notesData2014, (Note.Type=="Consults"))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " consult notes"), quote=FALSE)
## [1] In 2014 - 288 consult notes
notesDataSubset2014<-subset(notesData2014, (Note.Type=="H&P"))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " H&P's"), quote=FALSE)
## [1] In 2014 - 1335 H&P's
notesDataSubset2014<-subset(notesData2014, (Note.Type=="OP Note"))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " operative notes"), quote=FALSE)
## [1] In 2014 - 513 operative notes
notesDataSubset2014<-subset(notesData2014, (Note.Type=="Procedures"))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " procedures"), quote=FALSE)
## [1] In 2014 - 362 procedures
notesDataSubset2014<-subset(notesData2014, (Note.Type=="Progress Notes"))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " total progress notes"), quote=FALSE)
## [1] In 2014 - 7228 total progress notes
notesDataSubset2014<-subset(notesData2014, (Note.Type=="Progress Notes")&(Admit.Date.Time==" "))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " outpatient progress notes"), quote=FALSE)
## [1] In 2014 - 779 outpatient progress notes
notesDataSubset2014<-subset(notesData2014, (Note.Type=="Progress Notes")&(Admit.Date.Time!=" "))
print(paste0("In 2014 - ", dim(notesDataSubset2014)[1], " inpatient progress notes"), quote=FALSE)
## [1] In 2014 - 6449 inpatient progress notes
print(paste0("In 2015 - ", dim(notesData2015)[1], " total notes"), quote=FALSE)
## [1] In 2015 - 10933 total notes
notesDataSubset2015<-subset(notesData2015, (Note.Type=="Consults"))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " consult notes"), quote=FALSE)
## [1] In 2015 - 313 consult notes
notesDataSubset2015<-subset(notesData2015, (Note.Type=="H&P"))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " H&P's"), quote=FALSE)
## [1] In 2015 - 1495 H&P's
notesDataSubset2015<-subset(notesData2015, (Note.Type=="OP Note"))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " operative notes"), quote=FALSE)
## [1] In 2015 - 548 operative notes
notesDataSubset2015<-subset(notesData2015, (Note.Type=="Procedures"))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " procedures"), quote=FALSE)
## [1] In 2015 - 311 procedures
notesDataSubset2015<-subset(notesData2015, (Note.Type=="Progress Notes"))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " total progress notes"), quote=FALSE)
## [1] In 2015 - 8265 total progress notes
notesDataSubset2015<-subset(notesData2015, (Note.Type=="Progress Notes")&(Admit.Date.Time==" "))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " outpatient progress notes"), quote=FALSE)
## [1] In 2015 - 934 outpatient progress notes
notesDataSubset2015<-subset(notesData2015, (Note.Type=="Progress Notes")&(Admit.Date.Time!=" "))
print(paste0("In 2015 - ", dim(notesDataSubset2015)[1], " inpatient progress notes"), quote=FALSE)
## [1] In 2015 - 7331 inpatient progress notes

Converting all dates to an asPOSIXct class.

library(lubridate)
## Warning: package 'lubridate' was built under R version 3.2.1
notesDataSubset2014[,4]<-as.POSIXct(notesDataSubset2014[,4], format="%m/%d/%y %H%M")
notesDataSubset2015[,4]<-as.POSIXct(notesDataSubset2015[,4], format="%m/%d/%y %H%M")
notesPeriod2014<-ceiling(yday(notesDataSubset2014[,4])/7)
notesPeriod2015<-ceiling(yday(notesDataSubset2015[,4])/7)
notesHour2014<-as.numeric(format(notesDataSubset2014[,4], "%H"))
notesHour2015<-as.numeric(format(notesDataSubset2015[,4], "%H"))
notesMinutePct2014<-as.numeric(format(notesDataSubset2014[,4], "%M"))/60
notesMinutePct2015<-as.numeric(format(notesDataSubset2015[,4], "%M"))/60
notesTime2014<-notesHour2014+notesMinutePct2014
notesTime2015<-notesHour2015+notesMinutePct2015
notesDataSubset2014[,9]<-as.POSIXct(notesDataSubset2014[,9], format="%m/%d/%Y %H%M")
notesDataSubset2015[,9]<-as.POSIXct(notesDataSubset2015[,9], format="%m/%d/%Y %H%M")

Calculation of any note that was written after patient discharge. Outputs are the number of late notes, the percentage of late notes, and the percentage of late notes by attending surgeon.

lateNote2014<-notesDataSubset2014[,4]>notesDataSubset2014[,9]
numberLateNotes2014<-sum(lateNote2014, na.rm=TRUE)
percentLateNote2014<-numberLateNotes2014/(dim(notesDataSubset2014)[1])
notesDataSubset2014[,11]<-lateNote2014
attendingLateNote2014<-aggregate(notesDataSubset2014[,11], by=notesDataSubset2014[3], mean, na.rm=TRUE)
print("Note written one minute or more after patient discharge - 2014")
## [1] "Note written one minute or more after patient discharge - 2014"
numberLateNotes2014
## [1] 819
percentLateNote2014
## [1] 0.1269964
attendingLateNote2014
##                Note.Author           x
## 1 Claridge, Jeffrey A., MD 0.144268775
## 2           Como, John, MD 0.080393765
## 3 Golob Jr., Joseph F., MD 0.005865103
## 4    Khandelwal, Anjay, MD 0.220443350
## 5    McDonald, Amy Ann, MD 0.194444444
## 6        Patel, Nimitt, MD 0.283495146
## 7   Yowler, Charles J., MD 0.003171247
lateNote2015<-notesDataSubset2015[,4]>notesDataSubset2015[,9]
numberLateNotes2015<-sum(lateNote2015, na.rm=TRUE)
percentLateNote2015<-numberLateNotes2015/(dim(notesDataSubset2015)[1])
notesDataSubset2015[,11]<-lateNote2015
attendingLateNote2015<-aggregate(notesDataSubset2015[,11], by=notesDataSubset2015[3], mean, na.rm=TRUE)
print("Note written one minute or more after patient discharge - 2015")
## [1] "Note written one minute or more after patient discharge - 2015"
numberLateNotes2015
## [1] 618
percentLateNote2015
## [1] 0.08429955
attendingLateNote2015
##                Note.Author           x
## 1 Claridge, Jeffrey A., MD 0.089086860
## 2           Como, John, MD 0.072983355
## 3 Golob Jr., Joseph F., MD 0.005980861
## 4    Khandelwal, Anjay, MD 0.152462121
## 5    Kreiner, Laura A., MD 0.117647059
## 6    McDonald, Amy Ann, MD 0.051851852
## 7        Patel, Nimitt, MD 0.171052632
## 8   Yowler, Charles J., MD 0.006730769

The average hour of the day that a note is written, first by total average, then by attending surgeon.

notesDataSubset2014[,12]<-notesTime2014
colnames(notesDataSubset2014)<-c(colnames(notesDataSubset2014)[1:10], "Late.Note", "Note.Time")
attendingNoteTime2014<-aggregate(notesDataSubset2014[,12], by=notesDataSubset2014[3], mean, na.rm=TRUE)
MeanNoteTime2014<-mean(notesDataSubset2014[,12])
print("Average hour of the day note written - 2014")
## [1] "Average hour of the day note written - 2014"
MeanNoteTime2014
## [1] 14.69686
attendingNoteTime2014
##                Note.Author        x
## 1 Claridge, Jeffrey A., MD 15.03409
## 2           Como, John, MD 15.22648
## 3 Golob Jr., Joseph F., MD 12.08425
## 4    Khandelwal, Anjay, MD 15.54108
## 5    McDonald, Amy Ann, MD 16.61100
## 6        Patel, Nimitt, MD 16.01561
## 7   Yowler, Charles J., MD 12.76856
notesDataSubset2015[,12]<-notesTime2015
colnames(notesDataSubset2015)<-c(colnames(notesDataSubset2015)[1:10], "Late.Note", "Note.Time")
attendingNoteTime2015<-aggregate(notesDataSubset2015[,12], by=notesDataSubset2015[3], mean, na.rm=TRUE)
MeanNoteTime2015<-mean(notesDataSubset2015[,12])
print("Average hour of the day note written - 2015")
## [1] "Average hour of the day note written - 2015"
MeanNoteTime2015
## [1] 14.0886
attendingNoteTime2015
##                Note.Author        x
## 1 Claridge, Jeffrey A., MD 14.70364
## 2           Como, John, MD 15.16721
## 3 Golob Jr., Joseph F., MD 12.38947
## 4    Khandelwal, Anjay, MD 13.46397
## 5    Kreiner, Laura A., MD 14.08864
## 6    McDonald, Amy Ann, MD 15.43901
## 7        Patel, Nimitt, MD 15.65165
## 8   Yowler, Charles J., MD 12.29393

Histograms of the above:

histNoteHour2014<-hist(notesDataSubset2014$Note.Time, main=paste0("2014 ", q, " Histogram"), breaks=c(0, 4, 8, 12, 16, 20, 24), xaxt="n", labels=TRUE, xlab="Hour of the Day in Which Notes Written", ylab="Number of Notes Written", ylim=c(0, 4000), col="grey")
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))

histNoteHour2015<-hist(notesDataSubset2015$Note.Time, main=paste0("2015 ", q, " Histogram"), breaks=c(0, 4, 8, 12, 16, 20, 24), xaxt="n", labels=TRUE, xlab="Hour of the Day in Which Notes Written", ylab="Number of Notes Written", ylim=c(0, 4000), col="grey")
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))

Chi square test to compare percentage of notes done between MN and 4AM between 2014 and 2015:

matrMN<-matrix(c(histNoteHour2014$counts[1], sum(histNoteHour2014$counts[2:6]), histNoteHour2015$counts[1], sum(histNoteHour2015$counts[2:6])), nrow=2, ncol=2)
chisq.test(matrMN)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matrMN
## X-squared = 0.6143, df = 1, p-value = 0.4332
pmat<-chisq.test(matrMN)$p.value<0.05

Chi square test to compare percentage of notes done between 4AM and 8AM between 2014 and 2015:

matr4AM<-matrix(c(histNoteHour2014$counts[2], sum(histNoteHour2014$counts[c(1, 3:6)]), histNoteHour2015$counts[2], sum(histNoteHour2015$counts[c(1, 3:6)])), nrow=2, ncol=2)
chisq.test(matr4AM)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matr4AM
## X-squared = 11.526, df = 1, p-value = 0.0006865
pmat<-c(pmat, chisq.test(matr4AM)$p.value<0.05)

Chi square test to compare percentage of notes done between 8AM and NOON between 2014 and 2015:

matr8AM<-matrix(c(histNoteHour2014$counts[3], sum(histNoteHour2014$counts[c(1:2, 4:6)]), histNoteHour2015$counts[3], sum(histNoteHour2015$counts[c(1:2, 4:6)])), nrow=2, ncol=2)
chisq.test(matr8AM)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matr8AM
## X-squared = 0.77178, df = 1, p-value = 0.3797
pmat<-c(pmat, chisq.test(matr8AM)$p.value<0.05)

Chi square test to compare percentage of notes done between NOON and 4PM between 2014 and 2015:

matrNOON<-matrix(c(histNoteHour2014$counts[4], sum(histNoteHour2014$counts[c(1:3, 5:6)]), histNoteHour2015$counts[4], sum(histNoteHour2015$counts[c(1:3, 5:6)])), nrow=2, ncol=2)
chisq.test(matrNOON)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matrNOON
## X-squared = 60.596, df = 1, p-value = 7.006e-15
pmat<-c(pmat, chisq.test(matrNOON)$p.value<0.05)

Chi square test to compare percentage of notes done between 4PM and 8PM between 2014 and 2015:

matr4PM<-matrix(c(histNoteHour2014$counts[5], sum(histNoteHour2014$counts[c(1:4, 6)]), histNoteHour2015$counts[5], sum(histNoteHour2015$counts[c(1:4, 6)])), nrow=2, ncol=2)
chisq.test(matr4PM)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matr4PM
## X-squared = 3.6139, df = 1, p-value = 0.0573
pmat<-c(pmat, chisq.test(matr4PM)$p.value<0.05)

Chi square test to compare percentage of notes done between 8PM and MN between 2014 and 2015:

matr8PM<-matrix(c(histNoteHour2014$counts[6], sum(histNoteHour2014$counts[1:5]), histNoteHour2015$counts[6], sum(histNoteHour2015$counts[1:5])), nrow=2, ncol=2)
chisq.test(matr8PM)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matr8PM
## X-squared = 207.09, df = 1, p-value < 2.2e-16
pmat<-c(pmat, chisq.test(matr8PM)$p.value<0.05)

Chi square test to compare the above two histograms and barplot

matrAll<-matrix(c(histNoteHour2014$counts, histNoteHour2015$counts), nrow=2, ncol=6, byrow=TRUE)
chisq.test(matrAll)
## 
##  Pearson's Chi-squared test
## 
## data:  matrAll
## X-squared = 236.59, df = 5, p-value < 2.2e-16
bp<-barplot(matrAll, main="Total Inpatient Progress Notes With and Without Scribes", sub="*p<0.05", col=c("black", "grey"), xlab="Time of day note was completed", ylab="Number of notes written", ylim=c(0,3500), names=c("MN-4AM", "4-8AM", "8AM-12PM", "12-4PM", "4-8PM", "8PM-MN"), beside=TRUE)
text(x=(which(pmat)*3-1), y=apply(matrAll, 2, max)[pmat], labels="*", cex=2, pos=3)
legend("topright", lty=1, lwd=15, legend=c("2014 - No scribes", "2015 - Scribes"), col=c("black", "grey"), cex=1)

Frequency histograms of above:

fhist2014<-hist(notesDataSubset2014$Note.Time, main=paste0("2014 ", q, " Histogram - Frequency"), breaks=c(2*(0:12)), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16), labels=TRUE)
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubset2014$Note.Time))

fhist2015<-hist(notesDataSubset2015$Note.Time, main=paste0("2015 ", q, " Histogram - Frequency"), breaks=c(2*(0:12)), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16), labels=TRUE)
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubset2015$Note.Time))

matr6to8AM<-matrix(c(fhist2014$counts[3], sum(fhist2014$counts[c(1:2, 4:12)]), fhist2015$counts[3], sum(fhist2015$counts[c(1:2, 4:12)])))
chisq.test(matr6to8AM)
## 
##  Chi-squared test for given probabilities
## 
## data:  matr6to8AM
## X-squared = 13505, df = 3, p-value < 2.2e-16

Same frequency histograms, but sorted by weekdays and weekends:

notesDataSubset2014[,13]<-(weekdays(notesDataSubset2014[,4]))
colnames(notesDataSubset2014)<-c(colnames(notesDataSubset2014[1:12]), "day")
notesDataSubsetWD2014<-subset(notesDataSubset2014, (day!="Saturday")&(day!="Sunday"))
notesDataSubsetWE2014<-subset(notesDataSubset2014, (day=="Saturday")|(day=="Sunday"))

hist(notesDataSubsetWD2014$Note.Time, main=paste0("2014 ", q, " Histogram - Weekdays"), breaks=c(0:24), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16))
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubsetWD2014$Note.Time))

hist(notesDataSubsetWE2014$Note.Time, main=paste0("2014 ", q, " Histogram - Weekends"), breaks=c(0:24), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16))
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubsetWE2014$Note.Time))

notesDataSubset2015[,13]<-(weekdays(notesDataSubset2015[,4]))
colnames(notesDataSubset2015)<-c(colnames(notesDataSubset2015[1:12]), "day")
notesDataSubsetWD2015<-subset(notesDataSubset2015, (day!="Saturday")&(day!="Sunday"))
notesDataSubsetWE2015<-subset(notesDataSubset2015, (day=="Saturday")|(day=="Sunday"))

hist(notesDataSubsetWD2015$Note.Time, main=paste0("2015 ", q, " Histogram - Weekdays"), breaks=c(0:24), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16))
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubsetWD2015$Note.Time))

hist(notesDataSubsetWE2015$Note.Time, main=paste0("2015 ", q, " Histogram - Weekends"), breaks=c(0:24), xaxt="n", xlab="Hour of the Day in Which Notes Written", ylab="Frequency with which Notes Written", probability=TRUE, ylim=c(0, 0.16))
axis(side=1, at=c(0, 4, 8, 12, 16, 20, 24), labels=c("MN", "4AM", "8AM", "NOON", "4PM", "8PM", "MN"))
lines(density(notesDataSubsetWE2015$Note.Time))

T test

print("Comparison of time of day note was written betwwen 2015 and 2014")
## [1] "Comparison of time of day note was written betwwen 2015 and 2014"
t.test(notesDataSubset2015$Note.Time, notesDataSubset2014$Note.Time)
## 
##  Welch Two Sample t-test
## 
## data:  notesDataSubset2015$Note.Time and notesDataSubset2014$Note.Time
## t = -8.1132, df = 13065, p-value = 5.37e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7552148 -0.4613033
## sample estimates:
## mean of x mean of y 
##  14.08860  14.69686

Adding column about weeks of the year in which the note was written and comparing 2014 and 2015 with a t test

notesDataSubset2014[,14]<-notesPeriod2014
notesDataSubset2015[,14]<-notesPeriod2015
colnames(notesDataSubset2014)<-c(colnames(notesDataSubset2014[1:13]), "Week.of.Year")
colnames(notesDataSubset2015)<-c(colnames(notesDataSubset2015[1:13]), "Week.of.Year")
notesByWeek2014<-table(notesDataSubset2014[,14])[1:(dim(table(notesDataSubset2014[,14]))-1)]
notesByWeek2015<-table(notesDataSubset2015[,14])[1:(dim(table(notesDataSubset2015[,14]))-1)]

##  plot(notesByWeek2014, type="l", main=paste0("Comparison of notes by week of year in 2014 and 2015 - ", q), xlab="Week of Year", ylab="Notes per Week", ylim=c(0,500), col="red")
##  lines(notesByWeek2015, col="blue")
##  legend("bottomright", lty=1, col=c("red", "blue"), legend=c("2014", "2015"), cex=1.0)
       
t.test(notesByWeek2015, notesByWeek2014)
## 
##  Welch Two Sample t-test
## 
## data:  notesByWeek2015 and notesByWeek2014
## t = 2.7821, df = 39.995, p-value = 0.008202
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  12.24368 77.28013
## sample estimates:
## mean of x mean of y 
##  342.8571  298.0952

Now we will re-run the data, looking to see how many days after discharge the notes were written. We will only take notes of the type “Progress Notes” and those with a non-blank admit time. We also eliminate all patients with a blank discharge date - these patients were likely still in the hospital at the end date of the study. I then print the number of Progress Notes over this time period:

notesDataSubset2014<-subset(notesData2014, ((Admit.Date.Time!=" ")&(Disch.Date.Time!=" ")&(Note.Type=="Progress Notes")))
notesDataSubset2014[,4]<-as.Date(notesDataSubset2014[,4], format="%m/%d/%y %H%M")
notesDataSubset2014[,9]<-as.Date(notesDataSubset2014[,9], format="%m/%d/%Y %H%M")
matr2014<-NULL

notesDataSubset2015<-subset(notesData2015, ((Admit.Date.Time!=" ")&(Disch.Date.Time!=" ")&(Note.Type=="Progress Notes")))
notesDataSubset2015[,4]<-as.Date(notesDataSubset2015[,4], format="%m/%d/%y %H%M")
notesDataSubset2015[,9]<-as.Date(notesDataSubset2015[,9], format="%m/%d/%Y %H%M")
matr2015<-NULL

Print out by attending surgeon

delayedNotes2014<-function(n) {
  lateNote2014<-notesDataSubset2014[,4]>n+notesDataSubset2014[,9]
  numberLateNotes2014<-sum(lateNote2014, na.rm=TRUE)
  percentLateNote2014<-numberLateNotes2014/(dim(notesDataSubset2014)[1])
  notesDataSubset2014[,11]<-lateNote2014
  attendingLateNote2014<-aggregate(notesDataSubset2014[,11], by=notesDataSubset2014[3], mean, na.rm=TRUE)
  matr2014<<-cbind(matr2014, c(attendingLateNote2014[,2], percentLateNote2014))
}

delayedNotes2015<-function(n) {
  lateNote2015<-notesDataSubset2015[,4]>n+notesDataSubset2015[,9]
  numberLateNotes2015<-sum(lateNote2015, na.rm=TRUE)
  percentLateNote2015<-numberLateNotes2015/(dim(notesDataSubset2015)[1])
  notesDataSubset2015[,11]<-lateNote2015
  attendingLateNote2015<-aggregate(notesDataSubset2015[,11], by=notesDataSubset2015[3], mean, na.rm=TRUE)
  matr2015<<-cbind(matr2015, c(attendingLateNote2015[,2], percentLateNote2015))
}

Run the function by days late:

for (i in 0:29) {
delayedNotes2014(i)
delayedNotes2015(i)
}

Runs the function and generates the plot of progress notes written after discharge - by day.

rownames(matr2014)<-c(as.character(attendingLateNote2014[,1]), "Total")
colnames(matr2014)<-1:30
write.csv(matr2014, file=paste0("Trauma 2014 ", q, ".csv"))

rownames(matr2015)<-c(as.character(attendingLateNote2015[,1]), "Total")
colnames(matr2015)<-1:30
write.csv(matr2015, file=paste0("Trauma 2015 ", q, ".csv"))

plot(matr2014[nrow(matr2014),], type="l", main=paste0("Progress Notes Written After Discharge - ", q), xlab="Days after Discharge", ylab="Fraction notes not yet written", ylim=c(0,0.08), col="red")
lines(matr2015[nrow(matr2015),], col="blue")
legend("topright", lty=1, col=c("red", "blue"), legend=c("2014", "2015"), cex=1.5)

T test

print("Comparison of 2015 and 2014 notes written after patient discharge")
## [1] "Comparison of 2015 and 2014 notes written after patient discharge"
t.test(matr2015, matr2014)
## 
##  Welch Two Sample t-test
## 
## data:  matr2015 and matr2014
## t = -5.0593, df = 351.14, p-value = 6.798e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02523599 -0.01110779
## sample estimates:
##  mean of x  mean of y 
## 0.01820315 0.03637504