1 Load Packages

Be sure to have the package readxl installed on your computer.

library(readxl)

2 Read in Data

We are going to read in the data from the Radiology Workflow from the hospital for the year 2016-2017.

dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
##   Unique.Identifier PatientAge Radiology.Technician
## 1                 1         75                   65
## 2                 2         87                   65
## 3                 3         35                   16
## 4                 4         51                   24
## 5                 5         67                   37
## 6                 6         54                    7
##                      CatalogCode Ordering.Physician PatientTypeMnemonic
## 1 DX Abdomen 2 vw w/single chest                  4                  IP
## 2 DX Abdomen 2 vw w/single chest                  4                  IP
## 3 DX Abdomen 2 vw w/single chest                150                  IP
## 4 DX Abdomen 2 vw w/single chest                130                  IP
## 5 DX Abdomen 2 vw w/single chest                173                  IP
## 6 DX Abdomen 2 vw w/single chest                349                  IP
##   Priority  OrderDateTime ExamCompleteDateTime  FinalDateTime
## 1  Routine 12/27/16 10:32       12/27/16 11:19 12/28/16 14:32
## 2  Routine  1/13/17 11:44        1/13/17 12:32  1/14/17 16:00
## 3  Routine   1/2/17 17:19         1/2/17 18:00    1/3/17 7:44
## 4  Routine 11/13/16 10:13        11/14/16 9:34 11/14/16 16:40
## 5     STAT  12/13/16 3:22        12/13/16 4:04  12/13/16 3:19
## 6  Routine   1/17/17 5:38         1/17/17 7:47  1/17/17 10:55
##   Ordered.to.Complete...Mins Ordered.to.Complete...Hours Loc.At.Exam.Complete
## 1                         47                   0.7833333                  GTU
## 2                         48                   0.8000000                  GTU
## 3                         41                   0.6833333                   3W
## 4                       1401                  23.3500000                   4W
## 5                         42                   0.7000000        Emergency Ctr
## 6                        129                   2.1500000                   3E
##   Exam.Completed.Bucket Section    Exam.Room
## 1                 8a-8p      DX      DX Rm 1
## 2                 8a-8p      DX      DX Rm 1
## 3                 8a-8p   EC DX DX Rm 5 (EC)
## 4                 8a-8p      DX      DX Rm 1
## 5                12a-8a   EC DX DX Rm 5 (EC)
## 6                12a-8a      DX  DX Portable

2.1 Select Columns

Select only the columns related to the patient age, Radiology Technician,Priority,Time to complete and location of patient

dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","Radtech","Priority","Completion Time","Location")
head(dat)
##   PatientAge Radtech Priority Completion Time      Location
## 1         75      65  Routine              47           GTU
## 2         87      65  Routine              48           GTU
## 3         35      16  Routine              41            3W
## 4         51      24  Routine            1401            4W
## 5         67      37     STAT              42 Emergency Ctr
## 6         54       7  Routine             129            3E

2.1.1 Convert to Factors

Convert some of the variables to factors

dat$Radtech<-as.factor(dat$Radtech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
## 'data.frame':    43632 obs. of  5 variables:
##  $ PatientAge     : int  75 87 35 51 67 54 34 65 67 40 ...
##  $ Radtech        : Factor w/ 72 levels "2","3","4","5",..: 63 63 15 23 36 6 38 1 1 33 ...
##  $ Priority       : Factor w/ 2 levels "Routine","STAT": 1 1 1 1 2 1 1 1 1 1 ...
##  $ Completion Time: int  47 48 41 1401 42 129 42 1068 49 47 ...
##  $ Location       : Factor w/ 25 levels "3E","3W","4E",..: 16 16 2 4 14 1 1 1 7 2 ...

2.2 Select Rows

Keep only those observations on 3 West

dat1<-dat[dat$Location=="3W",]
head(dat1)
##    PatientAge Radtech Priority Completion Time Location
## 3          35      16  Routine              41       3W
## 10         40      34  Routine              47       3W
## 15         71      57  Routine            1214       3W
## 17         39      23  Routine              90       3W
## 18         66      23  Routine              90       3W
## 20         53       9  Routine              80       3W

3 Data Visualization

Now let us visualize the data for 3W

3.1 Histogram of Completion Time

The histogram for 3W has many outliers

hist(dat1$`Completion Time`,main="Histogram of Completion time on 3W",xlab="in minutes",ylab="Count",col="blue")

3.2 Plot of age by Completion time

A plot of completion time with age is given below

plot(dat$PatientAge,dat$`Completion Time`,xlab="Age",ylab="Time in minutes",col="red")

4 Descriptive Statistics

Generate some descriptive statistics for 3W

4.1 Mean Patient Age

The mean patient age is approximately 52 years

mean(dat1$PatientAge)
## [1] 52.09769

4.2 Summary of Completion Time

summary(dat1$`Completion Time`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    38.0    70.0  1463.1   402.5 29637.0

Note, that the completion time has outliers , Filter data such that only completion time less than a day are considered

dat2<-dat1[dat1$`Completion Time`<24*60,]
summary(dat2$`Completion Time`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2      34      56     152     113    1406

5 Complete Code

library(readxl)
dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","Radtech","Priority","Completion Time","Location")
head(dat)
dat$Radtech<-as.factor(dat$Radtech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
dat1<-dat[dat$Location=="3W",]
head(dat1)
hist(dat1$`Completion Time`,main="Histogram of Completion time on 3W",xlab="in minutes",ylab="Count",col="blue")
plot(dat$PatientAge,dat$`Completion Time`,xlab="Age",ylab="Time in minutes",col="red")
mean(dat1$PatientAge)
summary(dat1$`Completion Time`)
dat2<-dat1[dat1$`Completion Time`<24*60,]
summary(dat2$`Completion Time`)