1 Load Packages

Be sure to have the package readxl installed on your computer.

library(readxl)

2 Read in Data

We are going to read in the data from the Radiology Workflow from the hospital for the years 2016-2017.

dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
##   Unique.Identifier PatientAge Radiology.Technician
## 1                 1         75                   65
## 2                 2         87                   65
## 3                 3         35                   16
## 4                 4         51                   24
## 5                 5         67                   37
## 6                 6         54                    7
##                      CatalogCode Ordering.Physician PatientTypeMnemonic
## 1 DX Abdomen 2 vw w/single chest                  4                  IP
## 2 DX Abdomen 2 vw w/single chest                  4                  IP
## 3 DX Abdomen 2 vw w/single chest                150                  IP
## 4 DX Abdomen 2 vw w/single chest                130                  IP
## 5 DX Abdomen 2 vw w/single chest                173                  IP
## 6 DX Abdomen 2 vw w/single chest                349                  IP
##   Priority  OrderDateTime ExamCompleteDateTime  FinalDateTime
## 1  Routine 12/27/16 10:32       12/27/16 11:19 12/28/16 14:32
## 2  Routine  1/13/17 11:44        1/13/17 12:32  1/14/17 16:00
## 3  Routine   1/2/17 17:19         1/2/17 18:00    1/3/17 7:44
## 4  Routine 11/13/16 10:13        11/14/16 9:34 11/14/16 16:40
## 5     STAT  12/13/16 3:22        12/13/16 4:04  12/13/16 3:19
## 6  Routine   1/17/17 5:38         1/17/17 7:47  1/17/17 10:55
##   Ordered.to.Complete...Mins Ordered.to.Complete...Hours Loc.At.Exam.Complete
## 1                         47                   0.7833333                  GTU
## 2                         48                   0.8000000                  GTU
## 3                         41                   0.6833333                   3W
## 4                       1401                  23.3500000                   4W
## 5                         42                   0.7000000        Emergency Ctr
## 6                        129                   2.1500000                   3E
##   Exam.Completed.Bucket Section    Exam.Room
## 1                 8a-8p      DX      DX Rm 1
## 2                 8a-8p      DX      DX Rm 1
## 3                 8a-8p   EC DX DX Rm 5 (EC)
## 4                 8a-8p      DX      DX Rm 1
## 5                12a-8a   EC DX DX Rm 5 (EC)
## 6                12a-8a      DX  DX Portable

2.1 Select Columns

Select only the columns related the patient age, radiology technician, priority, time to complete(min), and location of patient

dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","RadTech","Priority","CompletionTime","Location")
head(dat)
##   PatientAge RadTech Priority CompletionTime      Location
## 1         75      65  Routine             47           GTU
## 2         87      65  Routine             48           GTU
## 3         35      16  Routine             41            3W
## 4         51      24  Routine           1401            4W
## 5         67      37     STAT             42 Emergency Ctr
## 6         54       7  Routine            129            3E

2.1.1 Covert Factors

Now we want to convert some of the variable to factors

dat$RadTech<-as.factor(dat$RadTech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
## 'data.frame':    43632 obs. of  5 variables:
##  $ PatientAge    : int  75 87 35 51 67 54 34 65 67 40 ...
##  $ RadTech       : Factor w/ 72 levels "2","3","4","5",..: 63 63 15 23 36 6 38 1 1 33 ...
##  $ Priority      : Factor w/ 2 levels "Routine","STAT": 1 1 1 1 2 1 1 1 1 1 ...
##  $ CompletionTime: int  47 48 41 1401 42 129 42 1068 49 47 ...
##  $ Location      : Factor w/ 25 levels "3E","3W","4E",..: 16 16 2 4 14 1 1 1 7 2 ...

2.2 Select Rows

Keep only those observations on 3 West.

dat1<-dat[dat$Location=="3W",]
head(dat1)
##    PatientAge RadTech Priority CompletionTime Location
## 3          35      16  Routine             41       3W
## 10         40      34  Routine             47       3W
## 15         71      57  Routine           1214       3W
## 17         39      23  Routine             90       3W
## 18         66      23  Routine             90       3W
## 20         53       9  Routine             80       3W

3 Data Visualization

Now lets visualize the data on 3W

3.1 Histogram of Completion Time

The histogram on 3W has many outliers

hist(dat1$CompletionTime,main="Histogram of Completion Time on 3W",
     xlab="minutes",ylab="count",col="blue")

3.2 Plot of Age by Completion Time

A plot of patient age by completion time is given below

plot(dat1$PatientAge,dat1$CompletionTime,main="Plot Age by Completion Time",
     xlab="Age",ylab="Time(min)",col="red")

4 Descriptive Statistics

Generate some descriptive statistics for 3W

4.1 Mean Patient Age

The mean patient age is approximately 52 years

mean(dat1$PatientAge)
## [1] 52.09769

4.2 Summary of Completion Times

summary(dat1$CompletionTime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    38.0    70.0  1463.1   402.5 29637.0

Note, that the completion time has outliers. Filter data such that only completion times less than a day are considered.

dat2<-dat1[dat1$CompletionTime<24*60,]
summary(dat2$CompletionTime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2      34      56     152     113    1406

5 Complete Code

library(readxl)
dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","RadTech","Priority","CompletionTime","Location")
head(dat)
dat$RadTech<-as.factor(dat$RadTech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
dat1<-dat[dat$Location=="3W",]
head(dat1)
hist(dat1$CompletionTime,main="Histogram of Completion Time on 3W",xlab="minutes",ylab="count",col="blue")
plot(dat1$PatientAge,dat1$CompletionTime,main="Plot Age by Completion Time",xlab="Age",ylab="Time(min)",col="red")
mean(dat1$PatientAge)
summary(dat1$CompletionTime)
dat2<-dat1[dat1$CompletionTime<24*60,]
summary(dat2$CompletionTime)