Be sure to have the package readxl installed on your computer.
library(readxl)
We are going to read in the data from the Radiology Workflow from the hospital for the years 2016-2017.
dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
## Unique.Identifier PatientAge Radiology.Technician
## 1 1 75 65
## 2 2 87 65
## 3 3 35 16
## 4 4 51 24
## 5 5 67 37
## 6 6 54 7
## CatalogCode Ordering.Physician PatientTypeMnemonic
## 1 DX Abdomen 2 vw w/single chest 4 IP
## 2 DX Abdomen 2 vw w/single chest 4 IP
## 3 DX Abdomen 2 vw w/single chest 150 IP
## 4 DX Abdomen 2 vw w/single chest 130 IP
## 5 DX Abdomen 2 vw w/single chest 173 IP
## 6 DX Abdomen 2 vw w/single chest 349 IP
## Priority OrderDateTime ExamCompleteDateTime FinalDateTime
## 1 Routine 12/27/16 10:32 12/27/16 11:19 12/28/16 14:32
## 2 Routine 1/13/17 11:44 1/13/17 12:32 1/14/17 16:00
## 3 Routine 1/2/17 17:19 1/2/17 18:00 1/3/17 7:44
## 4 Routine 11/13/16 10:13 11/14/16 9:34 11/14/16 16:40
## 5 STAT 12/13/16 3:22 12/13/16 4:04 12/13/16 3:19
## 6 Routine 1/17/17 5:38 1/17/17 7:47 1/17/17 10:55
## Ordered.to.Complete...Mins Ordered.to.Complete...Hours Loc.At.Exam.Complete
## 1 47 0.7833333 GTU
## 2 48 0.8000000 GTU
## 3 41 0.6833333 3W
## 4 1401 23.3500000 4W
## 5 42 0.7000000 Emergency Ctr
## 6 129 2.1500000 3E
## Exam.Completed.Bucket Section Exam.Room
## 1 8a-8p DX DX Rm 1
## 2 8a-8p DX DX Rm 1
## 3 8a-8p EC DX DX Rm 5 (EC)
## 4 8a-8p DX DX Rm 1
## 5 12a-8a EC DX DX Rm 5 (EC)
## 6 12a-8a DX DX Portable
Select only the columns related the patient age, radiology technician, priority, time to complete(min), and location of patient
dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","RadTech","Priority","CompletionTime","Location")
head(dat)
## PatientAge RadTech Priority CompletionTime Location
## 1 75 65 Routine 47 GTU
## 2 87 65 Routine 48 GTU
## 3 35 16 Routine 41 3W
## 4 51 24 Routine 1401 4W
## 5 67 37 STAT 42 Emergency Ctr
## 6 54 7 Routine 129 3E
Now we want to convert some of the variable to factors
dat$RadTech<-as.factor(dat$RadTech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
## 'data.frame': 43632 obs. of 5 variables:
## $ PatientAge : int 75 87 35 51 67 54 34 65 67 40 ...
## $ RadTech : Factor w/ 72 levels "2","3","4","5",..: 63 63 15 23 36 6 38 1 1 33 ...
## $ Priority : Factor w/ 2 levels "Routine","STAT": 1 1 1 1 2 1 1 1 1 1 ...
## $ CompletionTime: int 47 48 41 1401 42 129 42 1068 49 47 ...
## $ Location : Factor w/ 25 levels "3E","3W","4E",..: 16 16 2 4 14 1 1 1 7 2 ...
Keep only those observations on 3 West.
dat1<-dat[dat$Location=="3W",]
head(dat1)
## PatientAge RadTech Priority CompletionTime Location
## 3 35 16 Routine 41 3W
## 10 40 34 Routine 47 3W
## 15 71 57 Routine 1214 3W
## 17 39 23 Routine 90 3W
## 18 66 23 Routine 90 3W
## 20 53 9 Routine 80 3W
Now lets visualize the data on 3W
The histogram on 3W has many outliers
hist(dat1$CompletionTime,main="Histogram of Completion Time on 3W",
xlab="minutes",ylab="count",col="blue")
A plot of patient age by completion time is given below
plot(dat$PatientAge,dat$CompletionTime,main="Plot Age by Completion Time",
xlab="Age",ylab="Time(min)",col="red")
Generate some descriptive statistics for 3W
The mean patient age is approximately 52 years
mean(dat1$PatientAge)
## [1] 52.09769
summary(dat1$CompletionTime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 38.0 70.0 1463.1 402.5 29637.0
Note, that the completion time has outliers. Filter data such that only completion times less than a day are considered.
dat2<-dat1[dat1$CompletionTime<24*60,]
summary(dat2$CompletionTime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 34 56 152 113 1406
library(readxl)
dat<-read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/RadDat_IMSE.csv")
head(dat)
dat<-subset(dat,select=c("PatientAge","Radiology.Technician","Priority","Ordered.to.Complete...Mins","Loc.At.Exam.Complete"))
colnames(dat)<-c("PatientAge","RadTech","Priority","CompletionTime","Location")
head(dat)
dat$RadTech<-as.factor(dat$RadTech)
dat$Priority<-as.factor(dat$Priority)
dat$Location<-as.factor(dat$Location)
str(dat)
dat1<-dat[dat$Location=="3W",]
head(dat1)
hist(dat1$CompletionTime,main="Histogram of Completion Time on 3W",xlab="minutes",ylab="count",col="blue")
plot(dat$PatientAge,dat$CompletionTime,main="Plot Age by Completion Time",xlab="Age",ylab="Time(min)",col="red")
mean(dat1$PatientAge)
summary(dat1$CompletionTime)
dat2<-dat1[dat1$CompletionTime<24*60,]
summary(dat2$CompletionTime)