library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# Load dataset
dat <- read.csv("RadDat.csv")
# Inspect structure of the dataset
str(dat)
## 'data.frame': 43636 obs. of 21 variables:
## $ Unique.Identifier : int 1 2 3 4 5 6 7 8 9 10 ...
## $ PatientAge : int 75 87 35 51 67 54 34 65 67 40 ...
## $ Radiology.Technician : int 65 65 16 24 37 7 40 2 2 34 ...
## $ Modality : chr "DX " "DX " "DX " "DX " ...
## $ CatalogCode : chr "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" ...
## $ In.Rad.Room : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Ordering.Physician : int 4 4 150 130 173 349 4 4 39 4 ...
## $ PatientTypeMnemonic : chr "IP" "IP" "IP" "IP" ...
## $ Encounter.Type : chr "Inpatient" "Inpatient" "Inpatient" "Inpatient" ...
## $ EC.Visit : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Priority : chr "Routine" "Routine" "Routine" "Routine" ...
## $ OrderDateTime : chr "12/27/16 10:32" "1/13/17 11:44" "1/2/17 17:19" "11/13/16 10:13" ...
## $ ExamCompleteDateTime : chr "12/27/16 11:19" "1/13/17 12:32" "1/2/17 18:00" "11/14/16 9:34" ...
## $ FinalDateTime : chr "12/28/16 14:32" "1/14/17 16:00" "1/3/17 7:44" "11/14/16 16:40" ...
## $ Ordered.to.Complete...Mins : int 47 48 41 1401 42 129 42 1068 49 47 ...
## $ Ordered.to.Complete...Hours: num 0.783 0.8 0.683 23.35 0.7 ...
## $ Loc.At.Exam.Complete : chr "GTU" "GTU" "3W" "4W" ...
## $ Exam.Completed.Bucket : chr "8a-8p" "8a-8p" "8a-8p" "8a-8p" ...
## $ Section : chr "DX" "DX" "EC DX" "DX" ...
## $ Subsection : chr "DX" "DX" "EC DX" "DX" ...
## $ Exam.Room : chr "DX Rm 1" "DX Rm 1" "DX Rm 5 (EC)" "DX Rm 1" ...
# Convert specific columns to factors
dat$Radiology.Technician <- as.factor(dat$Radiology.Technician)
dat$Ordering.Physician <- as.factor(dat$Ordering.Physician)
# View summary statistics
summary(dat)
## Unique.Identifier PatientAge Radiology.Technician Modality
## Min. : 1 Min. : 0.0 37 : 2870 Length:43636
## 1st Qu.:10796 1st Qu.: 33.0 9 : 2166 Class :character
## Median :21632 Median : 53.0 57 : 1941 Mode :character
## Mean :21674 Mean : 50.5 51 : 1859
## 3rd Qu.:32540 3rd Qu.: 67.0 45 : 1603
## Max. :43449 Max. :102.0 21 : 1559
## (Other):31638
## CatalogCode In.Rad.Room Ordering.Physician PatientTypeMnemonic
## Length:43636 Min. :0.0000 400 : 1915 Length:43636
## Class :character 1st Qu.:0.0000 243 : 1566 Class :character
## Mode :character Median :0.0000 73 : 1382 Mode :character
## Mean :0.1204 366 : 1180
## 3rd Qu.:0.0000 111 : 1127
## Max. :1.0000 22 : 1117
## (Other):35349
## Encounter.Type EC.Visit Priority OrderDateTime
## Length:43636 Length:43636 Length:43636 Length:43636
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ExamCompleteDateTime FinalDateTime Ordered.to.Complete...Mins
## Length:43636 Length:43636 Min. : -196
## Class :character Class :character 1st Qu.: 18
## Mode :character Mode :character Median : 33
## Mean : 1646
## 3rd Qu.: 67
## Max. :112168
##
## Ordered.to.Complete...Hours Loc.At.Exam.Complete Exam.Completed.Bucket
## Min. : -3.267 Length:43636 Length:43636
## 1st Qu.: 0.300 Class :character Class :character
## Median : 0.550 Mode :character Mode :character
## Mean : 27.430
## 3rd Qu.: 1.117
## Max. :1869.467
##
## Section Subsection Exam.Room
## Length:43636 Length:43636 Length:43636
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
# Filter rows where PatientAge > 75
dat2 <- dat %>% filter(PatientAge > 75)
# Filter rows where PatientAge == 75
dat2 <- dat %>% filter(PatientAge == 75)
# Filter rows where PatientAge <= 18 and > 3
dat2 <- dat %>% filter(PatientAge <= 18 & PatientAge > 3)
# Filter rows where PatientAge != 75
dat2 <- dat %>% filter(PatientAge != 75)
# Filter rows where PatientTypeMnemonic == "OPEC"
dat2 <- dat %>% filter(PatientTypeMnemonic == "OPEC")
# Select specific columns
dat2 <- dat %>% select(PatientAge, Ordering.Physician)
# Remove the PatientAge column
dat2 <- dat2 %>% select(-PatientAge)
# Create a new column (square of PatientAge)
dat2 <- dat %>% mutate(AgeSquared = PatientAge^2)
# Get the mean of PatientAge
dat %>% summarize(mean_age = mean(PatientAge))
## mean_age
## 1 50.49544
# Get the number of distinct Ordering.Physician
dat %>% summarize(distinct_physicians = n_distinct(Ordering.Physician))
## distinct_physicians
## 1 415
# Get the mean of Ordered.to.Complete...Mins
dat %>% summarize(mean_minutes = mean(Ordered.to.Complete...Mins))
## mean_minutes
## 1 1645.787
# Filter by Loc.At.Exam.Complete == "3E" and calculate mean Ordered.to.Complete...Mins
dat %>% filter(Loc.At.Exam.Complete == "3E") %>%
summarize(mean_minutes_3E = mean(Ordered.to.Complete...Mins))
## mean_minutes_3E
## 1 4644.828
# Group by Loc.At.Exam.Complete and calculate mean Ordered.to.Complete...Mins
dat %>% group_by(Loc.At.Exam.Complete) %>%
summarize(mean_minutes = mean(Ordered.to.Complete...Mins))
## # A tibble: 25 × 2
## Loc.At.Exam.Complete mean_minutes
## <chr> <dbl>
## 1 3E 4645.
## 2 3W 1462.
## 3 4E 1152.
## 4 4W 1675.
## 5 5E 8020.
## 6 5W 1282.
## 7 BICU 9695.
## 8 BIMC 11873.
## 9 BMT 72.7
## 10 CHCU 1583.
## # ℹ 15 more rows
# Group by Loc.At.Exam.Complete and mutate mean Ordered.to.Complete...Mins
dat2 <- dat %>% group_by(Loc.At.Exam.Complete) %>%
mutate(mean_minutes = mean(Ordered.to.Complete...Mins))