library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
# Load dataset
dat <- read.csv("RadDat.csv")
# Inspect structure of the dataset
str(dat)
## 'data.frame':    43636 obs. of  21 variables:
##  $ Unique.Identifier          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ PatientAge                 : int  75 87 35 51 67 54 34 65 67 40 ...
##  $ Radiology.Technician       : int  65 65 16 24 37 7 40 2 2 34 ...
##  $ Modality                   : chr  "DX " "DX " "DX " "DX " ...
##  $ CatalogCode                : chr  "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" "DX Abdomen 2 vw w/single chest" ...
##  $ In.Rad.Room                : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Ordering.Physician         : int  4 4 150 130 173 349 4 4 39 4 ...
##  $ PatientTypeMnemonic        : chr  "IP" "IP" "IP" "IP" ...
##  $ Encounter.Type             : chr  "Inpatient" "Inpatient" "Inpatient" "Inpatient" ...
##  $ EC.Visit                   : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Priority                   : chr  "Routine" "Routine" "Routine" "Routine" ...
##  $ OrderDateTime              : chr  "12/27/16 10:32" "1/13/17 11:44" "1/2/17 17:19" "11/13/16 10:13" ...
##  $ ExamCompleteDateTime       : chr  "12/27/16 11:19" "1/13/17 12:32" "1/2/17 18:00" "11/14/16 9:34" ...
##  $ FinalDateTime              : chr  "12/28/16 14:32" "1/14/17 16:00" "1/3/17 7:44" "11/14/16 16:40" ...
##  $ Ordered.to.Complete...Mins : int  47 48 41 1401 42 129 42 1068 49 47 ...
##  $ Ordered.to.Complete...Hours: num  0.783 0.8 0.683 23.35 0.7 ...
##  $ Loc.At.Exam.Complete       : chr  "GTU" "GTU" "3W" "4W" ...
##  $ Exam.Completed.Bucket      : chr  "8a-8p" "8a-8p" "8a-8p" "8a-8p" ...
##  $ Section                    : chr  "DX" "DX" "EC DX" "DX" ...
##  $ Subsection                 : chr  "DX" "DX" "EC DX" "DX" ...
##  $ Exam.Room                  : chr  "DX Rm 1" "DX Rm 1" "DX Rm 5 (EC)" "DX Rm 1" ...
# Convert specific columns to factors
dat$Radiology.Technician <- as.factor(dat$Radiology.Technician)
dat$Ordering.Physician <- as.factor(dat$Ordering.Physician)

# View summary statistics
summary(dat)
##  Unique.Identifier   PatientAge    Radiology.Technician   Modality        
##  Min.   :    1     Min.   :  0.0   37     : 2870        Length:43636      
##  1st Qu.:10796     1st Qu.: 33.0   9      : 2166        Class :character  
##  Median :21632     Median : 53.0   57     : 1941        Mode  :character  
##  Mean   :21674     Mean   : 50.5   51     : 1859                          
##  3rd Qu.:32540     3rd Qu.: 67.0   45     : 1603                          
##  Max.   :43449     Max.   :102.0   21     : 1559                          
##                                    (Other):31638                          
##  CatalogCode         In.Rad.Room     Ordering.Physician PatientTypeMnemonic
##  Length:43636       Min.   :0.0000   400    : 1915      Length:43636       
##  Class :character   1st Qu.:0.0000   243    : 1566      Class :character   
##  Mode  :character   Median :0.0000   73     : 1382      Mode  :character   
##                     Mean   :0.1204   366    : 1180                         
##                     3rd Qu.:0.0000   111    : 1127                         
##                     Max.   :1.0000   22     : 1117                         
##                                      (Other):35349                         
##  Encounter.Type       EC.Visit           Priority         OrderDateTime     
##  Length:43636       Length:43636       Length:43636       Length:43636      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  ExamCompleteDateTime FinalDateTime      Ordered.to.Complete...Mins
##  Length:43636         Length:43636       Min.   :  -196            
##  Class :character     Class :character   1st Qu.:    18            
##  Mode  :character     Mode  :character   Median :    33            
##                                          Mean   :  1646            
##                                          3rd Qu.:    67            
##                                          Max.   :112168            
##                                                                    
##  Ordered.to.Complete...Hours Loc.At.Exam.Complete Exam.Completed.Bucket
##  Min.   :  -3.267            Length:43636         Length:43636         
##  1st Qu.:   0.300            Class :character     Class :character     
##  Median :   0.550            Mode  :character     Mode  :character     
##  Mean   :  27.430                                                      
##  3rd Qu.:   1.117                                                      
##  Max.   :1869.467                                                      
##                                                                        
##    Section           Subsection         Exam.Room        
##  Length:43636       Length:43636       Length:43636      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 
# Filter rows where PatientAge > 75
dat2 <- dat %>% filter(PatientAge > 75)

# Filter rows where PatientAge == 75
dat2 <- dat %>% filter(PatientAge == 75)

# Filter rows where PatientAge <= 18 and > 3
dat2 <- dat %>% filter(PatientAge <= 18 & PatientAge > 3)

# Filter rows where PatientAge != 75
dat2 <- dat %>% filter(PatientAge != 75)

# Filter rows where PatientTypeMnemonic == "OPEC"
dat2 <- dat %>% filter(PatientTypeMnemonic == "OPEC")
# Select specific columns
dat2 <- dat %>% select(PatientAge, Ordering.Physician)

# Remove the PatientAge column
dat2 <- dat2 %>% select(-PatientAge)
# Create a new column (square of PatientAge)
dat2 <- dat %>% mutate(AgeSquared = PatientAge^2)
# Get the mean of PatientAge
dat %>% summarize(mean_age = mean(PatientAge))
##   mean_age
## 1 50.49544
# Get the number of distinct Ordering.Physician
dat %>% summarize(distinct_physicians = n_distinct(Ordering.Physician))
##   distinct_physicians
## 1                 415
# Get the mean of Ordered.to.Complete...Mins
dat %>% summarize(mean_minutes = mean(Ordered.to.Complete...Mins))
##   mean_minutes
## 1     1645.787
# Filter by Loc.At.Exam.Complete == "3E" and calculate mean Ordered.to.Complete...Mins
dat %>% filter(Loc.At.Exam.Complete == "3E") %>% 
  summarize(mean_minutes_3E = mean(Ordered.to.Complete...Mins))
##   mean_minutes_3E
## 1        4644.828
# Group by Loc.At.Exam.Complete and calculate mean Ordered.to.Complete...Mins
dat %>% group_by(Loc.At.Exam.Complete) %>% 
  summarize(mean_minutes = mean(Ordered.to.Complete...Mins))
## # A tibble: 25 × 2
##    Loc.At.Exam.Complete mean_minutes
##    <chr>                       <dbl>
##  1 3E                         4645. 
##  2 3W                         1462. 
##  3 4E                         1152. 
##  4 4W                         1675. 
##  5 5E                         8020. 
##  6 5W                         1282. 
##  7 BICU                       9695. 
##  8 BIMC                      11873. 
##  9 BMT                          72.7
## 10 CHCU                       1583. 
## # ℹ 15 more rows
# Group by Loc.At.Exam.Complete and mutate mean Ordered.to.Complete...Mins
dat2 <- dat %>% group_by(Loc.At.Exam.Complete) %>% 
  mutate(mean_minutes = mean(Ordered.to.Complete...Mins))