Load data and summarize data
rm(list = ls())
### library
library(ggplot2)
library(magrittr)
library(reshape2)
library(caret)
## Loading required package: lattice
library(Boruta)
## Loading required package: ranger
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(grid)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ranger':
##
## importance
## The following object is masked from 'package:ggplot2':
##
## margin
library(viridis)
### load data
rawdata = read.csv('AccessLog.csv', header = T, sep = ',')
saveRDS(rawdata, 'rawdata.rds')
#rawdata = readRDS('rawdata.rds')
summary(rawdata)
## Date.Time Employee.Name Employee.ID
## 11/19/2013 2:48:50 : 29 Gerardo Munoz :17415 GMunoz3 :17415
## 11/19/2013 12:22:31: 25 Dustin Steele :13447 DSteele9 :13447
## 11/26/2013 12:08:16: 25 Kathleen Wheeler:11521 KWheele10:11521
## 12/2/2013 12:05:00 : 20 Richard Cummings:11484 RCummin0 :11484
## 11/19/2013 12:39:40: 19 Taylor Carroll :10760 TCarrol8 :10760
## 11/20/2013 3:40:00 : 19 Lucas Chavez : 6600 LChavez8 : 6600
## (Other) :96473 (Other) :25383 (Other) :25383
## EMR.Module Patient.Name
## Report :43105 Pat Kelley : 648
## Encounter : 7266 James Sanderson: 563
## Patient Lookup Form : 5441 Sophie Hunt : 561
## Notes Section : 3277 Carolyn Mathis : 552
## Demographics : 3177 Lucy Harrington: 551
## Chart Review Encounters tab: 2977 Michaele Bolton: 550
## (Other) :31367 (Other) :93185
## Medical.Record.Number Patient.ID.Number
## JH94173864: 648 Z93108 : 648
## JH76240207: 561 Z70957 : 561
## JH29532130: 552 Z73483 : 552
## JH22634601: 551 Z98693 : 551
## JH64607870: 550 Z36633 : 550
## JH91736583: 545 Z71324 : 545
## (Other) :93203 (Other):93203
names(rawdata)
## [1] "Date.Time" "Employee.Name" "Employee.ID"
## [4] "EMR.Module" "Patient.Name" "Medical.Record.Number"
## [7] "Patient.ID.Number"
head(rawdata)
## Date.Time Employee.Name Employee.ID
## 1 11/14/2013 6:42:58 Kristin Guzman KGuzman9
## 2 11/14/2013 6:43:18 Kristin Guzman KGuzman9
## 3 11/14/2013 6:43:19 Kristin Guzman KGuzman9
## 4 11/14/2013 6:43:21 Kristin Guzman KGuzman9
## 5 11/14/2013 7:03:14 Kristin Guzman KGuzman9
## 6 11/14/2013 7:09:24 Kristin Guzman KGuzman9
## EMR.Module Patient.Name Medical.Record.Number
## 1 Patient Lookup Form Leonard Perez JH90939388
## 2 Patient Lookup Form Leonard Perez JH90939388
## 3 Chart Review Procedures tab Leonard Perez JH90939388
## 4 Chart Review Encounters tab Leonard Perez JH90939388
## 5 Patient Lookup Form Shelia Bailey JH22901225
## 6 Report Dustin Wagner JH42520855
## Patient.ID.Number
## 1 Z8755695
## 2 Z8755695
## 3 Z8755695
## 4 Z8755695
## 5 Z3442
## 6 Z36493
str(rawdata)
## 'data.frame': 96610 obs. of 7 variables:
## $ Date.Time : Factor w/ 59269 levels "11/14/2013 1:00:32",..: 2340 2341 2342 2343 2353 2354 2355 2356 2357 2358 ...
## $ Employee.Name : Factor w/ 16 levels "Candace Castro",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ Employee.ID : Factor w/ 16 levels "CCastro5","CDaniel2",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ EMR.Module : Factor w/ 75 levels "Addendum","Admin",..: 57 57 17 7 57 62 62 25 57 57 ...
## $ Patient.Name : Factor w/ 3510 levels "Abel Dunn","Abigail Allan",..: 2017 2017 2017 2017 2996 970 970 970 970 970 ...
## $ Medical.Record.Number: Factor w/ 3523 levels "JH10003485","JH10025223",..: 3190 3190 3190 3190 514 1317 1317 1317 1317 1317 ...
## $ Patient.ID.Number : Factor w/ 3492 levels "Z10001","Z10020",..: 3025 3025 3025 3025 968 1036 1036 1036 1036 1036 ...
## look at NA
sum(is.na(rawdata))
## [1] 0
summary(rawdata$Patient.ID.Number)
## Z93108 Z70957 Z73483 Z98693 Z36633 Z71324 Z476353 Z38181 Z63422
## 648 561 552 551 550 545 531 503 492
## Z71829 Z52757 Z52838 Z50210 Z86000 Z68559 Z97242 Z84385 Z32293
## 481 478 449 442 425 412 411 392 387
## Z97384 Z71927 Z85370 Z59178 Z16913 Z19863 Z51300 Z54363 Z849151
## 384 382 379 373 365 350 340 338 337
## Z42308 Z19669 Z23379 Z93470 Z59216 Z85646 Z27585 Z46167 Z46903
## 331 326 323 320 312 312 310 299 299
## Z64453 Z73122 Z86994 Z82742 Z10185 Z91438 Z34198 Z87299 Z73924
## 298 296 296 295 291 285 283 279 276
## Z83207 Z22088 Z21252 Z95777 Z57980 Z59327 Z38586 Z27324 Z78003
## 276 274 271 271 268 261 252 248 248
## Z51775 Z6469 Z83531 Z73363 Z37536 Z61387 Z54908 Z61206 Z74248
## 247 245 245 244 243 243 237 236 236
## Z86939 Z97568 Z20539 Z1287 Z45810 Z74981 Z26490 Z99969 Z53190
## 235 233 228 225 225 225 224 222 221
## Z25661 Z13048 Z88555 Z60160 Z61298 Z19707 Z67319 Z9873 Z44968
## 219 217 215 213 210 208 208 208 206
## Z69432 Z83171 Z18216 Z56038 Z16483 Z79365 Z33827 Z68513 Z79882
## 206 206 203 203 202 201 197 197 194
## Z50803 Z45591 Z222191 Z77125 Z11336 Z31906 Z31169 Z9406 Z1184
## 193 191 190 189 188 188 187 187 186
## (Other)
## 67126
summary(rawdata$Employee.ID)
## CCastro5 CDaniel2 DSteele9 EJennin9 GMatthe5 GMunoz3 KGuzman9
## 1740 2087 13447 4589 335 17415 6446
## KKenned8 KWheele10 LChavez8 MEricks0 NGreene4 RCummin0 RKeller3
## 1953 11521 6600 1657 1824 11484 532
## RRice4 TCarrol8
## 4220 10760
##process time
date_sub = substr(rawdata$Date.Time, 1, 10)
date_data = as.Date(date_sub, format = '%m/%d/%Y')
time_data = as.POSIXct(rawdata$Date.Time, format="%m/%d/%Y %H:%M:%S")
rawdata$date_data = date_data
rawdata$time_data = time_data
## look at each employee record over date
plot(rawdata$Employee.Name, rawdata$date_data)

# this is a table to look at record per day per employee
tapply(rawdata$Patient.ID.Number, FUN = length, INDEX = list(rawdata$Employee.ID,rawdata$date_data))
## 2013-11-14 2013-11-15 2013-11-18 2013-11-19 2013-11-20
## CCastro5 115 81 135 173 233
## CDaniel2 100 169 132 55 188
## DSteele9 993 1009 952 560 636
## EJennin9 234 289 126 238 412
## GMatthe5 50 6 NA 60 56
## GMunoz3 1301 1620 1211 254 1218
## KGuzman9 272 165 197 197 274
## KKenned8 88 108 71 210 144
## KWheele10 NA NA 509 590 878
## LChavez8 109 298 NA 606 198
## MEricks0 54 117 79 179 84
## NGreene4 53 97 123 47 104
## RCummin0 375 661 1167 803 760
## RKeller3 NA 21 NA 29 NA
## RRice4 282 170 150 271 304
## TCarrol8 1068 NA 821 582 479
## 2013-11-21 2013-11-22 2013-11-24 2013-11-25 2013-11-26
## CCastro5 85 34 NA 65 NA
## CDaniel2 104 76 NA 133 44
## DSteele9 803 1192 NA 744 1101
## EJennin9 142 229 NA 100 283
## GMatthe5 NA 73 NA 5 NA
## GMunoz3 1185 141 NA NA 1542
## KGuzman9 418 368 NA 260 372
## KKenned8 NA NA NA NA NA
## KWheele10 321 642 NA 931 471
## LChavez8 282 192 NA 206 520
## MEricks0 89 66 NA 141 111
## NGreene4 134 50 NA 64 90
## RCummin0 619 NA NA 530 706
## RKeller3 19 NA NA NA 13
## RRice4 253 73 NA 210 270
## TCarrol8 604 420 68 773 830
## 2013-11-27 2013-11-29 2013-12-02 2013-12-03 2013-12-04
## CCastro5 NA NA 67 60 211
## CDaniel2 112 NA 138 42 177
## DSteele9 476 NA NA NA NA
## EJennin9 470 NA 118 353 383
## GMatthe5 NA NA 24 NA 46
## GMunoz3 1024 NA 175 358 1846
## KGuzman9 461 NA 300 401 267
## KKenned8 NA NA 214 73 NA
## KWheele10 708 NA 604 735 757
## LChavez8 403 59 NA 521 548
## MEricks0 46 NA NA 151 51
## NGreene4 81 104 NA NA 121
## RCummin0 398 NA 850 975 919
## RKeller3 114 NA 6 325 5
## RRice4 270 64 187 315 207
## TCarrol8 573 NA NA 308 509
## 2013-12-05 2013-12-06 2013-12-07 2013-12-08 2013-12-09
## CCastro5 4 33 NA NA 35
## CDaniel2 128 63 NA NA 25
## DSteele9 NA 797 NA NA 748
## EJennin9 210 208 NA NA NA
## GMatthe5 NA NA NA NA 4
## GMunoz3 70 NA NA NA 173
## KGuzman9 442 147 NA NA 274
## KKenned8 311 205 NA NA 77
## KWheele10 987 658 NA NA 387
## LChavez8 512 678 NA NA NA
## MEricks0 91 57 NA NA 36
## NGreene4 196 77 NA NA NA
## RCummin0 819 460 NA NA 530
## RKeller3 NA NA NA NA NA
## RRice4 184 54 NA NA 125
## TCarrol8 774 425 231 21 339
## 2013-12-10 2013-12-11 2013-12-12 2013-12-13
## CCastro5 197 87 71 54
## CDaniel2 61 112 123 105
## DSteele9 570 1064 866 936
## EJennin9 NA 421 92 281
## GMatthe5 11 NA NA NA
## GMunoz3 1258 1400 1251 1388
## KGuzman9 350 301 400 580
## KKenned8 141 169 112 30
## KWheele10 374 719 661 589
## LChavez8 118 290 420 640
## MEricks0 118 33 26 128
## NGreene4 158 118 159 48
## RCummin0 482 295 NA 135
## RKeller3 NA NA NA NA
## RRice4 200 276 288 67
## TCarrol8 NA 650 713 572
look up patients function
## function 1 ##
## use a index time, search all the employee and return a list of all the employees durng that time
start = rawdata$time_data[1]
end = rawdata$time_data[1] + 60*24*60*60
# end and start difference : 10 hours
time_lookup <- function(start, end) {
# start and end are date
subset = which(rawdata$time_data > start)
subset2 = which(rawdata$time_data < end)
subset3 = intersect(subset, subset2)
return(rawdata[subset3,])
}
## lets try
new_try = time_lookup(start, end)
##find the patients ID who has been looked up by more than one employee
commonpatient <- function(new_try){
new_patient = matrix(0, nrow = 1000, ncol = 16)
new_patient = data.frame(new_patient)
colnames(new_patient) = levels(rawdata$Employee.ID)
for (i in c(1:16)) {
subset = which(new_try$Employee.ID == levels(new_try$Employee.ID)[i])
if (length(subset) > 0){
new_patient[1:length(subset), i] =
new_try[subset,]$Patient.ID.Number
}
}
result = data.frame(names = rep('None', 1),
freq = rep(0, 1))
### this prints the pairwise comparison
for ( i in c(1:16)) {
for (j in c(1:16)) {
if (i != j & i < j) {
subset = which(new_try$Employee.ID == levels(new_try$Employee.ID)[i])
subset2 = which(new_try$Employee.ID == levels(new_try$Employee.ID)[j])
subset3 = intersect(new_try$Patient.ID.Number[subset],
new_try$Patient.ID.Number[subset2])
print(paste(
as.character(levels(new_try$Employee.ID)[i]),
as.character(levels(new_try$Employee.ID)[j]),
sep = ','
))
print(length(subset3))
curdata = data.frame(names =
paste(
as.character(levels(new_try$Employee.ID)[i]),
as.character(levels(new_try$Employee.ID)[j]),
sep = ','
),
freq = length(subset3))
result = rbind(result,
curdata)
}
}
}
return (result)
}
matrix = time_lookup(start, end)
result = commonpatient(matrix)
## [1] "CCastro5,CDaniel2"
## [1] 58
## [1] "CCastro5,DSteele9"
## [1] 5
## [1] "CCastro5,EJennin9"
## [1] 27
## [1] "CCastro5,GMatthe5"
## [1] 4
## [1] "CCastro5,GMunoz3"
## [1] 2
## [1] "CCastro5,KGuzman9"
## [1] 1
## [1] "CCastro5,KKenned8"
## [1] 5
## [1] "CCastro5,KWheele10"
## [1] 32
## [1] "CCastro5,LChavez8"
## [1] 4
## [1] "CCastro5,MEricks0"
## [1] 56
## [1] "CCastro5,NGreene4"
## [1] 76
## [1] "CCastro5,RCummin0"
## [1] 1
## [1] "CCastro5,RKeller3"
## [1] 23
## [1] "CCastro5,RRice4"
## [1] 61
## [1] "CCastro5,TCarrol8"
## [1] 22
## [1] "CDaniel2,DSteele9"
## [1] 6
## [1] "CDaniel2,EJennin9"
## [1] 41
## [1] "CDaniel2,GMatthe5"
## [1] 13
## [1] "CDaniel2,GMunoz3"
## [1] 10
## [1] "CDaniel2,KGuzman9"
## [1] 5
## [1] "CDaniel2,KKenned8"
## [1] 17
## [1] "CDaniel2,KWheele10"
## [1] 70
## [1] "CDaniel2,LChavez8"
## [1] 38
## [1] "CDaniel2,MEricks0"
## [1] 209
## [1] "CDaniel2,NGreene4"
## [1] 179
## [1] "CDaniel2,RCummin0"
## [1] 4
## [1] "CDaniel2,RKeller3"
## [1] 52
## [1] "CDaniel2,RRice4"
## [1] 303
## [1] "CDaniel2,TCarrol8"
## [1] 64
## [1] "DSteele9,EJennin9"
## [1] 2
## [1] "DSteele9,GMatthe5"
## [1] 3
## [1] "DSteele9,GMunoz3"
## [1] 57
## [1] "DSteele9,KGuzman9"
## [1] 0
## [1] "DSteele9,KKenned8"
## [1] 3
## [1] "DSteele9,KWheele10"
## [1] 3
## [1] "DSteele9,LChavez8"
## [1] 3
## [1] "DSteele9,MEricks0"
## [1] 5
## [1] "DSteele9,NGreene4"
## [1] 5
## [1] "DSteele9,RCummin0"
## [1] 3
## [1] "DSteele9,RKeller3"
## [1] 3
## [1] "DSteele9,RRice4"
## [1] 8
## [1] "DSteele9,TCarrol8"
## [1] 4
## [1] "EJennin9,GMatthe5"
## [1] 3
## [1] "EJennin9,GMunoz3"
## [1] 1
## [1] "EJennin9,KGuzman9"
## [1] 0
## [1] "EJennin9,KKenned8"
## [1] 4
## [1] "EJennin9,KWheele10"
## [1] 23
## [1] "EJennin9,LChavez8"
## [1] 5
## [1] "EJennin9,MEricks0"
## [1] 44
## [1] "EJennin9,NGreene4"
## [1] 40
## [1] "EJennin9,RCummin0"
## [1] 1
## [1] "EJennin9,RKeller3"
## [1] 13
## [1] "EJennin9,RRice4"
## [1] 47
## [1] "EJennin9,TCarrol8"
## [1] 19
## [1] "GMatthe5,GMunoz3"
## [1] 1
## [1] "GMatthe5,KGuzman9"
## [1] 0
## [1] "GMatthe5,KKenned8"
## [1] 24
## [1] "GMatthe5,KWheele10"
## [1] 2
## [1] "GMatthe5,LChavez8"
## [1] 1
## [1] "GMatthe5,MEricks0"
## [1] 12
## [1] "GMatthe5,NGreene4"
## [1] 12
## [1] "GMatthe5,RCummin0"
## [1] 2
## [1] "GMatthe5,RKeller3"
## [1] 4
## [1] "GMatthe5,RRice4"
## [1] 15
## [1] "GMatthe5,TCarrol8"
## [1] 5
## [1] "GMunoz3,KGuzman9"
## [1] 4
## [1] "GMunoz3,KKenned8"
## [1] 1
## [1] "GMunoz3,KWheele10"
## [1] 3
## [1] "GMunoz3,LChavez8"
## [1] 2
## [1] "GMunoz3,MEricks0"
## [1] 4
## [1] "GMunoz3,NGreene4"
## [1] 3
## [1] "GMunoz3,RCummin0"
## [1] 3
## [1] "GMunoz3,RKeller3"
## [1] 3
## [1] "GMunoz3,RRice4"
## [1] 11
## [1] "GMunoz3,TCarrol8"
## [1] 3
## [1] "KGuzman9,KKenned8"
## [1] 1
## [1] "KGuzman9,KWheele10"
## [1] 0
## [1] "KGuzman9,LChavez8"
## [1] 1
## [1] "KGuzman9,MEricks0"
## [1] 2
## [1] "KGuzman9,NGreene4"
## [1] 4
## [1] "KGuzman9,RCummin0"
## [1] 4
## [1] "KGuzman9,RKeller3"
## [1] 2
## [1] "KGuzman9,RRice4"
## [1] 8
## [1] "KGuzman9,TCarrol8"
## [1] 2
## [1] "KKenned8,KWheele10"
## [1] 2
## [1] "KKenned8,LChavez8"
## [1] 1
## [1] "KKenned8,MEricks0"
## [1] 16
## [1] "KKenned8,NGreene4"
## [1] 16
## [1] "KKenned8,RCummin0"
## [1] 1
## [1] "KKenned8,RKeller3"
## [1] 5
## [1] "KKenned8,RRice4"
## [1] 21
## [1] "KKenned8,TCarrol8"
## [1] 6
## [1] "KWheele10,LChavez8"
## [1] 8
## [1] "KWheele10,MEricks0"
## [1] 68
## [1] "KWheele10,NGreene4"
## [1] 69
## [1] "KWheele10,RCummin0"
## [1] 3
## [1] "KWheele10,RKeller3"
## [1] 23
## [1] "KWheele10,RRice4"
## [1] 100
## [1] "KWheele10,TCarrol8"
## [1] 31
## [1] "LChavez8,MEricks0"
## [1] 46
## [1] "LChavez8,NGreene4"
## [1] 31
## [1] "LChavez8,RCummin0"
## [1] 4
## [1] "LChavez8,RKeller3"
## [1] 8
## [1] "LChavez8,RRice4"
## [1] 62
## [1] "LChavez8,TCarrol8"
## [1] 6
## [1] "MEricks0,NGreene4"
## [1] 165
## [1] "MEricks0,RCummin0"
## [1] 7
## [1] "MEricks0,RKeller3"
## [1] 42
## [1] "MEricks0,RRice4"
## [1] 289
## [1] "MEricks0,TCarrol8"
## [1] 50
## [1] "NGreene4,RCummin0"
## [1] 3
## [1] "NGreene4,RKeller3"
## [1] 83
## [1] "NGreene4,RRice4"
## [1] 245
## [1] "NGreene4,TCarrol8"
## [1] 62
## [1] "RCummin0,RKeller3"
## [1] 1
## [1] "RCummin0,RRice4"
## [1] 8
## [1] "RCummin0,TCarrol8"
## [1] 3
## [1] "RKeller3,RRice4"
## [1] 65
## [1] "RKeller3,TCarrol8"
## [1] 12
## [1] "RRice4,TCarrol8"
## [1] 98
print(result)
## names freq
## 1 None 0
## 2 CCastro5,CDaniel2 58
## 3 CCastro5,DSteele9 5
## 4 CCastro5,EJennin9 27
## 5 CCastro5,GMatthe5 4
## 6 CCastro5,GMunoz3 2
## 7 CCastro5,KGuzman9 1
## 8 CCastro5,KKenned8 5
## 9 CCastro5,KWheele10 32
## 10 CCastro5,LChavez8 4
## 11 CCastro5,MEricks0 56
## 12 CCastro5,NGreene4 76
## 13 CCastro5,RCummin0 1
## 14 CCastro5,RKeller3 23
## 15 CCastro5,RRice4 61
## 16 CCastro5,TCarrol8 22
## 17 CDaniel2,DSteele9 6
## 18 CDaniel2,EJennin9 41
## 19 CDaniel2,GMatthe5 13
## 20 CDaniel2,GMunoz3 10
## 21 CDaniel2,KGuzman9 5
## 22 CDaniel2,KKenned8 17
## 23 CDaniel2,KWheele10 70
## 24 CDaniel2,LChavez8 38
## 25 CDaniel2,MEricks0 209
## 26 CDaniel2,NGreene4 179
## 27 CDaniel2,RCummin0 4
## 28 CDaniel2,RKeller3 52
## 29 CDaniel2,RRice4 303
## 30 CDaniel2,TCarrol8 64
## 31 DSteele9,EJennin9 2
## 32 DSteele9,GMatthe5 3
## 33 DSteele9,GMunoz3 57
## 34 DSteele9,KGuzman9 0
## 35 DSteele9,KKenned8 3
## 36 DSteele9,KWheele10 3
## 37 DSteele9,LChavez8 3
## 38 DSteele9,MEricks0 5
## 39 DSteele9,NGreene4 5
## 40 DSteele9,RCummin0 3
## 41 DSteele9,RKeller3 3
## 42 DSteele9,RRice4 8
## 43 DSteele9,TCarrol8 4
## 44 EJennin9,GMatthe5 3
## 45 EJennin9,GMunoz3 1
## 46 EJennin9,KGuzman9 0
## 47 EJennin9,KKenned8 4
## 48 EJennin9,KWheele10 23
## 49 EJennin9,LChavez8 5
## 50 EJennin9,MEricks0 44
## 51 EJennin9,NGreene4 40
## 52 EJennin9,RCummin0 1
## 53 EJennin9,RKeller3 13
## 54 EJennin9,RRice4 47
## 55 EJennin9,TCarrol8 19
## 56 GMatthe5,GMunoz3 1
## 57 GMatthe5,KGuzman9 0
## 58 GMatthe5,KKenned8 24
## 59 GMatthe5,KWheele10 2
## 60 GMatthe5,LChavez8 1
## 61 GMatthe5,MEricks0 12
## 62 GMatthe5,NGreene4 12
## 63 GMatthe5,RCummin0 2
## 64 GMatthe5,RKeller3 4
## 65 GMatthe5,RRice4 15
## 66 GMatthe5,TCarrol8 5
## 67 GMunoz3,KGuzman9 4
## 68 GMunoz3,KKenned8 1
## 69 GMunoz3,KWheele10 3
## 70 GMunoz3,LChavez8 2
## 71 GMunoz3,MEricks0 4
## 72 GMunoz3,NGreene4 3
## 73 GMunoz3,RCummin0 3
## 74 GMunoz3,RKeller3 3
## 75 GMunoz3,RRice4 11
## 76 GMunoz3,TCarrol8 3
## 77 KGuzman9,KKenned8 1
## 78 KGuzman9,KWheele10 0
## 79 KGuzman9,LChavez8 1
## 80 KGuzman9,MEricks0 2
## 81 KGuzman9,NGreene4 4
## 82 KGuzman9,RCummin0 4
## 83 KGuzman9,RKeller3 2
## 84 KGuzman9,RRice4 8
## 85 KGuzman9,TCarrol8 2
## 86 KKenned8,KWheele10 2
## 87 KKenned8,LChavez8 1
## 88 KKenned8,MEricks0 16
## 89 KKenned8,NGreene4 16
## 90 KKenned8,RCummin0 1
## 91 KKenned8,RKeller3 5
## 92 KKenned8,RRice4 21
## 93 KKenned8,TCarrol8 6
## 94 KWheele10,LChavez8 8
## 95 KWheele10,MEricks0 68
## 96 KWheele10,NGreene4 69
## 97 KWheele10,RCummin0 3
## 98 KWheele10,RKeller3 23
## 99 KWheele10,RRice4 100
## 100 KWheele10,TCarrol8 31
## 101 LChavez8,MEricks0 46
## 102 LChavez8,NGreene4 31
## 103 LChavez8,RCummin0 4
## 104 LChavez8,RKeller3 8
## 105 LChavez8,RRice4 62
## 106 LChavez8,TCarrol8 6
## 107 MEricks0,NGreene4 165
## 108 MEricks0,RCummin0 7
## 109 MEricks0,RKeller3 42
## 110 MEricks0,RRice4 289
## 111 MEricks0,TCarrol8 50
## 112 NGreene4,RCummin0 3
## 113 NGreene4,RKeller3 83
## 114 NGreene4,RRice4 245
## 115 NGreene4,TCarrol8 62
## 116 RCummin0,RKeller3 1
## 117 RCummin0,RRice4 8
## 118 RCummin0,TCarrol8 3
## 119 RKeller3,RRice4 65
## 120 RKeller3,TCarrol8 12
## 121 RRice4,TCarrol8 98
#now no patient are common
## function to return frequency
# find_freq <- function(patient, new_try) {
# for (name in levels(new_try$Employee.ID)) {
# freq = match(new_try$Patient.ID.Number[new_try$Employee.ID == name], patient)
# print('Employe: %', name)
# print('freq:%' + freq)
# }
# }
ggplot(result, aes(x=result$names, y = result$freq)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

Conclusion
- The given data has one month of time duration. During this time, it seems that there’s no common records shared between these employees (compare 1 with all 15 other employee).
- However, pairwise comparison showed lots of common patient record
- The abover barplot showed the frequency of shared records between different employees
- The employee pair that have high frequency includes: CDaniel2,RRice4; MEricks0,RRice4; NGreene4,RRice4
- It seems that CDaniel2, RRice4 and MEricks0, and NGreene4 worked in the same place.
- Based on my findings, these employees might not work in the same place.
- These employee might from different hospitals.
- It is possible that I searched the wrong variables (patient_ID). I might need search other variables such as record ID.
What features can be generated?
- For each employee, generate a logical varible which indicates whether or nor his accessed data has been visited by other employee or not.
- For each employee, generate the name of other employee who shared the same visit record at the same time.