########################################
# #
# Daniela Poggiali Analysis #
# #
########################################
# Four interesting graphics.
#
# People die in the morning, when most nurses are around.
# Daniela is an ordinary nurse, she is most often on duty for the morning shift.
# She is a hard and reliable worker and consequently works long shifts
# - she usually clocks in before the handover, clocks out after.
# Amazingly many deaths are registered to have occurred between midnight
# and five minutes after midnight. Deaths tend amazingly to happen at
# whole hours and at half past the hour.
# In Italy, less people die at lunch-time.
# Note: death certificates have to be signed by a doctor and the
# time written on the certificate might be a guess,
# might be the time he confirmed that death had occurred.
### (1) Packages, Directory, self defined functions and Data ###
# 1.1: Packages
library(MASS)
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# 1.2: Directory PUT YOUR DIRECTORY HERE
setwd("~/Desktop/Daniela Poggiali/Data and R code")
rm(list=ls())
# 1.3: Functions
trimws <- function (x) gsub("^\\s+|\\s+$", "", x) # Recodes settore variable avoiding strange strings
daysworked <- function(name) {#
range <- (worktimes$COGNOME == name & times1 %within% micciolo)#
length(times1[range]) - sum(is.na(times1[range]))#
} # Computes number of days worked accordingly to each nurse
hoursworked <- function(name) { # Computes hours worked day by day
range <- (worktimes$COGNOME == name & times1 %within% micciolo & !is.na(times1))#
matrix(c(as.numeric(difftime(times2[range], times1[range], units = "hours")),#
as.numeric(difftime(times4[range], times3[range], units = "hours"))), ncol = 2)#
}
# 1.4: Data and explorative analysis
worktimes <- read.csv("Nurses.csv", header = TRUE,stringsAsFactors = T) # Work time of the nurses
nursenames <- as.character(worktimes$COGNOME) # Recodes surname variable
nurses <- names(table(nursenames))[-1] # Two rows of the spreadsheet are empty! (3392, 3425)
# Recode the variable associated to the time intervals
date <- as.character(worktimes[, 3])#
times1 <- as.POSIXct(paste(date, worktimes[, 6]), format = "%d/%m/%Y %H.%M")#
times2 <- as.POSIXct(paste(date, worktimes[, 7]), format = "%d/%m/%Y %H.%M")#
times3 <- as.POSIXct(paste(date, worktimes[, 8]), format = "%d/%m/%Y %H.%M")#
times4 <- as.POSIXct(paste(date, worktimes[, 9]), format = "%d/%m/%Y %H.%M")#
#micciolo <- interval(times1[23958], times2[24545]) # Define the interval of the investigation
#class(micciolo)
micciolo <- interval("2012-04-08 00:00:00 CEST", "2014-04-08 23:59:00 CEST") # Define the interval of the investigation
nursedat <- NULL # vector reporting, for each nurse, the number of hours worked
hourperday <- NULL # vector reporting, for each nurse, the average hours worked for each day
for(i in 1:length(nurses)) { #
nursedat[i] <- round(sum(hoursworked(nurses[i]), na.rm = TRUE))#
hourperday[i] <- mean(rowSums(hoursworked(nurses[i]), na.rm = TRUE))#
}
names(nursedat) = nurses; names(hourperday) = nurses
# open and recode data associated to each death
fiveponedat <- read.csv("Appendix 5.1.csv", header = TRUE)
deathtimes <- as.POSIXct(paste(as.character(fiveponedat[,4]),
as.character(fiveponedat[,5])),
format = "%d/%m/%Y %H:%M")
#deathsDaPeriod <- deathtimes
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]
shift1 <- interval(times1, times2)#
shift2 <- interval(times3, times4)#
hours1 <- hour(times1)
hours2 <- hour(times2)
hours3 <- hour(times3)
hours4 <- hour(times4)
hours2[hours2 == 0] <- 24
hours4[hours4 == 0] <- 24
hourworked <- function(halfhour) {
range <- (times1 %within% micciolo & !is.na(times1))
cbind(halfhour < hours2[range] & halfhour > hours1[range],
halfhour < hours4[range] & halfhour > hours3[range])
}
halfhours <- (0:23) + 0.5
hoursworked <- rep(0, 24)
for (i in 1:24) {
test <- hourworked(halfhours[i])
test[is.na(test)] <- FALSE
hoursworked[i] <- sum(test)
}
# jpeg("plot1.jpg")
plot(halfhours, hoursworked, type = "l",
ylim = c(0, 10000),
main = "Total (over 2 years) #nurses at work at each mid-hour",
xlab = "time (0:30, 1:30, ...23:30)",
ylab = "Frequency")
points(halfhours, hoursworked)

# graphics.off()
hoursworked
## [1] 2991 2992 2992 2992 2992 3029 6994 6041 6710 6697 6692 6689 6619 9444 5239
## [16] 4439 4282 4237 4202 4192 5953 3021 2978 2978
DPhourworked <- function(halfhour) {
range <- (worktimes$COGNOME == "POGGIALI" & times1 %within% micciolo & !is.na(times1))
cbind(halfhour < hours2[range] & halfhour > hours1[range],
halfhour < hours4[range] & halfhour > hours3[range])
}
DPhoursworked <- rep(0, 24)
for (i in 1:24) {
test <- DPhourworked(halfhours[i])
test[is.na(test)] <- FALSE
DPhoursworked[i] <- sum(test)
}
# jpeg("plot2.jpg")
plot(halfhours, DPhoursworked, type = "l",
ylim = c(0, 300),
main = "Total (over 2 years) #times DP was at work at each mid-hour",
xlab = "time (0:30, 1:30, ...23:30)",
ylab = "Frequency")
points(halfhours, DPhoursworked)

# graphics.off()
DPhoursworked
## [1] 123 123 123 123 123 123 269 186 158 158 158 158 158 250 159 135 134 134 134
## [20] 134 232 134 123 123
# open and recode data associated to each death
fiveponedat <- read.csv("Appendix 5.1.csv", header = TRUE)
deathtimes <- as.POSIXct(paste(as.character(fiveponedat[,4]),
as.character(fiveponedat[,5])),
format = "%d/%m/%Y %H:%M")
#deathsDaPeriod <- deathtimes
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]
shift1 <- interval(times1, times2)#
shift2 <- interval(times3, times4)#
# Create matrix of 0's and 1's for presence of a nurse at a death, can take more than 10 minutes to run.#
present <- matrix(0, nrow = length(deathsDaPeriod), ncol = length(nurses))#
for(i in 1:length(deathsDaPeriod)){
present[i, match((nursenames[which(deathsDaPeriod[i] %within% shift1 | deathsDaPeriod[i] %within% shift2)]), nurses)] <- 1#
}
colnames(present) <- nurses
summ.mat = cbind(apply(present, 2,sum),nursedat,hourperday)
summ.mat
## nursedat hourperday
## ALDIGHIERI 136 3456 6.658317
## ALPI 124 3686 7.144348
## ANCARANI 117 3545 6.765331
## ANDALO' 132 3750 7.062963
## ASTOLFI 10 209 7.201724
## BAGNARA 102 3467 6.811984
## BARACANI 0 0 NaN
## BEDESCHI 116 2973 6.851344
## BORTOLOTTI 3 88 6.743590
## BOTA 17 577 6.949598
## BUCCHI 28 582 6.061632
## BULDRINI 0 0 NaN
## CASADEI 75 2250 6.922154
## CASADEI2 107 3324 6.661122
## CASTELLANI 118 3240 7.482987
## CAVALLUCCI 15 563 7.128481
## CERAUDO 0 0 NaN
## CICCHETTI 90 2735 6.837625
## DONIGAGLIA 53 2079 6.977405
## DREI 10 258 6.614530
## FABBRI 13 295 6.711742
## FALZONI 121 3527 6.916209
## FAROLFI 0 0 NaN
## FEBBRAIO 80 2395 6.861652
## FERRI 56 1646 6.831535
## FERRUZZI 11 318 6.628125
## GARCIA 103 3346 6.828537
## GIOVANNARDI 43 1145 6.431835
## GRANATA 64 1435 6.867783
## GRASSI 105 3625 7.066959
## GRAZIANI 10 553 7.274561
## IVAN 0 0 NaN
## MARCHI 108 3531 6.829110
## MATTEUCCI 113 3396 6.633431
## MORRI 103 3327 6.735088
## PEPPI 0 0 NaN
## PIRAZZOLI 11 280 7.003750
## POGGIALI 190 3576 7.067984
## RADUICA 67 2099 6.663386
## RAGNINI 105 3409 6.914503
## RAMBELLI 101 3240 7.105117
## RICCI 113 3257 7.020115
## ROSSI 91 3196 7.329855
## SALVI 62 1708 6.673177
## SARTONI 0 0 NaN
## SAVARINO 103 3709 7.037287
## SILAGHI 138 3421 6.856379
## SINTONI 0 0 NaN
## SOGLIA 37 1547 7.162577
## STELLUTI 106 3293 7.190429
## STERNINI 94 3402 6.928140
## URSACHI 0 0 NaN
## VESPA 25 722 7.081373
## VESTRUCCI 0 0 NaN
## ZALAMBANI 118 3741 6.954027
## ZARDI 83 2758 6.520725
## ZAZZARO 80 2690 6.776868
# TO DO: RICORDIFICA fiveponedat$settore e assicurati che deathsDaPeriod e fiveponedat$settore abbiano la stessa lunghezza
#Read in appendix 6.1 data. #
sixponedat <- read.csv("Appendix 6.1.csv", header = TRUE)#
#
sixpone <- sixponedat[3:length(sixponedat[,1]), ]#
colnames(sixpone) <- c("Name", "DeathsDuty", "DaysDuty", "RateDuty", "DeathsAway", "DaysAway", "RateAway", "Pval", "RR", "AR")#
sixpone$DeathsDuty <- as.numeric(as.character(sixpone$DeathsDuty))#
sixpone$RateDuty <- as.numeric(as.character(sixpone$RateDuty))#
#
# Calculate deaths by hour. Then compare those to deaths per week from appendix 6.1#
# Took the ratio and normalized it, values greater than 1 mean the deaths per week value is relatively higher than actual deaths per hour#
# #
# DeathByHour <- sixpone$DeathsDuty / nursedat[nursedat != 0]#
# Comparison <- sixpone$RateDuty / DeathByHour#
# NormCompare <- (Comparison - mean(Comparison)) / sqrt(var(Comparison))#
# #
# cbind(nurses[nursedat != 0], round(NormCompare, digits = 4), round(hourperday[nursedat != 0], digits = 3))#
#
#Read in appendix 5.1#
fiveponedat <- read.csv("Appendix 5.1.csv", header = TRUE)#
#
# 6 times missing in file#
deathtimes <- as.POSIXct(paste(as.character(fiveponedat[ ,4]), as.character(fiveponedat[,5])), format = "%d/%m/%Y %H:%M")#
# sum(is.na(deathtimes))#
#
# Deaths in file start at first day of Daniela but continue afterwards, take selection of just DaPo's period.#
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]#
# jpeg("plot3.jpg")
truehist(hour(deathsDaPeriod), h = 1, x0 = -0.5,
main = "Deaths over 2 years, by hour of day",
xlab = "Hour",
ylab = "Frequency",
prob = FALSE)#

# graphics.off()
# jpeg("plot4.jpg")
truehist(minute(deathsDaPeriod), h = 5, x0 = -2.5,
main = "Deaths over 2 years, by minutes of hour",
xlab = "Minute",
ylab = "Frequency",
prob = FALSE)#

# graphics.off()