########################################                                 
#                                      #
#     Daniela Poggiali Analysis        # 
#                                      #
########################################

# Four interesting graphics.
#
# People die in the morning, when most nurses are around. 
# Daniela is an ordinary nurse, she is most often on duty for the morning shift. 
# She is a hard and reliable worker and consequently works long shifts 
# - she usually clocks in before the handover, clocks out after. 
# Amazingly many deaths are registered to have occurred between midnight 
# and five minutes after midnight. Deaths tend amazingly to happen at 
# whole hours and at half past the hour. 
# In Italy, less people die at lunch-time. 
# Note: death certificates have to be signed by a doctor and the 
# time written on the certificate might be a guess, 
# might be the time he confirmed that death had occurred.

### (1) Packages, Directory, self defined functions and Data ###

# 1.1: Packages

library(MASS)
require(lubridate)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# 1.2: Directory PUT YOUR DIRECTORY HERE

setwd("~/Desktop/Daniela Poggiali/Data and R code")

rm(list=ls())


# 1.3: Functions

trimws <- function (x) gsub("^\\s+|\\s+$", "", x) # Recodes settore variable avoiding strange strings

daysworked  <- function(name) {#
  range <- (worktimes$COGNOME == name & times1 %within% micciolo)#
  length(times1[range]) - sum(is.na(times1[range]))#
}               # Computes number of days worked accordingly to each nurse

hoursworked <- function(name) {                   # Computes hours worked day by day
  range <- (worktimes$COGNOME == name & times1 %within% micciolo & !is.na(times1))#
  matrix(c(as.numeric(difftime(times2[range], times1[range], units = "hours")),#
           as.numeric(difftime(times4[range], times3[range], units = "hours"))), ncol = 2)#
}

# 1.4: Data and explorative analysis

worktimes  <- read.csv("Nurses.csv", header = TRUE,stringsAsFactors = T) # Work time of the nurses
nursenames <- as.character(worktimes$COGNOME)                            # Recodes surname variable
nurses     <- names(table(nursenames))[-1]                               # Two rows of the spreadsheet are empty! (3392, 3425)

# Recode the variable associated to the time intervals

date     <- as.character(worktimes[, 3])#
times1   <- as.POSIXct(paste(date, worktimes[, 6]), format = "%d/%m/%Y %H.%M")#
times2   <- as.POSIXct(paste(date, worktimes[, 7]), format = "%d/%m/%Y %H.%M")#
times3   <- as.POSIXct(paste(date, worktimes[, 8]), format = "%d/%m/%Y %H.%M")#
times4   <- as.POSIXct(paste(date, worktimes[, 9]), format = "%d/%m/%Y %H.%M")#

#micciolo <- interval(times1[23958], times2[24545]) # Define the interval of the investigation
#class(micciolo)
micciolo <- interval("2012-04-08 00:00:00 CEST", "2014-04-08 23:59:00 CEST") # Define the interval of the investigation

nursedat   <- NULL # vector reporting, for each nurse, the number of hours worked
hourperday <- NULL # vector reporting, for each nurse, the average hours worked for each day

for(i in 1:length(nurses)) { #
  nursedat[i]   <- round(sum(hoursworked(nurses[i]), na.rm = TRUE))#
  hourperday[i] <- mean(rowSums(hoursworked(nurses[i]), na.rm = TRUE))#
}
names(nursedat) = nurses; names(hourperday) = nurses


# open and recode data associated to each death

fiveponedat    <- read.csv("Appendix 5.1.csv", header = TRUE)
deathtimes     <- as.POSIXct(paste(as.character(fiveponedat[,4]), 
                                   as.character(fiveponedat[,5])), 
                             format = "%d/%m/%Y %H:%M")
#deathsDaPeriod <- deathtimes
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]


shift1 <- interval(times1, times2)#
shift2 <- interval(times3, times4)#



hours1 <- hour(times1)
hours2 <- hour(times2)
hours3 <- hour(times3)
hours4 <- hour(times4)

hours2[hours2 == 0] <- 24
hours4[hours4 == 0] <- 24

hourworked <- function(halfhour) {
  range <- (times1 %within% micciolo & !is.na(times1))
  cbind(halfhour < hours2[range] & halfhour > hours1[range],
        halfhour < hours4[range] & halfhour > hours3[range])
}

halfhours <- (0:23) + 0.5
hoursworked <- rep(0, 24)

for (i in 1:24) {
    test <- hourworked(halfhours[i])
    test[is.na(test)] <- FALSE
    hoursworked[i] <- sum(test)
}

# jpeg("plot1.jpg")
plot(halfhours, hoursworked, type = "l",
    ylim = c(0, 10000), 
    main = "Total (over 2 years) #nurses at work at each mid-hour",
    xlab = "time (0:30, 1:30, ...23:30)",
    ylab = "Frequency")
points(halfhours, hoursworked)

# graphics.off()

hoursworked
##  [1] 2991 2992 2992 2992 2992 3029 6994 6041 6710 6697 6692 6689 6619 9444 5239
## [16] 4439 4282 4237 4202 4192 5953 3021 2978 2978
DPhourworked <- function(halfhour) {
  range <- (worktimes$COGNOME == "POGGIALI" & times1 %within% micciolo & !is.na(times1))
  cbind(halfhour < hours2[range] & halfhour > hours1[range],
        halfhour < hours4[range] & halfhour > hours3[range])
}

DPhoursworked <- rep(0, 24)

for (i in 1:24) {
  test <- DPhourworked(halfhours[i])
  test[is.na(test)] <- FALSE
  DPhoursworked[i] <- sum(test)
}

# jpeg("plot2.jpg")
plot(halfhours, DPhoursworked, type = "l",
     ylim = c(0, 300),
     main = "Total (over 2 years) #times DP was at work at each mid-hour",
     xlab = "time (0:30, 1:30, ...23:30)",
     ylab = "Frequency")
points(halfhours, DPhoursworked)

# graphics.off()

DPhoursworked
##  [1] 123 123 123 123 123 123 269 186 158 158 158 158 158 250 159 135 134 134 134
## [20] 134 232 134 123 123
# open and recode data associated to each death

fiveponedat    <- read.csv("Appendix 5.1.csv", header = TRUE)
deathtimes     <- as.POSIXct(paste(as.character(fiveponedat[,4]), 
                                   as.character(fiveponedat[,5])), 
                             format = "%d/%m/%Y %H:%M")
#deathsDaPeriod <- deathtimes
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]


shift1 <- interval(times1, times2)#
shift2 <- interval(times3, times4)#

# Create matrix of 0's and 1's for presence of a nurse at a death, can take more than 10 minutes to run.#
present <- matrix(0, nrow = length(deathsDaPeriod), ncol = length(nurses))#

for(i in 1:length(deathsDaPeriod)){
  present[i, match((nursenames[which(deathsDaPeriod[i] %within% shift1 | deathsDaPeriod[i] %within% shift2)]), nurses)] <- 1#
}

colnames(present) <- nurses

summ.mat = cbind(apply(present, 2,sum),nursedat,hourperday)
summ.mat
##                 nursedat hourperday
## ALDIGHIERI  136     3456   6.658317
## ALPI        124     3686   7.144348
## ANCARANI    117     3545   6.765331
## ANDALO'     132     3750   7.062963
## ASTOLFI      10      209   7.201724
## BAGNARA     102     3467   6.811984
## BARACANI      0        0        NaN
## BEDESCHI    116     2973   6.851344
## BORTOLOTTI    3       88   6.743590
## BOTA         17      577   6.949598
## BUCCHI       28      582   6.061632
## BULDRINI      0        0        NaN
## CASADEI      75     2250   6.922154
## CASADEI2    107     3324   6.661122
## CASTELLANI  118     3240   7.482987
## CAVALLUCCI   15      563   7.128481
## CERAUDO       0        0        NaN
## CICCHETTI    90     2735   6.837625
## DONIGAGLIA   53     2079   6.977405
## DREI         10      258   6.614530
## FABBRI       13      295   6.711742
## FALZONI     121     3527   6.916209
## FAROLFI       0        0        NaN
## FEBBRAIO     80     2395   6.861652
## FERRI        56     1646   6.831535
## FERRUZZI     11      318   6.628125
## GARCIA      103     3346   6.828537
## GIOVANNARDI  43     1145   6.431835
## GRANATA      64     1435   6.867783
## GRASSI      105     3625   7.066959
## GRAZIANI     10      553   7.274561
## IVAN          0        0        NaN
## MARCHI      108     3531   6.829110
## MATTEUCCI   113     3396   6.633431
## MORRI       103     3327   6.735088
## PEPPI         0        0        NaN
## PIRAZZOLI    11      280   7.003750
## POGGIALI    190     3576   7.067984
## RADUICA      67     2099   6.663386
## RAGNINI     105     3409   6.914503
## RAMBELLI    101     3240   7.105117
## RICCI       113     3257   7.020115
## ROSSI        91     3196   7.329855
## SALVI        62     1708   6.673177
## SARTONI       0        0        NaN
## SAVARINO    103     3709   7.037287
## SILAGHI     138     3421   6.856379
## SINTONI       0        0        NaN
## SOGLIA       37     1547   7.162577
## STELLUTI    106     3293   7.190429
## STERNINI     94     3402   6.928140
## URSACHI       0        0        NaN
## VESPA        25      722   7.081373
## VESTRUCCI     0        0        NaN
## ZALAMBANI   118     3741   6.954027
## ZARDI        83     2758   6.520725
## ZAZZARO      80     2690   6.776868
#  TO DO: RICORDIFICA fiveponedat$settore e assicurati che deathsDaPeriod e fiveponedat$settore abbiano la stessa lunghezza

#Read in appendix 6.1 data. #
sixponedat <- read.csv("Appendix 6.1.csv", header = TRUE)#
#
sixpone <- sixponedat[3:length(sixponedat[,1]), ]#
colnames(sixpone) <- c("Name", "DeathsDuty", "DaysDuty", "RateDuty", "DeathsAway", "DaysAway", "RateAway", "Pval", "RR", "AR")#
sixpone$DeathsDuty <- as.numeric(as.character(sixpone$DeathsDuty))#
sixpone$RateDuty <- as.numeric(as.character(sixpone$RateDuty))#
#
# Calculate deaths by hour. Then compare those to deaths per week from appendix 6.1#
# Took the ratio and normalized it, values greater than 1 mean the deaths per week value is relatively higher than actual deaths per hour#
# #
# DeathByHour <- sixpone$DeathsDuty / nursedat[nursedat != 0]#
# Comparison <- sixpone$RateDuty / DeathByHour#
# NormCompare <- (Comparison - mean(Comparison)) / sqrt(var(Comparison))#
# #
# cbind(nurses[nursedat != 0], round(NormCompare, digits = 4), round(hourperday[nursedat != 0], digits = 3))#
#
#Read in appendix 5.1#
fiveponedat <- read.csv("Appendix 5.1.csv", header = TRUE)#
#
# 6 times missing in file#
deathtimes <- as.POSIXct(paste(as.character(fiveponedat[ ,4]), as.character(fiveponedat[,5])), format = "%d/%m/%Y %H:%M")#
# sum(is.na(deathtimes))#
#
# Deaths in file start at first day of Daniela but continue afterwards, take selection of just DaPo's period.#
deathsDaPeriod <- deathtimes[deathtimes %within% micciolo]#

# jpeg("plot3.jpg")
truehist(hour(deathsDaPeriod), h = 1, x0 = -0.5, 
         main = "Deaths over 2 years, by hour of day", 
         xlab = "Hour",
         ylab = "Frequency",
         prob = FALSE)#

# graphics.off()

# jpeg("plot4.jpg")
truehist(minute(deathsDaPeriod), h = 5, x0 = -2.5, 
         main = "Deaths over 2 years, by minutes of hour", 
         xlab = "Minute",
         ylab = "Frequency",
         prob = FALSE)#

# graphics.off()