set.seed(123456789)

library(PROreg)
library(MASS)
library(readxl)
library(rmutil)
## 
## Attaching package: 'rmutil'
## The following object is masked from 'package:stats':
## 
##     nobs
## The following objects are masked from 'package:base':
## 
##     as.data.frame, units
presences.num <- (read.csv("presences.num.csv"))[ , -1] #Remove column 1, row numbers
presences_lessLL <- presences.num[ , -23] # Remove Lucy, now column 23, named "L3"
presences_lessLL_T <- t(presences_lessLL)
dim(presences_lessLL)
## [1] 25 37
colnames(presences_lessLL_T) <- c(" 1", " 2", " 3", " 4", " 5", " 6", " 7", " 8", " 9", 10:25)
presences_lessLL_T
##     1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## A1  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## A2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0
## A3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0
## A4  0  0  0  0  0  0  0  0  0  0  1  1  0  1  0  0  0  1  1  0  0  0  0  0  0
## B1  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  1  1  0  1  0  0  0  0
## B2  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## C1  1  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## C2  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0
## C3  0  0  0  1  1  0  0  1  0  0  0  1  1  0  1  0  1  0  0  0  0  0  0  0  0
## C4  0  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## C5  0  0  0  0  0  0  1  0  0  1  0  0  0  1  0  0  0  0  0  1  0  1  1  1  0
## E1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  0  0
## E2  1  0  1  1  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## E3  0  0  1  0  0  0  0  1  0  1  1  0  0  0  0  0  0  1  1  0  0  0  0  0  1
## J1  0  0  0  0  0  0  0  1  0  0  0  1  1  0  0  0  0  0  0  0  0  1  1  1  0
## J2  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0  0  0  0
## J3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## J4  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  1  1  0  0  0  0  0  0  0  0
## K1  0  1  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## K2  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## L1  0  0  0  0  0  0  0  0  0  0  0  1  1  0  1  1  0  0  0  0  0  0  0  0  0
## L2  1  1  0  0  1  0  1  0  0  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## M1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  1  1  0  0  0  0  1  1
## M2  1  0  1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  1  0  1  1  0  0
## M3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1
## N1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  1
## P1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  1  0  0  1  0  0  1  1  0  0
## R1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  1  0
## S1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0
## S2  0  0  0  0  0  0  0  1  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
## S3  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0
## S4  1  1  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## V1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  1  1  1  0
## V2  0  0  0  0  1  1  0  0  1  0  0  0  0  1  0  0  1  0  0  1  0  0  0  0  0
## V3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0  0  0  0
## Y1  0  0  0  0  0  0  0  1  0  0  1  0  1  0  0  0  0  0  0  0  0  1  1  1  0
## Y2  0  0  0  0  0  0  0  1  1  0  0  0  0  0  0  1  0  0  0  0  0  1  1  0  0
sum(presences_lessLL) / (25 * 37)
## [1] 0.1556757
nurse_totals <- colSums(presences_lessLL)
nurse_totals[1 : 22]
## A1 A2 A3 A4 B1 B2 C1 C2 C3 C4 C5 E1 E2 E3 J1 J2 J3 J4 K1 K2 L1 L2 
##  1  1  2  5  5  2  2  3  7  2  7  3  5  7  6  2  1  3  3  2  4  6
nurse_totals[23 : 37]
## M1 M2 M3 N1 P1 R1 S1 S2 S3 S4 V1 V2 V3 Y1 Y2 
##  7  7  2  3  6  3  2  3  4  4  5  6  2  6  5
rowSums(presences_lessLL)
##  [1]  6  5  5  4  4  5  4 10  4  4 10  4  6  4  5  8  4  5  6  4  6 10 10  7  4
rosterchart <- read_excel("Letby_Liste_plus.xlsx")
rosterchart[[43]][1:25]
##  [1] "N" "N" "N" "N" "N" "N" "N" "D" "N" "N" "D" "N" "N" "N" "N" "D" "N" "D" "D"
## [20] "N" "N" "D" "D" "D" "D"
fit.BB <- BBest(nurse_totals, 25, method = "MM")
fit.BB
## The probability parameter of the beta-binomial distribution: 0.1556757 
## The dispersion parameter of the beta-binomial distribution: 0.007548569 
## 
## Balanced data, maximum score number: 25
alpha <- fit.BB$p / fit.BB$phi
beta <- (1 - fit.BB$p) / fit.BB$phi
alpha; beta
## [1] 20.6232
## [1] 111.8522
###########################################################################
# BBest(nurse_totals, 25, method = "MLE") [fails]
###########################################################################

mean(nurse_totals)
## [1] 3.891892
var(nurse_totals)
## [1] 3.876877
#
#

alpha <- 2.92 # From O'Quigley(2024)
beta <- 16.23 # From O'Quigley(2024)

1 - pbetabinom(24, 61, alpha / (alpha + beta), alpha + beta)  # probability a random nurse has 25 or more
## [1] 0.01330005
                                                              # events in 61 shifts with an event
1 - (pbetabinom(24, 61, alpha / (alpha + beta), alpha + beta))^38 # prob no nurse has more than 25 events
## [1] 0.3987793
                                                      # in 61 shifts with an event
##########################################################################
# Now look at how well the model fits to the actual data: 25 shifts, 37 nurses
##########################################################################                                                      
est.probs <- dbetabinom(0:25, 25, alpha / (alpha + beta), alpha + beta)
nurse_totals_counts <- table(nurse_totals)
truehist(nurse_totals, h = 1, xlim = c(0, 25), prob = FALSE, 
  main = "Histogram of number of nurses (excl LL) on duty in each of 25 shifts",
  ylab = "# nurses (out of 37) present at N out of 25 chosen shifts", 
  xlab = "N = 0, 1, ... 25 shifts")

barplot(est.probs * 37, col = "red", ylim = c(0, 8), names.arg= 0:25,
  main = "Expected number of shifts (out of 25) with \n each number of nurses (out of 37) present", 
  xlab = "Number of nurses", ylab = "Expected number of shifts")

nurse_totals_countsINCLzeros <- c(0, nurse_totals_counts, rep(0, 18) )
names(nurse_totals_countsINCLzeros) <- 0:25
barplots <- rbind(37 * est.probs, nurse_totals_countsINCLzeros)
barplot(as.vector(barplots)[1:32], col = c("red", "cyan"), 
  names.arg = as.vector(rbind(0:15, 0:15)),
  main = "Expected and actual numbers of shifts (out of 25) \n with N nurses excl. LL (out of 37)",
  xlab = "N (nurses)", ylab = "Shifts")

##########################################################################
rbetas <- rbeta(38, alpha, beta)
max(rbetas)
## [1] 0.3260555
mean(rbetas)
## [1] 0.1463406
#
#
presences.sim <- matrix(0, 25, 38)
for (i in 1:38) presences.sim[ , i] <- runif(25) < rbetas[i]
mean (colSums(presences.sim))
## [1] 3.5
var(colSums(presences.sim))
## [1] 6.418919
NightOrDay <- rosterchart[[43]][1:25]
table(NightOrDay, rowSums(presences.num))
##           
## NightOrDay 5 6 7 8 9 11
##          D 1 1 1 1 1  4
##          N 9 4 3 0 0  0
mean(rowSums(presences.num)[NightOrDay == "N"])
## [1] 5.625
mean(rowSums(presences.num)[NightOrDay == "D"])
## [1] 8.777778