Development of sampling strategy to identify bovine digital dermatitis in Australian dairy cows

Show the code
#recode names of farms
herdnumber <- data %>% group_by(herd) %>%
    summarise(herd.number = n())

herdnumber <- herdnumber %>%
  mutate(herd.number = row_number())

data <- merge(herdnumber %>% select(herd,herd.number),data,by="herd")

data$herd <- paste0("Herd ",data$herd.number)

#dput(data)

Prevalence of cow-level DD by farm

Show the code
data %>%
  tabyl(herd,dd) %>%
  adorn_totals("row") %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 1) %>%
  adorn_ns %>%
  adorn_title
dd
herd 0 1
Herd 1 67.0% (71) 33.0% (35)
Herd 2 71.6% (187) 28.4% (74)
Herd 3 82.6% (157) 17.4% (33)
Herd 4 76.3% (74) 23.7% (23)
Herd 5 66.9% (115) 33.1% (57)
Herd 6 78.8% (421) 21.2% (113)
Herd 7 94.4% (236) 5.6% (14)
Total 78.3% (1261) 21.7% (349)

Plot of DD vs milking stage stratified by farm (loess smoother)

Show the code
data$herd.quant <- data$herd.quant / 10
plot.func <- function(herdid){
data %>% filter(herd==herdid) %>%
    ggplot() +
  aes(herd.quant,as.integer(dd)) + 
  geom_smooth(method=loess, se=T) + 
  geom_point() +
  ggtitle(herdid) +
  labs(y = "Risk of cow having DD", x = "Stage of milking") +
  theme(panel.border = element_rect(colour = "black", fill=NA,size=1),
        title=element_text(size=12,face="bold",family="Times"),
        axis.text=element_text(size=12,family="Times"),
        axis.title=element_text(size=11,face="bold",family="Times"),
        axis.text.x = element_text(size = 10),
        panel.background = element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.2, linetype = 'solid', colour = "grey"),
        panel.grid.minor = element_line(size = 0.2, linetype = 'solid',colour = "grey"),
        legend.position="bottom",
        legend.text = element_text(colour="black",size=8,face="bold",family="Times"))
}

allherds <- data %>%
  ggplot() +
  aes(herd.quant,as.integer(dd)) + 
  geom_smooth(method=loess, se=T) + 
  geom_point() +
  ggtitle("All herds") +
  labs(y = "Risk of cow having DD", x = "Stage of milking") +
  theme(panel.border = element_rect(colour = "black", fill=NA,size=1),
        title=element_text(size=12,face="bold",family="Times"),
        axis.text=element_text(size=12,family="Times"),
        axis.title=element_text(size=11,face="bold",family="Times"),
        axis.text.x = element_text(size = 10),
        panel.background = element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.2, linetype = 'solid', colour = "grey"),
        panel.grid.minor = element_line(size = 0.2, linetype = 'solid',colour = "grey"),
        legend.position="bottom",
        legend.text = element_text(colour="black",size=8,face="bold",family="Times"))


#data %>% tabyl(herd)
h1 <- plot.func('Herd 1')
h2 <- plot.func('Herd 2')
h3 <- plot.func('Herd 3')
h4 <- plot.func('Herd 4')
h5 <- plot.func('Herd 5')
h6 <- plot.func('Herd 6')
h7 <- plot.func('Herd 7')

allherds

Show the code
h1

Show the code
h2

Show the code
h3

Show the code
h4

Show the code
h5

Show the code
h6

Show the code
h7

GLM and GLMM to investigate association between milking stage and odds of DD

Show the code
library(lme4)
library(broom)
data$herd.quant <- data$herd.quant * 10

glm1 <- glm(dd ~ herd.quant + herd,data=data,family=binomial)
summary(glm1)

Call:
glm(formula = dd ~ herd.quant + herd, family = binomial, data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9225  -0.7400  -0.6744  -0.3333   2.4264  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.77647    0.23354  -3.325 0.000885 ***
herd.quant   0.01365    0.02142   0.637 0.524109    
herdHerd 2  -0.21940    0.24807  -0.884 0.376458    
herdHerd 3  -0.85237    0.28169  -3.026 0.002479 ** 
herdHerd 4  -0.46144    0.31572  -1.462 0.143861    
herdHerd 5   0.00570    0.26253   0.022 0.982678    
herdHerd 6  -0.60758    0.23216  -2.617 0.008869 ** 
herdHerd 7  -2.11750    0.34401  -6.155 7.49e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1683.4  on 1609  degrees of freedom
Residual deviance: 1604.6  on 1602  degrees of freedom
AIC: 1620.6

Number of Fisher Scoring iterations: 5
Show the code
glm2 <- glm(dd ~ herd.quant + I(herd.quant^2) + herd,data=data,family=binomial)
summary(glm2)

Call:
glm(formula = dd ~ herd.quant + I(herd.quant^2) + herd, family = binomial, 
    data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9296  -0.7455  -0.6586  -0.3296   2.4813  

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -0.947938   0.276295  -3.431 0.000602 ***
herd.quant       0.114037   0.087689   1.300 0.193440    
I(herd.quant^2) -0.009944   0.008411  -1.182 0.237063    
herdHerd 2      -0.219663   0.248210  -0.885 0.376164    
herdHerd 3      -0.853202   0.281834  -3.027 0.002467 ** 
herdHerd 4      -0.461927   0.315884  -1.462 0.143650    
herdHerd 5       0.005699   0.262679   0.022 0.982690    
herdHerd 6      -0.608231   0.232292  -2.618 0.008835 ** 
herdHerd 7      -2.118949   0.344117  -6.158 7.38e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1683.4  on 1609  degrees of freedom
Residual deviance: 1603.2  on 1601  degrees of freedom
AIC: 1621.2

Number of Fisher Scoring iterations: 5
Show the code
glmm1 <- glmer(dd ~ herd.quant + (1|herd),data=data,family=binomial)
summary(glmm1)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dd ~ herd.quant + (1 | herd)
   Data: data

     AIC      BIC   logLik deviance df.resid 
  1636.0   1652.2   -815.0   1630.0     1607 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-0.7145 -0.5563 -0.5054 -0.2674  3.7899 

Random effects:
 Groups Name        Variance Std.Dev.
 herd   (Intercept) 0.3935   0.6273  
Number of obs: 1610, groups:  herd, 7

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.37601    0.27064  -5.084 3.69e-07 ***
herd.quant   0.01363    0.02136   0.638    0.523    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
herd.quant -0.402
Show the code
glmm2 <- glmer(dd ~ herd.quant + I(herd.quant^2) + (1|herd),data=data,family=binomial)
summary(glmm2)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dd ~ herd.quant + I(herd.quant^2) + (1 | herd)
   Data: data

     AIC      BIC   logLik deviance df.resid 
  1636.6   1658.2   -814.3   1628.6     1606 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-0.7212 -0.5611 -0.4922 -0.2644  4.0678 

Random effects:
 Groups Name        Variance Std.Dev.
 herd   (Intercept) 0.3941   0.6278  
Number of obs: 1610, groups:  herd, 7

Fixed effects:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -1.547565   0.308565  -5.015 5.29e-07 ***
herd.quant       0.113763   0.087421   1.301    0.193    
I(herd.quant^2) -0.009918   0.008385  -1.183    0.237    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) hrd.qn
herd.quant  -0.549       
I(hrd.qn^2)  0.474 -0.969
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model failed to converge with max|grad| = 0.00205391 (tol = 0.002, component 1)
Show the code
tab <- tidy(glm1, conf.int=T)
#tab$term
tab <- tab %>% filter(term=="herd.quant") 
tab$estimate <- tab$estimate %>% exp
tab$conf.low <- (tab$conf.low) %>% exp
tab$conf.high <- (tab$conf.high) %>% exp
tab$OR <- paste0(round(tab$estimate,2)," (",round(tab$conf.low,2),", ",round(tab$conf.high,2),")")
tab %>% select(term,OR)
term OR
herd.quant 1.01 (0.97, 1.06)

Conclusion

There is little evidence to suggest a strong trend for cows with DD to present earlier or later during milking.

Sample size table

Step 1 is to estimate true prevalence

We assume that diagnostic Se = 0.63, Sp = 1.0, based on recent findings by Yang and Laven (2019).

Formula for true prevalence is: TP = [AP + SP - 1] / [Se + Sp - 1]

Show the code
library(epiR)

#Estimate true prevalence in our sample
positive <- data %>% ungroup() %>% count(dd)
neg <- positive$n[positive$dd==0]
positive <- positive$n[positive$dd==1]
test <- neg + positive
true.prevalence <- epi.prev(pos = positive, tested = test, se=0.63, sp=1, method = "wilson", units = 100, conf.level = 0.95)

true.prevalence$tp$est / 100
[1] 0.3440797
Show the code
apparent.prevalence <- (positive / test) * 100
apparent.prevalence
[1] 21.67702

True prevalence is estimated to 34.4%

Step 2 is to estimate required sample size to achieve a sufficiently precise estimate in endemic herds (i.e. 2 or more lesions).

We will aim to have an estimate that is within 10% (on an aboslute scale) of this estimate.

Parameters in this calculation are: Se = 0.63, Sp = 1.0 Confidence interval: 95% Epsilon: 0.1 Underlying prevalence = will check at different prevalences ranging from 0.1 to 0.4. The observed prevalence in our sample of herds is 0.34 (“prev.true” below).

Show the code
list <- c(seq(from=100, to=500,by=20),seq(600,1000,by=100))
list <- list %>% as.data.frame
list <- rename(list, herdsize = .)

#epi.sssimpleestb
#epi.sssimpleestb(Py = true.prevalence$tp$est/100, epsilon = 0.1, se = 0.63, sp = 1, conf.level = 0.95)

list$sample.prev.true <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = true.prevalence$tp$est/100, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.20 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.2, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.10 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.1, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.40 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.4, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))
list %>% kable
herdsize sample.prev.true sample.prev.20 sample.prev.10 sample.prev.40
100 63 52 37 65
120 70 57 39 73
140 76 61 41 80
160 82 65 43 86
180 87 68 44 91
200 91 70 45 96
220 95 73 46 100
240 98 75 47 104
260 101 76 47 108
280 104 78 48 111
300 107 79 49 114
320 109 81 49 117
340 112 82 50 119
360 114 83 50 122
380 115 84 50 124
400 117 85 51 126
420 119 86 51 128
440 120 86 51 130
460 122 87 51 131
480 123 88 52 133
500 124 89 52 134
600 130 91 53 141
700 134 93 53 145
800 137 95 54 149
900 140 96 54 152
1000 142 97 55 155

Will use the following table for sampling of herds that have 2 or more lesions.

Show the code
list <- list %>% select(herdsize,sample.prev.true)
list %>% kable
herdsize sample.prev.true
100 63
120 70
140 76
160 82
180 87
200 91
220 95
240 98
260 101
280 104
300 107
320 109
340 112
360 114
380 115
400 117
420 119
440 120
460 122
480 123
500 124
600 130
700 134
800 137
900 140
1000 142

Step 3 is to estimate required sample size to maintain reasonable herd sensitivity

Method based on Wayne Martin paper from 1992. Implemented in epiR - rsu.sssep.rs function.

Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43.

Show the code
list$herd.sens.3.percent <- rsu.sssep.rs(N = list$herdsize,
             pstar = 0.03, 
             se.p = 0.95, 
             se.u = 0.63)

#epi.herdtest(se=0.63, sp=1, P=0.03, N=200,n=125,k=1)

list %>% kable
herdsize sample.prev.true herd.sens.3.percent
100 63 NA
120 70 101
140 76 101
160 82 115
180 87 113
200 91 125
220 95 122
240 98 119
260 101 129
280 104 126
300 107 135
320 109 132
340 112 129
360 114 137
380 115 134
400 117 141
420 119 138
440 120 135
460 122 141
480 123 138
500 124 144
600 130 147
700 134 148
800 137 150
900 140 151
1000 142 151
Show the code
list$herd.size <- list$herdsize

This result is equivalent to the method on the ausvet website. https://epitools.ausvet.com.au/freecalctwo

Calculate using simulation

1000 simulated herds for each combination of herd size and sample size. Estimate how likely we are to find at least 1 positive cow in a positive herd.

Show the code
herd1 <- function(herd.size,se,sp,tp,sample){

herd <- tibble(
  id = seq(1:herd.size),
  status.true = rbinom(herd.size, 1,tp),
)

herd$test <- herd$status.true %>% map_dbl(~rbinom(1,1,.*se))

#Sample cows
tested <- herd %>% slice_sample(n=sample)

test.post <- tested$test %>% sum
detected <- ifelse(test.post > 0,1,0)
detected
}

herd1(herd.size = 50,
      se = 0.63,
      sp = 1.0,
      tp = 0.03,
      sample = 50)
[1] 1
Show the code
# replicate(100,simplify=T,
#           herd1(herd.size = 1000,se = 0.63,sp = 1.0,tp = 0.03,sample = 100))
Show the code
herd2 <- function(sample.size,herd.size){
  
output <- replicate(500,simplify=T,
          herd1(herd.size = herd.size,se = 0.63,sp = 1.0,tp = 0.03,sample = sample.size))

output <- output %>% tabyl %>% slice_tail
output$percent
}

herd2(sample.size = 65,
      herd.size = 77)
[1] 0.682
Show the code
#Create simulation parameter table
sample.size <- seq(50,300,by=5)
herd.size <- seq(100,500,by=20)
simulation <- expand.grid(sample.size,herd.size)
colnames(simulation) <- c("sample.size","herd.size")
simulation <- simulation %>% select(herd.size,sample.size) %>% filter(sample.size <= herd.size)

#Run simulation
simulation$probability <- map2(.x=simulation$herd.size,
     .y=simulation$sample.size,
     ~herd2(
       herd.size=.x,sample.size=.y))

simulation <- simulation %>% filter(probability >= 0.95)
simulation95 <- simulation %>% group_by(herd.size) %>% summarise(
  sample.size.simulation = min(sample.size)
)
simulation95 %>% kable
herd.size sample.size.simulation
160 150
180 160
200 155
220 150
240 155
260 155
280 160
300 155
320 160
340 155
360 150
380 160
400 155
420 155
440 150
460 150
480 150
500 145
Show the code
table <- merge(list,
               simulation95,
               by="herd.size",all.x=T)

table %>% kable
herd.size herdsize sample.prev.true herd.sens.3.percent sample.size.simulation
100 100 63 NA NA
120 120 70 101 NA
140 140 76 101 NA
160 160 82 115 150
180 180 87 113 160
200 200 91 125 155
220 220 95 122 150
240 240 98 119 155
260 260 101 129 155
280 280 104 126 160
300 300 107 135 155
320 320 109 132 160
340 340 112 129 155
360 360 114 137 150
380 380 115 134 160
400 400 117 141 155
420 420 119 138 155
440 440 120 135 150
460 460 122 141 150
480 480 123 138 150
500 500 124 144 145
600 600 130 147 NA
700 700 134 148 NA
800 800 137 150 NA
900 900 140 151 NA
1000 1000 142 151 NA