Development of sampling strategy to identify bovine digital dermatitis in Australian dairy cows

Show the code

#recode names of farms
herdnumber <- data %>% group_by(herd) %>%
    summarise(herd.number = n())

herdnumber <- herdnumber %>%
  mutate(herd.number = row_number())

data <- merge(herdnumber %>% select(herd,herd.number),data,by="herd")

data$herd <- paste0("Herd ",data$herd.number)

#dput(data)

Prevalence of cow-level DD by farm

Show the code

data %>%
  tabyl(herd,dd) %>%
  adorn_totals("row") %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 1) %>%
  adorn_ns %>%
  adorn_title

	dd
herd	0	1
Herd 1	67.0% (71)	33.0% (35)
Herd 2	71.6% (187)	28.4% (74)
Herd 3	82.6% (157)	17.4% (33)
Herd 4	76.3% (74)	23.7% (23)
Herd 5	66.9% (115)	33.1% (57)
Herd 6	78.8% (421)	21.2% (113)
Herd 7	94.4% (236)	5.6% (14)
Total	78.3% (1261)	21.7% (349)

Plot of DD vs milking stage stratified by farm (loess smoother)

Show the code

data$herd.quant <- data$herd.quant / 10
plot.func <- function(herdid){
data %>% filter(herd==herdid) %>%
    ggplot() +
  aes(herd.quant,as.integer(dd)) + 
  geom_smooth(method=loess, se=T) + 
  geom_point() +
  ggtitle(herdid) +
  labs(y = "Risk of cow having DD", x = "Stage of milking") +
  theme(panel.border = element_rect(colour = "black", fill=NA,size=1),
        title=element_text(size=12,face="bold",family="Times"),
        axis.text=element_text(size=12,family="Times"),
        axis.title=element_text(size=11,face="bold",family="Times"),
        axis.text.x = element_text(size = 10),
        panel.background = element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.2, linetype = 'solid', colour = "grey"),
        panel.grid.minor = element_line(size = 0.2, linetype = 'solid',colour = "grey"),
        legend.position="bottom",
        legend.text = element_text(colour="black",size=8,face="bold",family="Times"))
}

allherds <- data %>%
  ggplot() +
  aes(herd.quant,as.integer(dd)) + 
  geom_smooth(method=loess, se=T) + 
  geom_point() +
  ggtitle("All herds") +
  labs(y = "Risk of cow having DD", x = "Stage of milking") +
  theme(panel.border = element_rect(colour = "black", fill=NA,size=1),
        title=element_text(size=12,face="bold",family="Times"),
        axis.text=element_text(size=12,family="Times"),
        axis.title=element_text(size=11,face="bold",family="Times"),
        axis.text.x = element_text(size = 10),
        panel.background = element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.2, linetype = 'solid', colour = "grey"),
        panel.grid.minor = element_line(size = 0.2, linetype = 'solid',colour = "grey"),
        legend.position="bottom",
        legend.text = element_text(colour="black",size=8,face="bold",family="Times"))


#data %>% tabyl(herd)
h1 <- plot.func('Herd 1')
h2 <- plot.func('Herd 2')
h3 <- plot.func('Herd 3')
h4 <- plot.func('Herd 4')
h5 <- plot.func('Herd 5')
h6 <- plot.func('Herd 6')
h7 <- plot.func('Herd 7')

allherds

Show the code

h1

Show the code

h2

Show the code

h3

Show the code

h4

Show the code

h5

Show the code

h6

Show the code

h7

GLM and GLMM to investigate association between milking stage and odds of DD

Show the code

library(lme4)
library(broom)
data$herd.quant <- data$herd.quant * 10

glm1 <- glm(dd ~ herd.quant + herd,data=data,family=binomial)
summary(glm1)


Call:
glm(formula = dd ~ herd.quant + herd, family = binomial, data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9225  -0.7400  -0.6744  -0.3333   2.4264  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.77647    0.23354  -3.325 0.000885 ***
herd.quant   0.01365    0.02142   0.637 0.524109    
herdHerd 2  -0.21940    0.24807  -0.884 0.376458    
herdHerd 3  -0.85237    0.28169  -3.026 0.002479 ** 
herdHerd 4  -0.46144    0.31572  -1.462 0.143861    
herdHerd 5   0.00570    0.26253   0.022 0.982678    
herdHerd 6  -0.60758    0.23216  -2.617 0.008869 ** 
herdHerd 7  -2.11750    0.34401  -6.155 7.49e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1683.4  on 1609  degrees of freedom
Residual deviance: 1604.6  on 1602  degrees of freedom
AIC: 1620.6

Number of Fisher Scoring iterations: 5

Show the code

glm2 <- glm(dd ~ herd.quant + I(herd.quant^2) + herd,data=data,family=binomial)
summary(glm2)


Call:
glm(formula = dd ~ herd.quant + I(herd.quant^2) + herd, family = binomial, 
    data = data)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.9296  -0.7455  -0.6586  -0.3296   2.4813  

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -0.947938   0.276295  -3.431 0.000602 ***
herd.quant       0.114037   0.087689   1.300 0.193440    
I(herd.quant^2) -0.009944   0.008411  -1.182 0.237063    
herdHerd 2      -0.219663   0.248210  -0.885 0.376164    
herdHerd 3      -0.853202   0.281834  -3.027 0.002467 ** 
herdHerd 4      -0.461927   0.315884  -1.462 0.143650    
herdHerd 5       0.005699   0.262679   0.022 0.982690    
herdHerd 6      -0.608231   0.232292  -2.618 0.008835 ** 
herdHerd 7      -2.118949   0.344117  -6.158 7.38e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1683.4  on 1609  degrees of freedom
Residual deviance: 1603.2  on 1601  degrees of freedom
AIC: 1621.2

Number of Fisher Scoring iterations: 5

Show the code

glmm1 <- glmer(dd ~ herd.quant + (1|herd),data=data,family=binomial)
summary(glmm1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dd ~ herd.quant + (1 | herd)
   Data: data

     AIC      BIC   logLik deviance df.resid 
  1636.0   1652.2   -815.0   1630.0     1607 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-0.7145 -0.5563 -0.5054 -0.2674  3.7899 

Random effects:
 Groups Name        Variance Std.Dev.
 herd   (Intercept) 0.3935   0.6273  
Number of obs: 1610, groups:  herd, 7

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.37601    0.27064  -5.084 3.69e-07 ***
herd.quant   0.01363    0.02136   0.638    0.523    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
herd.quant -0.402

Show the code

glmm2 <- glmer(dd ~ herd.quant + I(herd.quant^2) + (1|herd),data=data,family=binomial)
summary(glmm2)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dd ~ herd.quant + I(herd.quant^2) + (1 | herd)
   Data: data

     AIC      BIC   logLik deviance df.resid 
  1636.6   1658.2   -814.3   1628.6     1606 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-0.7212 -0.5611 -0.4922 -0.2644  4.0678 

Random effects:
 Groups Name        Variance Std.Dev.
 herd   (Intercept) 0.3941   0.6278  
Number of obs: 1610, groups:  herd, 7

Fixed effects:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)     -1.547565   0.308565  -5.015 5.29e-07 ***
herd.quant       0.113763   0.087421   1.301    0.193    
I(herd.quant^2) -0.009918   0.008385  -1.183    0.237    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) hrd.qn
herd.quant  -0.549       
I(hrd.qn^2)  0.474 -0.969
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model failed to converge with max|grad| = 0.00205391 (tol = 0.002, component 1)

Show the code

tab <- tidy(glm1, conf.int=T)
#tab$term
tab <- tab %>% filter(term=="herd.quant") 
tab$estimate <- tab$estimate %>% exp
tab$conf.low <- (tab$conf.low) %>% exp
tab$conf.high <- (tab$conf.high) %>% exp
tab$OR <- paste0(round(tab$estimate,2)," (",round(tab$conf.low,2),", ",round(tab$conf.high,2),")")
tab %>% select(term,OR)

term	OR
herd.quant	1.01 (0.97, 1.06)

Conclusion

There is little evidence to suggest a strong trend for cows with DD to present earlier or later during milking.

Sample size table

Step 1 is to estimate true prevalence

We assume that diagnostic Se = 0.63, Sp = 1.0, based on recent findings by Yang and Laven (2019).

Formula for true prevalence is: TP = [AP + SP - 1] / [Se + Sp - 1]

Show the code

library(epiR)

#Estimate true prevalence in our sample
positive <- data %>% ungroup() %>% count(dd)
neg <- positive$n[positive$dd==0]
positive <- positive$n[positive$dd==1]
test <- neg + positive
true.prevalence <- epi.prev(pos = positive, tested = test, se=0.63, sp=1, method = "wilson", units = 100, conf.level = 0.95)

true.prevalence$tp$est / 100

[1] 0.3440797

Show the code

apparent.prevalence <- (positive / test) * 100
apparent.prevalence

[1] 21.67702

True prevalence is estimated to 34.4%

Step 2 is to estimate required sample size to achieve a sufficiently precise estimate in endemic herds (i.e. 2 or more lesions).

We will aim to have an estimate that is within 10% (on an aboslute scale) of this estimate.

Parameters in this calculation are: Se = 0.63, Sp = 1.0 Confidence interval: 95% Epsilon: 0.1 Underlying prevalence = will check at different prevalences ranging from 0.1 to 0.4. The observed prevalence in our sample of herds is 0.34 (“prev.true” below).

Show the code

list <- c(seq(from=100, to=500,by=20),seq(600,1000,by=100))
list <- list %>% as.data.frame
list <- rename(list, herdsize = .)

#epi.sssimpleestb
#epi.sssimpleestb(Py = true.prevalence$tp$est/100, epsilon = 0.1, se = 0.63, sp = 1, conf.level = 0.95)

list$sample.prev.true <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = true.prevalence$tp$est/100, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.20 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.2, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.10 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.1, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))

list$sample.prev.40 <- list$herdsize %>% map_dbl(~epi.sssimpleestb(N = ., Py = 0.4, epsilon = 0.1, se = 0.63, error = "absolute", sp = 1, conf.level = 0.95))
list %>% kable

herdsize	sample.prev.true	sample.prev.20	sample.prev.10	sample.prev.40
100	63	52	37	65
120	70	57	39	73
140	76	61	41	80
160	82	65	43	86
180	87	68	44	91
200	91	70	45	96
220	95	73	46	100
240	98	75	47	104
260	101	76	47	108
280	104	78	48	111
300	107	79	49	114
320	109	81	49	117
340	112	82	50	119
360	114	83	50	122
380	115	84	50	124
400	117	85	51	126
420	119	86	51	128
440	120	86	51	130
460	122	87	51	131
480	123	88	52	133
500	124	89	52	134
600	130	91	53	141
700	134	93	53	145
800	137	95	54	149
900	140	96	54	152
1000	142	97	55	155

Will use the following table for sampling of herds that have 2 or more lesions.

Show the code

list <- list %>% select(herdsize,sample.prev.true)
list %>% kable

herdsize	sample.prev.true
100	63
120	70
140	76
160	82
180	87
200	91
220	95
240	98
260	101
280	104
300	107
320	109
340	112
360	114
380	115
400	117
420	119
440	120
460	122
480	123
500	124
600	130
700	134
800	137
900	140
1000	142

Step 3 is to estimate required sample size to maintain reasonable herd sensitivity

Method based on Wayne Martin paper from 1992. Implemented in epiR - rsu.sssep.rs function.

Martin S, Shoukri M, Thorburn M (1992). Evaluating the health status of herds based on tests applied to individuals. Preventive Veterinary Medicine 14: 33 - 43.

Show the code

list$herd.sens.3.percent <- rsu.sssep.rs(N = list$herdsize,
             pstar = 0.03, 
             se.p = 0.95, 
             se.u = 0.63)

#epi.herdtest(se=0.63, sp=1, P=0.03, N=200,n=125,k=1)

list %>% kable

herdsize	sample.prev.true	herd.sens.3.percent
100	63	NA
120	70	101
140	76	101
160	82	115
180	87	113
200	91	125
220	95	122
240	98	119
260	101	129
280	104	126
300	107	135
320	109	132
340	112	129
360	114	137
380	115	134
400	117	141
420	119	138
440	120	135
460	122	141
480	123	138
500	124	144
600	130	147
700	134	148
800	137	150
900	140	151
1000	142	151

Show the code

list$herd.size <- list$herdsize

This result is equivalent to the method on the ausvet website. https://epitools.ausvet.com.au/freecalctwo

Calculate using simulation

1000 simulated herds for each combination of herd size and sample size. Estimate how likely we are to find at least 1 positive cow in a positive herd.

Show the code

herd1 <- function(herd.size,se,sp,tp,sample){

herd <- tibble(
  id = seq(1:herd.size),
  status.true = rbinom(herd.size, 1,tp),
)

herd$test <- herd$status.true %>% map_dbl(~rbinom(1,1,.*se))

#Sample cows
tested <- herd %>% slice_sample(n=sample)

test.post <- tested$test %>% sum
detected <- ifelse(test.post > 0,1,0)
detected
}

herd1(herd.size = 50,
      se = 0.63,
      sp = 1.0,
      tp = 0.03,
      sample = 50)

[1] 1

Show the code

# replicate(100,simplify=T,
#           herd1(herd.size = 1000,se = 0.63,sp = 1.0,tp = 0.03,sample = 100))

Show the code

herd2 <- function(sample.size,herd.size){
  
output <- replicate(500,simplify=T,
          herd1(herd.size = herd.size,se = 0.63,sp = 1.0,tp = 0.03,sample = sample.size))

output <- output %>% tabyl %>% slice_tail
output$percent
}

herd2(sample.size = 65,
      herd.size = 77)

[1] 0.682

Show the code

#Create simulation parameter table
sample.size <- seq(50,300,by=5)
herd.size <- seq(100,500,by=20)
simulation <- expand.grid(sample.size,herd.size)
colnames(simulation) <- c("sample.size","herd.size")
simulation <- simulation %>% select(herd.size,sample.size) %>% filter(sample.size <= herd.size)

#Run simulation
simulation$probability <- map2(.x=simulation$herd.size,
     .y=simulation$sample.size,
     ~herd2(
       herd.size=.x,sample.size=.y))

simulation <- simulation %>% filter(probability >= 0.95)
simulation95 <- simulation %>% group_by(herd.size) %>% summarise(
  sample.size.simulation = min(sample.size)
)
simulation95 %>% kable

herd.size	sample.size.simulation
160	150
180	160
200	155
220	150
240	155
260	155
280	160
300	155
320	160
340	155
360	150
380	160
400	155
420	155
440	150
460	150
480	150
500	145

Show the code

table <- merge(list,
               simulation95,
               by="herd.size",all.x=T)

table %>% kable

herd.size	herdsize	sample.prev.true	herd.sens.3.percent	sample.size.simulation
100	100	63	NA	NA
120	120	70	101	NA
140	140	76	101	NA
160	160	82	115	150
180	180	87	113	160
200	200	91	125	155
220	220	95	122	150
240	240	98	119	155
260	260	101	129	155
280	280	104	126	160
300	300	107	135	155
320	320	109	132	160
340	340	112	129	155
360	360	114	137	150
380	380	115	134	160
400	400	117	141	155
420	420	119	138	155
440	440	120	135	150
460	460	122	141	150
480	480	123	138	150
500	500	124	144	145
600	600	130	147	NA
700	700	134	148	NA
800	800	137	150	NA
900	900	140	151	NA
1000	1000	142	151	NA

herd.size	sample.size.simulation
160	150
180	160
200	155
220	150
240	155
260	155
280	160
300	155
320	160
340	155
360	150
380	160
400	155
420	155
440	150
460	150
480	150
500	145

herd.size	sample.size.simulation
160	150
180	160
200	155
220	150
240	155
260	155
280	160
300	155
320	160
340	155
360	150
380	160
400	155
420	155
440	150
460	150
480	150
500	145

herd.size	sample.size.simulation
160	150
180	160
200	155
220	150
240	155
260	155
280	160
300	155
320	160
340	155
360	150
380	160
400	155
420	155
440	150
460	150
480	150
500	145