Loading libraries:

library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(TTR)
library(forecast)
## Loading required package: timeDate
## This is forecast 7.3
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2. http://CRAN.R-project.org/package=stargazer
library(stats)
NARMS_retailmeat_public <- read.csv("narms_public.csv")
d <- NARMS_retailmeat_public

Subsetting the data to contain only the genuses Salmonella and E coli, and renaming psec.

sec <- subset(d,(d$GENUS=="S"|d$GENUS=="EC"))

Creating a year/month variable in sec:

sec$ym <- as.yearmon(paste(sec$Year, sec$Month, sep = "-"))

Generating a table of proportion of isolates that were Susceptible, Resistant, or Intermediate to ceftriaxone (a cephalosporin drug, abbreviated AXO) by month. Creates the dataset sec.AXO (shorthand for preliminary salmonella e coli ceftriaxone) where ym is a yearmon variable and AXO.SIR is a categorical variable containing I, S and R with proportions indicating how many isolates that month were intermediate, susceptible, or resistant to ceftriaxone.

psec.AXO <- sec %>% group_by(ym, AXO.SIR) %>% summarise (n = n()) %>% mutate(freq = n / sum(n))
head(psec.AXO)
## Source: local data frame [6 x 4]
## Groups: ym [3]
## 
##              ym AXO.SIR     n       freq
##   <S3: yearmon>  <fctr> <int>      <dbl>
## 1      Jan 2002       R     1 0.01063830
## 2      Jan 2002       S    93 0.98936170
## 3      Feb 2002       R     6 0.06122449
## 4      Feb 2002       S    92 0.93877551
## 5      Mar 2002       R     1 0.01123596
## 6      Mar 2002       S    88 0.98876404

Subsetting psec.AXO to contain only proportions of isolates that were resistant to ceftriaxone each month (which groups Intermediate and Susceptible together by default):

sec.AXO <- subset(psec.AXO, AXO.SIR=="S")
head(sec.AXO)
## Source: local data frame [6 x 4]
## Groups: ym [6]
## 
##              ym AXO.SIR     n      freq
##   <S3: yearmon>  <fctr> <int>     <dbl>
## 1      Jan 2002       S    93 0.9893617
## 2      Feb 2002       S    92 0.9387755
## 3      Mar 2002       S    88 0.9887640
## 4      Apr 2002       S    91 0.9891304
## 5      May 2002       S    96 0.9142857
## 6      Jun 2002       S    95 0.9793814

Plotting the prevalence of susceptibility over time:

sec.axo.rprev <- plot(sec.AXO$ym, sec.AXO$freq, main = "Prevalence of ceftriaxone susceptibility in isolates of Salmonella and E. coli from NARMS retail meat", xlab = "Time (months)", ylab = "Proportion of isolates susceptible to ceftriaxone")

Generalized linear model with no covariates assuming that the trendlines before and after the intervention will have to intersect at the breakpoint:

Creating a date field, where each timepoint is treated as the 15th of the month (because isolates were received throughout) and a dummy variable for before/after the law restricting cephalosporins went into effect (assuming that it was in effect starting May 1, 2012), and creating a dateLaw object that is just the date of the law:

sec.AXO$by_date <- strptime(paste("15", sec.AXO$ym), "%d %b %Y")
sec.AXO$date <- as.Date(sec.AXO$by_date)
sec.AXO$dummy <- ifelse(sec.AXO$by_date<"2012-04-05",0,1)
dateLaw <- as.Date("2012-04-05")

Creating a GLM object that predicts the change in slope for a linear model after the date the law went into effect, with no covariates:

glm.sec.AXO <- glm(sec.AXO$freq ~ sec.AXO$date + pmax(sec.AXO$date - dateLaw, 0))
summary(glm.sec.AXO)
## 
## Call:
## glm(formula = sec.AXO$freq ~ sec.AXO$date + pmax(sec.AXO$date - 
##     dateLaw, 0))
## 
## Deviance Residuals: 
##       Min         1Q     Median         3Q        Max  
## -0.116198  -0.018643   0.007962   0.022407   0.063244  
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.216e+00  3.256e-02  37.341  < 2e-16 ***
## sec.AXO$date                    -2.126e-05  2.368e-06  -8.978 7.19e-16 ***
## pmax(sec.AXO$date - dateLaw, 0)  8.819e-05  1.085e-05   8.126 1.13e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0009905636)
## 
##     Null deviance: 0.24554  on 162  degrees of freedom
## Residual deviance: 0.15849  on 160  degrees of freedom
## AIC: -659.96
## 
## Number of Fisher Scoring iterations: 2

Issues with this model: 1. it treats the effect of the intervention as something that increases linearly starting from the date the law was passed. This is probably fine for the first few years, but would not be as reasonable for a long-term evaluation. 2. it assumes linear relationships between the predictors and outcomes. 3. it assumes observations are independent

Displaying the predictions of the GLM model against the actual prevalence data:

#this creates trendlines from predictions from the glm model:
newdata <- data.frame(sec.AXO$date)
newdata$freq <- predict(glm.sec.AXO, newdata = newdata)
plot.glm.sec.AXO <- plot(sec.AXO$freq ~ sec.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdata$freq ~ newdata$sec.AXO.date)

Merging the frequency data into the original dataset to include covariates:

sec$ym <- as.yearmon(paste(sec$Year, sec$Month, sep = "-"))
range(sec$ym)
## [1] "Jan 2002" "Jul 2015"
secm.AXO <- merge(sec,sec.AXO,by="ym")
merged.plot <- plot(secm.AXO$ym, secm.AXO$freq, main = "Prevalence of ceftriaxone susceptibility in isolates of Salmonella and E. coli from NARMS retail meat", xlab = "Time (months)", ylab = "Proportion of isolates susceptible to ceftriaxone")

glm.secm.AXO <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0))
summary(glm.secm.AXO)
## 
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - 
##     dateLaw, 0))
## 
## Deviance Residuals: 
##       Min         1Q     Median         3Q        Max  
## -0.115493  -0.019467   0.004716   0.021548   0.064240  
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       1.222e+00  2.869e-03  426.02   <2e-16
## secm.AXO$date                    -2.176e-05  2.088e-07 -104.24   <2e-16
## pmax(secm.AXO$date - dateLaw, 0)  9.670e-05  1.143e-06   84.61   <2e-16
##                                     
## (Intercept)                      ***
## secm.AXO$date                    ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0009084891)
## 
##     Null deviance: 28.088  on 19559  degrees of freedom
## Residual deviance: 17.767  on 19557  degrees of freedom
## AIC: -81479
## 
## Number of Fisher Scoring iterations: 2
newdatam <- data.frame(secm.AXO$date)
newdatam$freq <- predict(glm.secm.AXO, newdatam = newdatam)
plot.glm.secm.AXO <- plot(secm.AXO$freq ~ secm.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam$freq ~ newdatam$secm.AXO.date)

Here is the issue:

The following includes meat types as a covariate. Meat Type is a categorical variable which should (?) be treated as a factor by the glm() function:

glm.secm.AXO.spec <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0) + secm.AXO$MEAT_TYPE)
summary(glm.secm.AXO.spec)
## 
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - 
##     dateLaw, 0) + secm.AXO$MEAT_TYPE)
## 
## Deviance Residuals: 
##       Min         1Q     Median         3Q        Max  
## -0.116325  -0.019503   0.005332   0.021397   0.064998  
## 
## Coefficients:
##                                    Estimate Std. Error  t value Pr(>|t|)
## (Intercept)                       1.221e+00  2.903e-03  420.674   <2e-16
## secm.AXO$date                    -2.173e-05  2.092e-07 -103.869   <2e-16
## pmax(secm.AXO$date - dateLaw, 0)  9.661e-05  1.144e-06   84.465   <2e-16
## secm.AXO$MEAT_TYPEGB              1.521e-03  6.203e-04    2.452   0.0142
## secm.AXO$MEAT_TYPEGT              8.914e-04  5.222e-04    1.707   0.0878
## secm.AXO$MEAT_TYPEPC              1.611e-03  6.997e-04    2.303   0.0213
##                                     
## (Intercept)                      ***
## secm.AXO$date                    ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## secm.AXO$MEAT_TYPEGB             *  
## secm.AXO$MEAT_TYPEGT             .  
## secm.AXO$MEAT_TYPEPC             *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0009082234)
## 
##     Null deviance: 28.088  on 19559  degrees of freedom
## Residual deviance: 17.759  on 19554  degrees of freedom
## AIC: -81482
## 
## Number of Fisher Scoring iterations: 2
confint(glm.secm.AXO.spec)
## Waiting for profiling to be done...
##                                          2.5 %        97.5 %
## (Intercept)                       1.215416e+00  1.226795e+00
## secm.AXO$date                    -2.213907e-05 -2.131903e-05
## pmax(secm.AXO$date - dateLaw, 0)  9.436859e-05  9.885216e-05
## secm.AXO$MEAT_TYPEGB              3.054531e-04  2.736837e-03
## secm.AXO$MEAT_TYPEGT             -1.320885e-04  1.914950e-03
## secm.AXO$MEAT_TYPEPC              2.399432e-04  2.982802e-03
#this creates trendlines from predictions from the glm model:
newdatam.spec <- data.frame(secm.AXO$date)
newdatam.spec$freq <- predict(glm.secm.AXO.spec, newdatam.spec = newdatam.spec)
plot.glm.secm.AXO.spec <- plot(secm.AXO$freq ~ secm.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam.spec$freq ~ newdatam.spec$secm.AXO.date)

Suddenly the trendlines are jagged.

The same occurs by entering the meat_type variable as dummy variables:

secm.AXO$chicken <- ifelse(secm.AXO$MEAT_TYPE=="CB",1,0)
secm.AXO$beef <- ifelse(secm.AXO$MEAT_TYPE=="GB",1,0)
secm.AXO$turkey <- ifelse(secm.AXO$MEAT_TYPE=="GT",1,0)
secm.AXO$pork <- ifelse(secm.AXO$MEAT_TYPE=="PC",1,0)
glm.secm.AXO.spec <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0) + secm.AXO$chicken + secm.AXO$beef + secm.AXO$pork + secm.AXO$turkey)
summary(glm.secm.AXO.spec)
## 
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - 
##     dateLaw, 0) + secm.AXO$chicken + secm.AXO$beef + secm.AXO$pork + 
##     secm.AXO$turkey)
## 
## Deviance Residuals: 
##       Min         1Q     Median         3Q        Max  
## -0.116325  -0.019503   0.005332   0.021397   0.064998  
## 
## Coefficients: (1 not defined because of singularities)
##                                    Estimate Std. Error  t value Pr(>|t|)
## (Intercept)                       1.222e+00  2.897e-03  421.879   <2e-16
## secm.AXO$date                    -2.173e-05  2.092e-07 -103.869   <2e-16
## pmax(secm.AXO$date - dateLaw, 0)  9.661e-05  1.144e-06   84.465   <2e-16
## secm.AXO$chicken                 -8.914e-04  5.222e-04   -1.707   0.0878
## secm.AXO$beef                     6.297e-04  6.215e-04    1.013   0.3109
## secm.AXO$pork                     7.199e-04  7.010e-04    1.027   0.3044
## secm.AXO$turkey                          NA         NA       NA       NA
##                                     
## (Intercept)                      ***
## secm.AXO$date                    ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## secm.AXO$chicken                 .  
## secm.AXO$beef                       
## secm.AXO$pork                       
## secm.AXO$turkey                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0009082234)
## 
##     Null deviance: 28.088  on 19559  degrees of freedom
## Residual deviance: 17.759  on 19554  degrees of freedom
## AIC: -81482
## 
## Number of Fisher Scoring iterations: 2
confint(glm.secm.AXO.spec)
## Waiting for profiling to be done...
##                                          2.5 %        97.5 %
## (Intercept)                       1.216320e+00  1.227674e+00
## secm.AXO$date                    -2.213907e-05 -2.131903e-05
## pmax(secm.AXO$date - dateLaw, 0)  9.436859e-05  9.885216e-05
## secm.AXO$chicken                 -1.914950e-03  1.320885e-04
## secm.AXO$beef                    -5.883510e-04  1.847780e-03
## secm.AXO$pork                    -6.539430e-04  2.093827e-03
## secm.AXO$turkey                             NA            NA
#this creates trendlines from predictions from the glm model:
newdatam.spec <- data.frame(secm.AXO$date)
newdatam.spec$freq <- predict(glm.secm.AXO.spec, newdatam.spec = newdatam.spec)
plot.glm.secm.AXO.spec <- plot(secm.AXO$freq ~ secm.AXO$date, type = "p", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam.spec$freq ~ newdatam.spec$secm.AXO.date, col = "blue")