Loading libraries:
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(TTR)
library(forecast)
## Loading required package: timeDate
## This is forecast 7.3
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2. http://CRAN.R-project.org/package=stargazer
library(stats)
NARMS_retailmeat_public <- read.csv("narms_public.csv")
d <- NARMS_retailmeat_public
Subsetting the data to contain only the genuses Salmonella and E coli, and renaming psec.
sec <- subset(d,(d$GENUS=="S"|d$GENUS=="EC"))
Creating a year/month variable in sec:
sec$ym <- as.yearmon(paste(sec$Year, sec$Month, sep = "-"))
Generating a table of proportion of isolates that were Susceptible, Resistant, or Intermediate to ceftriaxone (a cephalosporin drug, abbreviated AXO) by month. Creates the dataset sec.AXO (shorthand for preliminary salmonella e coli ceftriaxone) where ym is a yearmon variable and AXO.SIR is a categorical variable containing I, S and R with proportions indicating how many isolates that month were intermediate, susceptible, or resistant to ceftriaxone.
psec.AXO <- sec %>% group_by(ym, AXO.SIR) %>% summarise (n = n()) %>% mutate(freq = n / sum(n))
head(psec.AXO)
## Source: local data frame [6 x 4]
## Groups: ym [3]
##
## ym AXO.SIR n freq
## <S3: yearmon> <fctr> <int> <dbl>
## 1 Jan 2002 R 1 0.01063830
## 2 Jan 2002 S 93 0.98936170
## 3 Feb 2002 R 6 0.06122449
## 4 Feb 2002 S 92 0.93877551
## 5 Mar 2002 R 1 0.01123596
## 6 Mar 2002 S 88 0.98876404
Subsetting psec.AXO to contain only proportions of isolates that were resistant to ceftriaxone each month (which groups Intermediate and Susceptible together by default):
sec.AXO <- subset(psec.AXO, AXO.SIR=="S")
head(sec.AXO)
## Source: local data frame [6 x 4]
## Groups: ym [6]
##
## ym AXO.SIR n freq
## <S3: yearmon> <fctr> <int> <dbl>
## 1 Jan 2002 S 93 0.9893617
## 2 Feb 2002 S 92 0.9387755
## 3 Mar 2002 S 88 0.9887640
## 4 Apr 2002 S 91 0.9891304
## 5 May 2002 S 96 0.9142857
## 6 Jun 2002 S 95 0.9793814
Plotting the prevalence of susceptibility over time:
sec.axo.rprev <- plot(sec.AXO$ym, sec.AXO$freq, main = "Prevalence of ceftriaxone susceptibility in isolates of Salmonella and E. coli from NARMS retail meat", xlab = "Time (months)", ylab = "Proportion of isolates susceptible to ceftriaxone")
Generalized linear model with no covariates assuming that the trendlines before and after the intervention will have to intersect at the breakpoint:
Creating a date field, where each timepoint is treated as the 15th of the month (because isolates were received throughout) and a dummy variable for before/after the law restricting cephalosporins went into effect (assuming that it was in effect starting May 1, 2012), and creating a dateLaw object that is just the date of the law:
sec.AXO$by_date <- strptime(paste("15", sec.AXO$ym), "%d %b %Y")
sec.AXO$date <- as.Date(sec.AXO$by_date)
sec.AXO$dummy <- ifelse(sec.AXO$by_date<"2012-04-05",0,1)
dateLaw <- as.Date("2012-04-05")
Creating a GLM object that predicts the change in slope for a linear model after the date the law went into effect, with no covariates:
glm.sec.AXO <- glm(sec.AXO$freq ~ sec.AXO$date + pmax(sec.AXO$date - dateLaw, 0))
summary(glm.sec.AXO)
##
## Call:
## glm(formula = sec.AXO$freq ~ sec.AXO$date + pmax(sec.AXO$date -
## dateLaw, 0))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.116198 -0.018643 0.007962 0.022407 0.063244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.216e+00 3.256e-02 37.341 < 2e-16 ***
## sec.AXO$date -2.126e-05 2.368e-06 -8.978 7.19e-16 ***
## pmax(sec.AXO$date - dateLaw, 0) 8.819e-05 1.085e-05 8.126 1.13e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0009905636)
##
## Null deviance: 0.24554 on 162 degrees of freedom
## Residual deviance: 0.15849 on 160 degrees of freedom
## AIC: -659.96
##
## Number of Fisher Scoring iterations: 2
Issues with this model: 1. it treats the effect of the intervention as something that increases linearly starting from the date the law was passed. This is probably fine for the first few years, but would not be as reasonable for a long-term evaluation. 2. it assumes linear relationships between the predictors and outcomes. 3. it assumes observations are independent
Displaying the predictions of the GLM model against the actual prevalence data:
#this creates trendlines from predictions from the glm model:
newdata <- data.frame(sec.AXO$date)
newdata$freq <- predict(glm.sec.AXO, newdata = newdata)
plot.glm.sec.AXO <- plot(sec.AXO$freq ~ sec.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdata$freq ~ newdata$sec.AXO.date)
Merging the frequency data into the original dataset to include covariates:
sec$ym <- as.yearmon(paste(sec$Year, sec$Month, sep = "-"))
range(sec$ym)
## [1] "Jan 2002" "Jul 2015"
secm.AXO <- merge(sec,sec.AXO,by="ym")
merged.plot <- plot(secm.AXO$ym, secm.AXO$freq, main = "Prevalence of ceftriaxone susceptibility in isolates of Salmonella and E. coli from NARMS retail meat", xlab = "Time (months)", ylab = "Proportion of isolates susceptible to ceftriaxone")
glm.secm.AXO <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0))
summary(glm.secm.AXO)
##
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date -
## dateLaw, 0))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.115493 -0.019467 0.004716 0.021548 0.064240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.222e+00 2.869e-03 426.02 <2e-16
## secm.AXO$date -2.176e-05 2.088e-07 -104.24 <2e-16
## pmax(secm.AXO$date - dateLaw, 0) 9.670e-05 1.143e-06 84.61 <2e-16
##
## (Intercept) ***
## secm.AXO$date ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0009084891)
##
## Null deviance: 28.088 on 19559 degrees of freedom
## Residual deviance: 17.767 on 19557 degrees of freedom
## AIC: -81479
##
## Number of Fisher Scoring iterations: 2
newdatam <- data.frame(secm.AXO$date)
newdatam$freq <- predict(glm.secm.AXO, newdatam = newdatam)
plot.glm.secm.AXO <- plot(secm.AXO$freq ~ secm.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam$freq ~ newdatam$secm.AXO.date)
The following includes meat types as a covariate. Meat Type is a categorical variable which should (?) be treated as a factor by the glm() function:
glm.secm.AXO.spec <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0) + secm.AXO$MEAT_TYPE)
summary(glm.secm.AXO.spec)
##
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date -
## dateLaw, 0) + secm.AXO$MEAT_TYPE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.116325 -0.019503 0.005332 0.021397 0.064998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.221e+00 2.903e-03 420.674 <2e-16
## secm.AXO$date -2.173e-05 2.092e-07 -103.869 <2e-16
## pmax(secm.AXO$date - dateLaw, 0) 9.661e-05 1.144e-06 84.465 <2e-16
## secm.AXO$MEAT_TYPEGB 1.521e-03 6.203e-04 2.452 0.0142
## secm.AXO$MEAT_TYPEGT 8.914e-04 5.222e-04 1.707 0.0878
## secm.AXO$MEAT_TYPEPC 1.611e-03 6.997e-04 2.303 0.0213
##
## (Intercept) ***
## secm.AXO$date ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## secm.AXO$MEAT_TYPEGB *
## secm.AXO$MEAT_TYPEGT .
## secm.AXO$MEAT_TYPEPC *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0009082234)
##
## Null deviance: 28.088 on 19559 degrees of freedom
## Residual deviance: 17.759 on 19554 degrees of freedom
## AIC: -81482
##
## Number of Fisher Scoring iterations: 2
confint(glm.secm.AXO.spec)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.215416e+00 1.226795e+00
## secm.AXO$date -2.213907e-05 -2.131903e-05
## pmax(secm.AXO$date - dateLaw, 0) 9.436859e-05 9.885216e-05
## secm.AXO$MEAT_TYPEGB 3.054531e-04 2.736837e-03
## secm.AXO$MEAT_TYPEGT -1.320885e-04 1.914950e-03
## secm.AXO$MEAT_TYPEPC 2.399432e-04 2.982802e-03
#this creates trendlines from predictions from the glm model:
newdatam.spec <- data.frame(secm.AXO$date)
newdatam.spec$freq <- predict(glm.secm.AXO.spec, newdatam.spec = newdatam.spec)
plot.glm.secm.AXO.spec <- plot(secm.AXO$freq ~ secm.AXO$date, type = "l", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam.spec$freq ~ newdatam.spec$secm.AXO.date)
The same occurs by entering the meat_type variable as dummy variables:
secm.AXO$chicken <- ifelse(secm.AXO$MEAT_TYPE=="CB",1,0)
secm.AXO$beef <- ifelse(secm.AXO$MEAT_TYPE=="GB",1,0)
secm.AXO$turkey <- ifelse(secm.AXO$MEAT_TYPE=="GT",1,0)
secm.AXO$pork <- ifelse(secm.AXO$MEAT_TYPE=="PC",1,0)
glm.secm.AXO.spec <- glm(secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date - dateLaw, 0) + secm.AXO$chicken + secm.AXO$beef + secm.AXO$pork + secm.AXO$turkey)
summary(glm.secm.AXO.spec)
##
## Call:
## glm(formula = secm.AXO$freq ~ secm.AXO$date + pmax(secm.AXO$date -
## dateLaw, 0) + secm.AXO$chicken + secm.AXO$beef + secm.AXO$pork +
## secm.AXO$turkey)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.116325 -0.019503 0.005332 0.021397 0.064998
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.222e+00 2.897e-03 421.879 <2e-16
## secm.AXO$date -2.173e-05 2.092e-07 -103.869 <2e-16
## pmax(secm.AXO$date - dateLaw, 0) 9.661e-05 1.144e-06 84.465 <2e-16
## secm.AXO$chicken -8.914e-04 5.222e-04 -1.707 0.0878
## secm.AXO$beef 6.297e-04 6.215e-04 1.013 0.3109
## secm.AXO$pork 7.199e-04 7.010e-04 1.027 0.3044
## secm.AXO$turkey NA NA NA NA
##
## (Intercept) ***
## secm.AXO$date ***
## pmax(secm.AXO$date - dateLaw, 0) ***
## secm.AXO$chicken .
## secm.AXO$beef
## secm.AXO$pork
## secm.AXO$turkey
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0009082234)
##
## Null deviance: 28.088 on 19559 degrees of freedom
## Residual deviance: 17.759 on 19554 degrees of freedom
## AIC: -81482
##
## Number of Fisher Scoring iterations: 2
confint(glm.secm.AXO.spec)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.216320e+00 1.227674e+00
## secm.AXO$date -2.213907e-05 -2.131903e-05
## pmax(secm.AXO$date - dateLaw, 0) 9.436859e-05 9.885216e-05
## secm.AXO$chicken -1.914950e-03 1.320885e-04
## secm.AXO$beef -5.883510e-04 1.847780e-03
## secm.AXO$pork -6.539430e-04 2.093827e-03
## secm.AXO$turkey NA NA
#this creates trendlines from predictions from the glm model:
newdatam.spec <- data.frame(secm.AXO$date)
newdatam.spec$freq <- predict(glm.secm.AXO.spec, newdatam.spec = newdatam.spec)
plot.glm.secm.AXO.spec <- plot(secm.AXO$freq ~ secm.AXO$date, type = "p", main = "Frequency of Susceptibility to Cephalosporin in E. coli and Salmonella in Retail Meat Before and After 2012 Restriction", xlab = "Time", ylab = "Proportion of Isolates susceptible") + lines(newdatam.spec$freq ~ newdatam.spec$secm.AXO.date, col = "blue")