# install trackdown
if(!require("trackdown")) {
install.packages("trackdown")
stopifnot(require("trackdown"))
}
## Loading required package: trackdown
## Warning: package 'trackdown' was built under R version 4.1.1
# download from google drive
trackdown::download_file("601proj2.Rmd", shared_drive = "NYP Data Science Team 6")
# update the google docs with your local version
trackdown::update_file("601proj2.Rmd", shared_drive = "NYP Data Science Team 6")
ds.nyt = read.csv("us-counties.csv")
ds.nyt$date = as.Date(ds.nyt$date, "%Y-%m-%d")
ds.nyt$county = tolower(ds.nyt$county)
ds.city = read.csv("NYP_City_List.csv")
ds.city$County = tolower(ds.city$County)
ds.city$Mask.Mandate = factor(ds.city$Mask.Mandate, c("No", "Yes"))
county_list = gsub("(.*)\\scounty","\\1", ds.city$County)
getY = function(start_time = "2021-08-20", end_time = "2021-10-20") {
start.date = as.Date(start_time, "%Y-%m-%d")
end.date = as.Date(end_time, "%Y-%m-%d")
y = c()
for (i in 1:length(county_list)) {
tmp = ds.nyt[ ds.nyt$county == county_list[i] &
ds.nyt$state == ds.city$State[i], ]
y[i] = (tmp[tmp$date == end.date, "cases"]
- tmp[tmp$date == start.date, "cases"]) / ds.city$population[i] * 100000
}
y
}
ds.city$y = getY()
get inital model
o = lm(y~Mask.Mandate, ds.city, ds.city$County!="duval county")
summary(o)
##
## Call:
## lm(formula = y ~ Mask.Mandate, data = ds.city, subset = ds.city$County !=
## "duval county")
##
## Residuals:
## Min 1Q Median 3Q Max
## -892.1 -488.5 -165.7 642.6 1109.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2353.3 252.7 9.312 7.34e-08 ***
## Mask.MandateYes -817.0 323.3 -2.527 0.0224 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 668.6 on 16 degrees of freedom
## Multiple R-squared: 0.2853, Adjusted R-squared: 0.2406
## F-statistic: 6.387 on 1 and 16 DF, p-value: 0.02241
g = glm(y~Mask.Mandate, data = ds.city, family = Gamma(),
subset = ds.city$County!="duval county")
summary(g)
##
## Call:
## glm(formula = y ~ Mask.Mandate, family = Gamma(), data = ds.city,
## subset = ds.city$County != "duval county")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4415 -0.3500 -0.1123 0.3048 0.4777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.249e-04 5.723e-05 7.424 1.44e-06 ***
## Mask.MandateYes 2.260e-04 9.037e-05 2.500 0.0237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1269934)
##
## Null deviance: 2.7772 on 17 degrees of freedom
## Residual deviance: 1.9801 on 16 degrees of freedom
## AIC: 285.81
##
## Number of Fisher Scoring iterations: 5
getYtilde = function(start2="2021-8-7", end2="2021-8-20") {
start2 = as.Date(start2, "%Y-%m-%d")
end2 = as.Date(end2, "%Y-%m-%d")
Ytilde = c()
for (i in 1:length(county_list)) {
tmp = ds.nyt[
ds.nyt$county == county_list[i] &
ds.nyt$state == ds.city$State[i], ]
Ytilde[i] = (tmp[tmp$date == end2, "cases"]
- tmp[tmp$date == start2, "cases"]) / ds.city$population[i] * 100000
}
Ytilde
}
ds.city$Y7 = getYtilde("2021-8-13")
ds.city$Y14 = getYtilde("2021-8-6")
ds.city$Y30 = getYtilde("2021-7-21")
ds.city$Y60 = getYtilde("2021-6-21")
ds.city$Yall = getYtilde("2021-01-21")
layout(matrix(1:5,1,5,T))
par(cex.axis=1.2, cex.main=1.5)
boxplot(Y7~Mask.Mandate, ds.city, main="-7d", ylab = "Cumulative Case", xlab = NA)
boxplot(Y14~Mask.Mandate, ds.city, main="-14d", ylab = NA, xlab = NA)
boxplot(Y30~Mask.Mandate, ds.city, main="-30d", ylab = NA, xlab = NA)
boxplot(Y60~Mask.Mandate, ds.city, main="-60d", ylab = NA, xlab = NA)
boxplot(Yall~Mask.Mandate, ds.city, main="All", ylab = NA, xlab = NA)
# t.test(Y1~Mask.Mandate, ds.city)
o2 = lm(y~Y14+Mask.Mandate, ds.city, subset = ds.city$County!="duval county")
summary(o2)
##
## Call:
## lm(formula = y ~ Y14 + Mask.Mandate, data = ds.city, subset = ds.city$County !=
## "duval county")
##
## Residuals:
## Min 1Q Median 3Q Max
## -989.10 -34.44 18.09 187.12 681.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 904.4376 311.9109 2.900 0.0110 *
## Y14 2.8670 0.5374 5.334 8.34e-05 ***
## Mask.MandateYes -521.6632 203.8266 -2.559 0.0218 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 405.7 on 15 degrees of freedom
## Multiple R-squared: 0.7533, Adjusted R-squared: 0.7204
## F-statistic: 22.9 on 2 and 15 DF, p-value: 2.762e-05
g2 = glm(y~Y14+Mask.Mandate, ds.city, family = Gamma(),
subset = ds.city$County!="duval county")
summary(g2)
##
## Call:
## glm(formula = y ~ Y14 + Mask.Mandate, family = Gamma(), data = ds.city,
## subset = ds.city$County != "duval county")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.52217 -0.12153 -0.00162 0.12968 0.25432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.992e-04 9.281e-05 9.689 7.56e-08 ***
## Y14 -8.448e-07 1.439e-07 -5.872 3.07e-05 ***
## Mask.MandateYes 1.271e-04 5.051e-05 2.516 0.0238 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.03711809)
##
## Null deviance: 2.77722 on 17 degrees of freedom
## Residual deviance: 0.62792 on 15 degrees of freedom
## AIC: 266.91
##
## Number of Fisher Scoring iterations: 4
plot(y~Y14, ds.city,
col=(ds.city$Mask.Mandate=="Yes")+1,
pch=(ds.city$Mask.Mandate=="Yes")*15+1,
xlab="Prior 14 days cumulative cases", ylab="outcome(60 days cumulative cases)")
abline(lm(y~Y14, ds.city, subset = ds.city$Mask.Mandate=="Yes"), col=2)
abline(lm(y~Y14, ds.city, subset = ds.city$Mask.Mandate=="No"), col=1, lty=2)
abline(lm(y~Y14, ds.city, subset = ds.city$Mask.Mandate=="No" & ds.city$County!="duval county"))
legend("bottomright",
legend = c("Mask required", "Not required", "remove outlier"),
col=c(2,1,1), lty=c(1,2,1), pch=c(16,1,NA))
text(ds.city$Y14[5]-80, ds.city$y[5]+100, labels = "Jacksonville")
outlier is removed
N = 31
Sys.setenv(LANG = "en")
sen = data.frame(
xlab=-10:20,
coef=rep(0,N), p=rep(0,N))
sen$date = sen$xlab + as.Date("2021-08-20", "%Y-%m-%d")
for (i in 1:N) {
ds.city$y1 = getY(
start_time = sen$date[i], end_time = sen$date[i]+60
)
#oo=lm(y1~Mask.Mandate, ds.city, subset = ds.city$County!="duval county")
oo=glm(y1~Mask.Mandate, ds.city, subset = ds.city$County!="duval county", family = Gamma())
sen$coef[i]=coef(oo)["Mask.MandateYes"]
#sen$p[i]=coef(summary(oo)[4])["Mask.MandateYes",4]
sen$p[i]=coef(summary(oo)[12])["Mask.MandateYes",4]
}
plot
layout(matrix(1:2,1,2,T))
par(cex.axis=1.2)
plot(sen$xlab, sen$coef, ylab=NA,
main="Coef[\"Mask\"]", xlab = NA, type = "b")
plot(sen$xlab, sen$p, ylab=NA,
main="P-value", xlab = NA, type = "b")
# mtext("Shifting the time window", side = 1, line = -2, outer = TRUE, cex=1.5)
ds.vac = read.csv("casevac.csv")
ds.vac$date = as.Date(ds.vac$date)
ds.city$v = ds.vac$Series_Complete_Pop_Pct[ds.vac$date == "2021-08-21"]
summary(lm(y~v+Mask.Mandate, ds.city, subset = ds.city$County!="duval county"))
##
## Call:
## lm(formula = y ~ v + Mask.Mandate, data = ds.city, subset = ds.city$County !=
## "duval county")
##
## Residuals:
## Min 1Q Median 3Q Max
## -1027.4 -441.7 -145.9 564.7 1148.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2588.981 492.243 5.260 9.62e-05 ***
## v -5.066 9.009 -0.562 0.5822
## Mask.MandateYes -827.214 330.920 -2.500 0.0245 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 683.4 on 15 degrees of freedom
## Multiple R-squared: 0.3, Adjusted R-squared: 0.2067
## F-statistic: 3.215 on 2 and 15 DF, p-value: 0.06887
N = 31
Sys.setenv(LANG = "en")
sen = data.frame(
xlab=-10:20,
coef=rep(0,N), p=rep(0,N))
sen$date = sen$xlab + as.Date("2021-08-20", "%Y-%m-%d")
for (i in 1:N) {
ds.city$y1 = getY(
start_time = sen$date[i], end_time = sen$date[i]+60
)
ds.city$y2 = getYtilde(start2 = sen$date[i] - 14, end2 = sen$date[i] - 1)
#oo=lm(y1~y2+Mask.Mandate, ds.city, subset = ds.city$County!="duval county")
oo=glm(y1~y2+Mask.Mandate, ds.city, subset = ds.city$County!="duval county", family = Gamma())
sen$coef[i]=coef(oo)["Mask.MandateYes"]
#sen$p[i]=coef(summary(oo)[4])["Mask.MandateYes",4]
sen$p[i]=coef(summary(oo)[12])["Mask.MandateYes",4]
}
layout(matrix(1:2,1,2,T))
par(cex.axis=1.2)
plot(sen$xlab, sen$coef, ylab=NA,
main="Coef[\"Mask\"]", xlab = NA, type = "b")
plot(sen$xlab, sen$p, ylab=NA,
main="P-value", xlab = NA, type = "b")
# mtext("Shifting the time window", side = 1, line = -2, outer = TRUE, cex=1.5)
layout(matrix(1:4, 2, 2, T))
plot(g2)
mtext("With prior case", side=3, line=-1, outer = T)