## Create survey object.
options(digits = 4)
options(survey.lonely.psu = "adjust")
des <- svydesign(ids = ~1, weights = ~weight, data = df[is.na(df$weight) ==
F, ])
# save dataframe
q1 <- data.frame(svytable(~Q1 + PPGENDER + ppagecat + ppagect4 + PPETHM + income +
PPEDUCAT + work + marital + PPMSACAT, des, round = T))
# ggplot templates
title <- ggtitle("Q1. Did you know influenza is different from the stomach flu?")
p <- ggplot(q1, aes(Q1, weight = Freq)) + ptext
fil <- aes(fill = Q1)
# plots: tables from svytable(~Q1 + PPGENDER, des, round = T)
p + geom_bar() + title
# default mosaic plot
plot(svytable(~Q1 + PPGENDER, des), main = "mosaic plot of Q1 and gender")
gen <- p + aes(fill = PPGENDER) + geom_bar(position = "dodge") + title
gen
svyboxplot(PPAGE ~ Q1, des, main = "Q1 by age")
age <- p + aes(fill = ppagect4) + geom_bar(position = "dodge")
age
age2 <- p + aes(ppagecat) + fil + geom_bar(position = "stack")
age2
eth <- p + peth + fil + geom_bar(position = "dodge") + title
eth
eth2 <- p + aes(fill = PPETHM) + geom_bar(position = "fill")
eth2
inc <- p + pinc + fil + geom_bar(position = "stack") + title
inc
inc2 <- p + aes(fill = income) + geom_bar(position = "dodge")
inc2
edu <- p + pedu + fil + geom_bar(position = "dodge") + title
edu
edu2 <- p + aes(fill = PPEDUCAT) + geom_bar(position = "dodge")
edu2
wor <- p + pwor + fil + geom_bar(position = "dodge")
wor
wor2 <- p + aes(fill = work) + geom_bar(position = "dodge")
wor2
mar <- p + pmar + fil + geom_bar(position = "dodge")
mar
met <- p + aes(PPMSACAT) + fil + geom_bar(position = "dodge")
met
met2 <- p + aes(fill = PPMSACAT) + geom_bar(position = "dodge")
met2
# grid.arrange(gen, age, eth) grid.arrange(inc, edu, wor, mar)
# mosaic plots
plot(svytable(~Q1 + PPGENDER + ppagect4, des), main = "mosaic plot of Q1 by gender & age group")
plot(svytable(~Q1 + PPGENDER + PPETHM, des), main = "mosaic plot of Q1 by gender & race")
q2 <- data.frame(svytable(~Q2 + PPGENDER + ppagecat + ppagect4 + PPETHM + income +
PPEDUCAT + work + marital + PPMSACAT, des, round = T))
title <- ggtitle("Q2. Have you had an illness with influenza-like symptoms since August 2015?")
p <- ggplot(q2, aes(Q2, weight = Freq)) + ptext
fil <- aes(fill = Q2)
# plots
all <- p + geom_bar() + title
all
gen <- p + pgen + fil + geom_bar(position = "dodge") + title
gen
gen2 <- p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
gen2
svyboxplot(PPAGE ~ Q2, des, main = "Q2 by age")
age <- p + page + fil + geom_bar(position = "dodge")
age
age2 <- p + aes(fill = ppagect4) + geom_bar(position = "dodge")
age2
eth <- p + peth + fil + geom_bar(position = "stack")
eth
eth2 <- p + aes(fill = PPETHM) + geom_bar(position = "fill")
eth2
inc <- p + pinc + fil + geom_bar(position = "stack") + title
inc
inc2 <- p + aes(fill = income) + geom_bar(position = "dodge")
inc2
edu <- p + pedu + fil + geom_bar(position = "dodge")
edu
edu2 <- p + aes(fill = PPEDUCAT) + geom_bar(position = "dodge")
edu2
wor <- p + pwor + fil + geom_bar(position = "dodge") + title
wor
wor2 <- p + aes(fill = work) + geom_bar(position = "dodge")
wor2
mar <- p + pmar + fil + geom_bar(position = "dodge")
mar
mar2 <- p + aes(fill = marital) + geom_bar(position = "dodge")
mar2
met <- p + aes(PPMSACAT) + fil + geom_bar(position = "dodge") + title
met
met2 <- p + aes(fill = PPMSACAT) + geom_bar(position = "dodge")
met2
# grid.arrange(gen, age, eth) grid.arrange(inc, edu, wor, mar)
## create ggplot template
er <- geom_errorbar(aes(ymin = Q2Yes - se.Q2Yes, ymax = Q2Yes + se.Q2Yes), width = 0.25)
## % of US adults sick last year with ILI by sex
gen <- svyby(~Q2, ~PPGENDER, des, svymean, na.rm = T)
svychisq(~Q2 + PPGENDER, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPGENDER, des)
## F = 6.3, ndf = 1, ddf = 2200, p-value = 0.01
ggplot(gen, aes(PPGENDER, Q2Yes)) + geom_point() + xlab("sex") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by sex")
## % of US adults sick last year with ILI by age
age <- svyby(~Q2, ~ppagecat, des, svymean, na.rm = T)
svychisq(~Q2 + ppagecat, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + ppagecat, des)
## F = 2.1, ndf = 5.8, ddf = 13000.0, p-value = 0.06
ggplot(age, aes(ppagecat, Q2Yes)) + geom_point() + xlab("age") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by age")
## % of US adults sick last year with ILI by ethnicity
eth <- svyby(~Q2, ~PPETHM, des, svymean, na.rm = T)
svychisq(~Q2 + PPETHM, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPETHM, des)
## F = 4.3, ndf = 3.4, ddf = 7400.0, p-value = 0.003
ggplot(eth, aes(PPETHM, Q2Yes)) + geom_point() + xlab("ethnicity") + ylab("% sick") +
ptext + er + ggtitle(label = "% of adults sick last year with ILI by ethnicity")
## by income
inc <- svyby(~Q2, ~PPINCIMP, des, svymean, na.rm = T)
svychisq(~Q2 + PPINCIMP, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPINCIMP, des)
## F = 0.84, ndf = 18, ddf = 38000, p-value = 0.7
ggplot(inc, aes(PPINCIMP, Q2Yes)) + geom_point() + xlab("income") + ylab("% sick") +
coord_flip() + er + ggtitle(label = "% of adults sick last year with ILI by income")
## by education
edu <- svyby(~Q2, ~PPEDUCAT, des, svymean, na.rm = T)
svychisq(~Q2 + PPEDUCAT, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPEDUCAT, des)
## F = 0.67, ndf = 2.9, ddf = 6300.0, p-value = 0.6
ggplot(edu, aes(PPEDUCAT, Q2Yes)) + geom_point() + xlab("education") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by education")
## by work status
wor <- svyby(~Q2, ~work, des, svymean, na.rm = T)
svychisq(~Q2 + work, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + work, des)
## F = 2.7, ndf = 1, ddf = 2200, p-value = 0.1
ggplot(wor, aes(work, Q2Yes)) + geom_point() + xlab("work") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by work")
## by marital status
mar <- svyby(~Q2, ~PPMARIT, des, svymean, na.rm = T)
svychisq(~Q2 + PPMARIT, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPMARIT, des)
## F = 0.71, ndf = 5, ddf = 11000, p-value = 0.6
ggplot(mar, aes(PPMARIT, Q2Yes)) + geom_point() + xlab("marital status") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by marital status")
# re-grouped marital status
mar2 <- svyby(~Q2, ~marital, des, svymean, na.rm = T)
svychisq(~Q2 + marital, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + marital, des)
## F = 0.094, ndf = 1, ddf = 2200, p-value = 0.8
ggplot(mar2, aes(marital, Q2Yes)) + geom_point() + xlab("") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by marital status")
## by metro status
met <- svyby(~Q2, ~PPMSACAT, des, svymean, na.rm = T)
svychisq(~Q2 + PPMSACAT, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPMSACAT, des)
## F = 3.1, ndf = 1, ddf = 2200, p-value = 0.08
ggplot(met, aes(PPMSACAT, Q2Yes)) + geom_point() + xlab("metro status") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by metro status")
## by region cat4
reg <- svyby(~Q2, ~PPREG4, des, svymean, na.rm = T)
svychisq(~Q2 + PPREG4, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPREG4, des)
## F = 4.5, ndf = 3, ddf = 6500, p-value = 0.004
ggplot(reg, aes(PPREG4, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") +
er + ggtitle(label = "% of adults sick last year with ILI by region")
## by region cat9
reg9 <- svyby(~Q2, ~ppreg9, des, svymean, na.rm = T)
svychisq(~Q2 + ppreg9, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + ppreg9, des)
## F = 2, ndf = 8, ddf = 17000, p-value = 0.04
ggplot(reg9, aes(ppreg9, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") +
ptext + er + ggtitle(label = "% of adults sick last year with ILI by region")
## by state
sta <- svyby(~Q2, ~PPSTATEN, des, svymean, na.rm = T)
svychisq(~Q2 + PPSTATEN, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPSTATEN, des)
## F = 1.4, ndf = 48, ddf = 100000, p-value = 0.05
ggplot(sta, aes(PPSTATEN, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") +
ptext + coord_flip() + er + ggtitle(label = "% of adults sick last year with ILI by state")
## rent status
ren <- svyby(~Q2, ~PPRENT, des, svymean, na.rm = T)
svychisq(~Q2 + PPRENT, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + PPRENT, des)
## F = 2.9, ndf = 2, ddf = 4300, p-value = 0.05
ggplot(ren, aes(PPRENT, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") +
ptext + er + ggtitle(label = "% of adults sick last year with ILI by rent")
q3 <- as.data.frame(svytable(~Q3 + ppagect4 + PPETHM + income + work + marital,
des, round = T))
title <- ggtitle("Q3. Has any other person in your household had an illness with influenza like symptoms since August 2015?")
p <- ggplot(q3, aes(Q3, weight = Freq)) + ptext
fil <- aes(fill = Q3)
# plots
p + geom_bar() + title
eth <- p + peth + fil + geom_bar(position = "fill") + title
eth
eth2 <- p + aes(fill = PPETHM) + geom_bar(position = "dodge")
eth2
## % sick with sick household member
svychisq(~Q2 + Q3, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + Q3, des)
## F = 230, ndf = 2, ddf = 4300, p-value <2e-16
q <- svyby(~Q2, ~Q3, des, svymean, na.rm = T)
ggplot(q, aes(Q3, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") + er +
ggtitle(label = "% of adults sick vs. having sick household member ")
q4 <- as.data.frame(svytable(~Q4 + Q2 + PPGENDER + ppagect4 + PPETHM + income +
PPEDUCAT, des, round = T))
title <- ggtitle("Q4. Does your job require you to have a lot of contact with the public?")
p <- ggplot(q4, aes(Q4, weight = Freq)) + ptext
fil <- aes(fill = Q4)
all <- p + geom_bar() + title
all
gen <- p + pgen + fil + geom_bar(position = "dodge") + title
gen2 <- p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
age <- p + page + fil + geom_bar(position = "dodge") + title
age2 <- p + aes(fill = ppagect4) + geom_bar(position = "dodge")
eth <- p + peth + fil + geom_bar(position = "stack")
eth2 <- p + aes(fill = PPETHM) + geom_bar(position = "fill")
inc <- p + pinc + fil + geom_bar(position = "stack")
inc2 <- p + aes(fill = income) + geom_bar(position = "dodge")
edu <- p + pedu + fil + geom_bar(position = "dodge")
edu2 <- p + aes(fill = PPEDUCAT) + geom_bar(position = "dodge")
grid.arrange(gen, age, nrow = 2)
grid.arrange(gen2, age2, nrow = 1)
grid.arrange(eth, eth2, nrow = 2)
grid.arrange(inc, inc2)
grid.arrange(edu, edu2)
## % sick plot
svychisq(~Q2 + Q4, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + Q4, des)
## F = 14, ndf = 2, ddf = 4300, p-value = 7e-07
q <- svyby(~Q2, ~Q4, des, svymean, na.rm = T)
ggplot(q, aes(Q4, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") + ptext +
er + ggtitle(label = "% of adults sick and having job with public contact ")
## look at those sick people with high-contact jobs
svytable(~Q2 + Q4, des)
## Q4
## Q2 Yes No, I don_t work
## Yes 203.87 125.07
## No 569.44 604.04
## Q4
## Q2 No, my job does not require much contact with the public
## Yes 95.43
## No 539.59
# subset q4
psub <- ggplot(q4[q4$Q4 == "Yes", ], aes(Q4, weight = Freq)) + ptext
# being sick by gender
psub + pgen + aes(fill = Q2) + geom_bar(position = "fill") + ggtitle("People with high-contact jobs vs. being sick")
# by ethnicity
psub + peth + aes(fill = Q2) + geom_bar(position = "fill") + ggtitle("People with high-contact jobs vs. being sick")
q5 <- as.data.frame(svytable(~Q5 + PPGENDER + ppagect4 + PPETHM + income + PPEDUCAT,
des, round = T))
title <- ggtitle("Q5. Do you have a car that you can use to travel to work?")
p <- ggplot(q5, aes(Q5, weight = Freq)) + ptext
p + geom_bar() + title
q6 <- as.data.frame(svytable(~Q6 + PPGENDER + ppagect4 + PPETHM + income + PPEDUCAT +
work + marital + PPMSACAT, des, round = T))
title <- ggtitle("Q6. Do you regularly use public transportation?")
p <- ggplot(q6, aes(Q6, weight = Freq)) + ptext
fil <- aes(fill = Q6)
all <- p + geom_bar() + title
all
gen <- p + pgen + fil + geom_bar(position = "dodge")
gen2 <- p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
age <- p + page + fil + geom_bar(position = "dodge")
age2 <- p + aes(fill = ppagect4) + geom_bar(position = "dodge")
eth <- p + peth + fil + geom_bar(position = "stack")
eth2 <- p + aes(fill = PPETHM) + geom_bar(position = "fill")
inc <- p + pinc + fil + geom_bar(position = "stack")
inc2 <- p + aes(fill = income) + geom_bar(position = "dodge")
met <- p + aes(PPMSACAT) + fil + geom_bar(position = "dodge")
met2 <- p + aes(fill = PPMSACAT) + geom_bar(position = "dodge")
grid.arrange(gen, age, nrow = 2)
grid.arrange(gen2, age2, nrow = 1)
grid.arrange(eth, eth2, nrow = 2)
grid.arrange(inc, inc2)
grid.arrange(met, met2)
## % sick plot
svychisq(~Q2 + Q6, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q2 + Q6, des)
## F = 17, ndf = 1, ddf = 2200, p-value = 3e-05
q <- svyby(~Q2, ~Q6, des, svymean, na.rm = T)
ggplot(q, aes(Q6, Q2Yes)) + geom_point() + xlab(" ") + ylab("% sick") + er +
ggtitle(label = "% of adults sick and frequent public transportation use")
# subset question data
q7df <- df %>% select(CaseID, PPGENDER, ppagect4, PPETHM, income, PPEDUCAT,
work, marital, ppreg9, PPMSACAT, Q2, Q7_1:Q7_7, weight) %>% gather(Q7_q,
Q7_r, Q7_1:Q7_7, na.rm = T) %>% group_by(Q7_q, Q7_r)
# new survey design
des7 <- svydesign(ids = ~1, weights = ~weight, data = q7df[is.na(q7df$weight) ==
F, ])
# weighted data frame
q7 <- data.frame(svytable(~Q7_q + Q7_r + PPGENDER + ppagect4 + PPETHM + income +
PPEDUCAT + work + ppreg9 + PPMSACAT, des7, round = T))
# plot
title <- ggtitle("Q7. What types of public transportation do you regularly use?")
p <- ggplot(q7[(q7$Q7_r) == "Yes", ], aes(Q7_q, weight = Freq)) + ptext
fil <- aes(fill = Q7_q)
p + geom_bar() + title
p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
p + page + fil + geom_bar(position = "dodge")
p + aes(fill = ppagect4) + geom_bar(position = "stack")
p + peth + fil + geom_bar(position = "stack")
p + aes(fill = PPETHM) + geom_bar(position = "fill")
p + pinc + fil + geom_bar(position = "stack") + title
p + aes(fill = income) + geom_bar(position = "dodge")
p + aes(PPMSACAT) + fil + geom_bar(position = "fill") + title
p + aes(fill = PPMSACAT) + geom_bar(position = "stack")
p + aes(fill = Q7_q) + geom_bar(position = "dodge") + facet_wrap(~ppreg9) +
title
# subset question data
q8df <- df %>% select(CaseID, PPGENDER, ppagect4, PPETHM, income, PPEDUCAT,
work, marital, ppreg9, PPMSACAT, Q2, Q8_1:Q8_6, weight) %>% gather(Q8_q,
Q8_r, Q8_1:Q8_6, na.rm = T) %>% group_by(Q8_q, Q8_r)
# new survey design
des8 <- svydesign(ids = ~1, weights = ~weight, data = q8df[is.na(q8df$weight) ==
F, ])
# weighted data frame
q8 <- data.frame(svytable(~Q8_q + Q8_r + PPGENDER + ppagect4 + PPETHM + income +
PPEDUCAT + work + ppreg9 + PPMSACAT, des8, round = T))
# plot
title <- ggtitle("Q8. For what types of activities do you regularly use public transportation?")
p <- ggplot(q8[(q8$Q8_r) == "Yes", ], aes(Q8_q, weight = Freq)) + ptext
fil <- aes(fill = Q8_q)
p + geom_bar() + title
p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
# age
p + page + fil + geom_bar(position = "dodge") + title
p + aes(fill = ppagect4) + geom_bar(position = "stack")
# race
p + peth + fil + geom_bar(position = "stack")
p + aes(fill = PPETHM) + geom_bar(position = "fill")
# income
p + pinc + fil + geom_bar(position = "stack") + title
p + aes(fill = income) + geom_bar(position = "dodge")
# metro location
p + aes(PPMSACAT) + fil + geom_bar(position = "fill")
p + aes(fill = PPMSACAT) + geom_bar(position = "stack")
# region
p + fil + geom_bar(position = "dodge") + facet_wrap(~ppreg9) + title
q9 <- data.frame(svytable(~Q9 + PPETHM + income + PPMSACAT, des, round = T))
title <- ggtitle("Q9. Do other members of your household regularly use public transportation?")
p <- ggplot(q9, aes(Q9, weight = Freq)) + ptext
p + geom_bar() + title
# chisquare
svychisq(~Q15 + Q3, des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~Q15 + Q3, des)
## F = 3.2, ndf = 3.9, ddf = 8600.0, p-value = 0.01
# subset question data
q10df <- df %>% select(CaseID, PPGENDER, ppagect4, PPETHM, income, PPEDUCAT,
work, marital, ppreg9, PPMSACAT, Q2, Q10_1:Q10_8, weight) %>% gather(Q10_q,
Q10_r, Q10_1:Q10_8, na.rm = T) %>% group_by(Q10_q, Q10_r)
# new survey design
des10 <- svydesign(ids = ~1, weights = ~weight, data = q10df[is.na(q10df$weight) ==
F, ])
# weighted data frame
q10 <- data.frame(svytable(~Q10_q + Q10_r + PPGENDER + ppagect4 + PPETHM + income +
PPEDUCAT + work + ppreg9 + PPMSACAT, des10, round = T))
# plot
title <- ggtitle("Q10. What types of public transportation do other members of your household regularly use?")
p <- ggplot(q10[(q10$Q10_r) == "Yes", ], aes(Q10_q, weight = Freq)) + ptext
fil <- aes(fill = Q10_q)
p + geom_bar() + title
p + aes(fill = PPGENDER) + geom_bar(position = "dodge")
# age
p + page + fil + geom_bar(position = "dodge")
p + aes(fill = ppagect4) + geom_bar(position = "stack")
# race
p + peth + fil + geom_bar(position = "stack") + title
p + aes(fill = PPETHM) + geom_bar(position = "fill")
# income
p + pinc + fil + geom_bar(position = "stack")
p + aes(fill = income) + geom_bar(position = "dodge")
# metro location
p + aes(PPMSACAT) + fil + geom_bar(position = "fill") + title
p + aes(fill = PPMSACAT) + geom_bar(position = "stack")
# region
p + fil + geom_bar(position = "dodge") + facet_wrap(~ppreg9) + title