Chapter 6
p <- ggplot(data = gapminder,
mapping = aes(x = log(gdpPercap), y = lifeExp))
p + geom_point(alpha=0.1) +
geom_smooth(color = "tomato", fill="tomato", method = MASS::rlm) +
geom_smooth(color = "steelblue", fill="steelblue", method = "lm")

p + geom_point(alpha=0.1) +
geom_smooth(color = "tomato", method = "lm", size = 1.2,
formula = y ~ splines::bs(x, 3), se = FALSE)

p + geom_point(alpha=0.1) +
geom_quantile(color = "tomato", size = 1.2, method = "rqss",
lambda = 1, quantiles = c(0.20, 0.5, 0.85))

model_colors <- RColorBrewer::brewer.pal(3, "Set1")
p0 <- ggplot(data = gapminder,
mapping = aes(x = log(gdpPercap), y = lifeExp))
p1 <- p0 + geom_point(alpha = 0.2) +
geom_smooth(method = "lm", aes(color = "OLS", fill = "OLS")) +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, df = 3),
aes(color = "Cubic Spline", fill = "Cubic Spline")) +
geom_smooth(method = "loess",
aes(color = "LOESS", fill = "LOESS"))
p1 + scale_color_manual(name = "Models", values = model_colors) +
scale_fill_manual(name = "Models", values = model_colors) +
theme(legend.position = "top")

out <- lm(formula = lifeExp ~ gdpPercap + pop + continent,
data = gapminder)
summary(out)
str(out)
min_gdp <- min(gapminder$gdpPercap)
max_gdp <- max(gapminder$gdpPercap)
med_pop <- median(gapminder$pop)
pred_df <- expand.grid(gdpPercap = (seq(from = min_gdp,
to = max_gdp,
length.out = 100)),
pop = med_pop,
continent = c("Africa", "Americas",
"Asia", "Europe", "Oceania"))
dim(pred_df)
## [1] 500 3
head(pred_df)
pred_out <- predict(object = out,
newdata = pred_df,
interval = "predict")
head(pred_out)
pred_df <- cbind(pred_df, pred_out)
head(pred_df)
p <- ggplot(data = subset(pred_df, continent %in% c("Europe", "Africa")),
aes(x = gdpPercap,
y = fit, ymin = lwr, ymax = upr,
color = continent,
fill = continent,
group = continent))
p + geom_point(data = subset(gapminder,
continent %in% c("Europe", "Africa")),
aes(x = gdpPercap, y = lifeExp,
color = continent),
alpha = 0.5,
inherit.aes = FALSE) +
geom_line() +
geom_ribbon(alpha = 0.2, color = FALSE) +
scale_x_log10(labels = scales::dollar)

out_comp <- tidy(out)
out_comp %>% round_df()
p <- ggplot(out_comp, mapping = aes(x = term, y = estimate))
p + geom_point() + coord_flip()

out_conf <- tidy(out, conf.int = T)
out_conf %>% round_df()
out_conf <- subset(out_conf, term %nin% "(Intercept)")
out_conf$nicelabs <- prefix_strip(out_conf$term, "continent")
p <- ggplot(out_conf, mapping = aes(x = reorder(nicelabs, estimate),
y = estimate, ymin = conf.low, ymax = conf.high))
p + geom_pointrange() + coord_flip() + labs(x="", y="OLS Estimate")

out_aug <- augment(out)
head(out_aug) %>% round_df()
out_aug <- augment(out, data = gapminder)
head(out_aug) %>% round_df()
p <- ggplot(data = out_aug,
mapping = aes(x = .fitted, y = .resid))
p + geom_point()

glance(out) %>% round_df()
library(survival)
out_cph <- coxph(Surv(time, status) ~ age + sex, data = lung)
out_surv <- survfit(out_cph)
out_tidy <- tidy(out_surv)
p <- ggplot(data = out_tidy, mapping = aes(time, estimate))
p + geom_line() +
geom_ribbon(mapping = aes(ymin = conf.low, ymax = conf.high), alpha = .2)

out_le <- gapminder %>%
group_by(continent, year) %>%
nest()
out_le
fit_ols <- function(df) {
lm(lifeExp ~ log(gdpPercap), data = df)
}
out_le <- gapminder %>%
group_by(continent, year) %>%
nest() %>%
mutate(model = map(data, fit_ols))
out_le
fit_ols <- function(df) {
lm(lifeExp ~ log(gdpPercap), data = df)
}
out_tidy <- gapminder %>%
group_by(continent, year) %>%
nest() %>%
mutate(model = map(data, fit_ols),
tidied = map(model, tidy)) %>%
unnest(tidied) %>%
filter(term %nin% "(Intercept)" &
continent %nin% "Oceania")
p <- ggplot(data = out_tidy,
mapping = aes(x = year, y = estimate,
ymin = estimate - 2*std.error,
ymax = estimate + 2*std.error,
group = continent, color = continent))
p + geom_pointrange(position = position_dodge(width = 1)) +
scale_x_continuous(breaks = unique(gapminder$year)) +
theme(legend.position = "top") +
labs(x = "Year", y = "Estimate", color = "Continent")

gss_sm$polviews_m <- relevel(gss_sm$polviews, ref = "Moderate")
out_bo <- glm(obama ~ polviews_m + sex*race,
family = "binomial", data = gss_sm)
summary(out_bo)
bo_m <- margins(out_bo)
summary(bo_m)
plot(bo_m)

bo_gg <- as_tibble(summary(bo_m))
prefixes <- c("polviews_m", "sex")
bo_gg$factor <- prefix_strip(bo_gg$factor, prefixes)
bo_gg$factor <- prefix_replace(bo_gg$factor, "race", "Race: ")
bo_gg %>% select(factor, AME, lower, upper)
p <- ggplot(data = bo_gg, aes(x = reorder(factor, AME),
y = AME, ymin = lower, ymax = upper))
p + geom_hline(yintercept = 0, color = "gray80") +
geom_pointrange() + coord_flip() +
labs(x = NULL, y = "Average Marginal Effect")

pv_cp <- cplot(out_bo, x = "sex", draw = FALSE)
p <- ggplot(data = pv_cp, aes(x = reorder(xvals, yvals),
y = yvals, ymin = lower, ymax = upper))
p + geom_hline(yintercept = 0, color = "gray80") +
geom_pointrange() + coord_flip() +
labs(x = NULL, y = "Conditional Effect")

options(survey.lonely.psu = "adjust")
options(na.action="na.pass")
gss_wt <- subset(gss_lon, year > 1974) %>%
mutate(stratvar = interaction(year, vstrat)) %>%
as_survey_design(ids = vpsu,
strata = stratvar,
weights = wtssall,
nest = TRUE)
out_grp <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
group_by(year, race, degree) %>%
summarize(prop = survey_mean(na.rm = TRUE))
out_grp
out_mrg <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
mutate(racedeg = interaction(race, degree)) %>%
group_by(year, racedeg) %>%
summarize(prop = survey_mean(na.rm = TRUE))
out_mrg
out_mrg <- gss_wt %>%
filter(year %in% seq(1976, 2016, by = 4)) %>%
mutate(racedeg = interaction(race, degree)) %>%
group_by(year, racedeg) %>%
summarize(prop = survey_mean(na.rm = TRUE)) %>%
separate(racedeg, sep = "\\.", into = c("race", "degree"))
out_mrg
p <- ggplot(data = subset(out_grp, race %nin% "Other"),
mapping = aes(x = degree, y = prop,
ymin = prop - 2*prop_se,
ymax = prop + 2*prop_se,
fill = race,
color = race,
group = race))
dodge <- position_dodge(width=0.9)
p + geom_col(position = dodge, alpha = 0.2) +
geom_errorbar(position = dodge, width = 0.2) +
scale_x_discrete(labels = scales::wrap_format(10)) +
scale_y_continuous(labels = scales::percent) +
scale_color_brewer(type = "qual", palette = "Dark2") +
scale_fill_brewer(type = "qual", palette = "Dark2") +
labs(title = "Educational Attainment by Race",
subtitle = "GSS 1976-2016",
fill = "Race",
color = "Race",
x = NULL, y = "Percent") +
facet_wrap(~ year, ncol = 2) +
theme(legend.position = "top")

p <- ggplot(data = subset(out_grp, race %nin% "Other"),
mapping = aes(x = year, y = prop, ymin = prop - 2*prop_se,
ymax = prop + 2*prop_se, fill = race, color = race,
group = race))
p + geom_ribbon(alpha = 0.3, aes(color = NULL)) +
geom_line() +
facet_wrap(~ degree, ncol = 1) +
scale_y_continuous(labels = scales::percent) +
scale_color_brewer(type = "qual", palette = "Dark2") +
scale_fill_brewer(type = "qual", palette = "Dark2") +
labs(title = "Educational Attainment\nby Race",
subtitle = "GSS 1976-2016", fill = "Race",
color = "Race", x = NULL, y = "Percent") +
theme(legend.position = "top")

out <- lm(formula = lifeExp ~ log(gdpPercap) + pop + continent, data = gapminder)
#plot(out, which = c(1,2), ask=FALSE)
plot(out, which = c(1,2,3,4), ask=FALSE)




library(coefplot)
out <- lm(formula = lifeExp ~ log(gdpPercap) + log(pop) + continent, data = gapminder)
coefplot(out, sort = "magnitude", intercept = FALSE)

organdata_sm <- organdata %>%
select(donors, pop_dens, pubhealth,
roads, consent_law)
ggpairs(data = organdata_sm,
mapping = aes(color = consent_law),
upper = list(continuous = wrap("density"), combo = "box_no_facet"),
lower = list(continuous = wrap("points"), combo = wrap("dot_no_facet")))
