Below is the script file that I wrote. Use it to fill out any of the things you may have missed!
# Welcome to the InteRmediate R workshop!
# Please open a script file and load the following:
library(dplyr)
library(ggplot2)
# If they don't work, type
# install.packages("dplyr") and
# install.packages("ggplot2") in the console
data("diamonds")
ggplot(diamonds, aes(x = carat, y = price,
colour = cut)) +
geom_point() +
facet_wrap(~ clarity) +
theme_bw() +
scale_y_sqrt() +
geom_smooth(method = "lm", se = FALSE)
# motor trend cars data set
data(mtcars)
?mtcars
head(mtcars)
glimpse(mtcars) # from dplyr
ggplot(data = mtcars,
mapping = aes(x = wt, y = mpg)) +
geom_point() +
theme_bw() +
labs(y = "Miles per Gallon")
# barplots
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar()
# Stacked barplots
ggplot(mtcars, aes(x = factor(cyl),
fill = factor(am))) +
geom_bar() +
labs(x = "Number of Cylinders",
y = "Count", fill = "Phil") +
scale_fill_manual(
values = c("hotpink", 1))
# inside aes(): part of our data
# outside aes(): fixed value
ggplot(mtcars, aes(x = factor(cyl),
fill = factor(am))) +
geom_bar(colour = "blue") +
labs(x = "Number of Cylinders",
y = "Count", fill = "Phil") +
scale_fill_manual(
values = c("hotpink", 1))
# More barplots
ggplot(mtcars, aes(x = factor(cyl),
fill = factor(am))) +
geom_bar(position = "dodge") +
coord_flip()
# Histograms, Density Plots
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(bins = 5)
ggplot(mtcars, aes(x = mpg)) +
geom_density()
ggplot(mtcars, aes(x = mpg,
fill = factor(cyl))) +
geom_density(alpha = 0.4)
ggplot(diamonds, aes(x = carat, y = price)) + geom_point(alpha = 0.05)
# boxplots
ggplot(mtcars, aes(x = factor(cyl),
y = mpg, fill = factor(am))) +
geom_boxplot(varwidth = TRUE)
ggplot(mtcars, aes(x = factor(cyl),
y = mpg, fill = factor(am))) +
geom_violin(scale = "count")
# dplyr: a collection of verbs based on sql
# select columns
select(mtcars, mpg)
select(mtcars, mpg, cyl)
select(mtcars, cyl, mpg)
names(mtcars)
select(mtcars, starts_with("c"),
everything())
# filter the rows
filter(mtcars, cyl != 8)
filter(mtcars, mpg >= 20)
filter(mtcars, mpg >= 19, cyl == 8)
filter(mtcars, mpg >= 19 | cyl == 8)
# mutate columns
mutate(mtcars, mpg2 = mpg^2)
mutate(mtcars, mpg = mpg^2)
t2 <- transmute(mtcars, mpg = mpg^2, cyl)
select(t2, cyl, mpg)
# summarise
summarise(mtcars, mean(mpg))
summarise(mtcars, mean(mpg), sd(mpg))
summarise(mtcars, mean_mpg = mean(mpg),
Dave = sd(mpg))
# group_by
group_by(mtcars, cyl)
# The pipe: %>%
# f(a) is the same as a %>% f
x <- c(1,4,2,22, NA)
mean(x, na.rm = TRUE)
x %>% mean(na.rm = TRUE)
mtcars %>%
mutate(mpg = mpg^2 + cyl) %>%
filter(mpg > 400) %>%
select(mpg, cyl) %>%
group_by(cyl) %>%
summarise(weird_mean = mean(mpg)) %>%
ggplot(aes(x = cyl, y = weird_mean)) +
geom_bar(stat = "identity")
mtcars %>%
group_by(cyl) %>%
summarise(mean_mpg = mean(mpg)) %>%
ggplot(aes(x = cyl, y = mean_mpg)) +
geom_bar(stat = "identity")
mtcars %>%
group_by(am, cyl) %>%
summarise(mean_mpg = mean(mpg),
count = n()) %>%
ggplot(aes(x = factor(am),
y = factor(cyl),
fill = mean_mpg)) +
geom_tile() +
scale_fill_gradient(low = "pink",
high = "darkorchid")
mtcars %>%
group_by(am, cyl) %>%
summarise(mymean = mean(mpg)) %>%
arrange(desc(mymean))
# rpubs.com/dbecker7/PracticePlots
mylm <- lm(mpg ~ disp, data = mtcars)
mylm
summary(mylm)
par(mfrow = c(2,2)) # multi-figures, 2 rows and 2 cols
plot(mylm)
ggplot(mtcars, aes(y = mpg, x = disp)) +
geom_point() +
geom_smooth(se = TRUE) +
geom_smooth(se = FALSE, method = "lm",
colour = "red")
ggplot(mtcars, aes(y = mpg, x = disp,
colour = factor(am))) +
geom_smooth(method = "lm") +
geom_point()
mylm2 <- lm(mpg ~ disp * factor(am), data = mtcars)
summary(mylm2)
# It's actually an ANCOVA
anova(mylm2)