Script

Below is the script file that I wrote. Use it to fill out any of the things you may have missed!

# Welcome to the InteRmediate R workshop!

# Please open a script file and load the following:
library(dplyr)
library(ggplot2)
# If they don't work, type 
# install.packages("dplyr") and
# install.packages("ggplot2") in the console

data("diamonds")

ggplot(diamonds, aes(x = carat, y = price,
  colour = cut)) +
  geom_point() +
  facet_wrap(~ clarity) + 
  theme_bw() +
  scale_y_sqrt() +
  geom_smooth(method = "lm", se = FALSE)

# motor trend cars data set
data(mtcars)
?mtcars

head(mtcars)
glimpse(mtcars) # from dplyr

ggplot(data = mtcars,
  mapping = aes(x = wt, y = mpg)) +
  geom_point() +
  theme_bw() +
  labs(y = "Miles per Gallon")

# barplots
ggplot(mtcars, aes(x = factor(cyl))) + 
  geom_bar()

# Stacked barplots
ggplot(mtcars, aes(x = factor(cyl),
  fill = factor(am))) + 
  geom_bar() +
  labs(x = "Number of Cylinders",
    y = "Count", fill = "Phil") +
  scale_fill_manual(
    values = c("hotpink", 1))

# inside aes(): part of our data
# outside aes(): fixed value
ggplot(mtcars, aes(x = factor(cyl),
  fill = factor(am))) + 
  geom_bar(colour = "blue") +
  labs(x = "Number of Cylinders",
    y = "Count", fill = "Phil") +
  scale_fill_manual(
    values = c("hotpink", 1))

# More barplots
ggplot(mtcars, aes(x = factor(cyl), 
  fill = factor(am))) +
  geom_bar(position = "dodge") +
  coord_flip()

# Histograms, Density Plots
ggplot(mtcars, aes(x = mpg)) + 
  geom_histogram(bins = 5)
ggplot(mtcars, aes(x = mpg)) +
  geom_density()
ggplot(mtcars, aes(x = mpg,
  fill = factor(cyl))) + 
  geom_density(alpha = 0.4)

ggplot(diamonds, aes(x = carat, y = price)) + geom_point(alpha = 0.05)

# boxplots
ggplot(mtcars, aes(x = factor(cyl), 
  y = mpg, fill = factor(am))) +
  geom_boxplot(varwidth = TRUE)
ggplot(mtcars, aes(x = factor(cyl),
  y = mpg, fill = factor(am))) +
  geom_violin(scale = "count")

# dplyr: a collection of verbs based on sql
# select columns
select(mtcars, mpg)
select(mtcars, mpg, cyl)
select(mtcars, cyl, mpg)

names(mtcars)
select(mtcars, starts_with("c"), 
  everything())

# filter the rows
filter(mtcars, cyl != 8)
filter(mtcars, mpg >= 20)
filter(mtcars, mpg >= 19, cyl == 8)
filter(mtcars, mpg >= 19 | cyl == 8)

# mutate columns
mutate(mtcars, mpg2 = mpg^2)
mutate(mtcars, mpg = mpg^2)
t2 <- transmute(mtcars, mpg = mpg^2, cyl)
select(t2, cyl, mpg)

# summarise
summarise(mtcars, mean(mpg))
summarise(mtcars, mean(mpg), sd(mpg))
summarise(mtcars, mean_mpg = mean(mpg), 
  Dave = sd(mpg))

# group_by
group_by(mtcars, cyl)

# The pipe: %>% 
# f(a) is the same as a %>% f
x <- c(1,4,2,22, NA)
mean(x, na.rm = TRUE)
x %>% mean(na.rm = TRUE)

mtcars %>% 
  mutate(mpg = mpg^2 + cyl) %>% 
  filter(mpg > 400) %>% 
  select(mpg, cyl) %>%
  group_by(cyl) %>% 
  summarise(weird_mean = mean(mpg)) %>%
  ggplot(aes(x = cyl, y = weird_mean)) +
    geom_bar(stat = "identity")

mtcars %>% 
  group_by(cyl) %>% 
  summarise(mean_mpg = mean(mpg)) %>% 
  ggplot(aes(x = cyl, y = mean_mpg)) + 
    geom_bar(stat = "identity")

mtcars %>% 
  group_by(am, cyl) %>% 
  summarise(mean_mpg = mean(mpg),
    count = n()) %>% 
  ggplot(aes(x = factor(am), 
    y = factor(cyl), 
    fill = mean_mpg)) +
  geom_tile() +
  scale_fill_gradient(low = "pink", 
    high = "darkorchid")

mtcars %>% 
  group_by(am, cyl) %>% 
  summarise(mymean = mean(mpg)) %>% 
  arrange(desc(mymean))

# rpubs.com/dbecker7/PracticePlots

mylm <- lm(mpg ~ disp, data = mtcars)
mylm
summary(mylm)

par(mfrow = c(2,2)) # multi-figures, 2 rows and 2 cols
plot(mylm)

ggplot(mtcars, aes(y = mpg, x = disp)) +
  geom_point() + 
  geom_smooth(se = TRUE) +
  geom_smooth(se = FALSE, method = "lm", 
    colour = "red")

ggplot(mtcars, aes(y = mpg, x = disp, 
  colour = factor(am))) +
  geom_smooth(method = "lm") + 
  geom_point()

mylm2 <- lm(mpg ~ disp * factor(am), data = mtcars)
summary(mylm2)

# It's actually an ANCOVA
anova(mylm2)