Datasets

# Load package(s)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(lubridate)
library(scales)

# Load datasets
load(file = "data/tech_stocks.rda")

# Read in the cdc dataset
cdc <- read_delim(file = "data/cdc.txt", delim = "|") %>%
  mutate(genhlth = factor(genhlth,
    levels = c("excellent", "very good", "good", "fair", "poor")
  ))

# Set seed
set.seed(8221984)

# Selecting a random subset of size 100
cdc_small <- cdc %>% sample_n(100)

# Generating toy datasets for exercise 2
dat1 <- tibble(theta = seq(0, 2 * pi, 0.01))

dat2 <- tibble(
  theta = seq(0, 2 * pi, length.out = 100),
  obs = rnorm(100, sin(theta), 0.1),
  larger_than = ifelse(abs(obs) < abs(sin(theta)), "1", "0")
)

Above, the packages of ggplot 2, tidyverse, dplyr, lubridate, and scales are loaded. The datasets of tech_stocks.rda and cdc are also loaded as well, with the toy datsets generated through the tibble function.

Exercise 1

ggplot(tech_stocks, aes(x = date, y = price_indexed)) + 
  geom_line(aes(colour = company)) + 
  guides(colour = guide_legend(override.aes = list(size = 1.3))) +
  theme_minimal() +
  theme(legend.position = c(0.75, 0.85), 
        legend.justification = c(0.75, 0.85),
        legend.title = element_blank(),
        legend.background = element_blank(),
        legend.key = element_blank(),
        axis.ticks = element_blank()) +
  scale_colour_manual(values = c("#7CAE00", "#C77CFF", "#F8766D", "#00BFC4" ), breaks = c("Facebook", "Alphabet", "Microsoft", "Apple")) +
  scale_x_date(expand = c(0,0)) +
  scale_y_continuous(position = "right", labels = dollar,
                     breaks = c(0,100,200,300,400,500)) +
  guides(linetype = guide_legend(override.aes = list(size = 1.3))) +
  ggtitle("Stock price, indexed") +
  xlab("") +
  ylab("")

Above, I have created a line graph with the tech_stocks dataset, with colour differentiated by company.

Exercise 2

largerthan1<-dat2 %>%
  filter(larger_than == 1)

largerthan0<-dat2 %>%
  filter(larger_than == 0)

ggplot(dat2, aes(x = theta, y = obs)) +
  geom_point(data = largerthan0, color = "darkgreen", size = 2, alpha = 0.8) +
  geom_point(data = largerthan1, aes(x = theta, y = obs), 
             color = "red", size = 2, alpha = 0.8) +
  geom_line(data = dat1, aes(x = theta, y = sin(theta)), color = "#56B4E9", 
             size = 1.3) +
  theme(axis.ticks = element_blank()) +
  theme_minimal() +
  xlab("θ") +
  ylab("sin(θ)")

Above, I have created a sine graph using the datasets of dat2 and its theta values. The scatterplot and line graph seem to compliment each other on the same panel.

Exercise 3

ggplot(cdc_small, aes(height, weight)) +
  geom_point(aes(colour = genhlth, shape = genhlth), size = 3) + 
  theme_minimal() +
  scale_x_continuous(name= "",
                     trans = "log10",
                     limits = c(55,80),
                     breaks = c(55,60,65,70,75,80),
                     labels = c("55 in", "60 in","65 in",
                                "70 in","75 in","80 in")
                     ) +
  scale_y_continuous(name ="",
                     trans = "log10",
                     limits = c(100,300),
                     breaks = c(100,125,150,
                                175,200,225,
                                250,275,300),
                     labels = c("100 lbs","125 lbs","150 lbs",
                                "175 lbs","200 lbs","225 lbs",
                                "250 lbs", "275 lbs", "300 lbs"
                                )) +
  scale_colour_manual(name = "General \n Health?",
                      labels = c("Excellent", "Very good", "Good",
                                 "Fair", "Poor"),
                      values = c("blue","red","purple","green","yellow")) +
  scale_shape_manual(name = "General \n Health?",
                      labels = c("Excellent", "Very good", "Good",
                                 "Fair", "Poor"),
                     values = c(17,19,15,9,4)) +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position = c(0.92,0.25)) +
  ggtitle("CDC BRFSS: Weight by Height") +
  xlab("") +
  ylab("")

Above, I have created a scatterplot that shows the weight by height variables from the cdc dataset. Both the shape and colour variables are mapped out by the genhlth variable on the same legend.

L07 Scales, Axes & Legends

Taehyung Kim

April 25, 2019

Datasets

Exercise 1

Exercise 2

Exercise 3