Datasets

# Load package(s)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(lubridate)
library(scales)

# Load datasets
load(file = "data/tech_stocks.rda")

# Read in the cdc dataset
cdc <- read_delim(file = "data/cdc.txt", delim = "|") %>%
  mutate(genhlth = factor(genhlth,
    levels = c("excellent", "very good", "good", "fair", "poor")
  ))

# Set seed
set.seed(8221984)

# Selecting a random subset of size 100
cdc_small <- cdc %>% sample_n(100)

# Generating toy datasets for exercise 2
dat1 <- tibble(theta = seq(0, 2 * pi, 0.01))

dat2 <- tibble(
  theta = seq(0, 2 * pi, length.out = 100),
  obs = rnorm(100, sin(theta), 0.1),
  larger_than = ifelse(abs(obs) < abs(sin(theta)), "1", "0")
)

Above, the packages of ggplot 2, tidyverse, dplyr, lubridate, and scales are loaded. The datasets of tech_stocks.rda and cdc are also loaded as well, with the toy datsets generated through the tibble function.

Exercise 1

ggplot(tech_stocks, aes(x = date, y = price_indexed)) + 
  geom_line(aes(colour = company)) + 
  guides(colour = guide_legend(override.aes = list(size = 1.3))) +
  theme_minimal() +
  theme(legend.position = c(0.75, 0.85), 
        legend.justification = c(0.75, 0.85),
        legend.title = element_blank(),
        legend.background = element_blank(),
        legend.key = element_blank(),
        axis.ticks = element_blank()) +
  scale_colour_manual(values = c("#7CAE00", "#C77CFF", "#F8766D", "#00BFC4" ), breaks = c("Facebook", "Alphabet", "Microsoft", "Apple")) +
  scale_x_date(expand = c(0,0)) +
  scale_y_continuous(position = "right", labels = dollar,
                     breaks = c(0,100,200,300,400,500)) +
  guides(linetype = guide_legend(override.aes = list(size = 1.3))) +
  ggtitle("Stock price, indexed") +
  xlab("") +
  ylab("")

Above, I have created a line graph with the tech_stocks dataset, with colour differentiated by company.

Exercise 2

largerthan1<-dat2 %>%
  filter(larger_than == 1)

largerthan0<-dat2 %>%
  filter(larger_than == 0)

ggplot(dat2, aes(x = theta, y = obs)) +
  geom_point(data = largerthan0, color = "darkgreen", size = 2, alpha = 0.8) +
  geom_point(data = largerthan1, aes(x = theta, y = obs), 
             color = "red", size = 2, alpha = 0.8) +
  geom_line(data = dat1, aes(x = theta, y = sin(theta)), color = "#56B4E9", 
             size = 1.3) +
  theme(axis.ticks = element_blank()) +
  theme_minimal() +
  xlab("θ") +
  ylab("sin(θ)")

Above, I have created a sine graph using the datasets of dat2 and its theta values. The scatterplot and line graph seem to compliment each other on the same panel.

Exercise 3

ggplot(cdc_small, aes(height, weight)) +
  geom_point(aes(colour = genhlth, shape = genhlth), size = 3) + 
  theme_minimal() +
  scale_x_continuous(name= "",
                     trans = "log10",
                     limits = c(55,80),
                     breaks = c(55,60,65,70,75,80),
                     labels = c("55 in", "60 in","65 in",
                                "70 in","75 in","80 in")
                     ) +
  scale_y_continuous(name ="",
                     trans = "log10",
                     limits = c(100,300),
                     breaks = c(100,125,150,
                                175,200,225,
                                250,275,300),
                     labels = c("100 lbs","125 lbs","150 lbs",
                                "175 lbs","200 lbs","225 lbs",
                                "250 lbs", "275 lbs", "300 lbs"
                                )) +
  scale_colour_manual(name = "General \n Health?",
                      labels = c("Excellent", "Very good", "Good",
                                 "Fair", "Poor"),
                      values = c("blue","red","purple","green","yellow")) +
  scale_shape_manual(name = "General \n Health?",
                      labels = c("Excellent", "Very good", "Good",
                                 "Fair", "Poor"),
                     values = c(17,19,15,9,4)) +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position = c(0.92,0.25)) +
  ggtitle("CDC BRFSS: Weight by Height") +
  xlab("") +
  ylab("")

Above, I have created a scatterplot that shows the weight by height variables from the cdc dataset. Both the shape and colour variables are mapped out by the genhlth variable on the same legend.