# Load package(s)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(lubridate)
library(scales)
# Load datasets
load(file = "data/tech_stocks.rda")
# Read in the cdc dataset
cdc <- read_delim(file = "data/cdc.txt", delim = "|") %>%
mutate(genhlth = factor(genhlth,
levels = c("excellent", "very good", "good", "fair", "poor")
))
# Set seed
set.seed(8221984)
# Selecting a random subset of size 100
cdc_small <- cdc %>% sample_n(100)
# Generating toy datasets for exercise 2
dat1 <- tibble(theta = seq(0, 2 * pi, 0.01))
dat2 <- tibble(
theta = seq(0, 2 * pi, length.out = 100),
obs = rnorm(100, sin(theta), 0.1),
larger_than = ifelse(abs(obs) < abs(sin(theta)), "1", "0")
)
Above, the packages of ggplot 2, tidyverse, dplyr, lubridate, and scales are loaded. The datasets of tech_stocks.rda and cdc are also loaded as well, with the toy datsets generated through the tibble function.
ggplot(tech_stocks, aes(x = date, y = price_indexed)) +
geom_line(aes(colour = company)) +
guides(colour = guide_legend(override.aes = list(size = 1.3))) +
theme_minimal() +
theme(legend.position = c(0.75, 0.85),
legend.justification = c(0.75, 0.85),
legend.title = element_blank(),
legend.background = element_blank(),
legend.key = element_blank(),
axis.ticks = element_blank()) +
scale_colour_manual(values = c("#7CAE00", "#C77CFF", "#F8766D", "#00BFC4" ), breaks = c("Facebook", "Alphabet", "Microsoft", "Apple")) +
scale_x_date(expand = c(0,0)) +
scale_y_continuous(position = "right", labels = dollar,
breaks = c(0,100,200,300,400,500)) +
guides(linetype = guide_legend(override.aes = list(size = 1.3))) +
ggtitle("Stock price, indexed") +
xlab("") +
ylab("")
Above, I have created a line graph with the tech_stocks dataset, with colour differentiated by company.
largerthan1<-dat2 %>%
filter(larger_than == 1)
largerthan0<-dat2 %>%
filter(larger_than == 0)
ggplot(dat2, aes(x = theta, y = obs)) +
geom_point(data = largerthan0, color = "darkgreen", size = 2, alpha = 0.8) +
geom_point(data = largerthan1, aes(x = theta, y = obs),
color = "red", size = 2, alpha = 0.8) +
geom_line(data = dat1, aes(x = theta, y = sin(theta)), color = "#56B4E9",
size = 1.3) +
theme(axis.ticks = element_blank()) +
theme_minimal() +
xlab("θ") +
ylab("sin(θ)")
Above, I have created a sine graph using the datasets of dat2 and its theta values. The scatterplot and line graph seem to compliment each other on the same panel.
ggplot(cdc_small, aes(height, weight)) +
geom_point(aes(colour = genhlth, shape = genhlth), size = 3) +
theme_minimal() +
scale_x_continuous(name= "",
trans = "log10",
limits = c(55,80),
breaks = c(55,60,65,70,75,80),
labels = c("55 in", "60 in","65 in",
"70 in","75 in","80 in")
) +
scale_y_continuous(name ="",
trans = "log10",
limits = c(100,300),
breaks = c(100,125,150,
175,200,225,
250,275,300),
labels = c("100 lbs","125 lbs","150 lbs",
"175 lbs","200 lbs","225 lbs",
"250 lbs", "275 lbs", "300 lbs"
)) +
scale_colour_manual(name = "General \n Health?",
labels = c("Excellent", "Very good", "Good",
"Fair", "Poor"),
values = c("blue","red","purple","green","yellow")) +
scale_shape_manual(name = "General \n Health?",
labels = c("Excellent", "Very good", "Good",
"Fair", "Poor"),
values = c(17,19,15,9,4)) +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = c(0.92,0.25)) +
ggtitle("CDC BRFSS: Weight by Height") +
xlab("") +
ylab("")
Above, I have created a scatterplot that shows the weight by height variables from the cdc dataset. Both the shape and colour variables are mapped out by the genhlth variable on the same legend.