library(dplyr)
library(ggplot2)
Diamonds <- diamonds # create a working copy
Diamonds$cut<- factor(Diamonds$cut, levels=c('Fair','Good','Very Good','Premium','Ideal'), ordered=TRUE)
Diamonds$color<- factor(Diamonds$color, levels=c('J','I','H','G','F','E','D'), ordered=TRUE)
Diamonds$clarity<- factor(Diamonds$clarity, levels=c('I1','SI1','SI2','VS1','VS2','VVS1','VVS2','IF'), ordered=TRUE)

Exercise 1

Produce a bar plot showing the frequency of different diamond cut quality.

ex1 <- ggplot(data=Diamonds, aes(x=Diamonds$cut))+
  geom_bar()+
  theme_bw()+
  xlab("Cut Quality")+
  ylab("Frequency")+
  ggtitle("Frequency of Diamonds by Cut Quality.")
ex1

Exercise 2

Create a plot showing the relationship between diamond price and carat.

ex2 <- ggplot(Diamonds, aes(x=Diamonds$carat, y=Diamonds$price))+
  geom_point(alpha=0.1)+
  stat_smooth()+
  coord_cartesian(xlim=c(0,3))+
  ggtitle("Diamond Price vs. Carat. (3649252)")+
  xlab("Carat")+
  ylab("Price (USD)")
ex2
## `geom_smooth()` using method = 'gam'

Exercise 3

Create a plot showing the relationship between diamond price and carat across diamond colour.

ex3 <- ggplot(Diamonds,
              aes(x=carat,
                  y=price,
                  color=Diamonds$color))
ex3$labels$colour <- "Diamond Color"
ex3 <- ex3+
  geom_point(alpha=0.05)+
  stat_smooth(level = 0)+
  coord_cartesian(xlim=c(0,3))+
  ggtitle("Diamond Price vs. Carat by Color")+
  xlab("Carat")+
  ylab("Price (USD)")
ex3
## `geom_smooth()` using method = 'gam'

Exercise 4

Create a series of box plots comparing diamond price across cut quality, but subset this comparison further by clarity and then by colour.

ex4 <- ggplot(Diamonds,aes(x=cut,y=price))+
  geom_boxplot()+
  facet_grid(clarity~color)+
  stat_summary(fun.y=mean,
               color="red",
               geom = "point",
               shape="triangle")+
  ggtitle("Box Plots of Diamond Prices (s3649252)")
ex4

Wow! This is really hideous. I’m sure there must be a more appropriate way to do this.

Anscombe’s Quartet

In this exercise, visit the Wikipedia page for Anscombe’s Quartet. Use the data from the web page and ggplot2 to reproduce Anscombe’s famous visualisation.

#R has the anscombe dataset built in???
x <- c(anscombe$x1,anscombe$x2,anscombe$x3,anscombe$x4)
y <- c(anscombe$y1,anscombe$y2,anscombe$y3,anscombe$y4)
chart <- c(rep(1,11),rep(2,11),rep(3,11),rep(4,11)) %>% factor()
ansc <- data.frame(x,y,chart)

ex5 <- ggplot(data=ansc,
             aes(x=ansc$x,
                 y=ansc$y))+
  geom_point(color="orange")+
  stat_smooth(method="lm",size=0,fullrange = TRUE,level=0)+
  facet_wrap(~chart)+
  ylab("Y")+
  theme(axis.title.y = element_text(angle=0,vjust = 0.5))+
  xlab("X")+
  ggtitle("Anscombe's Quartet. (3649252)")
ex5