library("yarrr")
## Loading required package: jpeg
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(beanplot)

Q1

You can use the rnorm() function to generate random data following a Normal distribution. Run the following commands to generate two vectors x and y:

x <- rnorm(100, mean = 100, sd = 10)
y <- x + rnorm(100, mean = 20, sd = 20)

Create a histogram for variable x

hist(x,
     main = "Histogram of x",
     xlab = "This is the x lable",
     ylab = "Frequency"
     )

Create a histogram for variable y

hist(y,
     main = "Histogram of x",
     xlab = "This is the x lable",
     ylab = "Frequency"
     )

Create a scatterplot with x on the x-axis and y on the y-axis.

plot(x,y, main = "This is the title", xlab = "This is the title", ylab = "This is the y-lable")

Using the abline() function, add a (dashed) horizontal and vertical line at the mean of each variable (like this)

plot(x,y, main = "This is the title", xlab = "This is the title", ylab = "This is the y-lable")
abline(v = mean(x), lty = 2)
abline(h = mean(y), lty = 2)

Q2

All the ‘named’ colors in R are listed in the colors() vector. When you run the function without any arguments, it will return a vector of named colors. Run the following code to look at the first 10.

colors()[1:10]
##  [1] "white"         "aliceblue"     "antiquewhite"  "antiquewhite1"
##  [5] "antiquewhite2" "antiquewhite3" "antiquewhite4" "aquamarine"   
##  [9] "aquamarine1"   "aquamarine2"

Let’s look at some random colors. To get a random sample of integers, we’ll use the sample() function (we’ll learn this function in more detail later). For now, try running the following code a few times to see how the function works

sample(x = 1:100, 
       size = 10)
##  [1] 83 13 96 73 32 94  4 57 85 84
samp.numbers <- sample(1:657, size = 10)
colors.to.use <- colors()[samp.numbers]

plot(1:10, 
     col = colors.to.use,
     pch = 16,
     cex = 2,
     xlim = c(0, 11),
     ylim = c(0, 11)
     )

text(1:10, 
     1:10, 
     colors()[samp.numbers],
     pos = 3
     )

Q3

Using the boxplot() funciton, create the following boxplots showing the relationship between a pirate’s sword type and his/her sword speed. Be sure to use the y ~ x formulation as the main argumen.

boxplot(pirates$sword.time ~ pirates$sword.type, xlab = "Sword Type", ylab = "Sword Swing Time", main = "Sword swingng time by sword type")

Q4

Using the beanplot package, create the following beanplots showing the relationship between a pirate’s sword type and his/her sword speed. (you can make the beans white by adding the argument color = “white”)

beanplot(pirates$sword.time ~ pirates$sword.type, color = "white", xlab = "Sword Type", ylab = "Sword Swing Time", main = "Sword swingng time by sword type")

Q5

Create the following blank plot with gridlines

plot(x = 1,
     xlab = "Age",
     ylab = "Beard Length",
     main = "Empty Plot",
     xlim = c(10,50),
     ylim = c(0,40)
     )
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))

Now, using the points() command, add points showing the relationship between pirate’s age and beard length just for male pirates!

plot(x = 1,
     xlab = "Age",
     ylab = "Beard Length",
     main = "Pirate age and beard length",
     xlim = c(10,50),
     ylim = c(0,40)
     )
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))

points(x = pirates$age[pirates$sex == "male"],
       y = pirates$beard.length[pirates$sex == "male"],
       pch = 16,
       col = "goldenrod1")

Now, do the same with female pirates

plot(x = 1,
     xlab = "Age",
     ylab = "Pirate age and beard length",
     main = "Empty Plot",
     xlim = c(10,50),
     ylim = c(0,40)
     )

abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))

points(x = pirates$age[pirates$sex == "male"],
       y = pirates$beard.length[pirates$sex == "male"],
       pch = 16,
       col = "goldenrod1")
points(x = pirates$age[pirates$sex == "female"],
       y = pirates$beard.length[pirates$sex == "female"],
       pch = 16,
       col = "violetred")

Now add a legend using legend()

plot(x = 1,
     xlab = "Age",
     ylab = "Pirate age and beard length",
     main = "Empty Plot",
     xlim = c(10,50),
     ylim = c(0,40)
     )

abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))

points(x = pirates$age[pirates$sex == "male"],
       y = pirates$beard.length[pirates$sex == "male"],
       pch = 16,
       col = "goldenrod1")
points(x = pirates$age[pirates$sex == "female"],
       y = pirates$beard.length[pirates$sex == "female"],
       pch = 16,
       col = "violetred")

legend("topright",
       legend = c("Males","Females"),
       col = c("goldenrod1","violetred"),
       pch = c(16,16),
       #bg = "white",
       cex = 0.8
       )

Q6

Now let’s use the movies dataset. First, create a new dataframe called movies.sample that contains a sample of 10 random movies using the following code:

movies$budget.millions <- movies$budget / 1000000
movies$boxoffice.millions <- movies$boxoffice.total / 1000000
movies.2 <- subset(movies, budget > 0 & budget.millions < 100 & boxoffice.millions < 100)

random.index <- sample(1:nrow(movies.2), size = 10, replace = F)
movies.sample <- movies.2[random.index,]

## Here are the movies I got in movies.sample, yours will be different!

movies.sample$name
##  [1] "A Most Wanted Man"    "Appaloosa"            "Living Out Loud"     
##  [4] "Bad Santa"            "The Sweetest Thing"   "The Golden Child"    
##  [7] "Lone Star"            "A Bridge Too Far"     "Kiss Kiss, Bang Bang"
## [10] "The Wedding Date"

Now, create the following blank plot with gridlines and a diagonal line separating movies that made money from those that lost money.

plot(x = 1,
     xlab = "Budget (in millions)",
     ylab = "Revenue (in millions)",
     #main = "Empty Plot",
     xlim = c(0,100),
     ylim = c(0,100)
     )

abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)

Now, add light gray points for all movies using points(). Hint: make the colors transparent and light gray by setting the color using gray() and setting the point type to 21.

plot(x = 1,
     xlab = "Budget (in millions)",
     ylab = "Revenue (in millions)",
     #main = "Empty Plot",
     xlim = c(0,100),
     ylim = c(0,100)
     )

abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
       pch = 21,
       col = gray(.2, .2))

Now, add points for the movies in movies.sample using points()

plot(x = 1,
     xlab = "Budget (in millions)",
     ylab = "Revenue (in millions)",
     #main = "Empty Plot",
     xlim = c(0,100),
     ylim = c(0,100)
     )

abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
       pch = 21,
       col = gray(.2, .2))
points(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
       pch = 21,
       col = "red")

Finally, add the movie names above the points using text(). Use the pos = 3 argument to place the names above the points

plot(x = 1,
     xlab = "Budget (in millions)",
     ylab = "Revenue (in millions)",
     #main = "Empty Plot",
     xlim = c(0,100),
     ylim = c(0,100)
     )

abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
       pch = 21,
       col = gray(.2, .2))
points(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
       pch = 21,
       col = "red")

text(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
     labels = movies.sample$name,
     pos = 3)