library("yarrr")
## Loading required package: jpeg
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(beanplot)
Q1
You can use the rnorm() function to generate random data following a Normal distribution. Run the following commands to generate two vectors x and y:
x <- rnorm(100, mean = 100, sd = 10)
y <- x + rnorm(100, mean = 20, sd = 20)
Create a histogram for variable x
hist(x,
main = "Histogram of x",
xlab = "This is the x lable",
ylab = "Frequency"
)
Create a histogram for variable y
hist(y,
main = "Histogram of x",
xlab = "This is the x lable",
ylab = "Frequency"
)
Create a scatterplot with x on the x-axis and y on the y-axis.
plot(x,y, main = "This is the title", xlab = "This is the title", ylab = "This is the y-lable")
Using the abline() function, add a (dashed) horizontal and vertical line at the mean of each variable (like this)
plot(x,y, main = "This is the title", xlab = "This is the title", ylab = "This is the y-lable")
abline(v = mean(x), lty = 2)
abline(h = mean(y), lty = 2)
Q2
All the ‘named’ colors in R are listed in the colors() vector. When you run the function without any arguments, it will return a vector of named colors. Run the following code to look at the first 10.
colors()[1:10]
## [1] "white" "aliceblue" "antiquewhite" "antiquewhite1"
## [5] "antiquewhite2" "antiquewhite3" "antiquewhite4" "aquamarine"
## [9] "aquamarine1" "aquamarine2"
Let’s look at some random colors. To get a random sample of integers, we’ll use the sample() function (we’ll learn this function in more detail later). For now, try running the following code a few times to see how the function works
sample(x = 1:100,
size = 10)
## [1] 83 13 96 73 32 94 4 57 85 84
samp.numbers <- sample(1:657, size = 10)
colors.to.use <- colors()[samp.numbers]
plot(1:10,
col = colors.to.use,
pch = 16,
cex = 2,
xlim = c(0, 11),
ylim = c(0, 11)
)
text(1:10,
1:10,
colors()[samp.numbers],
pos = 3
)
Q3
Using the boxplot() funciton, create the following boxplots showing the relationship between a pirate’s sword type and his/her sword speed. Be sure to use the y ~ x formulation as the main argumen.
boxplot(pirates$sword.time ~ pirates$sword.type, xlab = "Sword Type", ylab = "Sword Swing Time", main = "Sword swingng time by sword type")
Q4
Using the beanplot package, create the following beanplots showing the relationship between a pirate’s sword type and his/her sword speed. (you can make the beans white by adding the argument color = “white”)
beanplot(pirates$sword.time ~ pirates$sword.type, color = "white", xlab = "Sword Type", ylab = "Sword Swing Time", main = "Sword swingng time by sword type")
Q5
Create the following blank plot with gridlines
plot(x = 1,
xlab = "Age",
ylab = "Beard Length",
main = "Empty Plot",
xlim = c(10,50),
ylim = c(0,40)
)
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))
Now, using the points() command, add points showing the relationship between pirate’s age and beard length just for male pirates!
plot(x = 1,
xlab = "Age",
ylab = "Beard Length",
main = "Pirate age and beard length",
xlim = c(10,50),
ylim = c(0,40)
)
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))
points(x = pirates$age[pirates$sex == "male"],
y = pirates$beard.length[pirates$sex == "male"],
pch = 16,
col = "goldenrod1")
Now, do the same with female pirates
plot(x = 1,
xlab = "Age",
ylab = "Pirate age and beard length",
main = "Empty Plot",
xlim = c(10,50),
ylim = c(0,40)
)
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))
points(x = pirates$age[pirates$sex == "male"],
y = pirates$beard.length[pirates$sex == "male"],
pch = 16,
col = "goldenrod1")
points(x = pirates$age[pirates$sex == "female"],
y = pirates$beard.length[pirates$sex == "female"],
pch = 16,
col = "violetred")
Now add a legend using legend()
plot(x = 1,
xlab = "Age",
ylab = "Pirate age and beard length",
main = "Empty Plot",
xlim = c(10,50),
ylim = c(0,40)
)
abline(v = seq(10,50,5), lwd = .4, col = gray(.5))
abline(h = seq(0,40,5), lwd = .4, col = gray(.5))
points(x = pirates$age[pirates$sex == "male"],
y = pirates$beard.length[pirates$sex == "male"],
pch = 16,
col = "goldenrod1")
points(x = pirates$age[pirates$sex == "female"],
y = pirates$beard.length[pirates$sex == "female"],
pch = 16,
col = "violetred")
legend("topright",
legend = c("Males","Females"),
col = c("goldenrod1","violetred"),
pch = c(16,16),
#bg = "white",
cex = 0.8
)
Q6
Now let’s use the movies dataset. First, create a new dataframe called movies.sample that contains a sample of 10 random movies using the following code:
movies$budget.millions <- movies$budget / 1000000
movies$boxoffice.millions <- movies$boxoffice.total / 1000000
movies.2 <- subset(movies, budget > 0 & budget.millions < 100 & boxoffice.millions < 100)
random.index <- sample(1:nrow(movies.2), size = 10, replace = F)
movies.sample <- movies.2[random.index,]
## Here are the movies I got in movies.sample, yours will be different!
movies.sample$name
## [1] "A Most Wanted Man" "Appaloosa" "Living Out Loud"
## [4] "Bad Santa" "The Sweetest Thing" "The Golden Child"
## [7] "Lone Star" "A Bridge Too Far" "Kiss Kiss, Bang Bang"
## [10] "The Wedding Date"
Now, create the following blank plot with gridlines and a diagonal line separating movies that made money from those that lost money.
plot(x = 1,
xlab = "Budget (in millions)",
ylab = "Revenue (in millions)",
#main = "Empty Plot",
xlim = c(0,100),
ylim = c(0,100)
)
abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
Now, add light gray points for all movies using points(). Hint: make the colors transparent and light gray by setting the color using gray() and setting the point type to 21.
plot(x = 1,
xlab = "Budget (in millions)",
ylab = "Revenue (in millions)",
#main = "Empty Plot",
xlim = c(0,100),
ylim = c(0,100)
)
abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
pch = 21,
col = gray(.2, .2))
Now, add points for the movies in movies.sample using points()
plot(x = 1,
xlab = "Budget (in millions)",
ylab = "Revenue (in millions)",
#main = "Empty Plot",
xlim = c(0,100),
ylim = c(0,100)
)
abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
pch = 21,
col = gray(.2, .2))
points(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
pch = 21,
col = "red")
Finally, add the movie names above the points using text(). Use the pos = 3 argument to place the names above the points
plot(x = 1,
xlab = "Budget (in millions)",
ylab = "Revenue (in millions)",
#main = "Empty Plot",
xlim = c(0,100),
ylim = c(0,100)
)
abline(v = seq(0,100,10), lwd = .4, col = gray(.5))
abline(h = seq(0,100,10), lwd = .4, col = gray(.5))
abline(a = 0, b = 1, lwd = 1, lty = 1)
points(x = movies$budget.millions, y = movies$boxoffice.millions,
pch = 21,
col = gray(.2, .2))
points(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
pch = 21,
col = "red")
text(x = movies.sample$budget.millions, y = movies.sample$boxoffice.millions,
labels = movies.sample$name,
pos = 3)