In this WPA, you’ll analyze data from a survey about happiness. In the survey, 500 people were asked the following questions:
age - How old are you
sex - What is your sex?
exercise - How many hours a week do you exercise on average?
relationship - Are you in a long-term romantic relationship?
drinks - How many alcoholic drinks d you consume a week on average
happiness - On a scale of 0 to 100, how happy are you with your life?
A. You’ll need to use the yarrr package in this WPA. It has been updated in the past few days so you should reinstall it.
library(devtools)
install_github("ndphillips/yarrr")
B. Now that you’ve installed the latest version you’re good to go. However, as always you need to load the package before you can use it. Load the package with the library() function:
library(yarrr)
C. Create a new project called wpa.Rproj. You’ll use this project for this and all future wpas.
D. Navigate to the folder containing wpa.Rproj, and create two new folders called data and scripts. You’ll save all future datasets and wpas in these two folders.
E. Now it’s time to download the data for this WPA. You can find the data stored as a tab-delimited text file at http://nathanieldphillips.com/wp-content/uploads/2016/04/hapsur.txt. Download the file and save it to the data folder in your wpa project.
hapsur <- read.table("http://nathanieldphillips.com/wp-content/uploads/2016/04/hapsur.txt", header = T, sep = "\t")
write.table(hapsur, file = "data/hapsur.txt", sep = "\t")
F. Open a new R script and save it the scripts folder under the name WPA5.
G. Using read.table(), load the happiness survey data into R as a new object called hapsur
hapsur <- read.table("data/hapsur.txt", header = T, sep = "\t")
?hist
hist(hapsur$exercise)
Now, create a more interesting histogram by making the following changes
hist(hapsur$exercise,
xlab = "Exercise (Hours / week",
main = "Happiness hapsur",
xlim = c(0, 20),
col = "coral",
border = "white",
yaxt = "n",
ylab = ""
)
Now, create two separate histograms of the exercise data on top of each other. One for men and one for women.
survey.men <- subset(hapsur, sex == "m")
survey.women <- subset(hapsur, sex == "f")
hist(survey.men$age,
col = transparent("red", .5),
border = "white",
xlim = c(15, 35),
ylim = c(0, 60)
)
hist(survey.women$age,
col = transparent("blue", .5),
border = "white",
add = T
)
legend("topright",
legend = c("Men", "Women"),
pch = c(22, 22),
pt.bg = c(transparent("red", .5), transparent("blue", .5)),
col = c("white", "white"),
bty = "n",
cex = 1
)
abline(v = 1) # Add a vertical line at x = 1
abline(h = 30) # Add a horizontal line at y = 30
Using abline(), add a line showing the mean of each of the two histograms. Feel free to change the color, width, and type of lines using additional arguments. As always, check the help menu for details.
survey.men <- subset(hapsur, sex == "m")
survey.women <- subset(hapsur, sex == "f")
hist(survey.men$age,
col = transparent("red", .5),
border = "white",
xlim = c(15, 35),
ylim = c(0, 60)
)
hist(survey.women$age,
col = transparent("blue", .5),
border = "white",
add = T
)
legend("topright",
legend = c("Men", "Women"),
pch = c(22, 22),
pt.bg = c(transparent("red", .5), transparent("blue", .5)),
col = c("white", "white"),
bty = "n",
cex = 1
)
abline(v = mean(survey.women$age), lwd = 2, lty = 2, col = "blue")
abline(v = mean(survey.men$age), lwd = 2, lty = 2, col = "red")
?par
?plot
plot(hapsur$exercise,
hapsur$happiness)
Now create a prettier scatterplot by making the following changes
plot(hapsur$exercise,
hapsur$happiness,
pch = 16,
ylim = c(0, 100),
col = transparent("blue", .9),
xlab = "Exercise (hours per week)",
ylab = "Happiness (0 - 100)",
main = "Happiness hapsur"
)
# Define the model
mod <- lm(happiness ~ exercise,
data = hapsur)
# Add model line to the plot
abline(reg = mod,
lty = 2)
plot(hapsur$exercise,
hapsur$happiness,
pch = 16,
ylim = c(0, 100),
col = transparent("blue", .9),
xlab = "Exercise (hours per week)",
ylab = "Happiness (0 - 100)",
main = "Happiness hapsur"
)
mod <- lm(happiness ~ exercise,
data = hapsur)
# Add model line to the plot
abline(reg = mod,
lty = 2)
Repeat the previous plot, but now create separate points for people in a relationship and those not in a relationship.
hapsur.yrelationship <- subset(hapsur, relationship == 1)
hapsur.nrelationship <- subset(hapsur, relationship == 0)
plot(hapsur.yrelationship$exercise,
hapsur.yrelationship$happiness,
pch = 16,
ylim = c(0, 100),
col = transparent("steelblue4", .8),
xlab = "Exercise (hours per week)",
ylab = "Happiness (0 - 100)",
main = "Happiness hapsur"
)
points(hapsur.nrelationship$exercise,
hapsur.nrelationship$happiness,
pch = 17,
col = transparent("olivedrab4", .8)
)
legend("bottomright",
legend = c("In a Relationship", "Not in a Relationship"),
pch = c(16, 17),
col = c(transparent("steelblue4", .8), transparent("olivedrab4", .8))
)
?barplot
drink.agg <- aggregate(drinks ~ sex,
data = hapsur,
FUN = mean)
barplot(height = drink.agg$drinks,
names.arg = drink.agg$sex,
main = "Happiness hapsur",
ylab = "Mean drinks per week",
xlab = "Sex"
)
?pirateplot
library(yarrr)
pirateplot(formula = drinks ~ sex,
data = hapsur,
xlab = "Sex",
ylab = "Drinks per Week",
main = "Happiness hapsur"
)
library(yarrr)
pirateplot(formula = drinks ~ sex,
data = hapsur,
xlab = "Sex",
ylab = "Drinks per Week",
main = "Happiness hapsur",
pal = "black",
point.pch = 16,
point.o = .05,
bean.o = .1
)
pirateplot(formula = happiness ~ sex + relationship,
data = hapsur,
ylim = c(0, 100),
pal = "google",
xlab = "Sex",
ylab = "Happiness",
main = "Happiness hapsur"
)
pirateplot(formula = happiness ~ drinks,
data = hapsur,
xlab = "Number of drinks per week",
ylab = "Happiness",
main = "Drinks and Happiness"
)
N.balloons <- 100 # Try 10, 100, 1000
my.palette <- "basel" # try "basel", "google", "nemo"
# x - locations of balloons
x.loc <- rnorm(N.balloons, mean = 100, sd = 15)
# y.loc - vertical locations of balloons
y.loc <- rnorm(N.balloons, mean = 100, sd = 10)
# size - size of the balloons
size <- runif(N.balloons, min = 0, max = 3)
# Set up the plotting space
# Remove margins
par(mar = c(0, 0, 0, 0))
plot(1,
xaxt = "n",
yaxt = "n",
bty = "n",
xlab = "",
ylab = "",
xlim = c(70, 130),
ylim = c(70, 130),
type = "n"
)
# Add Strings
segments(x0 = x.loc + rnorm(N.balloons, mean = 0, sd = .3),
y0 = y.loc - (size * 1.5),
x1 = x.loc,
y1 = y.loc,
col = transparent("black", 1 - size / 10),
lwd = size / 3
)
# Add balloons
points(x.loc,
y.loc,
cex = size, # Size of the balloons
pch = 21,
col = "white", # white border
bg = piratepal(palette = my.palette, trans = .5))
class.initials <- c("AA", "RA", "CB", "GB", "TB", "GaBr", "EE", "VF", "SF", "SH",
"WH", "SeHu", "LK", "TK", "LL", "SM", "JM", "LN", "KO", "NP",
"SR", "AS", "SS", "CS", "GS", "LS", "ShSt", "KS", "SaSt", "Bv",
"LW")
N.balloons <- length(class.initials)
my.palette <- "basel" # try "basel", "google", "nemo"
# x - locations of balloons
x.loc <- rnorm(N.balloons, mean = 100, sd = 15)
# y.loc - vertical locations of balloons
y.loc <- x.loc + rnorm(N.balloons, mean = 0, sd = 15)
# size - size of the balloons
size <- rexp(N.balloons, rate = .7)
# Set up the plotting space
# Regular margins
par(mar = c(5, 4, 4, 1))
plot(1,
xlab = "Drinks",
ylab = "Happiness",
main = "R Pirate drinks per day and Happiness",
xlim = c(60, 140),
ylim = c(60, 140),
type = "n")
mtext("Point size indicates love of R", line = .5)
# Add gray background and gridlines
rect(-1000, -1000, 1000, 1000, col = gray(.96))
abline(h = seq(60, 140, 5), lwd = c(.5, 1), col = "white",
v = seq(60, 140, 5))
# Add regression line
abline(lm(y.loc ~ x.loc,
data = as.data.frame(cbind(y.loc, x.loc))),
lty = 2)
# Add initials below each balloon string
text(x = x.loc,
y = y.loc - (size * 2),
labels = class.initials,
cex = size / 3,
pos = 1
)
# Add Strings
segments(x0 = x.loc + rnorm(N.balloons, mean = 0, sd = .3),
y0 = y.loc - (size * 1.5),
x1 = x.loc,
y1 = y.loc,
col = transparent("black", 1 - size / max(size)),
lwd = size / 5
)
# Add balloons
points(x.loc,
y.loc,
cex = size, # Size of the balloons
pch = 21,
col = "white", # white border
bg = piratepal(palette = my.palette, trans = .5))