x <- c(1, 4, -2, 1, 4, -5, 10)
pos.log <- x > 0
pos.log
## [1] TRUE TRUE FALSE TRUE TRUE FALSE TRUE
x.pos <- x[pos.log]
x.pos
## [1] 1 4 1 4 10
my.mean <- function(x) { # Single input called x output <- sum(x) / length(x) # Calculate output return(output) # Return output to the user after running the f }
Question 1. Captain Jack is convinced that he can predict how much gold he will find on an island with the following equation: (a * b) - c * 324 + log(a), where a is the area of the island in square meters, b is the number of trees on the island, and c is how drunk he is on a scale of 1 to 10. Create a function called Jacks.Equation that takes a, b, and c as arguments and returns Captain Jack’s predictions. Test your function for an island with an area of 1,000 square meters that contains 30 trees when Jack is at a 7 on a drunkenness scale.
Jack.Equation <- function(a, b, c){
output <- ((a * b) - c * 324 + log(a))
return(output)
}
Jack.Equation(1000, 30, 7)
## [1] 27738.91
Question 2. Write a function called standardize.me that takes a vector x as an argument, and returns a vector that standardizes the values of x (standardization means subtracting the mean and dividing by the standard deviation).
standardize.me <- function(x) {
output <- (x-mean(x))/sd(x)
return (output)
}
data <- c(6, 3, 8, 6, 3, 2, 3, 2, 100)
standardize.me(data)
## [1] -0.2740789 -0.3677514 -0.2116305 -0.2740789 -0.3677514 -0.3989756
## [7] -0.3677514 -0.3989756 2.6609937
Question 3. Write a function called how.many that takes two arguments (data and value). The function should return a value indicating how many times the element value occurred in the vector data
how.many <- function(data, value) {
output <- sum(data == value)
return (output)
}
how.many(data = c(1, 1, 9, 3, 2, 1, 1), value = 1)
## [1] 4
how.many(data = c(1, 1, 9, 3, 2, 1, 1), value = -100)
## [1] 0
Question 4. Often times you will need to recode values of a dataset. For example, if you have a survey of age data, you may want to convert any crazy values (like anything below 0 or above 100) to NA. Let’s create a function to do this in R. Write a function called recode.numeric() with 3 arguments: x, lb, and ub. We’ll assume that x is a numeric vector. The function should look at the values of x, convert any values below lb and above ub to NA, and then return the resulting vector.
recode.numeric <- function(x, lb, ub) {
x[x < lb | x > ub] <- NA
return (x)
}
recode.numeric(x = c(5, 6, -10, 2, 1000, 2), lb = 0, ub = 100)
## [1] 5 6 NA 2 NA 2
Here is a dataset containing results from a survey containing 3 questions. The answer to every question should be between 1 and 10. Create a new dataframe called “survey.fixed” that contains the original data but with all invalid values converted to NA.
survey <- data.frame(
id = 1:6,
q1 = c(6, 2, 5, -1, 11, 100),
q2 = c(-5, 4, 65, 3, 7, 6),
q3 = c(2, 1, 2, 45, 5, -5)
)
survey
## id q1 q2 q3
## 1 1 6 -5 2
## 2 2 2 4 1
## 3 3 5 65 2
## 4 4 -1 3 45
## 5 5 11 7 5
## 6 6 100 6 -5
for (i in 2:4){
survey[i] <- recode.numeric(x = survey[i], lb = 0, ub = 10)}
survey.fixed <- survey
survey
## id q1 q2 q3
## 1 1 6 NA 2
## 2 2 2 4 1
## 3 3 5 NA 2
## 4 4 NA 3 NA
## 5 5 NA 7 5
## 6 6 NA 6 NA
Question 5. Now, write recode.factor() which does the same thing as recode.numeric(), except with categorical (e.g.; factor) values. recode.factor() should have three arguments: x, old, and new. old is the vector of old values that will be replaced, and new is the vector of replacement values. For now, assume that the length of both old and new are always 3.
recode.factor <- function(x, old, vnew){
x[x == old] <- vnew
#return (gsub(old, vnew, x))
return (x)
}
orders <- c("coke light",
"coke",
"pepsi",
"coke",
"coke light",
"water",
"pepsi",
"pepsi light",
"water",
"water")
neworders <- recode.factor(orders, "coke", "pepsi")
neworders <- recode.factor(neworders, "water", "pepsi max")
neworders
## [1] "coke light" "pepsi" "pepsi" "pepsi" "coke light"
## [6] "pepsi max" "pepsi" "pepsi light" "pepsi max" "pepsi max"
Question 6. Write a function called madlib that takes three strings as arguments, and returns the following sentence with the string arguments inserted into the following text:
“If you talk to an ADJECTIVE pirate like NAME you may find that he/she spends more time talking about PLURALNOUN than the pirate arts.”
Your three arguments to the function should be:
adjective, a string indicating an adjective name, a string of a person’s name plural.noun, a string indicating plural noun
madlib <- function(ADJECTIVE, NAME, PLURALNOUN) {
return (paste("If you talk to an", ADJECTIVE, "pirate like", NAME, "you may find that he/she spends more time talking about", PLURALNOUN, "than the pirate arts."))
}
madlib("hipster", "Bruce", "kale")
## [1] "If you talk to an hipster pirate like Bruce you may find that he/she spends more time talking about kale than the pirate arts."
Question 7. Write a function called remove.outliers that takes a vector as an argument, determines which values of the vector are outliers, and returns a vector with the outliers removed. Define an outlier as any value that is less than 2 standard deviations below the mean, or more than 2 standard deviations above the mean.
remove.outliers <- function (vector){
vector < mean(vector) - 2 * sd(vector)
vector < mean(vector) + 2 * sd(vector)
output <- vector[!(vector < mean(vector) - 2 * sd(vector)) & (vector < mean(vector) + 2 * sd(vector))]
return (output)
}
data <- c(rep(1, 50), -529484903)
remove.outliers(data)
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1