##Complete the following assignment and upload your R script into Blackboard. We will be grading your script, not the output.
1. Enter the following dataframe into R and assign to the variable dat. Be sure that Name is recognized as a character and Age/BMI are recognized as numbers
a. Without manually entering the data, create a new column Factor AB that is the product of Factor A and Factor B
b. Create a new column Factor C that looks as follows
(-1,-1,1,1,-1,-1,1,1,-1,-1)
c. Without manually entering the data, create a new column Factor ABC that is the product of Factor A, Factor B, and Factor C
d. Make sure that all Factor columns are recognized as factors in R
Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
## chr [1:10] "Frank" "Bob" "Sally" "Susan" "Joan" "Bill" "Richard" "Jane" ...
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
## num [1:10] 34 28 19 28 30 47 24 34 32 64
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
## num [1:10] 34 28 19 28 30 47 24 34 32 64
BMI<-c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI<- as.numeric(BMI)
str(BMI)
## num [1:10] 24.2 18.3 15.4 22.7 29.2 32.4 21 40.4 24.8 34.4
FactorA<-c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
## num [1:10] -1 -1 -1 -1 -1 1 1 1 1 1
FactorB<-c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
## num [1:10] 1 -1 1 -1 1 -1 1 -1 1 -1
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
## Name Age BMI FactorA FactorB
## 1 Frank 34 24.2 -1 1
## 2 Bob 28 18.3 -1 -1
## 3 Sally 19 15.4 -1 1
## 4 Susan 28 22.7 -1 -1
## 5 Joan 30 29.2 -1 1
## 6 Bill 47 32.4 1 -1
## 7 Richard 24 21.0 1 1
## 8 Jane 34 40.4 1 -1
## 9 Jill 32 24.8 1 1
## 10 John 64 34.4 1 -1
dat$FactorAB <- (FactorA*FactorB)
print(dat)
## Name Age BMI FactorA FactorB FactorAB
## 1 Frank 34 24.2 -1 1 -1
## 2 Bob 28 18.3 -1 -1 1
## 3 Sally 19 15.4 -1 1 -1
## 4 Susan 28 22.7 -1 -1 1
## 5 Joan 30 29.2 -1 1 -1
## 6 Bill 47 32.4 1 -1 -1
## 7 Richard 24 21.0 1 1 1
## 8 Jane 34 40.4 1 -1 -1
## 9 Jill 32 24.8 1 1 1
## 10 John 64 34.4 1 -1 -1
dat$FactorC<-c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC
## 1 Frank 34 24.2 -1 1 -1 -1
## 2 Bob 28 18.3 -1 -1 1 -1
## 3 Sally 19 15.4 -1 1 -1 1
## 4 Susan 28 22.7 -1 -1 1 1
## 5 Joan 30 29.2 -1 1 -1 -1
## 6 Bill 47 32.4 1 -1 -1 -1
## 7 Richard 24 21.0 1 1 1 1
## 8 Jane 34 40.4 1 -1 -1 1
## 9 Jill 32 24.8 1 1 1 -1
## 10 John 64 34.4 1 -1 -1 -1
dat$FactorABC<-c(FactorA*FactorB*dat$FactorC)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC
## 1 Frank 34 24.2 -1 1 -1 -1 1
## 2 Bob 28 18.3 -1 -1 1 -1 -1
## 3 Sally 19 15.4 -1 1 -1 1 -1
## 4 Susan 28 22.7 -1 -1 1 1 1
## 5 Joan 30 29.2 -1 1 -1 -1 1
## 6 Bill 47 32.4 1 -1 -1 -1 1
## 7 Richard 24 21.0 1 1 1 1 1
## 8 Jane 34 40.4 1 -1 -1 1 -1
## 9 Jill 32 24.8 1 1 1 -1 -1
## 10 John 64 34.4 1 -1 -1 -1 1
#Question 1D
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
## Factor w/ 2 levels "-1","1": 1 1 1 1 1 2 2 2 2 2
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
## Factor w/ 2 levels "-1","1": 2 1 2 1 2 1 2 1 2 1
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
## Factor w/ 2 levels "-1","1": 1 1 2 2 1 1 2 2 1 1
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorABC)
## num [1:10] 1 -1 -1 1 1 1 1 -1 -1 1
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
## Factor w/ 2 levels "-1","1": 2 1 1 2 2 2 2 1 1 2
1. Enter this additional column into the dataframe dat, making sure it is recognized as a factor in R.
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
## Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 2 2 1 2
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No
## 3 Sally 19 15.4 -1 1 -1 1 -1 No
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes
## 6 Bill 47 32.4 1 -1 -1 -1 1 No
## 7 Richard 24 21.0 1 1 1 1 1 Yes
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes
## 9 Jill 32 24.8 1 1 1 -1 -1 No
## 10 John 64 34.4 1 -1 -1 -1 1 Yes
Replace the BMI of Richard with a NA
dat[7,3] <- c(NA)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No
## 3 Sally 19 15.4 -1 1 -1 1 -1 No
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes
## 6 Bill 47 32.4 1 -1 -1 -1 1 No
## 7 Richard 24 NA 1 1 1 1 1 Yes
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes
## 9 Jill 32 24.8 1 1 1 -1 -1 No
## 10 John 64 34.4 1 -1 -1 -1 1 Yes
#Without manually entering the data and using the dataframe after entering Richard’s BMI as NA , create a new column in dat with the logarithm of BMI
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking log_BMI
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes 3.186353
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No 2.906901
## 3 Sally 19 15.4 -1 1 -1 1 -1 No 2.734368
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes 3.122365
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes 3.374169
## 6 Bill 47 32.4 1 -1 -1 -1 1 No 3.478158
## 7 Richard 24 NA 1 1 1 1 1 Yes NA
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes 3.698830
## 9 Jill 32 24.8 1 1 1 -1 -1 No 3.210844
## 10 John 64 34.4 1 -1 -1 -1 1 Yes 3.538057
Create a new dataframe dat2 selecting only the columns from dat corresponding to logarithm of BMI, Factor A, Factor B, and Factor AB
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
## FactorA FactorB FactorAB log_BMI
## 1 -1 1 -1 3.186353
## 2 -1 -1 1 2.906901
## 3 -1 1 -1 2.734368
## 4 -1 -1 1 3.122365
## 5 -1 1 -1 3.374169
## 6 1 -1 -1 3.478158
## 7 1 1 1 NA
## 8 1 -1 -1 3.698830
## 9 1 1 1 3.210844
## 10 1 -1 -1 3.538057
dat3 <- dat2[c(1:5),]
print(dat3)
## FactorA FactorB FactorAB log_BMI
## 1 -1 1 -1 3.186353
## 2 -1 -1 1 2.906901
## 3 -1 1 -1 2.734368
## 4 -1 -1 1 3.122365
## 5 -1 1 -1 3.374169
Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
BMI<-c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI<- as.numeric(BMI)
str(BMI)
FactorA<-c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
FactorB<-c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
#Soluntion 1a
dat$FactorAB <- (FactorA*FactorB)
print(dat)
#Soluntion 1b
dat$FactorC<-c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
#Soluntion 1c
dat$FactorABC<-c(FactorA*FactorB*dat$FactorC)
print(dat)
#Soluntion 1d
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorABC)
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
#Soluntion 2
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
#Soluntion 3
dat[7,3] <- c(NA)
print(dat)
#Soluntion 4
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
#Soluntion 5
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
#Soluntion 6
dat3 <- dat2[c(1:5),]
print(dat3)