Enter the following dataframe into R and assign to the variable dat. Be sure that Name is recognized as a character and Age/BMI are recognized as numbers
a. Without manually entering the data, create a new column Factor AB that is the product of Factor A and Factor B.
b. Using the rep() command, create a new column Factor C that looks as follows
(-1,-1,1,1,-1,-1,1,1,-1,-1)
c. Without manually entering the data, create a new column Factor ABC that is the product of Factor A, Factor B, and Factor C
d. Make sure that all Factor columns are recognized as factors in R
Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
## chr [1:10] "Frank" "Bob" "Sally" "Susan" "Joan" "Bill" "Richard" "Jane" ...
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
## num [1:10] 34 28 19 28 30 47 24 34 32 64
BMI <- c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI <- as.numeric(BMI)
str(BMI)
## num [1:10] 24.2 18.3 15.4 22.7 29.2 32.4 21 40.4 24.8 34.4
FactorA <- c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
## num [1:10] -1 -1 -1 -1 -1 1 1 1 1 1
FactorB <- c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
## num [1:10] 1 -1 1 -1 1 -1 1 -1 1 -1
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
## Name Age BMI FactorA FactorB
## 1 Frank 34 24.2 -1 1
## 2 Bob 28 18.3 -1 -1
## 3 Sally 19 15.4 -1 1
## 4 Susan 28 22.7 -1 -1
## 5 Joan 30 29.2 -1 1
## 6 Bill 47 32.4 1 -1
## 7 Richard 24 21.0 1 1
## 8 Jane 34 40.4 1 -1
## 9 Jill 32 24.8 1 1
## 10 John 64 34.4 1 -1
Solution 1 a:
dat$FactorAB <- FactorA*FactorB
print(dat)
## Name Age BMI FactorA FactorB FactorAB
## 1 Frank 34 24.2 -1 1 -1
## 2 Bob 28 18.3 -1 -1 1
## 3 Sally 19 15.4 -1 1 -1
## 4 Susan 28 22.7 -1 -1 1
## 5 Joan 30 29.2 -1 1 -1
## 6 Bill 47 32.4 1 -1 -1
## 7 Richard 24 21.0 1 1 1
## 8 Jane 34 40.4 1 -1 -1
## 9 Jill 32 24.8 1 1 1
## 10 John 64 34.4 1 -1 -1
dat$FactorC <- c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC
## 1 Frank 34 24.2 -1 1 -1 -1
## 2 Bob 28 18.3 -1 -1 1 -1
## 3 Sally 19 15.4 -1 1 -1 1
## 4 Susan 28 22.7 -1 -1 1 1
## 5 Joan 30 29.2 -1 1 -1 -1
## 6 Bill 47 32.4 1 -1 -1 -1
## 7 Richard 24 21.0 1 1 1 1
## 8 Jane 34 40.4 1 -1 -1 1
## 9 Jill 32 24.8 1 1 1 -1
## 10 John 64 34.4 1 -1 -1 -1
dat$FactorABC <- FactorA*FactorB*dat$FactorC
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC
## 1 Frank 34 24.2 -1 1 -1 -1 1
## 2 Bob 28 18.3 -1 -1 1 -1 -1
## 3 Sally 19 15.4 -1 1 -1 1 -1
## 4 Susan 28 22.7 -1 -1 1 1 1
## 5 Joan 30 29.2 -1 1 -1 -1 1
## 6 Bill 47 32.4 1 -1 -1 -1 1
## 7 Richard 24 21.0 1 1 1 1 1
## 8 Jane 34 40.4 1 -1 -1 1 -1
## 9 Jill 32 24.8 1 1 1 -1 -1
## 10 John 64 34.4 1 -1 -1 -1 1
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
## Factor w/ 2 levels "-1","1": 1 1 1 1 1 2 2 2 2 2
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
## Factor w/ 2 levels "-1","1": 2 1 2 1 2 1 2 1 2 1
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorAB)
## Factor w/ 2 levels "-1","1": 1 2 1 2 1 1 2 1 2 1
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
## Factor w/ 2 levels "-1","1": 1 1 2 2 1 1 2 2 1 1
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
## Factor w/ 2 levels "-1","1": 2 1 1 2 2 2 2 1 1 2
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
## Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 2 2 1 2
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No
## 3 Sally 19 15.4 -1 1 -1 1 -1 No
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes
## 6 Bill 47 32.4 1 -1 -1 -1 1 No
## 7 Richard 24 21.0 1 1 1 1 1 Yes
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes
## 9 Jill 32 24.8 1 1 1 -1 -1 No
## 10 John 64 34.4 1 -1 -1 -1 1 Yes
3. Replace the BMI of Richard with a NA
dat[7,3] <- c(NA)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No
## 3 Sally 19 15.4 -1 1 -1 1 -1 No
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes
## 6 Bill 47 32.4 1 -1 -1 -1 1 No
## 7 Richard 24 NA 1 1 1 1 1 Yes
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes
## 9 Jill 32 24.8 1 1 1 -1 -1 No
## 10 John 64 34.4 1 -1 -1 -1 1 Yes
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
## Name Age BMI FactorA FactorB FactorAB FactorC FactorABC Smoking log_BMI
## 1 Frank 34 24.2 -1 1 -1 -1 1 Yes 3.186353
## 2 Bob 28 18.3 -1 -1 1 -1 -1 No 2.906901
## 3 Sally 19 15.4 -1 1 -1 1 -1 No 2.734368
## 4 Susan 28 22.7 -1 -1 1 1 1 Yes 3.122365
## 5 Joan 30 29.2 -1 1 -1 -1 1 Yes 3.374169
## 6 Bill 47 32.4 1 -1 -1 -1 1 No 3.478158
## 7 Richard 24 NA 1 1 1 1 1 Yes NA
## 8 Jane 34 40.4 1 -1 -1 1 -1 Yes 3.698830
## 9 Jill 32 24.8 1 1 1 -1 -1 No 3.210844
## 10 John 64 34.4 1 -1 -1 -1 1 Yes 3.538057
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
## FactorA FactorB FactorAB log_BMI
## 1 -1 1 -1 3.186353
## 2 -1 -1 1 2.906901
## 3 -1 1 -1 2.734368
## 4 -1 -1 1 3.122365
## 5 -1 1 -1 3.374169
## 6 1 -1 -1 3.478158
## 7 1 1 1 NA
## 8 1 -1 -1 3.698830
## 9 1 1 1 3.210844
## 10 1 -1 -1 3.538057
dat3 <- dat2[c(1:5),]
print(dat3)
## FactorA FactorB FactorAB log_BMI
## 1 -1 1 -1 3.186353
## 2 -1 -1 1 2.906901
## 3 -1 1 -1 2.734368
## 4 -1 -1 1 3.122365
## 5 -1 1 -1 3.374169
getwd()
Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
Name <- as.character(Name)
str(Name)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
BMI <- c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI <- as.numeric(BMI)
str(BMI)
FactorA <- c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
FactorB <- c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
# Answer to the ques No 1.a.
dat$FactorAB <- FactorA*FactorB
print(dat)
# Answer to the ques No 1.b.
dat$FactorC <- c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
# Answer to the ques No 1.c.
dat$FactorABC <- FactorA*FactorB*dat$FactorC
print(dat)
# Answer to the ques No 1.d.
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorAB)
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
#Answer to the problem No 2
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
#Answer to the problem No 3
dat[7,3] <- c(NA)
print(dat)
#Answer to the problem No 4
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
#Answer to the problem No 5
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
#Answer to the problem No 6
dat3 <- dat2[c(1:5),]
print(dat3)