1 Problem # 1:

  1. Enter the following dataframe into R and assign to the variable dat.  Be sure that Name is recognized as a character and Age/BMI are recognized as numbers

    a. Without manually entering the data, create a new column Factor AB that is the product of Factor A and Factor B.

    b. Using the rep() command, create a new column Factor C that looks as follows

    (-1,-1,1,1,-1,-1,1,1,-1,-1)

    c. Without manually entering the data, create a new column Factor ABC that is the product of Factor A, Factor B, and Factor C

    d. Make sure that all Factor columns are recognized as factors in R

2 Solution # 1:

Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
##  chr [1:10] "Frank" "Bob" "Sally" "Susan" "Joan" "Bill" "Richard" "Jane" ...
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
##  num [1:10] 34 28 19 28 30 47 24 34 32 64
BMI <- c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI <- as.numeric(BMI)
str(BMI)
##  num [1:10] 24.2 18.3 15.4 22.7 29.2 32.4 21 40.4 24.8 34.4
FactorA <- c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
##  num [1:10] -1 -1 -1 -1 -1 1 1 1 1 1
FactorB <- c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
##  num [1:10] 1 -1 1 -1 1 -1 1 -1 1 -1
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
##       Name Age  BMI FactorA FactorB
## 1    Frank  34 24.2      -1       1
## 2      Bob  28 18.3      -1      -1
## 3    Sally  19 15.4      -1       1
## 4    Susan  28 22.7      -1      -1
## 5     Joan  30 29.2      -1       1
## 6     Bill  47 32.4       1      -1
## 7  Richard  24 21.0       1       1
## 8     Jane  34 40.4       1      -1
## 9     Jill  32 24.8       1       1
## 10    John  64 34.4       1      -1

2.1

Solution 1 a:

dat$FactorAB <- FactorA*FactorB
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB
## 1    Frank  34 24.2      -1       1       -1
## 2      Bob  28 18.3      -1      -1        1
## 3    Sally  19 15.4      -1       1       -1
## 4    Susan  28 22.7      -1      -1        1
## 5     Joan  30 29.2      -1       1       -1
## 6     Bill  47 32.4       1      -1       -1
## 7  Richard  24 21.0       1       1        1
## 8     Jane  34 40.4       1      -1       -1
## 9     Jill  32 24.8       1       1        1
## 10    John  64 34.4       1      -1       -1

2.2 Solution 1 b:

dat$FactorC <- c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC
## 1    Frank  34 24.2      -1       1       -1      -1
## 2      Bob  28 18.3      -1      -1        1      -1
## 3    Sally  19 15.4      -1       1       -1       1
## 4    Susan  28 22.7      -1      -1        1       1
## 5     Joan  30 29.2      -1       1       -1      -1
## 6     Bill  47 32.4       1      -1       -1      -1
## 7  Richard  24 21.0       1       1        1       1
## 8     Jane  34 40.4       1      -1       -1       1
## 9     Jill  32 24.8       1       1        1      -1
## 10    John  64 34.4       1      -1       -1      -1

2.3 Solution 1 c:

dat$FactorABC <- FactorA*FactorB*dat$FactorC
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC
## 1    Frank  34 24.2      -1       1       -1      -1         1
## 2      Bob  28 18.3      -1      -1        1      -1        -1
## 3    Sally  19 15.4      -1       1       -1       1        -1
## 4    Susan  28 22.7      -1      -1        1       1         1
## 5     Joan  30 29.2      -1       1       -1      -1         1
## 6     Bill  47 32.4       1      -1       -1      -1         1
## 7  Richard  24 21.0       1       1        1       1         1
## 8     Jane  34 40.4       1      -1       -1       1        -1
## 9     Jill  32 24.8       1       1        1      -1        -1
## 10    John  64 34.4       1      -1       -1      -1         1

2.4 Solution 1 d:

dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
##  Factor w/ 2 levels "-1","1": 1 1 1 1 1 2 2 2 2 2
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
##  Factor w/ 2 levels "-1","1": 2 1 2 1 2 1 2 1 2 1
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorAB)
##  Factor w/ 2 levels "-1","1": 1 2 1 2 1 1 2 1 2 1
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
##  Factor w/ 2 levels "-1","1": 1 1 2 2 1 1 2 2 1 1
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
##  Factor w/ 2 levels "-1","1": 2 1 1 2 2 2 2 1 1 2

3 Problem # 2:

  1. Enter this additional column into the data frame dat, making sure it is recognized as a factor in R.

3.1 Solution 2:

dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
##  Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 2 2 1 2
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No
## 3    Sally  19 15.4      -1       1       -1       1        -1      No
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes
## 6     Bill  47 32.4       1      -1       -1      -1         1      No
## 7  Richard  24 21.0       1       1        1       1         1     Yes
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes
## 9     Jill  32 24.8       1       1        1      -1        -1      No
## 10    John  64 34.4       1      -1       -1      -1         1     Yes

4 Problem # 3:

3. Replace the BMI of Richard with a NA

4.1 Solution 3:

dat[7,3] <- c(NA)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No
## 3    Sally  19 15.4      -1       1       -1       1        -1      No
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes
## 6     Bill  47 32.4       1      -1       -1      -1         1      No
## 7  Richard  24   NA       1       1        1       1         1     Yes
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes
## 9     Jill  32 24.8       1       1        1      -1        -1      No
## 10    John  64 34.4       1      -1       -1      -1         1     Yes

5 Problem # 4:

  1. Without manually entering the data and using the dataframe after entering Richard’s BMI as NA , create a new column in dat with the logarithm of BMI

5.1 Solution 4:

log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking  log_BMI
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes 3.186353
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No 2.906901
## 3    Sally  19 15.4      -1       1       -1       1        -1      No 2.734368
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes 3.122365
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes 3.374169
## 6     Bill  47 32.4       1      -1       -1      -1         1      No 3.478158
## 7  Richard  24   NA       1       1        1       1         1     Yes       NA
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes 3.698830
## 9     Jill  32 24.8       1       1        1      -1        -1      No 3.210844
## 10    John  64 34.4       1      -1       -1      -1         1     Yes 3.538057

6 Problem # 5:

  1. Create a new dataframe dat2 selecting only the columns from dat corresponding to logarithm of BMI, Factor A, Factor B, and Factor AB

6.1 Solution 5:

dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
##    FactorA FactorB FactorAB  log_BMI
## 1       -1       1       -1 3.186353
## 2       -1      -1        1 2.906901
## 3       -1       1       -1 2.734368
## 4       -1      -1        1 3.122365
## 5       -1       1       -1 3.374169
## 6        1      -1       -1 3.478158
## 7        1       1        1       NA
## 8        1      -1       -1 3.698830
## 9        1       1        1 3.210844
## 10       1      -1       -1 3.538057

7 Problem # 6:

  1. Create a new dataframe dat3 selecting only the first 5 rows of dat2

7.1 Solution 6:

dat3 <- dat2[c(1:5),]
print(dat3)
##   FactorA FactorB FactorAB  log_BMI
## 1      -1       1       -1 3.186353
## 2      -1      -1        1 2.906901
## 3      -1       1       -1 2.734368
## 4      -1      -1        1 3.122365
## 5      -1       1       -1 3.374169

8 Complete R-Code:

getwd()
Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
Name <- as.character(Name)
str(Name)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
BMI <- c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI <- as.numeric(BMI)
str(BMI)
FactorA <- c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
FactorB <- c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
# Answer to the ques No 1.a.
dat$FactorAB <- FactorA*FactorB
print(dat)
# Answer to the ques No 1.b.
dat$FactorC <- c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
# Answer to the ques No 1.c.
dat$FactorABC <- FactorA*FactorB*dat$FactorC
print(dat)
# Answer to the ques No 1.d.
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorAB)
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
#Answer to the problem No 2
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
#Answer to the problem No 3
dat[7,3] <- c(NA)
print(dat)
#Answer to the problem No 4
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
#Answer to the problem No 5
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
#Answer to the problem No 6
dat3 <- dat2[c(1:5),]
print(dat3)