##Complete the following assignment and upload your R script into Blackboard.  We will be grading your script, not the output. 

Question 1

1.      Enter the following dataframe into R and assign to the variable dat.  Be sure that Name is recognized as a character and Age/BMI are recognized as numbers

a.      Without manually entering the data, create a new column Factor AB that is the product of Factor A and Factor B

b.      Create a new column Factor C that looks as follows

(-1,-1,1,1,-1,-1,1,1,-1,-1)

c.       Without manually entering the data, create a new column Factor ABC that is the product of Factor A, Factor B, and Factor C

d.      Make sure that all Factor columns are recognized as factors in R

Solution #1

Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
##  chr [1:10] "Frank" "Bob" "Sally" "Susan" "Joan" "Bill" "Richard" "Jane" ...
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
##  num [1:10] 34 28 19 28 30 47 24 34 32 64
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
##  num [1:10] 34 28 19 28 30 47 24 34 32 64
BMI<-c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI<- as.numeric(BMI)
str(BMI)
##  num [1:10] 24.2 18.3 15.4 22.7 29.2 32.4 21 40.4 24.8 34.4
FactorA<-c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
##  num [1:10] -1 -1 -1 -1 -1 1 1 1 1 1
FactorB<-c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
##  num [1:10] 1 -1 1 -1 1 -1 1 -1 1 -1
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)
##       Name Age  BMI FactorA FactorB
## 1    Frank  34 24.2      -1       1
## 2      Bob  28 18.3      -1      -1
## 3    Sally  19 15.4      -1       1
## 4    Susan  28 22.7      -1      -1
## 5     Joan  30 29.2      -1       1
## 6     Bill  47 32.4       1      -1
## 7  Richard  24 21.0       1       1
## 8     Jane  34 40.4       1      -1
## 9     Jill  32 24.8       1       1
## 10    John  64 34.4       1      -1

#Question 1A

dat$FactorAB <- (FactorA*FactorB)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB
## 1    Frank  34 24.2      -1       1       -1
## 2      Bob  28 18.3      -1      -1        1
## 3    Sally  19 15.4      -1       1       -1
## 4    Susan  28 22.7      -1      -1        1
## 5     Joan  30 29.2      -1       1       -1
## 6     Bill  47 32.4       1      -1       -1
## 7  Richard  24 21.0       1       1        1
## 8     Jane  34 40.4       1      -1       -1
## 9     Jill  32 24.8       1       1        1
## 10    John  64 34.4       1      -1       -1

#Question 1B

dat$FactorC<-c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC
## 1    Frank  34 24.2      -1       1       -1      -1
## 2      Bob  28 18.3      -1      -1        1      -1
## 3    Sally  19 15.4      -1       1       -1       1
## 4    Susan  28 22.7      -1      -1        1       1
## 5     Joan  30 29.2      -1       1       -1      -1
## 6     Bill  47 32.4       1      -1       -1      -1
## 7  Richard  24 21.0       1       1        1       1
## 8     Jane  34 40.4       1      -1       -1       1
## 9     Jill  32 24.8       1       1        1      -1
## 10    John  64 34.4       1      -1       -1      -1

#Question1C

dat$FactorABC<-c(FactorA*FactorB*dat$FactorC)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC
## 1    Frank  34 24.2      -1       1       -1      -1         1
## 2      Bob  28 18.3      -1      -1        1      -1        -1
## 3    Sally  19 15.4      -1       1       -1       1        -1
## 4    Susan  28 22.7      -1      -1        1       1         1
## 5     Joan  30 29.2      -1       1       -1      -1         1
## 6     Bill  47 32.4       1      -1       -1      -1         1
## 7  Richard  24 21.0       1       1        1       1         1
## 8     Jane  34 40.4       1      -1       -1       1        -1
## 9     Jill  32 24.8       1       1        1      -1        -1
## 10    John  64 34.4       1      -1       -1      -1         1

#Question 1D

dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
##  Factor w/ 2 levels "-1","1": 1 1 1 1 1 2 2 2 2 2
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
##  Factor w/ 2 levels "-1","1": 2 1 2 1 2 1 2 1 2 1
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
##  Factor w/ 2 levels "-1","1": 1 1 2 2 1 1 2 2 1 1
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorABC)
##  num [1:10] 1 -1 -1 1 1 1 1 -1 -1 1
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)
##  Factor w/ 2 levels "-1","1": 2 1 1 2 2 2 2 1 1 2

#Question 2

1.      Enter this additional column into the dataframe dat, making sure it is recognized as a factor in R.

dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)
##  Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 2 2 1 2
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No
## 3    Sally  19 15.4      -1       1       -1       1        -1      No
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes
## 6     Bill  47 32.4       1      -1       -1      -1         1      No
## 7  Richard  24 21.0       1       1        1       1         1     Yes
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes
## 9     Jill  32 24.8       1       1        1      -1        -1      No
## 10    John  64 34.4       1      -1       -1      -1         1     Yes

#Question 3

Replace the BMI of Richard with a NA

dat[7,3] <- c(NA)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No
## 3    Sally  19 15.4      -1       1       -1       1        -1      No
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes
## 6     Bill  47 32.4       1      -1       -1      -1         1      No
## 7  Richard  24   NA       1       1        1       1         1     Yes
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes
## 9     Jill  32 24.8       1       1        1      -1        -1      No
## 10    John  64 34.4       1      -1       -1      -1         1     Yes

#Question 4

#Without manually entering the data and using the dataframe after entering Richard’s BMI as NA , create a new column in dat with the logarithm of BMI

log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)
print(dat)
##       Name Age  BMI FactorA FactorB FactorAB FactorC FactorABC Smoking  log_BMI
## 1    Frank  34 24.2      -1       1       -1      -1         1     Yes 3.186353
## 2      Bob  28 18.3      -1      -1        1      -1        -1      No 2.906901
## 3    Sally  19 15.4      -1       1       -1       1        -1      No 2.734368
## 4    Susan  28 22.7      -1      -1        1       1         1     Yes 3.122365
## 5     Joan  30 29.2      -1       1       -1      -1         1     Yes 3.374169
## 6     Bill  47 32.4       1      -1       -1      -1         1      No 3.478158
## 7  Richard  24   NA       1       1        1       1         1     Yes       NA
## 8     Jane  34 40.4       1      -1       -1       1        -1     Yes 3.698830
## 9     Jill  32 24.8       1       1        1      -1        -1      No 3.210844
## 10    John  64 34.4       1      -1       -1      -1         1     Yes 3.538057

#Question 5

Create a new dataframe dat2 selecting only the columns from dat corresponding to logarithm of BMI, Factor A, Factor B, and Factor AB

dat2 <- dat[,-c(1:3,7:9)]
print(dat2)
##    FactorA FactorB FactorAB  log_BMI
## 1       -1       1       -1 3.186353
## 2       -1      -1        1 2.906901
## 3       -1       1       -1 2.734368
## 4       -1      -1        1 3.122365
## 5       -1       1       -1 3.374169
## 6        1      -1       -1 3.478158
## 7        1       1        1       NA
## 8        1      -1       -1 3.698830
## 9        1       1        1 3.210844
## 10       1      -1       -1 3.538057

#Question 6

  1. Create a new dataframe dat3 selecting only the first 5 rows of dat2
dat3 <- dat2[c(1:5),]
print(dat3)
##   FactorA FactorB FactorAB  log_BMI
## 1      -1       1       -1 3.186353
## 2      -1      -1        1 2.906901
## 3      -1       1       -1 2.734368
## 4      -1      -1        1 3.122365
## 5      -1       1       -1 3.374169

#Complete R Code

Name <- c("Frank","Bob","Sally","Susan","Joan","Bill","Richard","Jane","Jill","John")
str(Name)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
Age <- c(34,28,19,28,30,47,24,34,32,64)
Age <- as.numeric(Age)
str(Age)
BMI<-c(24.2,18.3,15.4,22.7,29.2,32.4,21.0,40.4,24.8,34.4)
BMI<- as.numeric(BMI)
str(BMI)
FactorA<-c(-1,-1,-1,-1,-1,1,1,1,1,1)
str(FactorA)
FactorB<-c(1,-1,1,-1,1,-1,1,-1,1,-1)
str(FactorB)
dat <- data.frame(Name,Age,BMI,FactorA,FactorB)
print(dat)

#Soluntion 1a
dat$FactorAB <- (FactorA*FactorB)
print(dat)
#Soluntion 1b
dat$FactorC<-c(-1,-1,1,1,-1,-1,1,1,-1,-1)
print(dat)
#Soluntion 1c
dat$FactorABC<-c(FactorA*FactorB*dat$FactorC)
print(dat)
#Soluntion 1d
dat$FactorA <- as.factor(dat$FactorA)
str(dat$FactorA)
dat$FactorB <- as.factor(dat$FactorB)
str(dat$FactorB)
dat$FactorC <- as.factor(dat$FactorC)
str(dat$FactorC)
dat$FactorAB <- as.factor(dat$FactorAB)
str(dat$FactorABC)
dat$FactorABC <- as.factor(dat$FactorABC)
str(dat$FactorABC)

#Soluntion 2
dat$Smoking <- c("Yes","No","No","Yes","Yes","No","Yes","Yes","No","Yes")
dat$Smoking <- as.factor(dat$Smoking)
str(dat$Smoking)

#Soluntion 3
dat[7,3] <- c(NA)
print(dat)

#Soluntion 4
log_BMI <- log(dat[,3])
dat <- data.frame(dat,log_BMI)

#Soluntion 5
dat2 <- dat[,-c(1:3,7:9)]
print(dat2)

#Soluntion 6
dat3 <- dat2[c(1:5),]
print(dat3)