3.1 Factors

3.1.1 If x = c(1, 2, 3, 3, 5, 3, 2, 4, NA), what are the levels of factor(x)?

x = c(1, 2, 3, 3, 5, 3, 2, 4, NA)

xf <- factor(x)

xf # Levels are 1,2,3,4,5

## [1] 1    2    3    3    5    3    2    4    <NA>
## Levels: 1 2 3 4 5

3.1.2 Let x <- c(11, 22, 47, 47, 11, 47, 11). If an R expression factor(x, levels=c(11, 22, 47), ordered=TRUE) is executed, what will be the 4th element in the output?

x <- c(11, 22, 47, 47, 11, 47, 11)

factor(x, levels=c(11, 22, 47), ordered=TRUE) # The levels are 11, 22 and 47 and 4th element is 47.

## [1] 11 22 47 47 11 47 11
## Levels: 11 < 22 < 47

3.1.3 If z <- c(“p”, “a” , “g”, “t”, “b”), then which of the following R expressions will replace the third element in z with “b”.

z <- c("p", "a" , "g", "t", "b")

factor(z) # as we can see, "g" is the 3rd element and also the 3rd factor. 3rd expression will replace 3rd element with "b" obviously.

## [1] p a g t b
## Levels: a b g p t

z[3] <- "b"

3.1.4 If z <- factor(c(“p”, “q”, “p”, “r”, “q”)) and levels of z are “p”, “q” ,“r”, write an R expression that will change the level “p” to “w” so that z is equal to: “w”, “q” , “w”, “r” , “q”.

z <- factor(c("p", "q", "p", "r", "q"))  # now we have the z with p,q,r factors.

levels(z)[1] <- "w"

z

## [1] w q w r q
## Levels: w q r

3.1.5 If:
s1 <- factor(sample(letters, size=5, replace=TRUE)) and
s2 <- factor(sample(letters, size=5, replace=TRUE)),
write an R expression that will concatenate s1 and s2 in a single factor with 10 elements.

s1 <- factor(sample(letters, size=5, replace=TRUE))

s2 <- factor(sample(letters, size=5, replace=TRUE))

factor(c(levels(s1)[s1], levels(s2)[s2])) # if your random samples have similar letters, note that it affects the number of levels.

##  [1] g q o v j y q k t v
## Levels: g j k o q t v y

3.1.6 Consider the iris data set in R. Write an R expression that will ‘cut’ the Sepal.Length variable and create the following factor with five levels.

(4.3, 5.02] (5.02, 5.74] (5.74, 6.46] (6.46, 7.18] (7.18, 7.9]

iris$Sepal.Length

##   [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
##  [18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
##  [35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
##  [52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
##  [69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
##  [86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
## [103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
## [120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
## [137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9

(cut(iris$Sepal.Length,5)) # table(cut(iris$Sepal.Length,5))

##   [1] (5.02,5.74] (4.3,5.02]  (4.3,5.02]  (4.3,5.02]  (4.3,5.02] 
##   [6] (5.02,5.74] (4.3,5.02]  (4.3,5.02]  (4.3,5.02]  (4.3,5.02] 
##  [11] (5.02,5.74] (4.3,5.02]  (4.3,5.02]  (4.3,5.02]  (5.74,6.46]
##  [16] (5.02,5.74] (5.02,5.74] (5.02,5.74] (5.02,5.74] (5.02,5.74]
##  [21] (5.02,5.74] (5.02,5.74] (4.3,5.02]  (5.02,5.74] (4.3,5.02] 
##  [26] (4.3,5.02]  (4.3,5.02]  (5.02,5.74] (5.02,5.74] (4.3,5.02] 
##  [31] (4.3,5.02]  (5.02,5.74] (5.02,5.74] (5.02,5.74] (4.3,5.02] 
##  [36] (4.3,5.02]  (5.02,5.74] (4.3,5.02]  (4.3,5.02]  (5.02,5.74]
##  [41] (4.3,5.02]  (4.3,5.02]  (4.3,5.02]  (4.3,5.02]  (5.02,5.74]
##  [46] (4.3,5.02]  (5.02,5.74] (4.3,5.02]  (5.02,5.74] (4.3,5.02] 
##  [51] (6.46,7.18] (5.74,6.46] (6.46,7.18] (5.02,5.74] (6.46,7.18]
##  [56] (5.02,5.74] (5.74,6.46] (4.3,5.02]  (6.46,7.18] (5.02,5.74]
##  [61] (4.3,5.02]  (5.74,6.46] (5.74,6.46] (5.74,6.46] (5.02,5.74]
##  [66] (6.46,7.18] (5.02,5.74] (5.74,6.46] (5.74,6.46] (5.02,5.74]
##  [71] (5.74,6.46] (5.74,6.46] (5.74,6.46] (5.74,6.46] (5.74,6.46]
##  [76] (6.46,7.18] (6.46,7.18] (6.46,7.18] (5.74,6.46] (5.02,5.74]
##  [81] (5.02,5.74] (5.02,5.74] (5.74,6.46] (5.74,6.46] (5.02,5.74]
##  [86] (5.74,6.46] (6.46,7.18] (5.74,6.46] (5.02,5.74] (5.02,5.74]
##  [91] (5.02,5.74] (5.74,6.46] (5.74,6.46] (4.3,5.02]  (5.02,5.74]
##  [96] (5.02,5.74] (5.02,5.74] (5.74,6.46] (5.02,5.74] (5.02,5.74]
## [101] (5.74,6.46] (5.74,6.46] (6.46,7.18] (5.74,6.46] (6.46,7.18]
## [106] (7.18,7.9]  (4.3,5.02]  (7.18,7.9]  (6.46,7.18] (7.18,7.9] 
## [111] (6.46,7.18] (5.74,6.46] (6.46,7.18] (5.02,5.74] (5.74,6.46]
## [116] (5.74,6.46] (6.46,7.18] (7.18,7.9]  (7.18,7.9]  (5.74,6.46]
## [121] (6.46,7.18] (5.02,5.74] (7.18,7.9]  (5.74,6.46] (6.46,7.18]
## [126] (7.18,7.9]  (5.74,6.46] (5.74,6.46] (5.74,6.46] (7.18,7.9] 
## [131] (7.18,7.9]  (7.18,7.9]  (5.74,6.46] (5.74,6.46] (5.74,6.46]
## [136] (7.18,7.9]  (5.74,6.46] (5.74,6.46] (5.74,6.46] (6.46,7.18]
## [141] (6.46,7.18] (6.46,7.18] (5.74,6.46] (6.46,7.18] (6.46,7.18]
## [146] (6.46,7.18] (5.74,6.46] (6.46,7.18] (5.74,6.46] (5.74,6.46]
## Levels: (4.3,5.02] (5.02,5.74] (5.74,6.46] (6.46,7.18] (7.18,7.9]

3.1.7 Consider again the iris data set. Write an R expression that will generate a two-way frequency table with two rows and three colums. The rows should relate to Sepal.length (less than 5: TRUE or FALSE) and columns to Species, with the following output:

#      setosa versicolor virginica  
#FALSE   30       49        49  
#TRUE    20        1         1

# Now we are talking about a frequency table. 

table(iris$Sepal.Length <5, iris$Species) #Simple AF right?

##        
##         setosa versicolor virginica
##   FALSE     30         49        49
##   TRUE      20          1         1

3.1.8 Consider the factor responses <- factor(c(“Agree”, “Agree”, “Strongly Agree”, “Disagree”, “Agree”)), with the following output:

#[1] Agree Agree Strongly Agree Disagree Agree
#Levels: Agree Disagree Strongly Agree

responses <- factor(c("Agree", "Agree", "Strongly Agree", "Disagree", "Agree"))

Later it was found that new a level “Strongly Disagree” exists. Write an R expression that will include “strongly disagree” as new level attribute of the factor and returns the following output:

#[1] Agree Agree Strongly Agree Disagree Agree
#Levels: Strongly Agree Agree Disagree Strongly Disagree

factor(responses, levels=c("Strongly Agree", "Agree", "Disagree", "Strongly Disagree")) # added the last one.

## [1] Agree          Agree          Strongly Agree Disagree      
## [5] Agree         
## Levels: Strongly Agree Agree Disagree Strongly Disagree

3.1.9 Let x <- data.frame(q=c(2, 4, 6), p=c(“a”, “b”, “c”)). Write an R statement that will replace levels a, b, c with labels “fertiliser1”, “fertliser2”, “fertiliser3”.

x <- data.frame(q=c(2, 4, 6), p=c("a", "b", "c"))

x$p <- factor(x$p, levels=c("a", "b", "c"), labels=c("fertiliser1", "fertiliser2", "fertiliser3"))

levels(x$p)

## [1] "fertiliser1" "fertiliser2" "fertiliser3"

3.1.10 If x <- factor(c(“high”, “low”, “medium”, “high”, “high”, “low”, “medium”)), write an R expression that will provide unique numeric values for various levels of x with the following output:

#  levels value
#1 high   1
#2 low    2
#3 medium 3

x <- factor(c("high", "low", "medium", "high", "high", "low", "medium"))

unique(x) # this expression gives the unique values in x

## [1] high   low    medium
## Levels: high low medium

data.frame(levels = unique(x), value = as.numeric(unique(x)))

##   levels value
## 1   high     1
## 2    low     2
## 3 medium     3

3.1 Factors

source: http://www.r-exercises.com/2015/12/28/factor-exercises/