# Imagine that you have a variable that records month:

# Create a character vector month 
month <- c("Dec", "Apr", "Jan", "Mar")
class(month)
## [1] "character"
#>>Using a string to record this variable has two problems:

#(1)Sorting order
# It doesn’t sort in a useful way:
sort(month)
## [1] "Apr" "Dec" "Jan" "Mar"
#(2)Typos
# There are only twelve possible months, 
# and there’s nothing saving you from typos:
monthwrong <- c("Dec", "Apr", "Jam", "Mar")
#Can you identify the mistake


# You can fix both of these problems with a factor. 

# To create a factor you must start by creating a list of the valid levels:
monthlevels <- c(
  "Jan", "Feb", "Mar", "Apr", "May", "Jun", 
  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
)


# Now you can create a factor:
monthf=factor(month)
print(month)
## [1] "Dec" "Apr" "Jan" "Mar"
monthfak=as.factor(month)
print(monthfak)
## [1] Dec Apr Jan Mar
## Levels: Apr Dec Jan Mar
monthfac=factor(month,monthlevels)
print(monthfac)
## [1] Dec Apr Jan Mar
## Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
class(monthfac)
## [1] "factor"
# Summary of the factor
summary(monthfac)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 
##   1   0   1   1   0   0   0   0   0   0   0   1
summary(1:9)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       3       5       5       7       9
# Now let us sort the data
sort(monthfac)
## [1] Jan Mar Apr Dec
## Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# Thus display character vectors in a non-alphabetical order.

# Now let us see how it handles mistake
monthwrong <- c("Dec", "Apr", "Jam", "Mar")
monthfacw=factor(monthwrong,monthlevels)
print(monthfacw)
## [1] Dec  Apr  <NA> Mar 
## Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
any(is.na(monthfacw))
## [1] TRUE
anyNA(monthfacw)
## [1] TRUE
is.na(monthfacw)
## [1] FALSE FALSE  TRUE FALSE
#any values not in the set will be silently converted to NA
# Their values can only be one of the predefined levels or NA. 

# create a gender vector
gender <- c("Male","Female","Female","Male","Female")
unique(gender)
## [1] "Male"   "Female"
# create a gender vector without the levels argument
genderfact=factor(gender)
print(genderfact)
## [1] Male   Female Female Male   Female
## Levels: Female Male
factor(month)
## [1] Dec Apr Jan Mar
## Levels: Apr Dec Jan Mar
# Creating a factor with levels defined 
genderf <- factor(c("female", "male", "male", "female"),
                 levels = c("female", "transgender", "male"));
print(genderf)
## [1] female male   male   female
## Levels: female transgender male
#Check if a variable is a factor
class(genderf)
## [1] "factor"
is.factor(genderf)
## [1] TRUE
str(genderf)
##  Factor w/ 3 levels "female","transgender",..: 1 3 3 1
#Advantage of storage
mode(monthfac)
## [1] "numeric"
storage.mode(monthfac)
## [1] "integer"
# factor(x = character(), levels, labels = levels, exclude = NA, ordered = is.ordered(x), nmax = NA)

# Properties of factor- Just like vector or character vector
tsize <- c("XL","L","XL","S","XXL","L","XL","M")
tsizef<-factor(tsize)
tsizefl<-factor(tsize,levels = c("S","M","L","XL","XXL"))
print(tsize)
## [1] "XL"  "L"   "XL"  "S"   "XXL" "L"   "XL"  "M"
print(tsizef)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: L M S XL XXL
print(tsizefl)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: S M L XL XXL
sort(tsize)
## [1] "L"   "L"   "M"   "S"   "XL"  "XL"  "XL"  "XXL"
sort(tsizef)
## [1] L   L   M   S   XL  XL  XL  XXL
## Levels: L M S XL XXL
sort(tsizefl)
## [1] S   M   L   L   XL  XL  XL  XXL
## Levels: S M L XL XXL
print(tsizefl[3])
## [1] XL
## Levels: S M L XL XXL
print(tsizefl[5])
## [1] XXL
## Levels: S M L XL XXL
tsizefl[5]>tsizefl[3]
## Warning in Ops.factor(tsizefl[5], tsizefl[3]): '>' not meaningful for factors
## [1] NA
#Properties of factors : Character stored as integer
print(tsizefl)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: S M L XL XXL
length(tsizefl)
## [1] 8
table(tsizefl)
## tsizefl
##   S   M   L  XL XXL 
##   1   1   2   3   1
unclass(tsizefl)
## [1] 4 3 4 1 5 3 4 2
## attr(,"levels")
## [1] "S"   "M"   "L"   "XL"  "XXL"
# Properties of factor- Levels
attributes(tsizefl)
## $levels
## [1] "S"   "M"   "L"   "XL"  "XXL"
## 
## $class
## [1] "factor"
levels(tsizefl)
## [1] "S"   "M"   "L"   "XL"  "XXL"
class(tsizefl)
## [1] "factor"
#Properties of factors: They can have order
print(tsizefl)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: S M L XL XXL
is.ordered(tsizefl)
## [1] FALSE
tsizeflo<-factor(tsize,ordered = TRUE,levels = c("S","M","L","XL","XXL"))
print(tsizeflo)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: S < M < L < XL < XXL
is.ordered(tsizeflo)
## [1] TRUE
sort(tsizeflo)
## [1] S   M   L   L   XL  XL  XL  XXL
## Levels: S < M < L < XL < XXL
print(tsizeflo[3])
## [1] XL
## Levels: S < M < L < XL < XXL
print(tsizeflo[5])
## [1] XXL
## Levels: S < M < L < XL < XXL
tsizeflo[5]>tsizeflo[3]
## [1] TRUE
# Convert unordered factors to ordered factors
as.ordered(tsizefl)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: S < M < L < XL < XXL
as.ordered(tsizef)
## [1] XL  L   XL  S   XXL L   XL  M  
## Levels: L < M < S < XL < XXL
# Advantage of factors in plotting
#plot(tsize)   # cannot plot a character vector
plot(tsizef)  # can plot but the sequence is alphabet

plot(tsizefl) # can plot with desired sequence

plot(tsizeflo)# can plot with desired sequence

# Creating a factor with levels defined 
genderflx <- factor(c("F", "M", "M", "F"),
                  levels = c("F", "T", "M"));
print(genderflx)
## [1] F M M F
## Levels: F T M
levels(genderflx)
## [1] "F" "T" "M"
genderflab <- factor(c("F", "M", "M", "F"),
                     levels = c("F", "T", "M"),
                  labels = c("Female","Transgender","Male"));

print(genderflx)
## [1] F M M F
## Levels: F T M
print(genderflab)
## [1] Female Male   Male   Female
## Levels: Female Transgender Male
unclass(genderflx)
## [1] 1 3 3 1
## attr(,"levels")
## [1] "F" "T" "M"
unclass(genderflab)
## [1] 1 3 3 1
## attr(,"levels")
## [1] "Female"      "Transgender" "Male"
levels(genderflx)
## [1] "F" "T" "M"
levels(genderflab)
## [1] "Female"      "Transgender" "Male"